import numpy as np
import scipy.stats
import matplotlib.pyplot as plt


### Simulate data:

np.random.seed(0)

theta_true = 3
n_samples = 100

x = np.zeros(n_samples)
for i in range(n_samples):
	# Sample from N(0,1) or N(theta_true,1) with equal probability
	if np.random.rand() < 0.5:
		x[i] = np.random.normal(0, 1)
	else:
		x[i] = np.random.normal(theta_true, 1)

# Plot the data
plt.hist(x, bins=25)
plt.show()


### The EM algorithm:

n_iter = 20
theta = np.zeros(n_iter)

# Initial guess for theta
theta[0] = 0

for it in range(1, n_iter):
	# The current estimate for theta,
	# computed in the previous iteration
	theta_0 = theta[it-1]
	
	# E-step: compute the responsibilities r2 for component 2
	r1_unnorm = scipy.stats.norm.pdf(x, 0, 1)
	r2_unnorm = scipy.stats.norm.pdf(x, theta_0, 1)
	r2 = r2_unnorm / (r1_unnorm + r2_unnorm)
	
	# M-step: compute the parameter value that maximizes
	# the expectation of the complete-data log-likelihood.
	theta[it] = sum(r2 * x) / sum(r2)


# Print and plot the values of theta in each iteration
print("theta")
for theta_i in theta:
	print("%.7f" % theta_i)

plt.plot(range(n_iter), theta)
plt.show()