MatiasMaravi / ML-P2

Proyecto 2 de Machine Learning
MIT License
0 stars 0 forks source link

Implementar GMM #2

Open MatiasMaravi opened 11 months ago

CarlosFloresCS commented 10 months ago
from scipy.stats import multivariate_normal
def gmm(X, K, max_iters=100, tol=1e-6):
    # X: input (N, D)
    # K: # de clusters (7)
    # max_iters: iteraciones maximas
    # tol: convergence tolerance

    N, D = X.shape # 189,70

    # Step 1: Initialize means, covariances, and mixing coefficients
    mu = X[np.random.choice(N, K, replace=False)] # 7,70
    cov = np.array([np.eye(D) * 1e-4] * K) # 7,70,70
    pi = np.array([1.0 / K] * K) # 7,

    # Initialize log likelihood 
    ll = -np.inf
    for i in range(max_iters): # 100
        # Step 2: E-step
        gamma = np.zeros((N, K)) # 189,7
        for k in range(K): # 7
            print(multivariate_normal.pdf(X,mean=mu[k],cov=cov[k]))
            gamma[:, k] = pi[k] * multivariate_normal.pdf(X,mean=mu[k],cov=cov[k])#gaussian(X, mu[k], cov[k])
            print(gamma)
        gamma /= gamma.sum(axis=1, keepdims=True)
        print("Matrices de covarianza en e_step:")
        print(gamma)

        # Step 3: M-step
        Nk = gamma.sum(axis=0)
        mu_new = np.zeros((K, D))
        cov_new = np.zeros((K, D, D))
        pi_new = np.zeros(K)
        for k in range(K):
            mu_new[k] = 1.0 / Nk[k] * np.sum(gamma[:, k] * X.T, axis=1).T
            X_centered = X - mu_new[k]
            cov_new[k] = np.dot(gamma[:, k] * X_centered.T, X_centered) / Nk[k] + np.eye(D) * 1e-6
            pi_new[k] = Nk[k] / N

        # Step 4: Evaluate log likelihood
        ll_new = np.sum(np.log(np.dot(gamma, pi)))

        # Check for convergence
        if np.abs(ll_new - ll) < tol:
            break

        # Update parameters
        mu = mu_new
        cov = cov_new
        pi = pi_new
        ll = ll_new

    return mu, cov, pi, ll

def gaussian(X, mu, cov):
    D = X.shape[1]
    X_centered = X - mu
    exponent = -0.5 * np.sum(np.dot(X_centered, np.linalg.inv(cov)) * X_centered, axis=1)
    return (2 * np.pi) ** (-D / 2) * np.linalg.det(cov) ** (-0.5) * np.exp(exponent)

mu, cov, pi, ll = gmm(dt_tissue_pca, K=7)

Bug importante

Se cae en gamma[:, k] = pi[k] * multivariate_normal.pdf(X,mean=mu[k],cov=cov[k])#gaussian(X, mu[k], cov[k]),porque genera numeros nans