public static Sum ( Matrix m, VectorType t ) : |
||
m | Matrix | Input Matrix. |
t | VectorType | Row or Column sum. |
return |
/// <summary>Estimates.</summary> /// <param name="X">The Matrix to process.</param> /// <param name="type">(Optional) the type.</param> public void Estimate(Matrix X, VectorType type = VectorType.Row) { int n = type == VectorType.Row ? X.Rows : X.Cols; int s = type == VectorType.Row ? X.Cols : X.Rows; Mu = X.Sum(type) / n; Sigma = Matrix.Zeros(s); for (int i = 0; i < n; i++) { var x = X[i, type] - Mu; Sigma += x.Outer(x); } Sigma *= (1d / (n - 1d)); }
/// <summary>Estimates.</summary> /// <param name="X">The Matrix to process.</param> /// <param name="type">(Optional) the type.</param> public void Estimate(Matrix X, VectorType type = VectorType.Row) { var n = type == VectorType.Row ? X.Rows : X.Cols; var s = type == VectorType.Row ? X.Cols : X.Rows; this.Mu = X.Sum(type) / n; this.Sigma = Matrix.Zeros(s); for (var i = 0; i < n; i++) { var x = X[i, type] - this.Mu; this.Sigma += x.Outer(x); } this.Sigma *= 1d / (n - 1d); }
/// <summary>Generates.</summary> /// <param name="X">The Matrix to process.</param> /// <param name="k">The int to process.</param> public void Generate(Matrix X, int k) { int n = X.Rows; int d = X.Cols; /*********************** * initialize parameters ***********************/ // convergence params var log_probability = 0d; var probability_difference = double.MaxValue; var mu_difference = double.MaxValue; // initialize centers with KMeans KMeans kmeans = new KMeans(); var asgn = kmeans.Generate(X, k, new EuclidianDistance()); // tentative centers var mu_k = kmeans.Centers; // initial covariances (stored as diag(cov) 1 of k) var sg_k = new Matrix(k, d); for (int i = 0; i < k; i++) { var indices = asgn.Select((a, b) => new Tuple<int, int>(a, b)).Where(t => t.Item1 == i).Select(t => t.Item2); var matrix = X.Slice(indices, VectorType.Row); sg_k[i] = matrix.CovarianceDiag(); } // mixing coefficient var pi_k = asgn .OrderBy(i => i) .GroupBy(j => j) .Select(g => (double)g.Count() / (double)asgn.Length) .ToVector(); int max_iter = 100; do { /*********************** * Expectation Step ***********************/ // responsibilty matrix: how much is gaussian k responsible for this point x var z_nk = new Matrix(n, k); for (int i = 0; i < n; i++) { // pi_j * N(x_n | mu_j, sigma_j) for (int j = 0; j < k; j++) z_nk[i, j] = pi_k[j] * Normal(X[i], mu_k[j], sg_k[j]); var dn = z_nk[i].Sum(); if(dn == 0) Console.WriteLine("Uh oh...."); z_nk[i].Each(z => z / dn); } /*********************** * Maximization Step ***********************/ var N_k = z_nk.Sum(VectorType.Row); var mu_k_new = new Matrix(mu_k.Rows, mu_k.Cols); for (int i = 0; i < k; i++) { var sum = Vector.Zeros(d); for (int j = 0; j < n; j++) sum += z_nk[j, i] * X[j]; mu_k_new[i] = sum / N_k[i]; } var sg_k_new = new Matrix(k, d); for (int i = 0; i < k; i++) { var sum = Vector.Zeros(d); for (int j = 0; j < n; j++) sum += z_nk[j, i] * (X[j] - mu_k_new[i]).Each(s => s * s); sg_k_new[i] = sum / N_k[i]; } var pi_k_new = N_k / n; /*********************** * Convergence Check ***********************/ var new_log_prob = 0d; for (int i = 0; i < n; i++) { var acc = 0d; // pi_j * N(x_n | mu_j, sigma_j) for (int j = 0; j < k; j++) acc += pi_k[j] * Normal(X[i], mu_k[j], sg_k[j]); new_log_prob += System.Math.Log(acc, System.Math.E); } // log likelihood differences probability_difference = System.Math.Abs(log_probability - new_log_prob); Console.WriteLine("Log Likelihoods (Total Points: {0}, k={1}, d={2})\nO: {3}\nN: {4}\nDifference: {5}\n", n, k, d, log_probability, new_log_prob, probability_difference); log_probability = new_log_prob; // centers differences mu_difference = mu_k.GetRows() .Zip(mu_k_new.GetRows(), (v1, v2) => new { V1 = v1, V2 = v2 }) .Sum(a => (a.V1 - a.V2).Norm()); Console.WriteLine("Centers:\nO: {0}\nN: {1}\nDifference: {2}\n", mu_k, mu_k_new, mu_difference); mu_k = mu_k_new; // covariance differences double diff = sg_k.GetRows() .Zip(sg_k_new.GetRows(), (v1, v2) => new { V1 = v1, V2 = v2 }) .Sum(a => (a.V1 - a.V2).Norm()); Console.WriteLine("Covariance:\nO: {0}\nN: {1}\nDifference: {2}\n", sg_k, sg_k_new, diff); sg_k = sg_k_new; // mixing differences diff = (pi_k - pi_k_new).Each(s => System.Math.Abs(s)).Sum(); Console.WriteLine("Mixing Coeffs:\nO: {0}\nN: {1}\nDifference: {2}\n", pi_k, pi_k_new, diff); pi_k = pi_k_new; Console.WriteLine("-------------------------------------------------------------"); } while (probability_difference > .0000000001 && mu_difference > .0000000001 && --max_iter >= 0); }
/// <summary> /// Computes the sum of either the rows or columns of a matrix and returns a vector. /// </summary> /// <param name="m">Matrix.</param> /// <returns>Vector Sum.</returns> public static double Sum(this Matrix m) { return(Matrix.Sum(m)); }
public static double Sum(Matrix m, int i, VectorType t) { return(Matrix.Sum(m, i, t)); }
/// <summary> /// Computes the sum of either the rows /// or columns of a matrix and returns /// a vector /// </summary> /// <param name="t">Row or Column sum</param> /// <returns>Vector Sum</returns> public static Vector Sum(this Matrix m, VectorType t) { return(Matrix.Sum(m, t)); }
private static string classify0(Vector inX, Matrix dataset, List<string> labels, int k) { Console.WriteLine("Input"); Console.WriteLine(inX.ToString()); Console.WriteLine("Data"); Console.WriteLine(dataset); Console.WriteLine("Labels"); labels.ForEach(s => Console.Write(s+" ")); // Create difference matrix with same dimensions as the dataset var diffMatrix = new Matrix(dataset.Rows,dataset.Cols); for (int i = 0; i < dataset.Rows; i++) { diffMatrix[i]=inX; } diffMatrix = diffMatrix - dataset; Console.WriteLine("Diff Matrix"); Console.WriteLine(diffMatrix.ToString()); // Square all the items for (int i = 0; i < diffMatrix.Rows; i++) { var v = diffMatrix[i]; v.Each((d)=>Math.Pow(d,2.0)); diffMatrix[i] = v; } // Sum of each row and then square root var sqDistances = diffMatrix.Sum(VectorType.Col); //Why is this a Column Summation? sqDistances.Each(d => Math.Sqrt(d)); Console.WriteLine("Squared distances"); Console.WriteLine(sqDistances.ToString()); // get the sorted indices var sortedIndices = Vector.SortOrder(sqDistances).Reverse().ToVector(); Console.WriteLine("Sorted Indices"); Console.WriteLine(sortedIndices); var classCount = new Dictionary<string, double>(); // Now compare the first 'k' items that are closest for (int i = 0; i < k; i++) { var votelabel = labels[(int)sortedIndices[i]]; if (!classCount.ContainsKey(votelabel)) { classCount.Add(votelabel,1.0); } else { classCount[votelabel] += 1; // Increment count } } // Order by the values descending and return the first Key var orderedCount = classCount.OrderByDescending(kvp => kvp.Value).ToArray(); return orderedCount.First().Key; }