public void writeModel(string outPath) { if (kmeans == null) { buildModel(); } outmodelpath = outPath; using (System.IO.StreamWriter sw = new System.IO.StreamWriter(outmodelpath)) { sw.WriteLine(dataPrepBase.modelTypes.StrataCovCorr.ToString()); sw.WriteLine(InPath); sw.WriteLine(String.Join(",", VariableFieldNames)); sw.WriteLine(n.ToString()); sw.WriteLine(prop.ToString()); sw.WriteLine(k.ToString()); sw.WriteLine(String.Join(",", lbl.ToArray())); KMeansClusterCollection gCol = kmeans.Clusters; for (int i = 0; i < gCol.Count; i++) { KMeansCluster gClust = gCol[i]; sw.WriteLine(String.Join(",", (from double d in gClust.Mean select d.ToString()).ToArray())); sw.WriteLine(String.Join(",", (from double d in gClust.Covariance select d.ToString()).ToArray())); sw.WriteLine(gClust.Proportion.ToString()); } sw.Close(); } }
public byte[] Denoise(byte[] data, int width, int height, int padding, out int it) { it = 0; byte[,] arr = Utils.ByteArr1DTo2D(data, width, height); bool[,] vi = (bool[, ])Array.CreateInstance(typeof(bool), height, width); for (int i = 0; i < height; ++i) { for (int j = 0; j < width; ++j) { vi[i, j] = false; } } List <Point> points = new List <Point>(); List <int> areas = new List <int>(); for (int i = padding; i < height - padding; ++i) { for (int j = padding; j < width - padding; ++j) { if (vi[i, j]) { continue; } if (arr[i, j] == backgroundColor) { continue; } int sum = 0; dfsSearch(arr, vi, i, j, width, height, padding, ref sum); points.Add(new Point(j, i)); areas.Add(sum); } } if (points.Count > 1) { Vector2[] vs = areas.Select(i => new Vector2(i, 0)).ToArray(); it = 0; KMeansCluster result = KMeansClustering.AnnealCluster(vs, 2, out it); int bigcluster = (result[0].Centroid.ModSqr() > result[1].Centroid.ModSqr()) ? 0 : 1; for (int i = 0; i < points.Count; ++i) { if (result.ClusterIndex[i] != bigcluster) { //dfsColor(arr, points[i].Y, points[i].X, width, height, 1); dfsClear(arr, points[i].Y, points[i].X, width, height, 1); } } } else { dfsColor(arr, points[0].Y, points[0].X, width, height, 1); } byte[] bytes = Utils.ByteArr2DTo1D(arr); return(bytes); }
private void setKMeansCluster(System.IO.StreamReader sr) { KMeans kmeans = new KMeans(k); KMeansClusterCollection kmeansColl = kmeans.Clusters; for (int i = 0; i < k; i++) { double[] mns = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray(); string[] covVlsStr = sr.ReadLine().Split(new char[] { ',' }); double p = System.Convert.ToDouble(sr.ReadLine()); double[,] cov = new double[VariableFieldNames.Length, VariableFieldNames.Length]; for (int j = 0; j < VariableFieldNames.Length; j++) { for (int l = 0; l < VariableFieldNames.Length; l++) { int indexVl = (j * VariableFieldNames.Length) + l; cov[l, j] = System.Convert.ToDouble(covVlsStr[indexVl]); } } KMeansCluster kc = new KMeansCluster(kmeansColl, i); kc.Mean = mns; kc.Covariance = cov; kc.Proportion = p; } clusterCollection = kmeansColl; model = kmeans; }
public static void CompareStratMeansVar(KMeans km1, KMeans km2, out double[] meanPvalues, out double[] varPvalues) { meanPvalues = null; varPvalues = null; int nPv1 = km1.Clusters.Count; int nPv2 = km2.Clusters.Count; if (nPv1 != nPv2) { System.Windows.Forms.MessageBox.Show("Not the same number of strata! Models are not comparable!"); return; } meanPvalues = new double[nPv1]; varPvalues = new double[nPv2]; for (int i = 0; i < nPv1; i++) { KMeansCluster kmC1 = km1.Clusters[i]; KMeansCluster kmC2 = km2.Clusters[i]; double[] means1 = kmC1.Mean; double[] means2 = kmC2.Mean; double[,] cov1 = kmC1.Covariance; double[,] cov2 = kmC2.Covariance; double m, v; PairedTTestPValues(means1, cov1, means2, cov2, out m, out v); meanPvalues[i] = m; varPvalues[i] = v; } }
private void kmeansReport(Forms.RunningProcess.frmRunningProcessDialog rd) { KMeansClusterCollection gCol = kmeans.Clusters; for (int i = 0; i < gCol.Count; i++) { KMeansCluster gClust = gCol[i]; double[] mns = gClust.Mean; double[,] cov = gClust.Covariance; double[,] corr = getCorr(cov); rd.addMessage("\n\nStratum " + Labels[i] + ":\nMeans: " + String.Join(", ", (from double d in mns select d.ToString()).ToArray()) + "\nCovariance:"); for (int j = 0; j < VariableFieldNames.Length; j++) { string[] covStrArr = new string[VariableFieldNames.Length]; for (int l = 0; l < covStrArr.Length; l++) { covStrArr[l] = cov[l, j].ToString(); } rd.addMessage("\n" + String.Join(",", covStrArr)); } rd.addMessage("\nCorr:"); for (int j = 0; j < VariableFieldNames.Length; j++) { string[] corrStrArr = new string[VariableFieldNames.Length]; for (int l = 0; l < corrStrArr.Length; l++) { corrStrArr[l] = corr[l, j].ToString(); } rd.addMessage("\n" + String.Join(",", corrStrArr)); } } }
public static void CompareStratMeansVar(string StratModel1, string StratModel2, out List <string> labels, out double[] meanDiff, out double[] varDiff, out double[] meanPvalues, out double[] varPvalues) { meanPvalues = null; varPvalues = null; meanDiff = null; varDiff = null; labels = null; dataPrepClusterKmean dpc1 = new dataPrepClusterKmean(); dpc1.buildModel(StratModel1); KMeans km1 = (KMeans)dpc1.Model; dataPrepClusterKmean dpc2 = new dataPrepClusterKmean(); dpc2.buildModel(StratModel2); List <string> labels2 = dpc2.Labels; KMeans km2 = (KMeans)dpc2.Model; int nPv1 = km1.Clusters.Count; int nPv2 = km2.Clusters.Count; if (nPv1 != nPv2) { System.Windows.Forms.MessageBox.Show("Not the same number of strata! Models are not comparable!"); return; } labels = dpc1.Labels; meanPvalues = new double[nPv1]; varPvalues = new double[nPv2]; meanDiff = new double[nPv1]; varDiff = new double[nPv2]; foreach (string l in labels) { int ind1 = labels.IndexOf(l); int ind2 = labels2.IndexOf(l); KMeansCluster kmC1 = km1.Clusters[ind1]; KMeansCluster kmC2 = km2.Clusters[ind2]; double[] means1 = kmC1.Mean; double[] means2 = kmC2.Mean; double[,] cov1 = kmC1.Covariance; double[,] cov2 = kmC2.Covariance; double[] meanDiffArr = new double[means1.Length]; double[] varDiffArr = new double[means1.Length]; for (int i = 0; i < means1.Length; i++) { meanDiffArr[i] = means1[i] - means2[i]; varDiffArr[i] = cov1[i, i] - cov2[i, i]; } meanDiff[ind1] = meanDiffArr.Average(); varDiff[ind1] = varDiffArr.Average(); double m, v; PairedTTestPValues(means1, cov1, means2, cov2, out m, out v); meanPvalues[ind1] = m; varPvalues[ind1] = v; } }
private void binaryReport(Forms.RunningProcess.frmRunningProcessDialog rd) { KMeansClusterCollection gCol = (KMeansClusterCollection)clusterCollection; for (int i = 0; i < gCol.Count; i++) { KMeansCluster gClust = gCol[i]; double[] mns = gClust.Mean; rd.addMessage("\n\nCluster " + Labels[i] + ":\nMeans: " + String.Join(", ", (from double d in mns select d.ToString()).ToArray())); } }
private void writeKmeansData(System.IO.StreamWriter sw) { KMeansClusterCollection gCol = (KMeansClusterCollection)clusterCollection; for (int i = 0; i < gCol.Count; i++) { KMeansCluster gClust = gCol[i]; sw.WriteLine(String.Join(",", (from double d in gClust.Mean select d.ToString()).ToArray())); sw.WriteLine(String.Join(",", (from double d in gClust.Covariance select d.ToString()).ToArray())); sw.WriteLine(gClust.Proportion.ToString()); } }
public double[] computeNew(object category) { string cat = category.ToString(); int catIndex = lbl.IndexOf(cat); int np = ((VariableFieldNames.Length * VariableFieldNames.Length) - VariableFieldNames.Length) / 2; double[] pValues = new double[np]; if (catIndex == -1) { return(pValues); } KMeansClusterCollection gCol = kmeans.Clusters; KMeansCluster gClust = gCol[catIndex]; double[] mns = gClust.Mean; double[] var = new double[mns.Length]; double nSample = gClust.Proportion * N; double seAdjust = Math.Sqrt(2 * 1 / nSample); for (int j = 0; j < mns.Length; j++) { var[j] = gClust.Covariance[j, j]; } int cnt = 1; int pCnt = 0; for (int j = 0; j < mns.Length - 1; j++) { for (int k = cnt; k < mns.Length; k++) { double mD = mns[j] - mns[k]; double pSD = Math.Sqrt((var[j] + var[k]) / 2); double se = pSD * seAdjust; double tStat = mD / se; Accord.Statistics.Distributions.Univariate.TDistribution tDist = new Accord.Statistics.Distributions.Univariate.TDistribution(2 * nSample - 2); double cdf = tDist.DistributionFunction(tStat); double pValue = 0; if (tStat > 0) { pValue = (1 - cdf) * 2; } else { pValue = (cdf * 2); } pValues[pCnt] = pValue; pCnt++; } cnt += 1; } return(pValues); }
private void makeKMeans() { kmeans = new KMeans(k); KMeansClusterCollection kmeansColl = kmeans.Clusters; for (int i = 0; i < k; i++) { double[] mns = meansLst[i]; double p = proportionsLst[i]; double[,] scov = scovLst[i]; KMeansCluster kc = new KMeansCluster(kmeansColl, i); kc.Mean = mns; kc.Covariance = scov; kc.Proportion = p; } }
private void setBinaryCluster(System.IO.StreamReader sr) { BinarySplit bSplit = new BinarySplit(k); KMeansClusterCollection kmeansColl = bSplit.Clusters; for (int i = 0; i < k; i++) { double[] mns = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray(); sr.ReadLine(); double p = System.Convert.ToDouble(sr.ReadLine()); KMeansCluster kc = new KMeansCluster(kmeansColl, i); kc.Mean = mns; kc.Proportion = p; } clusterCollection = kmeansColl; model = bSplit; }
public void buildModel(string modelPath) { outmodelpath = modelPath; using (System.IO.StreamReader sr = new System.IO.StreamReader(outmodelpath)) { dataPrepBase.modelTypes mType = (dataPrepBase.modelTypes)Enum.Parse(typeof(dataPrepBase.modelTypes), sr.ReadLine()); if (mType != dataPrepBase.modelTypes.TTEST) { System.Windows.Forms.MessageBox.Show("Not a TTest Model!!", "Error", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error); return; } inpath = sr.ReadLine(); stratafld = sr.ReadLine(); VariableFieldNames = sr.ReadLine().Split(new char[] { ',' }); n = System.Convert.ToInt32(sr.ReadLine()); prop = System.Convert.ToDouble(sr.ReadLine()); k = System.Convert.ToInt32(sr.ReadLine()); lbl = sr.ReadLine().Split(new char[] { ',' }).ToList(); kmeans = new KMeans(k); KMeansClusterCollection kmeansColl = kmeans.Clusters; for (int i = 0; i < k; i++) { double[] mns = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray(); string[] covVlsStr = sr.ReadLine().Split(new char[] { ',' }); double p = System.Convert.ToDouble(sr.ReadLine()); double[,] cov = new double[VariableFieldNames.Length, VariableFieldNames.Length]; for (int j = 0; j < VariableFieldNames.Length; j++) { for (int l = 0; l < VariableFieldNames.Length; l++) { int indexVl = (j * VariableFieldNames.Length) + l; cov[l, j] = System.Convert.ToDouble(covVlsStr[indexVl]); } } KMeansCluster kc = new KMeansCluster(kmeansColl, i); kc.Mean = mns; kc.Covariance = cov; kc.Proportion = p; } sr.Close(); } }
public void getReport() { if (kmeans == null) { buildModel(); } Forms.RunningProcess.frmRunningProcessDialog rd = new Forms.RunningProcess.frmRunningProcessDialog(false); rd.Text = "Cluster Results"; rd.TopLevel = true; rd.pgbProcess.Visible = false; rd.FormBorderStyle = System.Windows.Forms.FormBorderStyle.Sizable; rd.addMessage("Input path = " + InPath); rd.addMessage("Sample size = " + n.ToString() + " proportion of total records = " + prop.ToString()); rd.addMessage("Number of Cluster = " + k.ToString()); rd.addMessage("Labels = " + String.Join(", ", lbl.ToArray())); rd.addMessage("Variables: " + String.Join(" ,", VariableFieldNames)); KMeansClusterCollection gCol = kmeans.Clusters; for (int i = 0; i < gCol.Count; i++) { KMeansCluster gClust = gCol[i]; double[] mns = gClust.Mean; double[,] cov = gClust.Covariance; rd.addMessage("\n\nCluster " + Labels[i] + ":\nMeans: " + String.Join(", ", (from double d in mns select d.ToString()).ToArray()) + "\nCovariance:"); for (int j = 0; j < VariableFieldNames.Length; j++) { string[] covStrArr = new string[VariableFieldNames.Length]; for (int l = 0; l < covStrArr.Length; l++) { covStrArr[l] = cov[l, j].ToString(); } rd.addMessage("\n" + String.Join(",", covStrArr)); } } rd.enableClose(); rd.Show(); }
public void getReport() { if (kmeans == null) { buildModel(); } rd = new Forms.RunningProcess.frmRunningProcessDialog(false); rd.Text = "Independent T-Test Results"; rd.TopLevel = true; rd.pgbProcess.Visible = false; rd.FormBorderStyle = System.Windows.Forms.FormBorderStyle.Sizable; rd.addMessage("Input path = " + InPath); rd.addMessage("Total Sample size = " + n.ToString()); rd.addMessage("\nLabel |Compare V1-V2 |N |Dif |T-Stat |P-Value "); rd.addMessage("-".PadRight(83, '-')); KMeansClusterCollection gCol = kmeans.Clusters; for (int i = 0; i < gCol.Count; i++) { string l = getValue(Labels[i], 8); KMeansCluster gClust = gCol[i]; double[] mns = gClust.Mean; double[] var = new double[mns.Length]; double nSample = gClust.Proportion * N; if (nSample <= 1) { continue; } double seAdjust = Math.Sqrt(2 * 1 / nSample); for (int j = 0; j < mns.Length; j++) { var[j] = gClust.Covariance[j, j]; } int cnt = 1; for (int j = 0; j < mns.Length - 1; j++) { for (int k = cnt; k < mns.Length; k++) { string fN1 = getValue(VariableFieldNames[j], 12); string fN2 = getValue(VariableFieldNames[k], 12); double mD = mns[j] - mns[k]; double pSD = Math.Sqrt((var[j] + var[k]) / 2); double se = pSD * seAdjust; double tStat = mD / se; Accord.Statistics.Distributions.Univariate.TDistribution tDist = new Accord.Statistics.Distributions.Univariate.TDistribution(2 * nSample - 2); double cdf = tDist.DistributionFunction(tStat); double pValue = 0; if (tStat > 0) { pValue = (1 - cdf) * 2; } else { pValue = (cdf * 2); } string pValueS = pValue.ToString(); if (pValue < 0.0001) { pValueS = "p < 0.0001"; } string ln = l + "|" + fN1 + "-" + fN2 + "| " + getValue(nSample.ToString(), 6) + " | " + getValue(mD.ToString(), 6) + " | " + getValue(tStat.ToString(), 6) + " | " + getValue(pValueS, 10); rd.addMessage(ln); } cnt += 1; } } rd.addMessage("-".PadRight(83, '-')); rd.enableClose(); rd.Show(); }
public void buildModel(string modelPath) { outmodelpath = modelPath; using (System.IO.StreamReader sr = new System.IO.StreamReader(outmodelpath)) { dataPrepBase.modelTypes mType = (dataPrepBase.modelTypes)Enum.Parse(typeof(dataPrepBase.modelTypes), sr.ReadLine()); if (mType != dataPrepBase.modelTypes.StrataCovCorr) { System.Windows.Forms.MessageBox.Show("Not a StrataCovCorr Model!!", "Error", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error); return; } inpath = sr.ReadLine(); VariableFieldNames = sr.ReadLine().Split(new char[] { ',' }); n = System.Convert.ToInt32(sr.ReadLine()); prop = System.Convert.ToDouble(sr.ReadLine()); k = System.Convert.ToInt32(sr.ReadLine()); lbl = sr.ReadLine().Split(new char[] { ',' }).ToList(); kmeans = new KMeans(k); KMeansClusterCollection kmeansColl = kmeans.Clusters; for (int i = 0; i < k; i++) { double[] mns = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray(); string[] covVlsStr = sr.ReadLine().Split(new char[] { ',' }); double p = System.Convert.ToDouble(sr.ReadLine()); double[,] cov = new double[VariableFieldNames.Length, VariableFieldNames.Length]; for (int j = 0; j < VariableFieldNames.Length; j++) { for (int l = 0; l < VariableFieldNames.Length; l++) { int indexVl = (j * VariableFieldNames.Length) + l; cov[l, j] = System.Convert.ToDouble(covVlsStr[indexVl]); } } KMeansCluster kc = new KMeansCluster(kmeansColl, i); kc.Mean = mns; kc.Covariance = cov; kc.Proportion = p; } sr.Close(); } }