public static void CompareStratMeansVar(string StratModel1, string StratModel2, out List <string> labels, out double[] meanDiff, out double[] varDiff, out double[] meanPvalues, out double[] varPvalues) { meanPvalues = null; varPvalues = null; meanDiff = null; varDiff = null; labels = null; dataPrepClusterKmean dpc1 = new dataPrepClusterKmean(); dpc1.buildModel(StratModel1); KMeans km1 = (KMeans)dpc1.Model; dataPrepClusterKmean dpc2 = new dataPrepClusterKmean(); dpc2.buildModel(StratModel2); List <string> labels2 = dpc2.Labels; KMeans km2 = (KMeans)dpc2.Model; int nPv1 = km1.Clusters.Count; int nPv2 = km2.Clusters.Count; if (nPv1 != nPv2) { System.Windows.Forms.MessageBox.Show("Not the same number of strata! Models are not comparable!"); return; } labels = dpc1.Labels; meanPvalues = new double[nPv1]; varPvalues = new double[nPv2]; meanDiff = new double[nPv1]; varDiff = new double[nPv2]; foreach (string l in labels) { int ind1 = labels.IndexOf(l); int ind2 = labels2.IndexOf(l); KMeansCluster kmC1 = km1.Clusters[ind1]; KMeansCluster kmC2 = km2.Clusters[ind2]; double[] means1 = kmC1.Mean; double[] means2 = kmC2.Mean; double[,] cov1 = kmC1.Covariance; double[,] cov2 = kmC2.Covariance; double[] meanDiffArr = new double[means1.Length]; double[] varDiffArr = new double[means1.Length]; for (int i = 0; i < means1.Length; i++) { meanDiffArr[i] = means1[i] - means2[i]; varDiffArr[i] = cov1[i, i] - cov2[i, i]; } meanDiff[ind1] = meanDiffArr.Average(); varDiff[ind1] = varDiffArr.Average(); double m, v; PairedTTestPValues(means1, cov1, means2, cov2, out m, out v); meanPvalues[ind1] = m; varPvalues[ind1] = v; } }
public static void CompareStratMeansVar(string StratModel1, string StratModel2, out List<string> labels, out double[] meanDiff, out double[] varDiff, out double[] meanPvalues, out double[] varPvalues) { meanPvalues = null; varPvalues = null; meanDiff = null; varDiff = null; labels = null; dataPrepClusterKmean dpc1 = new dataPrepClusterKmean(); dpc1.buildModel(StratModel1); KMeans km1 = (KMeans)dpc1.Model; dataPrepClusterKmean dpc2 = new dataPrepClusterKmean(); dpc2.buildModel(StratModel2); List<string> labels2 = dpc2.Labels; KMeans km2 = (KMeans)dpc2.Model; int nPv1 = km1.Clusters.Count; int nPv2 = km2.Clusters.Count; if (nPv1 != nPv2) { System.Windows.Forms.MessageBox.Show("Not the same number of strata! Models are not comparable!"); return; } labels = dpc1.Labels; meanPvalues = new double[nPv1]; varPvalues = new double[nPv2]; meanDiff = new double[nPv1]; varDiff = new double[nPv2]; foreach (string l in labels) { int ind1 = labels.IndexOf(l); int ind2 = labels2.IndexOf(l); KMeansCluster kmC1 = km1.Clusters[ind1]; KMeansCluster kmC2 = km2.Clusters[ind2]; double[] means1 = kmC1.Mean; double[] means2 = kmC2.Mean; double[,] cov1 = kmC1.Covariance; double[,] cov2 = kmC2.Covariance; double[] meanDiffArr = new double[means1.Length]; double[] varDiffArr = new double[means1.Length]; for (int i = 0; i < means1.Length; i++) { meanDiffArr[i] = means1[i] - means2[i]; varDiffArr[i] = cov1[i, i] - cov2[i, i]; } meanDiff[ind1] = meanDiffArr.Average(); varDiff[ind1] = varDiffArr.Average(); double m, v; PairedTTestPValues(means1, cov1, means2, cov2, out m, out v); meanPvalues[ind1] = m; varPvalues[ind1] = v; } }
public static double[] clusterProportions(string clusterModelPath) { dataPrepClusterKmean cls = new dataPrepClusterKmean(); cls.buildModel(clusterModelPath); int nClusters = ((Accord.MachineLearning.KMeans)cls.Model).Clusters.Count; double[] prop = new double[nClusters]; for (int i = 0; i < nClusters; i++) { Accord.MachineLearning.KMeansCluster k = ((Accord.MachineLearning.KMeans)cls.Model).Clusters[i]; prop[i] = k.Proportion; } return prop; }
public static double[] clusterProportions(string clusterModelPath) { dataPrepClusterKmean cls = new dataPrepClusterKmean(); cls.buildModel(clusterModelPath); int nClusters = ((Accord.MachineLearning.KMeans)cls.Model).Clusters.Count; double[] prop = new double[nClusters]; for (int i = 0; i < nClusters; i++) { Accord.MachineLearning.KMeansCluster k = ((Accord.MachineLearning.KMeans)cls.Model).Clusters[i]; prop[i] = k.Proportion; } return(prop); }
public static int[] sampleSizeMaxCluster(string clusterModelPath, double proportionOfMean = 0.1, double alpha = 0.05) { dataPrepClusterKmean cls = new dataPrepClusterKmean(); cls.buildModel(clusterModelPath); int nClusters = ((Accord.MachineLearning.KMeans)cls.Model).Clusters.Count; int[] maxN = new int[nClusters]; for (int i = 0; i < nClusters; i++) { Accord.MachineLearning.KMeansCluster k = ((Accord.MachineLearning.KMeans)cls.Model).Clusters[i]; int mx = sampleSizeMaxMean(k.Covariance, k.Mean, proportionOfMean, alpha)[0]; maxN[i] = mx; } return(maxN); }
private static void fillCluserReport(string modelPath, Forms.RunningProcess.frmRunningProcessDialog rp, double proportion = 0.1, double alpha = 0.05) { dataPrepClusterKmean clus = new dataPrepClusterKmean(); clus.buildModel(modelPath); List <string> lbl = clus.Labels; rp.addMessage("Samples by class (Cluster; number of samples)"); rp.addMessage("-".PadRight(45, '-')); int[] samples = sampleSizeMaxCluster(modelPath, proportion, alpha); for (int i = 0; i < samples.Length; i++) { rp.addMessage("\t" + lbl[i] + "; " + samples[i].ToString()); } rp.addMessage("-".PadRight(45, '-')); rp.addMessage("Total number of samples = " + samples.Sum().ToString()); }
private void buildModel() { if (!checkTables()) { //Console.WriteLine("CheckTables = false"); return; } if (!getSampleRatios()) { //Console.WriteLine("Sample Ratios = false"); return; } pca = new dataPrepPrincipleComponents(Sample1, Variables); cluster = new dataPrepClusterKmean(Sample1, Variables, numberOfBins); foreach (string s in cntDic.Keys) { buildSamples(s); double[] pValueArr = new double[Variables.Length]; double[] sValueArr = new double[Variables.Length]; //double[] s1Arr = sample1[0]; //double[] s2Arr = sample2[0]; //TwoSampleKolmogorovSmirnovTest test = new TwoSampleKolmogorovSmirnovTest(s1Arr, s2Arr, TwoSampleKolmogorovSmirnovTestHypothesis.SamplesDistributionsAreUnequal); //getCdfProp(s, 0, test); //pValue = test.PValue; //sValue = test.Statistic; for (int i = 0; i < Variables.Length; i++) { double[] s1Arr = sample1[i]; double[] s2Arr = sample2[i]; TwoSampleKolmogorovSmirnovTest test = new TwoSampleKolmogorovSmirnovTest(s1Arr, s2Arr, TwoSampleKolmogorovSmirnovTestHypothesis.SamplesDistributionsAreUnequal); //Console.WriteLine(test.Significant.ToString()); getCdfProp(s, i, test); double pValueS = test.PValue; double sValueS = test.Statistic; pValueArr[i] = pValueS; sValueArr[i] = sValueS; } pDic.Add(s, pValueArr); sDic.Add(s, sValueArr); } }
private void buildModel(string mdlPath) { using (System.IO.StreamReader sr = new System.IO.StreamReader(mdlPath)) { dataPrepBase.modelTypes mType = (dataPrepBase.modelTypes)Enum.Parse(typeof(dataPrepBase.modelTypes), sr.ReadLine()); if (mType != dataPrepBase.modelTypes.KS) { System.Windows.Forms.MessageBox.Show("Not a KS Model!!", "Error", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error); return; } Variables = sr.ReadLine().Split(new char[] { ',' }); StrataField = sr.ReadLine(); Oridinate = System.Convert.ToBoolean(sr.ReadLine()); string[] lbl = sr.ReadLine().Split(new char[] { ',' }); string[] pArr = sr.ReadLine().Split(new char[] { ',' }); string[] sArr = sr.ReadLine().Split(new char[] { ',' }); string[] cArr1 = sr.ReadLine().Split(new char[] { ',' }); string[] cArr2 = sr.ReadLine().Split(new char[] { ',' }); for (int i = 0; i < lbl.Length; i++) { string l = lbl[i]; double[] p = (from string str in pArr[i].Split(new char[] { ';' }) select System.Convert.ToDouble(str)).ToArray(); double[] s = (from string str in sArr[i].Split(new char[] { ';' }) select System.Convert.ToDouble(str)).ToArray(); int c1 = System.Convert.ToInt32(cArr1[i]); int c2 = System.Convert.ToInt32(cArr2[i]); pDic.Add(l, p); sDic.Add(l, s); cntDic.Add(l, new int[] { c1, c2 }); } double[][] minmax1, minmax2, bp1, bp2; for (int i = 0; i < lbl.Length; i++) { string l = lbl[i]; double[] min1 = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); double[] max1 = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); double[] min2 = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); double[] max2 = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); minmax1 = new double[2][]; minmax1[0] = min1; minmax1[1] = max1; minMaxDic1.Add(l, minmax1); minmax2 = new double[2][]; minmax2[0] = min2; minmax2[1] = max2; minMaxDic2.Add(l, minmax2); bp1 = new double[Variables.Length][]; bp2 = new double[Variables.Length][]; for (int j = 0; j < Variables.Length; j++) { bp1[j] = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); bp2[j] = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); } binPropDic1.Add(l, bp1); binPropDic2.Add(l, bp2); int[] clusCnt = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToInt32(s)).ToArray(); clusCountDic.Add(l, clusCnt); int[] clusSampCnt = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToInt32(s)).ToArray(); clusSampleCountDic.Add(l, clusSampCnt); } pca = new dataPrepPrincipleComponents(); string pcPath = System.IO.Path.GetDirectoryName(mdlPath) + "\\" + System.IO.Path.GetFileNameWithoutExtension(mdlPath) + "_pca.mdl"; pca.buildModel(pcPath); cluster = new dataPrepClusterKmean(); string clusterPath = System.IO.Path.GetDirectoryName(mdlPath) + "\\" + System.IO.Path.GetFileNameWithoutExtension(mdlPath) + "_cluster.mdl"; cluster.buildModel(clusterPath); numberOfBins = cluster.Classes; sr.Close(); } }
private void createClusterModel(string[] paramArr) { Statistics.dataPrepClusterBase dpC = null; esriUtil.Statistics.clusterType cType = Statistics.clusterType.KMEANS; if(paramArr.Length<6) { IRaster rs = rsUtil.createRaster(getRaster(paramArr[1])); int nCls = System.Convert.ToInt32(paramArr[2]); cType = (esriUtil.Statistics.clusterType)Enum.Parse(typeof(esriUtil.Statistics.clusterType), paramArr[3]); switch (cType) { case esriUtil.Statistics.clusterType.KMEANS: dpC = (Statistics.dataPrepClusterBase)(new Statistics.dataPrepClusterKmean(rs, nCls)); break; case esriUtil.Statistics.clusterType.BINARY: dpC = (Statistics.dataPrepClusterBase)(new Statistics.dataPrepClusterBinary(rs, nCls)); break; case esriUtil.Statistics.clusterType.GAUSSIANMIXTURE: dpC = (Statistics.dataPrepClusterBase)(new Statistics.dataPrepClusterGaussian(rs, nCls)); break; default: break; } } else { ITable tbl = getTable(paramArr[1]); string[] flds = paramArr[2].Split(new char[]{','}); int nCls = System.Convert.ToInt32(paramArr[3]); cType = (esriUtil.Statistics.clusterType)Enum.Parse(typeof(esriUtil.Statistics.clusterType), paramArr[4]); dpC = new Statistics.dataPrepClusterKmean(tbl,flds,nCls); } dpC.writeModel(paramArr[paramArr.Length - 1]); }
private static void fillCluserReport(string modelPath, Forms.RunningProcess.frmRunningProcessDialog rp, double proportion = 0.1, double alpha = 0.05) { dataPrepClusterKmean clus = new dataPrepClusterKmean(); clus.buildModel(modelPath); List<string> lbl = clus.Labels; rp.addMessage("Samples by class (Cluster; number of samples)"); rp.addMessage("-".PadRight(45, '-')); int[] samples = sampleSizeMaxCluster(modelPath, proportion, alpha); for (int i = 0; i < samples.Length; i++) { rp.addMessage("\t"+lbl[i] + "; " + samples[i].ToString()); } rp.addMessage("-".PadRight(45, '-')); rp.addMessage("Total number of samples = " + samples.Sum().ToString()); }
public static int[] sampleSizeMaxCluster(string clusterModelPath, double proportionOfMean = 0.1, double alpha = 0.05) { dataPrepClusterKmean cls = new dataPrepClusterKmean(); cls.buildModel(clusterModelPath); int nClusters = ((Accord.MachineLearning.KMeans)cls.Model).Clusters.Count; int[] maxN = new int[nClusters]; for (int i = 0; i < nClusters; i++) { Accord.MachineLearning.KMeansCluster k = ((Accord.MachineLearning.KMeans)cls.Model).Clusters[i]; int mx = sampleSizeMaxMean(k.Covariance, k.Mean, proportionOfMean, alpha)[0]; maxN[i] = mx; } return maxN; }
/// <summary> /// creates a new field called sample and populates yes or no depending on whether that feature should be sampled based on a previously ran cluster analysis /// </summary> /// <param name="inputTable"></param> /// <param name="clusterModelPath"></param> /// <param name="proportionOfMean"></param> /// <param name="alpha"></param> public void selectClusterFeaturesToSample(ITable inputTable, string clusterModelPath, string clusterFieldName="Cluster", double proportionOfMean=0.1, double alpha=0.05, bool weightsEqual=false) { IObjectClassInfo2 objInfo2 = (IObjectClassInfo2)inputTable; if (!objInfo2.CanBypassEditSession()) { System.Windows.Forms.MessageBox.Show("Input Table participates in a composite relationship. Please export this table as a new table and try again!"); return; } esriUtil.Statistics.dataPrepClusterKmean dpC = new Statistics.dataPrepClusterKmean(); dpC.buildModel(clusterModelPath); List<string> labels = dpC.Labels; HashSet<string> unqVls = geoUtil.getUniqueValues(inputTable, clusterFieldName); System.Random rd = new Random(); int[] samplesPerCluster = esriUtil.Statistics.dataPrepSampleSize.sampleSizeMaxCluster(clusterModelPath, proportionOfMean, alpha); double[] propPerCluster = esriUtil.Statistics.dataPrepSampleSize.clusterProportions(clusterModelPath); double[] weightsPerCluster = new double[propPerCluster.Length]; double sSamp = System.Convert.ToDouble(samplesPerCluster.Sum()); for (int i = 0; i < weightsPerCluster.Length; i++) { weightsPerCluster[i] = propPerCluster[i] / (samplesPerCluster[i] / sSamp); } if (weightsEqual) { double minProp = weightsPerCluster.Min(); for (int i = 0; i < samplesPerCluster.Length; i++) { samplesPerCluster[i] = System.Convert.ToInt32(samplesPerCluster[i] * (weightsPerCluster[i] / minProp)); weightsPerCluster[i] = 1; } } int[] tsPerCluster = new int[propPerCluster.Length]; double[] randomRatioPerClust = new double[propPerCluster.Length]; if (samplesPerCluster.Length != unqVls.Count) { System.Windows.Forms.MessageBox.Show("Unique Values in cluster field do not match the number of cluster models!"); return; } string sampleFldName = geoUtil.createField(inputTable, "sample", esriFieldType.esriFieldTypeSmallInteger,false); string weightFldName = geoUtil.createField(inputTable, "weight", esriFieldType.esriFieldTypeDouble,false); IQueryFilter qf0 = new QueryFilterClass(); qf0.SubFields = clusterFieldName; string h = ""; IField fld = inputTable.Fields.get_Field(inputTable.FindField(clusterFieldName)); if (fld.Type == esriFieldType.esriFieldTypeString) h = "'"; for (int i = 0; i < samplesPerCluster.Length; i++) { qf0.WhereClause = clusterFieldName + " = " + h+labels[i]+h; int tCnt = inputTable.RowCount(qf0); tsPerCluster[i] = tCnt; randomRatioPerClust[i] = System.Convert.ToDouble(samplesPerCluster[i]) / tCnt; } IQueryFilter qf = new QueryFilterClass(); qf.SubFields = clusterFieldName + "," + sampleFldName + "," + weightFldName; IWorkspace wks = ((IDataset)inputTable).Workspace; IWorkspaceEdit wksE = (IWorkspaceEdit)wks; if (wksE.IsBeingEdited()) { wksE.StopEditing(true); } try { ICursor cur = inputTable.Update(qf, false); int sIndex = cur.FindField(sampleFldName); int cIndex = cur.FindField(clusterFieldName); int wIndex = cur.FindField(weightFldName); IRow rw = cur.NextRow(); while (rw != null) { string clustStr = rw.get_Value(cIndex).ToString(); int clust = labels.IndexOf(clustStr); double w = weightsPerCluster[clust]; double rNum = rd.NextDouble(); int ss = 0; double r = randomRatioPerClust[clust]; if (rNum < r) { ss = 1; } rw.set_Value(sIndex, ss); rw.set_Value(wIndex, w); cur.UpdateRow(rw); rw = cur.NextRow(); } System.Runtime.InteropServices.Marshal.ReleaseComObject(cur); } catch(Exception e) { System.Windows.Forms.MessageBox.Show(e.ToString()); } }
private void buildModel(string mdlPath) { using (System.IO.StreamReader sr = new System.IO.StreamReader(mdlPath)) { dataPrepBase.modelTypes mType = (dataPrepBase.modelTypes)Enum.Parse(typeof(dataPrepBase.modelTypes), sr.ReadLine()); if (mType != dataPrepBase.modelTypes.KS) { System.Windows.Forms.MessageBox.Show("Not a KS Model!!", "Error", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error); return; } Variables = sr.ReadLine().Split(new char[] { ',' }); StrataField = sr.ReadLine(); Oridinate = System.Convert.ToBoolean(sr.ReadLine()); string[] lbl = sr.ReadLine().Split(new char[] { ',' }); string[] pArr = sr.ReadLine().Split(new char[] { ',' }); string[] sArr = sr.ReadLine().Split(new char[] {','}); string[] cArr1 = sr.ReadLine().Split(new char[] { ',' }); string[] cArr2 = sr.ReadLine().Split(new char[] { ',' }); for (int i = 0; i < lbl.Length; i++) { string l = lbl[i]; double[] p = (from string str in pArr[i].Split(new char[]{';'}) select System.Convert.ToDouble(str)).ToArray(); double[] s = (from string str in sArr[i].Split(new char[] { ';' }) select System.Convert.ToDouble(str)).ToArray(); int c1 = System.Convert.ToInt32(cArr1[i]); int c2 = System.Convert.ToInt32(cArr2[i]); pDic.Add(l, p); sDic.Add(l, s); cntDic.Add(l, new int[]{c1,c2}); } double[][] minmax1, minmax2, bp1, bp2; for (int i = 0; i < lbl.Length; i++) { string l = lbl[i]; double[] min1 = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); double[] max1 = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); double[] min2 = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); double[] max2 = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); minmax1 = new double[2][]; minmax1[0] = min1; minmax1[1] = max1; minMaxDic1.Add(l, minmax1); minmax2 = new double[2][]; minmax2[0] = min2; minmax2[1] = max2; minMaxDic2.Add(l,minmax2); bp1 = new double[Variables.Length][]; bp2 = new double[Variables.Length][]; for (int j = 0; j < Variables.Length; j++) { bp1[j] = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); bp2[j] = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); } binPropDic1.Add(l, bp1); binPropDic2.Add(l, bp2); int[] clusCnt = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToInt32(s)).ToArray(); clusCountDic.Add(l, clusCnt); int[] clusSampCnt = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToInt32(s)).ToArray(); clusSampleCountDic.Add(l, clusSampCnt); } pca = new dataPrepPrincipleComponents(); string pcPath = System.IO.Path.GetDirectoryName(mdlPath) + "\\" + System.IO.Path.GetFileNameWithoutExtension(mdlPath) + "_pca.mdl"; pca.buildModel(pcPath); cluster = new dataPrepClusterKmean(); string clusterPath = System.IO.Path.GetDirectoryName(mdlPath) + "\\" + System.IO.Path.GetFileNameWithoutExtension(mdlPath) + "_cluster.mdl"; cluster.buildModel(clusterPath); numberOfBins = cluster.Classes; sr.Close(); } }
private void buildModel() { if (!checkTables()) { //Console.WriteLine("CheckTables = false"); return; } if (!getSampleRatios()) { //Console.WriteLine("Sample Ratios = false"); return; } pca = new dataPrepPrincipleComponents(Sample1, Variables); cluster = new dataPrepClusterKmean(Sample1, Variables, numberOfBins); foreach(string s in cntDic.Keys) { buildSamples(s); double[] pValueArr = new double[Variables.Length]; double[] sValueArr = new double[Variables.Length]; //double[] s1Arr = sample1[0]; //double[] s2Arr = sample2[0]; //TwoSampleKolmogorovSmirnovTest test = new TwoSampleKolmogorovSmirnovTest(s1Arr, s2Arr, TwoSampleKolmogorovSmirnovTestHypothesis.SamplesDistributionsAreUnequal); //getCdfProp(s, 0, test); //pValue = test.PValue; //sValue = test.Statistic; for (int i = 0; i < Variables.Length; i++) { double[] s1Arr = sample1[i]; double[] s2Arr = sample2[i]; TwoSampleKolmogorovSmirnovTest test = new TwoSampleKolmogorovSmirnovTest(s1Arr, s2Arr, TwoSampleKolmogorovSmirnovTestHypothesis.SamplesDistributionsAreUnequal); //Console.WriteLine(test.Significant.ToString()); getCdfProp(s, i, test); double pValueS = test.PValue; double sValueS = test.Statistic; pValueArr[i] = pValueS; sValueArr[i] = sValueS; } pDic.Add(s, pValueArr); sDic.Add(s, sValueArr); } }