public static int[] sampleSizeMaxStrata(string strataModelPath, double proportionOfMean = 0.1, double alpha = 0.05) { dataPrepStrata strata = new dataPrepStrata(); strata.buildModel(strataModelPath); int nStrata = strata.Labels.Count; int[] maxN = new int[nStrata]; for (int i = 0; i < nStrata; i++) { Accord.MachineLearning.KMeansCluster k = ((Accord.MachineLearning.KMeans)strata.Model).Clusters[i]; int mx = sampleSizeMaxMean(k.Covariance, k.Mean, proportionOfMean, alpha)[0]; maxN[i] = mx; } return(maxN); }
private static void fillStrataReport(string modelPath, Forms.RunningProcess.frmRunningProcessDialog rp, double proportion, double alpha) { dataPrepStrata strata = new dataPrepStrata(); strata.buildModel(modelPath); List <string> lbl = strata.Labels; rp.addMessage("Samples by strata (Stratum; number of samples)"); rp.addMessage("-".PadRight(45, '-')); int[] samples = sampleSizeMaxStrata(modelPath, proportion, alpha); for (int i = 0; i < samples.Length; i++) { rp.addMessage("\t" + lbl[i] + "; " + samples[i].ToString()); } rp.addMessage("-".PadRight(45, '-')); rp.addMessage("Total number of samples = " + samples.Sum().ToString()); }
public static int[] sampleSizeMaxStrata(string strataModelPath, double proportionOfMean = 0.1, double alpha = 0.05) { dataPrepStrata strata = new dataPrepStrata(); strata.buildModel(strataModelPath); int nStrata = strata.Labels.Count; int[] maxN = new int[nStrata]; for (int i = 0; i < nStrata; i++) { Accord.MachineLearning.KMeansCluster k = ((Accord.MachineLearning.KMeans)strata.Model).Clusters[i]; int mx = sampleSizeMaxMean(k.Covariance, k.Mean, proportionOfMean, alpha)[0]; maxN[i] = mx; } return maxN; }
private static void fillStrataReport(string modelPath, Forms.RunningProcess.frmRunningProcessDialog rp, double proportion, double alpha) { dataPrepStrata strata = new dataPrepStrata(); strata.buildModel(modelPath); List<string> lbl = strata.Labels; rp.addMessage("Samples by strata (Stratum; number of samples)"); rp.addMessage("-".PadRight(45, '-')); int[] samples = sampleSizeMaxStrata(modelPath, proportion, alpha); for (int i = 0; i < samples.Length; i++) { rp.addMessage("\t" + lbl[i] + "; " + samples[i].ToString()); } rp.addMessage("-".PadRight(45, '-')); rp.addMessage("Total number of samples = " + samples.Sum().ToString()); }
public void selectStrataFeaturesToSample(ITable inputTable, string strataModelPath, string strataFieldName = "Cluster", double proportionOfMean = 0.1, double alpha = 0.05, bool weightsEqual = false) { IObjectClassInfo2 objInfo2 = (IObjectClassInfo2)inputTable; if (!objInfo2.CanBypassEditSession()) { System.Windows.Forms.MessageBox.Show("Input Table participates in a composite relationship. Please export this table as a new table and try again!"); return; } esriUtil.Statistics.dataPrepStrata dpC = new Statistics.dataPrepStrata(); dpC.buildModel(strataModelPath); List<string> labels = dpC.Labels; HashSet<string> unqVls = geoUtil.getUniqueValues(inputTable, strataFieldName); System.Random rd = new Random(); int[] samplesPerCluster = esriUtil.Statistics.dataPrepSampleSize.sampleSizeMaxCluster(strataModelPath, proportionOfMean, alpha); double[] propPerCluster = esriUtil.Statistics.dataPrepSampleSize.clusterProportions(strataModelPath); double[] weightsPerCluster = new double[propPerCluster.Length]; double sSamp = System.Convert.ToDouble(samplesPerCluster.Sum()); for (int i = 0; i < weightsPerCluster.Length; i++) { weightsPerCluster[i] = propPerCluster[i] / (samplesPerCluster[i] / sSamp); } if (weightsEqual) { double minProp = weightsPerCluster.Min(); for (int i = 0; i < samplesPerCluster.Length; i++) { samplesPerCluster[i] = System.Convert.ToInt32(samplesPerCluster[i] * (weightsPerCluster[i] / minProp)); weightsPerCluster[i] = 1; } } int[] tsPerCluster = new int[propPerCluster.Length]; double[] randomRatioPerClust = new double[propPerCluster.Length]; if (samplesPerCluster.Length != unqVls.Count) { System.Windows.Forms.MessageBox.Show("Unique Values in cluster field do not match the number of cluster models!"); return; } string sampleFldName = geoUtil.createField(inputTable, "sample", esriFieldType.esriFieldTypeSmallInteger, false); string weightFldName = geoUtil.createField(inputTable, "weight", esriFieldType.esriFieldTypeDouble, false); IQueryFilter qf0 = new QueryFilterClass(); qf0.SubFields = strataFieldName; string h = ""; IField fld = inputTable.Fields.get_Field(inputTable.FindField(strataFieldName)); if (fld.Type == esriFieldType.esriFieldTypeString) h = "'"; for (int i = 0; i < samplesPerCluster.Length; i++) { qf0.WhereClause = strataFieldName + " = " + h + labels[i] + h; int tCnt = inputTable.RowCount(qf0); tsPerCluster[i] = tCnt; randomRatioPerClust[i] = System.Convert.ToDouble(samplesPerCluster[i]) / tCnt; } IQueryFilter qf = new QueryFilterClass(); qf.SubFields = strataFieldName + "," + sampleFldName + "," + weightFldName; IWorkspace wks = ((IDataset)inputTable).Workspace; IWorkspaceEdit wksE = (IWorkspaceEdit)wks; if (wksE.IsBeingEdited()) { wksE.StopEditing(true); } try { ICursor cur = inputTable.Update(qf, false); int sIndex = cur.FindField(sampleFldName); int cIndex = cur.FindField(strataFieldName); int wIndex = cur.FindField(weightFldName); IRow rw = cur.NextRow(); while (rw != null) { string clustStr = rw.get_Value(cIndex).ToString(); int clust = labels.IndexOf(clustStr); double w = weightsPerCluster[clust]; double rNum = rd.NextDouble(); int ss = 0; double r = randomRatioPerClust[clust]; if (rNum < r) { ss = 1; } rw.set_Value(sIndex, ss); rw.set_Value(wIndex, w); cur.UpdateRow(rw); rw = cur.NextRow(); } System.Runtime.InteropServices.Marshal.ReleaseComObject(cur); } catch (Exception e) { System.Windows.Forms.MessageBox.Show(e.ToString()); } }