Beispiel #1
0
 public void writeModel(string outPath)
 {
     if (kmeans == null)
     {
         buildModel();
     }
     outmodelpath = outPath;
     using (System.IO.StreamWriter sw = new System.IO.StreamWriter(outmodelpath))
     {
         sw.WriteLine(dataPrepBase.modelTypes.StrataCovCorr.ToString());
         sw.WriteLine(InPath);
         sw.WriteLine(String.Join(",", VariableFieldNames));
         sw.WriteLine(n.ToString());
         sw.WriteLine(prop.ToString());
         sw.WriteLine(k.ToString());
         sw.WriteLine(String.Join(",", lbl.ToArray()));
         KMeansClusterCollection gCol = kmeans.Clusters;
         for (int i = 0; i < gCol.Count; i++)
         {
             KMeansCluster gClust = gCol[i];
             sw.WriteLine(String.Join(",", (from double d in gClust.Mean select d.ToString()).ToArray()));
             sw.WriteLine(String.Join(",", (from double d in gClust.Covariance select d.ToString()).ToArray()));
             sw.WriteLine(gClust.Proportion.ToString());
         }
         sw.Close();
     }
 }
Beispiel #2
0
        public byte[] Denoise(byte[] data, int width, int height, int padding, out int it)
        {
            it          = 0;
            byte[,] arr = Utils.ByteArr1DTo2D(data, width, height);
            bool[,] vi  = (bool[, ])Array.CreateInstance(typeof(bool), height, width);

            for (int i = 0; i < height; ++i)
            {
                for (int j = 0; j < width; ++j)
                {
                    vi[i, j] = false;
                }
            }

            List <Point> points = new List <Point>();
            List <int>   areas  = new List <int>();

            for (int i = padding; i < height - padding; ++i)
            {
                for (int j = padding; j < width - padding; ++j)
                {
                    if (vi[i, j])
                    {
                        continue;
                    }
                    if (arr[i, j] == backgroundColor)
                    {
                        continue;
                    }
                    int sum = 0;
                    dfsSearch(arr, vi, i, j, width, height, padding, ref sum);
                    points.Add(new Point(j, i));
                    areas.Add(sum);
                }
            }

            if (points.Count > 1)
            {
                Vector2[] vs = areas.Select(i => new Vector2(i, 0)).ToArray();
                it = 0;
                KMeansCluster result = KMeansClustering.AnnealCluster(vs, 2, out it);

                int bigcluster = (result[0].Centroid.ModSqr() > result[1].Centroid.ModSqr()) ? 0 : 1;
                for (int i = 0; i < points.Count; ++i)
                {
                    if (result.ClusterIndex[i] != bigcluster)
                    {
                        //dfsColor(arr, points[i].Y, points[i].X, width, height, 1);
                        dfsClear(arr, points[i].Y, points[i].X, width, height, 1);
                    }
                }
            }
            else
            {
                dfsColor(arr, points[0].Y, points[0].X, width, height, 1);
            }

            byte[] bytes = Utils.ByteArr2DTo1D(arr);
            return(bytes);
        }
Beispiel #3
0
        private void setKMeansCluster(System.IO.StreamReader sr)
        {
            KMeans kmeans = new KMeans(k);
            KMeansClusterCollection kmeansColl = kmeans.Clusters;

            for (int i = 0; i < k; i++)
            {
                double[] mns       = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray();
                string[] covVlsStr = sr.ReadLine().Split(new char[] { ',' });
                double   p         = System.Convert.ToDouble(sr.ReadLine());
                double[,] cov = new double[VariableFieldNames.Length, VariableFieldNames.Length];
                for (int j = 0; j < VariableFieldNames.Length; j++)
                {
                    for (int l = 0; l < VariableFieldNames.Length; l++)
                    {
                        int indexVl = (j * VariableFieldNames.Length) + l;
                        cov[l, j] = System.Convert.ToDouble(covVlsStr[indexVl]);
                    }
                }

                KMeansCluster kc = new KMeansCluster(kmeansColl, i);
                kc.Mean       = mns;
                kc.Covariance = cov;
                kc.Proportion = p;
            }
            clusterCollection = kmeansColl;
            model             = kmeans;
        }
Beispiel #4
0
        public static void CompareStratMeansVar(KMeans km1, KMeans km2, out double[] meanPvalues, out double[] varPvalues)
        {
            meanPvalues = null;
            varPvalues  = null;
            int nPv1 = km1.Clusters.Count;
            int nPv2 = km2.Clusters.Count;

            if (nPv1 != nPv2)
            {
                System.Windows.Forms.MessageBox.Show("Not the same number of strata! Models are not comparable!");
                return;
            }
            meanPvalues = new double[nPv1];
            varPvalues  = new double[nPv2];
            for (int i = 0; i < nPv1; i++)
            {
                KMeansCluster kmC1   = km1.Clusters[i];
                KMeansCluster kmC2   = km2.Clusters[i];
                double[]      means1 = kmC1.Mean;
                double[]      means2 = kmC2.Mean;
                double[,] cov1 = kmC1.Covariance;
                double[,] cov2 = kmC2.Covariance;
                double m, v;
                PairedTTestPValues(means1, cov1, means2, cov2, out m, out v);
                meanPvalues[i] = m;
                varPvalues[i]  = v;
            }
        }
Beispiel #5
0
        private void kmeansReport(Forms.RunningProcess.frmRunningProcessDialog rd)
        {
            KMeansClusterCollection gCol = kmeans.Clusters;

            for (int i = 0; i < gCol.Count; i++)
            {
                KMeansCluster gClust = gCol[i];
                double[]      mns    = gClust.Mean;
                double[,] cov  = gClust.Covariance;
                double[,] corr = getCorr(cov);
                rd.addMessage("\n\nStratum " + Labels[i] + ":\nMeans: " + String.Join(", ", (from double d in mns select d.ToString()).ToArray()) + "\nCovariance:");
                for (int j = 0; j < VariableFieldNames.Length; j++)
                {
                    string[] covStrArr = new string[VariableFieldNames.Length];
                    for (int l = 0; l < covStrArr.Length; l++)
                    {
                        covStrArr[l] = cov[l, j].ToString();
                    }
                    rd.addMessage("\n" + String.Join(",", covStrArr));
                }
                rd.addMessage("\nCorr:");
                for (int j = 0; j < VariableFieldNames.Length; j++)
                {
                    string[] corrStrArr = new string[VariableFieldNames.Length];
                    for (int l = 0; l < corrStrArr.Length; l++)
                    {
                        corrStrArr[l] = corr[l, j].ToString();
                    }
                    rd.addMessage("\n" + String.Join(",", corrStrArr));
                }
            }
        }
Beispiel #6
0
        public static void CompareStratMeansVar(string StratModel1, string StratModel2, out List <string> labels, out double[] meanDiff, out double[] varDiff, out double[] meanPvalues, out double[] varPvalues)
        {
            meanPvalues = null;
            varPvalues  = null;
            meanDiff    = null;
            varDiff     = null;
            labels      = null;
            dataPrepClusterKmean dpc1 = new dataPrepClusterKmean();

            dpc1.buildModel(StratModel1);
            KMeans km1 = (KMeans)dpc1.Model;
            dataPrepClusterKmean dpc2 = new dataPrepClusterKmean();

            dpc2.buildModel(StratModel2);
            List <string> labels2 = dpc2.Labels;
            KMeans        km2     = (KMeans)dpc2.Model;
            int           nPv1    = km1.Clusters.Count;
            int           nPv2    = km2.Clusters.Count;

            if (nPv1 != nPv2)
            {
                System.Windows.Forms.MessageBox.Show("Not the same number of strata! Models are not comparable!");
                return;
            }
            labels      = dpc1.Labels;
            meanPvalues = new double[nPv1];
            varPvalues  = new double[nPv2];
            meanDiff    = new double[nPv1];
            varDiff     = new double[nPv2];
            foreach (string l in labels)
            {
                int ind1 = labels.IndexOf(l);
                int ind2 = labels2.IndexOf(l);

                KMeansCluster kmC1   = km1.Clusters[ind1];
                KMeansCluster kmC2   = km2.Clusters[ind2];
                double[]      means1 = kmC1.Mean;
                double[]      means2 = kmC2.Mean;
                double[,] cov1 = kmC1.Covariance;
                double[,] cov2 = kmC2.Covariance;
                double[] meanDiffArr = new double[means1.Length];
                double[] varDiffArr  = new double[means1.Length];
                for (int i = 0; i < means1.Length; i++)
                {
                    meanDiffArr[i] = means1[i] - means2[i];
                    varDiffArr[i]  = cov1[i, i] - cov2[i, i];
                }
                meanDiff[ind1] = meanDiffArr.Average();
                varDiff[ind1]  = varDiffArr.Average();
                double m, v;
                PairedTTestPValues(means1, cov1, means2, cov2, out m, out v);
                meanPvalues[ind1] = m;
                varPvalues[ind1]  = v;
            }
        }
Beispiel #7
0
        private void binaryReport(Forms.RunningProcess.frmRunningProcessDialog rd)
        {
            KMeansClusterCollection gCol = (KMeansClusterCollection)clusterCollection;

            for (int i = 0; i < gCol.Count; i++)
            {
                KMeansCluster gClust = gCol[i];
                double[]      mns    = gClust.Mean;
                rd.addMessage("\n\nCluster " + Labels[i] + ":\nMeans: " + String.Join(", ", (from double d in mns select d.ToString()).ToArray()));
            }
        }
Beispiel #8
0
        private void writeKmeansData(System.IO.StreamWriter sw)
        {
            KMeansClusterCollection gCol = (KMeansClusterCollection)clusterCollection;

            for (int i = 0; i < gCol.Count; i++)
            {
                KMeansCluster gClust = gCol[i];
                sw.WriteLine(String.Join(",", (from double d in gClust.Mean select d.ToString()).ToArray()));
                sw.WriteLine(String.Join(",", (from double d in gClust.Covariance select d.ToString()).ToArray()));
                sw.WriteLine(gClust.Proportion.ToString());
            }
        }
Beispiel #9
0
        public double[] computeNew(object category)
        {
            string cat      = category.ToString();
            int    catIndex = lbl.IndexOf(cat);
            int    np       = ((VariableFieldNames.Length * VariableFieldNames.Length) - VariableFieldNames.Length) / 2;

            double[] pValues = new double[np];
            if (catIndex == -1)
            {
                return(pValues);
            }
            KMeansClusterCollection gCol   = kmeans.Clusters;
            KMeansCluster           gClust = gCol[catIndex];

            double[] mns      = gClust.Mean;
            double[] var      = new double[mns.Length];
            double   nSample  = gClust.Proportion * N;
            double   seAdjust = Math.Sqrt(2 * 1 / nSample);

            for (int j = 0; j < mns.Length; j++)
            {
                var[j] = gClust.Covariance[j, j];
            }
            int cnt  = 1;
            int pCnt = 0;

            for (int j = 0; j < mns.Length - 1; j++)
            {
                for (int k = cnt; k < mns.Length; k++)
                {
                    double mD    = mns[j] - mns[k];
                    double pSD   = Math.Sqrt((var[j] + var[k]) / 2);
                    double se    = pSD * seAdjust;
                    double tStat = mD / se;
                    Accord.Statistics.Distributions.Univariate.TDistribution tDist = new Accord.Statistics.Distributions.Univariate.TDistribution(2 * nSample - 2);
                    double cdf    = tDist.DistributionFunction(tStat);
                    double pValue = 0;
                    if (tStat > 0)
                    {
                        pValue = (1 - cdf) * 2;
                    }
                    else
                    {
                        pValue = (cdf * 2);
                    }
                    pValues[pCnt] = pValue;
                    pCnt++;
                }
                cnt += 1;
            }
            return(pValues);
        }
Beispiel #10
0
        private void makeKMeans()
        {
            kmeans = new KMeans(k);
            KMeansClusterCollection kmeansColl = kmeans.Clusters;

            for (int i = 0; i < k; i++)
            {
                double[] mns = meansLst[i];
                double   p   = proportionsLst[i];
                double[,] scov = scovLst[i];
                KMeansCluster kc = new KMeansCluster(kmeansColl, i);
                kc.Mean       = mns;
                kc.Covariance = scov;
                kc.Proportion = p;
            }
        }
Beispiel #11
0
        private void setBinaryCluster(System.IO.StreamReader sr)
        {
            BinarySplit             bSplit     = new BinarySplit(k);
            KMeansClusterCollection kmeansColl = bSplit.Clusters;

            for (int i = 0; i < k; i++)
            {
                double[] mns = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray();
                sr.ReadLine();
                double        p  = System.Convert.ToDouble(sr.ReadLine());
                KMeansCluster kc = new KMeansCluster(kmeansColl, i);
                kc.Mean       = mns;
                kc.Proportion = p;
            }
            clusterCollection = kmeansColl;
            model             = bSplit;
        }
Beispiel #12
0
        public void buildModel(string modelPath)
        {
            outmodelpath = modelPath;
            using (System.IO.StreamReader sr = new System.IO.StreamReader(outmodelpath))
            {
                dataPrepBase.modelTypes mType = (dataPrepBase.modelTypes)Enum.Parse(typeof(dataPrepBase.modelTypes), sr.ReadLine());
                if (mType != dataPrepBase.modelTypes.TTEST)
                {
                    System.Windows.Forms.MessageBox.Show("Not a TTest Model!!", "Error", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error);
                    return;
                }
                inpath             = sr.ReadLine();
                stratafld          = sr.ReadLine();
                VariableFieldNames = sr.ReadLine().Split(new char[] { ',' });
                n      = System.Convert.ToInt32(sr.ReadLine());
                prop   = System.Convert.ToDouble(sr.ReadLine());
                k      = System.Convert.ToInt32(sr.ReadLine());
                lbl    = sr.ReadLine().Split(new char[] { ',' }).ToList();
                kmeans = new KMeans(k);
                KMeansClusterCollection kmeansColl = kmeans.Clusters;
                for (int i = 0; i < k; i++)
                {
                    double[] mns       = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray();
                    string[] covVlsStr = sr.ReadLine().Split(new char[] { ',' });
                    double   p         = System.Convert.ToDouble(sr.ReadLine());
                    double[,] cov = new double[VariableFieldNames.Length, VariableFieldNames.Length];
                    for (int j = 0; j < VariableFieldNames.Length; j++)
                    {
                        for (int l = 0; l < VariableFieldNames.Length; l++)
                        {
                            int indexVl = (j * VariableFieldNames.Length) + l;
                            cov[l, j] = System.Convert.ToDouble(covVlsStr[indexVl]);
                        }
                    }

                    KMeansCluster kc = new KMeansCluster(kmeansColl, i);
                    kc.Mean       = mns;
                    kc.Covariance = cov;
                    kc.Proportion = p;
                }
                sr.Close();
            }
        }
Beispiel #13
0
        public void getReport()
        {
            if (kmeans == null)
            {
                buildModel();
            }
            Forms.RunningProcess.frmRunningProcessDialog rd = new Forms.RunningProcess.frmRunningProcessDialog(false);
            rd.Text               = "Cluster Results";
            rd.TopLevel           = true;
            rd.pgbProcess.Visible = false;
            rd.FormBorderStyle    = System.Windows.Forms.FormBorderStyle.Sizable;
            rd.addMessage("Input path = " + InPath);
            rd.addMessage("Sample size = " + n.ToString() + " proportion of total records = " + prop.ToString());
            rd.addMessage("Number of Cluster = " + k.ToString());
            rd.addMessage("Labels = " + String.Join(", ", lbl.ToArray()));
            rd.addMessage("Variables: " + String.Join(" ,", VariableFieldNames));
            KMeansClusterCollection gCol = kmeans.Clusters;

            for (int i = 0; i < gCol.Count; i++)
            {
                KMeansCluster gClust = gCol[i];
                double[]      mns    = gClust.Mean;
                double[,] cov = gClust.Covariance;
                rd.addMessage("\n\nCluster " + Labels[i] + ":\nMeans: " + String.Join(", ", (from double d in mns select d.ToString()).ToArray()) + "\nCovariance:");
                for (int j = 0; j < VariableFieldNames.Length; j++)
                {
                    string[] covStrArr = new string[VariableFieldNames.Length];
                    for (int l = 0; l < covStrArr.Length; l++)
                    {
                        covStrArr[l] = cov[l, j].ToString();
                    }
                    rd.addMessage("\n" + String.Join(",", covStrArr));
                }
            }
            rd.enableClose();
            rd.Show();
        }
 private void setBinaryCluster(System.IO.StreamReader sr)
 {
     BinarySplit bSplit = new BinarySplit(k);
     KMeansClusterCollection kmeansColl = bSplit.Clusters;
     for (int i = 0; i < k; i++)
     {
         double[] mns = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray();
         sr.ReadLine();
         double p = System.Convert.ToDouble(sr.ReadLine());
         KMeansCluster kc = new KMeansCluster(kmeansColl, i);
         kc.Mean = mns;
         kc.Proportion = p;
     }
     clusterCollection = kmeansColl;
     model = bSplit;
 }
Beispiel #15
0
        public void getReport()
        {
            if (kmeans == null)
            {
                buildModel();
            }
            rd                    = new Forms.RunningProcess.frmRunningProcessDialog(false);
            rd.Text               = "Independent T-Test Results";
            rd.TopLevel           = true;
            rd.pgbProcess.Visible = false;
            rd.FormBorderStyle    = System.Windows.Forms.FormBorderStyle.Sizable;
            rd.addMessage("Input path = " + InPath);
            rd.addMessage("Total Sample size = " + n.ToString());
            rd.addMessage("\nLabel   |Compare V1-V2            |N       |Dif     |T-Stat  |P-Value ");
            rd.addMessage("-".PadRight(83, '-'));
            KMeansClusterCollection gCol = kmeans.Clusters;

            for (int i = 0; i < gCol.Count; i++)
            {
                string        l       = getValue(Labels[i], 8);
                KMeansCluster gClust  = gCol[i];
                double[]      mns     = gClust.Mean;
                double[]      var     = new double[mns.Length];
                double        nSample = gClust.Proportion * N;
                if (nSample <= 1)
                {
                    continue;
                }
                double seAdjust = Math.Sqrt(2 * 1 / nSample);
                for (int j = 0; j < mns.Length; j++)
                {
                    var[j] = gClust.Covariance[j, j];
                }
                int cnt = 1;
                for (int j = 0; j < mns.Length - 1; j++)
                {
                    for (int k = cnt; k < mns.Length; k++)
                    {
                        string fN1   = getValue(VariableFieldNames[j], 12);
                        string fN2   = getValue(VariableFieldNames[k], 12);
                        double mD    = mns[j] - mns[k];
                        double pSD   = Math.Sqrt((var[j] + var[k]) / 2);
                        double se    = pSD * seAdjust;
                        double tStat = mD / se;
                        Accord.Statistics.Distributions.Univariate.TDistribution tDist = new Accord.Statistics.Distributions.Univariate.TDistribution(2 * nSample - 2);
                        double cdf    = tDist.DistributionFunction(tStat);
                        double pValue = 0;
                        if (tStat > 0)
                        {
                            pValue = (1 - cdf) * 2;
                        }
                        else
                        {
                            pValue = (cdf * 2);
                        }
                        string pValueS = pValue.ToString();
                        if (pValue < 0.0001)
                        {
                            pValueS = "p < 0.0001";
                        }
                        string ln = l + "|" + fN1 + "-" + fN2 + "| " + getValue(nSample.ToString(), 6) + " | " + getValue(mD.ToString(), 6) + " | " + getValue(tStat.ToString(), 6) + " | " + getValue(pValueS, 10);
                        rd.addMessage(ln);
                    }
                    cnt += 1;
                }
            }
            rd.addMessage("-".PadRight(83, '-'));
            rd.enableClose();
            rd.Show();
        }
        private void setKMeansCluster(System.IO.StreamReader sr)
        {
            KMeans kmeans = new KMeans(k);
            KMeansClusterCollection kmeansColl = kmeans.Clusters;
            for (int i = 0; i < k; i++)
            {
                double[] mns = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray();
                string[] covVlsStr = sr.ReadLine().Split(new char[] { ',' });
                double p = System.Convert.ToDouble(sr.ReadLine());
                double[,] cov = new double[VariableFieldNames.Length, VariableFieldNames.Length];
                for (int j = 0; j < VariableFieldNames.Length; j++)
                {
                    for (int l = 0; l < VariableFieldNames.Length; l++)
                    {
                        int indexVl = (j * VariableFieldNames.Length) + l;
                        cov[l, j] = System.Convert.ToDouble(covVlsStr[indexVl]);
                    }
                }

                KMeansCluster kc = new KMeansCluster(kmeansColl, i);
                kc.Mean = mns;
                kc.Covariance = cov;
                kc.Proportion = p;
            }
            clusterCollection = kmeansColl;
            model = kmeans;
        }
        public void buildModel(string modelPath)
        {
            outmodelpath = modelPath;
            using (System.IO.StreamReader sr = new System.IO.StreamReader(outmodelpath))
            {
                dataPrepBase.modelTypes mType = (dataPrepBase.modelTypes)Enum.Parse(typeof(dataPrepBase.modelTypes), sr.ReadLine());
                if (mType != dataPrepBase.modelTypes.StrataCovCorr)
                {

                    System.Windows.Forms.MessageBox.Show("Not a StrataCovCorr Model!!", "Error", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error);
                    return;
                }
                inpath = sr.ReadLine();
                VariableFieldNames = sr.ReadLine().Split(new char[] { ',' });
                n = System.Convert.ToInt32(sr.ReadLine());
                prop = System.Convert.ToDouble(sr.ReadLine());
                k = System.Convert.ToInt32(sr.ReadLine());
                lbl = sr.ReadLine().Split(new char[] { ',' }).ToList();
                kmeans = new KMeans(k);
                KMeansClusterCollection kmeansColl = kmeans.Clusters;
                for (int i = 0; i < k; i++)
                {
                    double[] mns = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray();
                    string[] covVlsStr = sr.ReadLine().Split(new char[] { ',' });
                    double p = System.Convert.ToDouble(sr.ReadLine());
                    double[,] cov = new double[VariableFieldNames.Length, VariableFieldNames.Length];
                    for (int j = 0; j < VariableFieldNames.Length; j++)
                    {
                        for (int l = 0; l < VariableFieldNames.Length; l++)
                        {
                            int indexVl = (j * VariableFieldNames.Length) + l;
                            cov[l, j] = System.Convert.ToDouble(covVlsStr[indexVl]);
                        }
                    }

                    KMeansCluster kc = new KMeansCluster(kmeansColl, i);
                    kc.Mean = mns;
                    kc.Covariance = cov;
                    kc.Proportion = p;
                }
                sr.Close();
            }
        }
 private void makeKMeans()
 {
     kmeans = new KMeans(k);
     KMeansClusterCollection kmeansColl = kmeans.Clusters;
     for (int i = 0; i < k; i++)
     {
         double[] mns = meansLst[i];
         double p = proportionsLst[i];
         double[,] scov = scovLst[i];
         KMeansCluster kc = new KMeansCluster(kmeansColl, i);
         kc.Mean = mns;
         kc.Covariance = scov;
         kc.Proportion = p;
     }
 }