Пример #1
0
        public void GetXDistanceFromSample_GiveCorrectDistance()
        {
            Sample sample1 = new Sample(new double[] { 0, 3, 4, 5}, 1, 0);
            Sample sample2 = new Sample(new double[] { 7, 6, 3, -1}, 1, 0);

            double distance = Math.Round(sample1.GetXDistanceFromSample(sample2), 3);
            Assert.AreEqual(distance, 9.747);
        }
Пример #2
0
 public void ClusterXCreate_CreateCorrectClusterX()
 {
     // set params
     Params.inputDataDimension = 3;
     Sample sample = new Sample(new double[] { 1.0, 2.0, 3.0 }, 1.0, 0);
     ClusterX clusterX = new ClusterX(sample, null);
     Assert.IsTrue(clusterX.Mean.EqualsToVector(new Vector(new double[] { 1.0, 2.0, 3.0 })));
 }
Пример #3
0
        public ClusterY(Sample sample, Node parent)
            : base(sample, parent)
        {
            this.dimension = Params.outputDataDimension;

            this.items.Add(new Vector(sample.Y.Values.ToArray(), sample.Label, this.items.Count + 1));
            this.mean = new Vector(sample.Y.Values.ToArray());
        }
Пример #4
0
        public ClusterPair(ClusterX cX, ClusterY cY, Sample sample)
        {
            clusterX = cX;
            clusterY = cY;

            this.PreviousCenter = 0;
            this.CurrentCenter = 0;

            this.samples = new List<Sample>() { sample };
        }
Пример #5
0
        public ClusterX(Sample sample, Node parent)
            : base(sample, parent)
        {
            this.child = null;
            this.dimension = Params.inputDataDimension;

            items.Add(new Vector(sample.X.Values.ToArray(), sample.Label, 1));
            this.mean = new Vector(sample.X.Values.ToArray());

            //#warning TODO count covariance matrix
            //this.covarianceMatrix = ILMath.zeros(Params.inputDataDimension, Params.inputDataDimension);
        }
Пример #6
0
        public void CountGSOManifold_ProvideCorrectCounting()
        {
            Params.inputDataDimension = 4;

            Node node = new Node(0.0, 0.0, 0.0, 0.0, "");
            Sample s1 = new Sample(new double[] { 1, 0, 2, 1 }, 1, 0);
            Sample s2 = new Sample(new double[] { -1, 1, 0, -1 }, 1, 0);
            Sample s3 = new Sample(new double[] { 2, 1, 1, 1 }, 1, 0);

            node.ClustersX.Add(new ClusterX(s1, null));
            node.ClustersX.Add(new ClusterX(s2, null));
            node.ClustersX.Add(new ClusterX(s3, null));

            List<Vector> scatterVectors = new List<Vector>();
            scatterVectors.Add(new Vector(new double[] { 1, 0, 2, 1 }));
            scatterVectors.Add(new Vector(new double[] { -1, 1, 0, -1 }));
            scatterVectors.Add(new Vector(new double[] { 2, 1, 1, 1 }));

            ILArray<double> array = node.GetManifold(scatterVectors);
        }
Пример #7
0
 public Cluster(Sample sample, Node parent)
 {
     this.items = new List<Vector>();
     this.parent = parent;
 }
Пример #8
0
        /// <summary>
        /// Create new clusers X and Y and their cluster pair
        /// </summary>
        /// <param name="sample">new sample</param>
        private void CreateNewClusters(Sample sample, Node parent)
        {
            ClusterX newClusterX = new ClusterX(sample, parent);
            this.clustersX.Add(newClusterX);
            ClusterY newClusterY = new ClusterY(sample, parent);
            this.clustersY.Add(newClusterY);

            ClusterPair clusterPair = new ClusterPair(newClusterX, newClusterY, sample);
            newClusterX.SetClusterPair(clusterPair);
            newClusterY.SetClusterPair(clusterPair);

            clusterPair.Id = clusterPairs.Count;
            clusterPair.Samples.Add(sample);

            this.clusterPairs.Add(clusterPair);
        }
Пример #9
0
        /// <summary>
        /// get nearest cluster pair by most dicrimnating features vector
        /// </summary>
        /// <param name="sample"></param>
        /// <param name="distance"></param>
        /// <returns></returns>
        private ClusterPair GetNearestClusterPairXBySDNLL_MDF(Sample sample, out double distance, out int index)
        {
            // convert vector to mdf vector
            ILArray<double> x = sample.X.Values.ToArray();
            ILArray<double> scaterPart = x - this.c;
            ILArray<double> mdfVector = ILMath.multiply(this.gSOManifold.T, scaterPart);

            distance = double.MaxValue;
            ClusterPair closestPair = clusterPairs[0];
            int i = 0;
            index = -1;
            foreach (ClusterPair item in clusterPairs)
            {
                double newDistance = item.X.GetSDNLL_MDF(mdfVector);
                if (newDistance < distance)
                {
                    distance = newDistance;
                    closestPair = item;
                    index = i;
                }
                i++;
            }

            //Console.WriteLine("MDF" + sample.Id.ToString() + " : " + index.ToString());

            return closestPair;
        }
Пример #10
0
        private double GetDistanceFromClosestCenter(List<Sample> centers, Sample sample)
        {
            double minDistance = Double.MaxValue;

            foreach (var item in centers)
            {
                double distance = item.X.GetDistance(sample.X);
                if (distance < minDistance)
                {
                    minDistance = distance;
                }
            }

            return minDistance;
        }
Пример #11
0
 // for k-means
 private void UpdateCenterY(Sample center, List<Sample> listOfSamples)
 {
     center.Y = Vector.GetMeanOfVectors(listOfSamples.Select(sample => sample.Y).ToList());
 }
Пример #12
0
        private List<Tuple<double, ClusterPair>> GetDistancesAndClusterPairsY(Sample sample)
        {
            List<Tuple<double, ClusterPair>> result = new List<Tuple<double, ClusterPair>>();
            foreach (ClusterPair item in clusterPairs)
            {
                if (sample.Y.Values.Count() < 2)
                {
                    throw new InvalidOperationException("Bad operation");
                }
                double newDistance = item.Y.Mean.GetDistance(sample.Y);
                result.Add(new Tuple<double,ClusterPair>(newDistance, item));
            }

            result = result.OrderBy(i => i.Item1).ToList();
            return result;
        }
Пример #13
0
 public TestResult GetLabelOfCategory(Sample item)
 {
     return this.root.GetLabelOfCategory(item);
 }
Пример #14
0
        public ClusterPair GetNearestClusterPairX(Sample sample, out double distance, out int index)
        {
            distance = double.MaxValue;
            ClusterPair closestPair = clusterPairs[0];
            int i = 0;
            index = -1;
            foreach (ClusterPair item in clusterPairs)
            {
                double newDistance = item.X.Mean.GetDistance(sample.X);
                if (newDistance < distance)
                {
                    distance = newDistance;
                    closestPair = item;
                    index = i;
                }
                i++;
            }

            return closestPair;
        }
Пример #15
0
        public Node GetNextAByEuclidean(Sample sample)
        {
            if (children.Count > 0)
            {
                double distance = double.MaxValue;
                Node result = children[0];

                foreach (var node in children)
                {
                    int clusterIndex = 1;
                    foreach (var cluster in node.ClustersX)
                    {
                        double tmpDistance = cluster.Mean.GetDistance(sample.X);

                        //Console.WriteLine(String.Format("E : N : {0}, C:{1}, D:{2}", node.Id, clusterIndex, tmpDistance.ToString()));
                        clusterIndex++;

                        if (tmpDistance < distance)
                        {
                            distance = tmpDistance;
                            result = node;
                        }
                    }
                }

                return result;
            }

            return null;
        }
Пример #16
0
        //public void CountClosestClusterPairByWidthSearch(Sample item, ClusterPairTestResult result)
        //{
        //    if (this.children != null && this.children.Count != 0 && this.children[0].IsLeafNode)
        //    {
        //        double distance = double.MaxValue;
        //        int index = 0;
        //        ClusterPair clPair = this.GetNearestClusterPairXBySDNLL_MDF(item, out distance, out index);
        //        if (distance < result.Distance)
        //        {
        //            result.ClusterPair = clPair;
        //            result.Distance = distance;
        //        }
        //    }
        //    else
        //    {
        //        var nodesToSearch = this.GetNodesToSearch(item);
        //        foreach (var node in nodesToSearch)
        //        {
        //            node.CountClosestClusterPairByWidthSearch(item, result);
        //        }
        //    }
        //}
        public void CountClosestClusterPairByWidthSearch(Sample item, ClusterPairTestResult result)
        {
            if (this.IsLeafNode)
            {
                if (this.Parent != null)
                {
                    double distance = double.MaxValue;
                    int index = 0;
                    ClusterPair clPair = this.Parent.GetNearestClusterPairXBySDNLL_MDF(item, out distance, out index);
                    Console.WriteLine(distance.ToString());
                    if (distance < result.Distance)
                    {
                        //Console.WriteLine("Store");
                        result.ClusterPair = clPair;
                        result.Distance = distance;
                    }
                }
            }
            else
            {
                var nodesToSearch = this.GetNodesToSearch(item);

                foreach (var node in nodesToSearch)
                {
                    node.CountClosestClusterPairByWidthSearch(item, result);
                }
            }
        }
Пример #17
0
        public TestResult GetLabelOfCategory(Sample item)
        {
            if (this.isLeafNode)
            {
                double distance = double.MinValue;
                int index = int.MinValue;
                ClusterPair clPair = this.GetNearestClusterPairX(item, out distance, out index);

                TestResult tr = new TestResult()
                {
                    ClusterMeanX = clPair.X.Mean,
                    ClusterMeanY = clPair.Y.Mean,
                    Label = clPair.X.Label,
                    Samples = clPair.Samples
                };
                return tr;
            }

            double distance2 = double.MinValue;
            int index2 = int.MinValue;
            ClusterPair nearestClPair = this.GetNearestClusterPairXBySDNLL_MDF(item, out distance2, out index2);
            Node next = nearestClPair.CorrespondChild;
            return next.GetLabelOfCategory(item);
        }
Пример #18
0
        private void UpdateClusters(Sample sample)
        {
            // parameters bly bound of number of y clusters in node, dy resolution
            // find nearest xj cluster using euclidean distance

            ClusterPair nearestCluster = null;
            double distance = double.MaxValue;
            int index = -1;

            //nearestCluster = this.GetNearestClusterPairY(sample, out distance, out index);

            List<Tuple<double, ClusterPair>> orderedClusterPairs = this.GetDistancesAndClusterPairsY(sample);

            // if is count < like bly and distance > deltay create new cluster
            // add new cluster pair (x,y), increment n
            if (this.clusterPairs.Count < Params.bly && orderedClusterPairs[0].Item1 > this.deltaY)
            {
                if (!Params.StoreItems)
                {
                    throw new InvalidOperationException("Not possible create new cluster. No items available for MDF counting.");
                }

                this.CreateNewClusters(sample, this);

                // update MDF space
                this.CountC();
                this.CountGSOManifold();
                this.CountMDFOfVectors();
                this.CountMDFMeans();
                this.CountCovarianceMatricesMDF();

                //count cov matrix mean
                this.CountCovarianceMatrixMeanMDF();
            }
            // else update p percents of xj cluster and yj cluster using amnesic average
            else
            {
                #warning TODO update ceratin portion

                int countOfClusters = orderedClusterPairs.Count;
                int countOfClustersToUpdate = (int)((orderedClusterPairs.Count - 1 ) * Params.p) + 1;

                //Update a certain portion p (e.g., p = 0:2, i.e., pulling top 20%) of nearest clusters using the amnesic average
                //explained in Section III-F and return the index j
                for (int i = 0; i < countOfClustersToUpdate; i++)
                {
                    //Console.WriteLine("Y");
                    orderedClusterPairs[i].Item2.Y.AddItem(sample.Y, sample.Label);
                }

                Vector newItem = new Vector(sample.X.Values.ToArray());
                newItem.Label = sample.Label;
                newItem.CountMDF(this.gSOManifold, this.c);

                // add sample to clusters, update statistics of clusters
                orderedClusterPairs[0].Item2.X.AddItemNonLeaf(newItem);

                if (Params.StoreSamples)
                {
                    orderedClusterPairs[0].Item2.Samples.Add(sample);
                }

                // update meanMDF and varianceMDF
                this.UpdateMeanAndVarianceMdf(newItem);

                //count cov matrix mean
                this.CountCovarianceMatrixMeanMDF();
            }
        }
Пример #19
0
        private void UpdateClusterPairsX_ForSwapping(Sample sample)
        {
            double distance = 0.0;
            int index = 0;
            ClusterPair nearestCluster = this.GetNearestClusterPairX(sample, out distance, out index);

            //Console.WriteLine(distance.ToString());
            // if is count < like bl and distance > delta create new cluster
            // add new cluster pair (x,y), increment n
            if (clusterPairs.Count < this.blx && distance > this.deltaX)
            {
                this.CreateNewClusters(sample, this);
            }
            // else update xj cluster and yj cluster using amnesic average
            else
            {
                // add sample to clusters, update statistics of clusters
                nearestCluster.AddItem(sample);
            }
        }
Пример #20
0
        private void UpdateClusterPairsX(Sample sample)
        {
            // update cluster pairs

            // parameters bl bound of number of microclusters in node, dx resolution
            // find nearest xj cluster using euclidean distance

            double distance = 0.0;
            int index = 0;
            ClusterPair nearestCluster = this.GetNearestClusterPairX(sample, out distance, out index);

            //Console.WriteLine(distance.ToString());
            // if is count < like bl and distance > delta create new cluster
            // add new cluster pair (x,y), increment n
            if (clusterPairs.Count < this.blx && distance > this.deltaX)
            {
                this.CreateNewClusters(sample, this);
            }
            // else update xj cluster and yj cluster using amnesic average
            else
            {
                // add sample to clusters, update statistics of clusters
                nearestCluster.AddItem(sample);
            }

            // spawn if necessary
            // if 2(n - q)/q2 > bs spawn to  q children
            // use k-means alg
            if (this.GetNSPP() > Params.bs)
            {
                #region Swapping evaluation log

                //for (int i = 0; i < 10; i++)
                //{
                //    this.EvaluateSwap();

                //    Console.WriteLine("Round" + i.ToString());
                //    for (int j = 0; j < Params.q; j++)
                //    {
                //        Node node = new Node(this);
                //        List<ClusterPair> clPairs = this.clusterPairs.Where(cp => cp.CurrentCenter == j).ToList();
                //        Console.WriteLine("Region" + j.ToString() + ": " + clPairs.Count.ToString());
                //    }
                //}

                #endregion

                if (Params.SwapType == 1)
                {
                    this.EvaluateSwap();
                    this.Swap();
                }
                if (Params.SwapType == 2)
                {
                    this.EvaluateSwap();
                    this.Swap_Modified();
                }
                if (Params.SwapType == 3)
                {
                    for (int i = 0; i < 10; i++)
                    {

                        if (this.KMeansPlusPlusClustering())
                        {
                            break;
                        }
                        if (i == 9)
                        {
                            throw new InvalidOperationException("Not successfull keans clustering.");
                        }
                    }
                    this.EvaluateSwap();
                    this.Swap();
                }
                if (Params.SwapType == 4)
                {
                    for (int i = 0; i < 10; i++)
                    {
                        if (this.KMeansClusteringY())
                        {
                            break;
                        }
                        if (i == 9)
                        {
                            throw new InvalidOperationException("Not successfull keans clustering.");
                        }
                    }
                    this.EvaluateSwap();
                    this.Swap();
                }

                // count most discriminating features space etc.
                this.CountC();
                this.CountGSOManifold();
                this.CountMDFOfVectors();
                this.CountMDFMeans();
                this.CountCovarianceMatricesMDF();

                //count cov matrix mean
                this.CountCovarianceMatrixMeanMDF();
                this.CountMeanAndVarianceMDF();

                // dispose cluster items
                if (!Params.StoreItems)
                {
                    this.DisposeClustersItems();
                }
            }
        }
Пример #21
0
        public Vector GetOutputFromKnownSamples(Sample sample)
        {
            if (!this.outputs.ContainsKey(sample.Label))
            {
                this.outputs[sample.Label] = new MappedValue(){ Mean = new Vector(sample.X.Values.ToArray()), Count = 1};
            }
            else
            {
                this.outputs[sample.Label].Count++;
                int count = this.outputs[sample.Label].Count;
                this.outputs[sample.Label].Mean.Multiply(((double)count-1.0)/(double)count);
                Vector addPart = new Vector(sample.X.Values.ToArray());
                addPart.Multiply(1 / (double)count);
                this.outputs[sample.Label].Mean.Add(addPart);
            }

            return new Vector(this.outputs[sample.Label].Mean.Values.ToArray());
        }
Пример #22
0
        /// <summary>
        /// get nearest cluster pair by most dicrimnating features vector
        /// </summary>
        /// <param name="sample"></param>
        /// <param name="distance"></param>
        /// <returns></returns>
        private ClusterPair GetNearestClusterPairXMDF(Sample sample, out double distance, out int index)
        {
            ILArray<double> thisVector = sample.X.Values.ToArray();
            ILArray<double> scaterPart = thisVector - C;
            ILArray<double> vector = ILMath.multiply(this.gSOManifold.T, scaterPart.ToArray());

            distance = double.MaxValue;
            ClusterPair closestPair = clusterPairs[0];
            int i = 0;
            index = -1;
            foreach (ClusterPair item in clusterPairs)
            {
                double newDistance = item.X.GetMDFDistanceFromMDFMean(vector);
                //Console.WriteLine("Distance in MDF : " + newDistance.ToString());
                if (newDistance < distance)
                {
                    distance = newDistance;
                    closestPair = item;
                    index = i;
                }
                i++;
            }

            return closestPair;
        }
Пример #23
0
 public void AddItem(Sample s)
 {
     this.X.AddItem(s.X, s.Label);
     this.Y.AddItem(s.Y, s.Label);
     this.samples.Add(s);
 }
Пример #24
0
        public ClusterPair GetTestResultByWidthSearch(Sample item)
        {
            ClusterPairTestResult result = new ClusterPairTestResult() { Distance = double.MaxValue, ClusterPair = new ClusterPair()};
            this.CountClosestClusterPairByWidthSearch(item, result);

            return result.ClusterPair;
        }
Пример #25
0
 /// <summary>
 /// Update tree with sample
 /// </summary>
 /// <param name="sample">added sample to tree</param>
 public void UpdateTree(Sample sample)
 {
     this.root.UpdateNode(sample);
 }
Пример #26
0
        public void UpdateNode(Sample sample)
        {
            //Console.WriteLine("Add sample " + count.ToString());

            // add sample (because of counting of output)
            if (Params.StoreSamples)
            {
                this.samples.Add(sample);
            }

            // count y of sample, if it is null
            if (sample.Y == null)
            {
                throw new InvalidOperationException("Output Y of sample is null");
            }

            this.countOfSamples++;
            if (this.isLeafNode)
            {
                // do leaf node staff
                if (this.countOfSamples == 1)
                {
                    // create new clusters and cluster pair
                    this.CreateNewClusters(sample, this);
                }
                else
                {
                    // update cluster pairs
                    this.UpdateClusterPairsX(sample);
                }
            }
            else
            {
                // update y clusters
                // q count of cluster, dy resolution
                // 1. find nearest y cluster, euclidean distance
                // 2. if n < q and dy > distance, increment n, add new cluster y
                //    else update p ( e.g p = 0,2 -> 20% ) nearest cluster using amnesic average
                // return nearest cluster

                // update x cluster associated with returned y, mean with amnesic average

                //if (this.isPlastic)
                //{
                    this.UpdateClusters(sample);

                //}

                double distance = 0;
                int index = 0;
                ClusterPair nearestClPair = this.GetNearestClusterPairXBySDNLL_MDF(sample, out distance, out index);

                Node next = nearestClPair.CorrespondChild;

                next.UpdateNode(sample);
            }
        }
Пример #27
0
        public TestResult GetTestResultByWidthSearch(Sample item)
        {
            ClusterPair resultClusterPair = root.GetTestResultByWidthSearch(item);

            return new TestResult()
            {
                ClusterMeanX = resultClusterPair.X.Mean,
                ClusterMeanY = resultClusterPair.Y.Mean,
                Label = resultClusterPair.X.Label
            };
        }
Пример #28
0
 public void UpdateNode_ForSwapping(Sample sample)
 {
     this.countOfSamples++;
     if (this.isLeafNode)
     {
         // do leaf node staff
         if (this.countOfSamples == 1)
         {
             // create new clusters and cluster pair
             this.CreateNewClusters(sample, this);
         }
         else
         {
             // update cluster pairs
             this.UpdateClusterPairsX_ForSwapping(sample);
         }
     }
 }
Пример #29
0
        public List<Node> GetNodesToSearch(Sample sample)
        {
            List<ClusterPair> clusterPairs = this.GetClosestClusterPairsAndSDNLLDistances_MDF(sample);

            return clusterPairs.GroupBy(item => item.CorrespondChild.Id).Select(item => item.First().CorrespondChild).ToList();
        }
Пример #30
0
 private ClusterPair GetNearestClusterPairY(Sample sample, out double distance)
 {
     distance = double.MaxValue;
     ClusterPair closestPair = clusterPairs[0];
     foreach (ClusterPair item in clusterPairs)
     {
         double newDistance = item.Y.Mean.GetDistance(sample.Y);
         if (item.Y.Mean.GetDistance(sample.Y) < distance)
         {
             distance = newDistance;
             closestPair = item;
         }
     }
     return closestPair;
 }