public void GetXDistanceFromSample_GiveCorrectDistance() { Sample sample1 = new Sample(new double[] { 0, 3, 4, 5}, 1, 0); Sample sample2 = new Sample(new double[] { 7, 6, 3, -1}, 1, 0); double distance = Math.Round(sample1.GetXDistanceFromSample(sample2), 3); Assert.AreEqual(distance, 9.747); }
public void ClusterXCreate_CreateCorrectClusterX() { // set params Params.inputDataDimension = 3; Sample sample = new Sample(new double[] { 1.0, 2.0, 3.0 }, 1.0, 0); ClusterX clusterX = new ClusterX(sample, null); Assert.IsTrue(clusterX.Mean.EqualsToVector(new Vector(new double[] { 1.0, 2.0, 3.0 }))); }
public ClusterY(Sample sample, Node parent) : base(sample, parent) { this.dimension = Params.outputDataDimension; this.items.Add(new Vector(sample.Y.Values.ToArray(), sample.Label, this.items.Count + 1)); this.mean = new Vector(sample.Y.Values.ToArray()); }
public ClusterPair(ClusterX cX, ClusterY cY, Sample sample) { clusterX = cX; clusterY = cY; this.PreviousCenter = 0; this.CurrentCenter = 0; this.samples = new List<Sample>() { sample }; }
public ClusterX(Sample sample, Node parent) : base(sample, parent) { this.child = null; this.dimension = Params.inputDataDimension; items.Add(new Vector(sample.X.Values.ToArray(), sample.Label, 1)); this.mean = new Vector(sample.X.Values.ToArray()); //#warning TODO count covariance matrix //this.covarianceMatrix = ILMath.zeros(Params.inputDataDimension, Params.inputDataDimension); }
public void CountGSOManifold_ProvideCorrectCounting() { Params.inputDataDimension = 4; Node node = new Node(0.0, 0.0, 0.0, 0.0, ""); Sample s1 = new Sample(new double[] { 1, 0, 2, 1 }, 1, 0); Sample s2 = new Sample(new double[] { -1, 1, 0, -1 }, 1, 0); Sample s3 = new Sample(new double[] { 2, 1, 1, 1 }, 1, 0); node.ClustersX.Add(new ClusterX(s1, null)); node.ClustersX.Add(new ClusterX(s2, null)); node.ClustersX.Add(new ClusterX(s3, null)); List<Vector> scatterVectors = new List<Vector>(); scatterVectors.Add(new Vector(new double[] { 1, 0, 2, 1 })); scatterVectors.Add(new Vector(new double[] { -1, 1, 0, -1 })); scatterVectors.Add(new Vector(new double[] { 2, 1, 1, 1 })); ILArray<double> array = node.GetManifold(scatterVectors); }
public Cluster(Sample sample, Node parent) { this.items = new List<Vector>(); this.parent = parent; }
/// <summary> /// Create new clusers X and Y and their cluster pair /// </summary> /// <param name="sample">new sample</param> private void CreateNewClusters(Sample sample, Node parent) { ClusterX newClusterX = new ClusterX(sample, parent); this.clustersX.Add(newClusterX); ClusterY newClusterY = new ClusterY(sample, parent); this.clustersY.Add(newClusterY); ClusterPair clusterPair = new ClusterPair(newClusterX, newClusterY, sample); newClusterX.SetClusterPair(clusterPair); newClusterY.SetClusterPair(clusterPair); clusterPair.Id = clusterPairs.Count; clusterPair.Samples.Add(sample); this.clusterPairs.Add(clusterPair); }
/// <summary> /// get nearest cluster pair by most dicrimnating features vector /// </summary> /// <param name="sample"></param> /// <param name="distance"></param> /// <returns></returns> private ClusterPair GetNearestClusterPairXBySDNLL_MDF(Sample sample, out double distance, out int index) { // convert vector to mdf vector ILArray<double> x = sample.X.Values.ToArray(); ILArray<double> scaterPart = x - this.c; ILArray<double> mdfVector = ILMath.multiply(this.gSOManifold.T, scaterPart); distance = double.MaxValue; ClusterPair closestPair = clusterPairs[0]; int i = 0; index = -1; foreach (ClusterPair item in clusterPairs) { double newDistance = item.X.GetSDNLL_MDF(mdfVector); if (newDistance < distance) { distance = newDistance; closestPair = item; index = i; } i++; } //Console.WriteLine("MDF" + sample.Id.ToString() + " : " + index.ToString()); return closestPair; }
private double GetDistanceFromClosestCenter(List<Sample> centers, Sample sample) { double minDistance = Double.MaxValue; foreach (var item in centers) { double distance = item.X.GetDistance(sample.X); if (distance < minDistance) { minDistance = distance; } } return minDistance; }
// for k-means private void UpdateCenterY(Sample center, List<Sample> listOfSamples) { center.Y = Vector.GetMeanOfVectors(listOfSamples.Select(sample => sample.Y).ToList()); }
private List<Tuple<double, ClusterPair>> GetDistancesAndClusterPairsY(Sample sample) { List<Tuple<double, ClusterPair>> result = new List<Tuple<double, ClusterPair>>(); foreach (ClusterPair item in clusterPairs) { if (sample.Y.Values.Count() < 2) { throw new InvalidOperationException("Bad operation"); } double newDistance = item.Y.Mean.GetDistance(sample.Y); result.Add(new Tuple<double,ClusterPair>(newDistance, item)); } result = result.OrderBy(i => i.Item1).ToList(); return result; }
public TestResult GetLabelOfCategory(Sample item) { return this.root.GetLabelOfCategory(item); }
public ClusterPair GetNearestClusterPairX(Sample sample, out double distance, out int index) { distance = double.MaxValue; ClusterPair closestPair = clusterPairs[0]; int i = 0; index = -1; foreach (ClusterPair item in clusterPairs) { double newDistance = item.X.Mean.GetDistance(sample.X); if (newDistance < distance) { distance = newDistance; closestPair = item; index = i; } i++; } return closestPair; }
public Node GetNextAByEuclidean(Sample sample) { if (children.Count > 0) { double distance = double.MaxValue; Node result = children[0]; foreach (var node in children) { int clusterIndex = 1; foreach (var cluster in node.ClustersX) { double tmpDistance = cluster.Mean.GetDistance(sample.X); //Console.WriteLine(String.Format("E : N : {0}, C:{1}, D:{2}", node.Id, clusterIndex, tmpDistance.ToString())); clusterIndex++; if (tmpDistance < distance) { distance = tmpDistance; result = node; } } } return result; } return null; }
//public void CountClosestClusterPairByWidthSearch(Sample item, ClusterPairTestResult result) //{ // if (this.children != null && this.children.Count != 0 && this.children[0].IsLeafNode) // { // double distance = double.MaxValue; // int index = 0; // ClusterPair clPair = this.GetNearestClusterPairXBySDNLL_MDF(item, out distance, out index); // if (distance < result.Distance) // { // result.ClusterPair = clPair; // result.Distance = distance; // } // } // else // { // var nodesToSearch = this.GetNodesToSearch(item); // foreach (var node in nodesToSearch) // { // node.CountClosestClusterPairByWidthSearch(item, result); // } // } //} public void CountClosestClusterPairByWidthSearch(Sample item, ClusterPairTestResult result) { if (this.IsLeafNode) { if (this.Parent != null) { double distance = double.MaxValue; int index = 0; ClusterPair clPair = this.Parent.GetNearestClusterPairXBySDNLL_MDF(item, out distance, out index); Console.WriteLine(distance.ToString()); if (distance < result.Distance) { //Console.WriteLine("Store"); result.ClusterPair = clPair; result.Distance = distance; } } } else { var nodesToSearch = this.GetNodesToSearch(item); foreach (var node in nodesToSearch) { node.CountClosestClusterPairByWidthSearch(item, result); } } }
public TestResult GetLabelOfCategory(Sample item) { if (this.isLeafNode) { double distance = double.MinValue; int index = int.MinValue; ClusterPair clPair = this.GetNearestClusterPairX(item, out distance, out index); TestResult tr = new TestResult() { ClusterMeanX = clPair.X.Mean, ClusterMeanY = clPair.Y.Mean, Label = clPair.X.Label, Samples = clPair.Samples }; return tr; } double distance2 = double.MinValue; int index2 = int.MinValue; ClusterPair nearestClPair = this.GetNearestClusterPairXBySDNLL_MDF(item, out distance2, out index2); Node next = nearestClPair.CorrespondChild; return next.GetLabelOfCategory(item); }
private void UpdateClusters(Sample sample) { // parameters bly bound of number of y clusters in node, dy resolution // find nearest xj cluster using euclidean distance ClusterPair nearestCluster = null; double distance = double.MaxValue; int index = -1; //nearestCluster = this.GetNearestClusterPairY(sample, out distance, out index); List<Tuple<double, ClusterPair>> orderedClusterPairs = this.GetDistancesAndClusterPairsY(sample); // if is count < like bly and distance > deltay create new cluster // add new cluster pair (x,y), increment n if (this.clusterPairs.Count < Params.bly && orderedClusterPairs[0].Item1 > this.deltaY) { if (!Params.StoreItems) { throw new InvalidOperationException("Not possible create new cluster. No items available for MDF counting."); } this.CreateNewClusters(sample, this); // update MDF space this.CountC(); this.CountGSOManifold(); this.CountMDFOfVectors(); this.CountMDFMeans(); this.CountCovarianceMatricesMDF(); //count cov matrix mean this.CountCovarianceMatrixMeanMDF(); } // else update p percents of xj cluster and yj cluster using amnesic average else { #warning TODO update ceratin portion int countOfClusters = orderedClusterPairs.Count; int countOfClustersToUpdate = (int)((orderedClusterPairs.Count - 1 ) * Params.p) + 1; //Update a certain portion p (e.g., p = 0:2, i.e., pulling top 20%) of nearest clusters using the amnesic average //explained in Section III-F and return the index j for (int i = 0; i < countOfClustersToUpdate; i++) { //Console.WriteLine("Y"); orderedClusterPairs[i].Item2.Y.AddItem(sample.Y, sample.Label); } Vector newItem = new Vector(sample.X.Values.ToArray()); newItem.Label = sample.Label; newItem.CountMDF(this.gSOManifold, this.c); // add sample to clusters, update statistics of clusters orderedClusterPairs[0].Item2.X.AddItemNonLeaf(newItem); if (Params.StoreSamples) { orderedClusterPairs[0].Item2.Samples.Add(sample); } // update meanMDF and varianceMDF this.UpdateMeanAndVarianceMdf(newItem); //count cov matrix mean this.CountCovarianceMatrixMeanMDF(); } }
private void UpdateClusterPairsX_ForSwapping(Sample sample) { double distance = 0.0; int index = 0; ClusterPair nearestCluster = this.GetNearestClusterPairX(sample, out distance, out index); //Console.WriteLine(distance.ToString()); // if is count < like bl and distance > delta create new cluster // add new cluster pair (x,y), increment n if (clusterPairs.Count < this.blx && distance > this.deltaX) { this.CreateNewClusters(sample, this); } // else update xj cluster and yj cluster using amnesic average else { // add sample to clusters, update statistics of clusters nearestCluster.AddItem(sample); } }
private void UpdateClusterPairsX(Sample sample) { // update cluster pairs // parameters bl bound of number of microclusters in node, dx resolution // find nearest xj cluster using euclidean distance double distance = 0.0; int index = 0; ClusterPair nearestCluster = this.GetNearestClusterPairX(sample, out distance, out index); //Console.WriteLine(distance.ToString()); // if is count < like bl and distance > delta create new cluster // add new cluster pair (x,y), increment n if (clusterPairs.Count < this.blx && distance > this.deltaX) { this.CreateNewClusters(sample, this); } // else update xj cluster and yj cluster using amnesic average else { // add sample to clusters, update statistics of clusters nearestCluster.AddItem(sample); } // spawn if necessary // if 2(n - q)/q2 > bs spawn to q children // use k-means alg if (this.GetNSPP() > Params.bs) { #region Swapping evaluation log //for (int i = 0; i < 10; i++) //{ // this.EvaluateSwap(); // Console.WriteLine("Round" + i.ToString()); // for (int j = 0; j < Params.q; j++) // { // Node node = new Node(this); // List<ClusterPair> clPairs = this.clusterPairs.Where(cp => cp.CurrentCenter == j).ToList(); // Console.WriteLine("Region" + j.ToString() + ": " + clPairs.Count.ToString()); // } //} #endregion if (Params.SwapType == 1) { this.EvaluateSwap(); this.Swap(); } if (Params.SwapType == 2) { this.EvaluateSwap(); this.Swap_Modified(); } if (Params.SwapType == 3) { for (int i = 0; i < 10; i++) { if (this.KMeansPlusPlusClustering()) { break; } if (i == 9) { throw new InvalidOperationException("Not successfull keans clustering."); } } this.EvaluateSwap(); this.Swap(); } if (Params.SwapType == 4) { for (int i = 0; i < 10; i++) { if (this.KMeansClusteringY()) { break; } if (i == 9) { throw new InvalidOperationException("Not successfull keans clustering."); } } this.EvaluateSwap(); this.Swap(); } // count most discriminating features space etc. this.CountC(); this.CountGSOManifold(); this.CountMDFOfVectors(); this.CountMDFMeans(); this.CountCovarianceMatricesMDF(); //count cov matrix mean this.CountCovarianceMatrixMeanMDF(); this.CountMeanAndVarianceMDF(); // dispose cluster items if (!Params.StoreItems) { this.DisposeClustersItems(); } } }
public Vector GetOutputFromKnownSamples(Sample sample) { if (!this.outputs.ContainsKey(sample.Label)) { this.outputs[sample.Label] = new MappedValue(){ Mean = new Vector(sample.X.Values.ToArray()), Count = 1}; } else { this.outputs[sample.Label].Count++; int count = this.outputs[sample.Label].Count; this.outputs[sample.Label].Mean.Multiply(((double)count-1.0)/(double)count); Vector addPart = new Vector(sample.X.Values.ToArray()); addPart.Multiply(1 / (double)count); this.outputs[sample.Label].Mean.Add(addPart); } return new Vector(this.outputs[sample.Label].Mean.Values.ToArray()); }
/// <summary> /// get nearest cluster pair by most dicrimnating features vector /// </summary> /// <param name="sample"></param> /// <param name="distance"></param> /// <returns></returns> private ClusterPair GetNearestClusterPairXMDF(Sample sample, out double distance, out int index) { ILArray<double> thisVector = sample.X.Values.ToArray(); ILArray<double> scaterPart = thisVector - C; ILArray<double> vector = ILMath.multiply(this.gSOManifold.T, scaterPart.ToArray()); distance = double.MaxValue; ClusterPair closestPair = clusterPairs[0]; int i = 0; index = -1; foreach (ClusterPair item in clusterPairs) { double newDistance = item.X.GetMDFDistanceFromMDFMean(vector); //Console.WriteLine("Distance in MDF : " + newDistance.ToString()); if (newDistance < distance) { distance = newDistance; closestPair = item; index = i; } i++; } return closestPair; }
public void AddItem(Sample s) { this.X.AddItem(s.X, s.Label); this.Y.AddItem(s.Y, s.Label); this.samples.Add(s); }
public ClusterPair GetTestResultByWidthSearch(Sample item) { ClusterPairTestResult result = new ClusterPairTestResult() { Distance = double.MaxValue, ClusterPair = new ClusterPair()}; this.CountClosestClusterPairByWidthSearch(item, result); return result.ClusterPair; }
/// <summary> /// Update tree with sample /// </summary> /// <param name="sample">added sample to tree</param> public void UpdateTree(Sample sample) { this.root.UpdateNode(sample); }
public void UpdateNode(Sample sample) { //Console.WriteLine("Add sample " + count.ToString()); // add sample (because of counting of output) if (Params.StoreSamples) { this.samples.Add(sample); } // count y of sample, if it is null if (sample.Y == null) { throw new InvalidOperationException("Output Y of sample is null"); } this.countOfSamples++; if (this.isLeafNode) { // do leaf node staff if (this.countOfSamples == 1) { // create new clusters and cluster pair this.CreateNewClusters(sample, this); } else { // update cluster pairs this.UpdateClusterPairsX(sample); } } else { // update y clusters // q count of cluster, dy resolution // 1. find nearest y cluster, euclidean distance // 2. if n < q and dy > distance, increment n, add new cluster y // else update p ( e.g p = 0,2 -> 20% ) nearest cluster using amnesic average // return nearest cluster // update x cluster associated with returned y, mean with amnesic average //if (this.isPlastic) //{ this.UpdateClusters(sample); //} double distance = 0; int index = 0; ClusterPair nearestClPair = this.GetNearestClusterPairXBySDNLL_MDF(sample, out distance, out index); Node next = nearestClPair.CorrespondChild; next.UpdateNode(sample); } }
public TestResult GetTestResultByWidthSearch(Sample item) { ClusterPair resultClusterPair = root.GetTestResultByWidthSearch(item); return new TestResult() { ClusterMeanX = resultClusterPair.X.Mean, ClusterMeanY = resultClusterPair.Y.Mean, Label = resultClusterPair.X.Label }; }
public void UpdateNode_ForSwapping(Sample sample) { this.countOfSamples++; if (this.isLeafNode) { // do leaf node staff if (this.countOfSamples == 1) { // create new clusters and cluster pair this.CreateNewClusters(sample, this); } else { // update cluster pairs this.UpdateClusterPairsX_ForSwapping(sample); } } }
public List<Node> GetNodesToSearch(Sample sample) { List<ClusterPair> clusterPairs = this.GetClosestClusterPairsAndSDNLLDistances_MDF(sample); return clusterPairs.GroupBy(item => item.CorrespondChild.Id).Select(item => item.First().CorrespondChild).ToList(); }
private ClusterPair GetNearestClusterPairY(Sample sample, out double distance) { distance = double.MaxValue; ClusterPair closestPair = clusterPairs[0]; foreach (ClusterPair item in clusterPairs) { double newDistance = item.Y.Mean.GetDistance(sample.Y); if (item.Y.Mean.GetDistance(sample.Y) < distance) { distance = newDistance; closestPair = item; } } return closestPair; }