public void FindWordFromExactVector() { const string word = "hello"; float[] vector = _vocabulary.VectorFromLabel(word); // Recover the vector and its word LabeledVector returnVector = _vocabulary.FindNearestNeighbors(vector, 1)[0].Item2; Assert.True(vector.SequenceEqual(returnVector.Vector)); Assert.Equal(returnVector.Label, word); }
public NearestNeighborsModel(LabelledData <T> trainingData, int numNeighbors, Func <double[], double[], double> distanceFunction) { int numRows = trainingData.Features.RowCount; Points = new LabeledVector[numRows]; for (int i = 0; i < numRows; i++) { Points[i].features = trainingData.Features[i]; Points[i].label = trainingData.Labels[i]; } K = numNeighbors; DistanceFunction = distanceFunction; }
public void FindWordFromApproximateVector() { const string word = "beautiful"; float[] vector = _vocabulary.VectorFromLabel(word); // Add a small vector float[] nearbyVector = new float[vector.Length]; for (int i = 0; i < vector.Length; i++) { nearbyVector[i] = vector[i] + Single.Epsilon; } // Find the vector and its word from the nearby vector LabeledVector returnVector = _vocabulary.FindNearestNeighbors(nearbyVector, 1)[0].Item2; Assert.True(vector.SequenceEqual(returnVector.Vector)); Assert.Equal(returnVector.Label, word); }
private static LabeledVector <string> CreateAugmentedVector(LabeledVector <string> frame, double[][] queue, int nQueuePartitions, int nSlotsPerQueuePartition) { int nClassCount = queue[0].Length; double[] rgAppend = new double[nQueuePartitions * nClassCount]; for (int i = 0; i < queue.Length; i++) { for (int j = 0; j < queue[i].Length; j++) { rgAppend[((i / nSlotsPerQueuePartition) * nClassCount) + j] += queue[i][j]; } } #region Normalize from 0.0 to 1.0 // Need to normalize the PARTITIONS and not the whole queue double fSum = 0.0; for (int i = 0; i < rgAppend.Length; i++) { fSum += rgAppend[i]; if ((i > 0 && i % nClassCount == nClassCount - 1)) { for (int j = i - (nClassCount - 1); j <= i; j++) { rgAppend[j] = (fSum > 0.0) ? (rgAppend[j] / fSum) : 0.0; } fSum = 0.0; } } #endregion return(new LabeledVector <string>(frame.Label, frame.Features.Concat(rgAppend))); }
public int Spawn(LabeledVector <L> lvector, DistanceDelegate measure, ParallelStrategy parallelStrategy, bool spawnUsingLDA) { int nSpawnCount = 0; if (this.DoesEncloseVector(lvector, measure)) { if (parallelStrategy == ParallelStrategy.Multithreaded) { List <Task> lstSpawnThreads = new List <Task>(); foreach (SphereEx <L> child in this.Children) { lstSpawnThreads.Add(Task.Factory.StartNew(c => { nSpawnCount += ((SphereEx <L>)c).Spawn(lvector, measure, ParallelStrategy.SingleThreaded, spawnUsingLDA); }, child, TaskCreationOptions.LongRunning)); } Task.WaitAll(lstSpawnThreads.ToArray()); } else { foreach (SphereEx <L> child in this.Children) { nSpawnCount += child.Spawn(lvector, measure, ParallelStrategy.SingleThreaded, spawnUsingLDA); } } if (!spawnUsingLDA) { #region Regular Spawn if (!this.Label.Equals(lvector.Label) && !Vector.EqualsEx(this, lvector) && !this.DoesAtLeastOneChildEncloseVector(lvector, measure)) { this.AddChild(new SphereEx <L>(this.Radius - measure(this, lvector), lvector)); nSpawnCount++; } #endregion } else { #region LDA Spawn if (!this.DoesAtLeastOneChildEncloseVector(lvector, measure)) // Don't care about label as well as location of lvector { bool bContains = false; List <LabeledVector <L> > lst; if (!this.LDAVectors.TryGetValue(lvector.Label, out lst)) { this.LDAVectors.Add(lvector.Label, (lst = new List <LabeledVector <L> >())); } else { bContains = lst.Any(v => Vector.EqualsEx(v, lvector)); } if (!bContains) { lst.Add(lvector); if (this.LDAVectors.Keys.Count == 2 && !this.Children.Any()) { #region If the sphere contains exactly 2 classes, do the following... // 1. Create another discriminant // 2. If the discrimant CANNOT separate the classes, do the following... // 2a. Remove the lvector from the list it was just added to // 2b. Create new children from the classes // 2c. Try to spawn the presented LVector in the NEW children // 2d. Clear the dictionary, LDAVectors // 2e. If you can't spawn WITHIN the NEW children, add the vector to the this.LDAVectors // 3. Else (If the discrimant CAN separate the classes), do nothing but assign the property. bool bIsSeparable = false; DiscriminantEx <L> discriminant = null; try { //bIsSeparable = LDA.IsCompletelySeparatedWithDiscriminant(this.LDAVectors.ElementAt(0).Value, this.LDAVectors.ElementAt(1).Value, this, out discriminant); bIsSeparable = LDAEx.IsCompletelySeparatedWithDiscriminant(this.LDAVectors.ElementAt(0).Value, this.LDAVectors.ElementAt(1).Value, out discriminant); } catch { // Just consume, leaving bIsSeparable = false } if (!bIsSeparable) { lst.RemoveAt(lst.Count - 1); // Faster than .Remove() as I am not linearly searching List <SphereEx <L> > lstNewChildren = new List <SphereEx <L> >(); foreach (KeyValuePair <L, List <LabeledVector <L> > > kvp in this.LDAVectors) { if (kvp.Value.Any() && !kvp.Key.Equals(this.Label)) { //Vector vectorCentroid = Vector.Centroid(kvp.Value); //if (!Vector.EqualsEx(this, vectorCentroid)) //{ // SphereEx<L> child = new SphereEx<L>(this.Radius - measure(this, vectorCentroid), kvp.Key, (IVector)vectorCentroid); // this.AddChild(child); // lstNewChildren.Add(child); // nSpawnCount++; //} } } //bool hasSpawned = false; //foreach (SphereEx<L> child in lstNewChildren) //{ // if (child.DoesEncloseVector(lvector, measure)) // { // nSpawnCount += child.Spawn(lvector, measure, ParallelStrategy.SingleThreaded, spawnUsingLDA); // hasSpawned = true; // } //} this.LDAVectors.Clear(); //if (!hasSpawned) //{ // this.LDAVectors.Add(lvector.Label, new List<LabeledVector<L>>() { lvector }); //} } else { this.DiscriminantEx = discriminant; } #endregion } else if (this.LDAVectors.Keys.Count > 2) { #region If the sphere contains 3 or more classes, do the following... // 1. Create children from the OLDER label-sets // 2. Try to spawn the presented LVector IN the NEW children just created // 3. Clear the dictionary, LDAVectors // 4. If you can't spawn WITHIN the NEW children, add the vector to this.LDAVectors List <SphereEx <L> > lstNewChildren = new List <SphereEx <L> >(); foreach (KeyValuePair <L, List <LabeledVector <L> > > kvp in this.LDAVectors) { if (!kvp.Key.Equals(lvector.Label)) { Vector vectorCentroid = Vector.Centroid(kvp.Value); if (!Vector.EqualsEx(this, vectorCentroid)) { SphereEx <L> child = new SphereEx <L>(this.Radius - measure(this, vectorCentroid), kvp.Key, (IVector)vectorCentroid); this.AddChild(child); lstNewChildren.Add(child); nSpawnCount++; } } } //bool hasSpawned = false; //foreach (SphereEx<L> child in lstNewChildren) //{ // if (child.DoesEncloseVector(lvector, measure)) // { // nSpawnCount += child.Spawn(lvector, measure, ParallelStrategy.SingleThreaded, spawnUsingLDA); // hasSpawned = true; // } //} this.LDAVectors.Clear(); //if (!hasSpawned) //{ // this.LDAVectors.Add(lvector.Label, new List<LabeledVector<L>>() { lvector }); //} #endregion } // Note: If this.LDAVectors.Keys.Count == 1, don't do anything additional. } } #endregion } } return(nSpawnCount); }
public SphereEx(double fRadius, LabeledVector <L> lvector) : base(fRadius, lvector) { }
public int Spawn(LabeledVector <L> lvector, DistanceDelegate measure, ParallelStrategy parallelStrategy, bool spawnUsingLDA) { int nSpawnCount = 0; if (this.DoesEncloseVector(lvector, measure)) { if (parallelStrategy == ParallelStrategy.Multithreaded) { List <Task> lstSpawnThreads = new List <Task>(); foreach (SphereEx <L> child in this.Children) { lstSpawnThreads.Add(Task.Factory.StartNew(c => { nSpawnCount += ((SphereEx <L>)c).Spawn(lvector, measure, ParallelStrategy.SingleThreaded, spawnUsingLDA); }, child, TaskCreationOptions.LongRunning)); } Task.WaitAll(lstSpawnThreads.ToArray()); } else { foreach (SphereEx <L> child in this.Children) { nSpawnCount += child.Spawn(lvector, measure, ParallelStrategy.SingleThreaded, spawnUsingLDA); } } if (!spawnUsingLDA) { #region Regular Spawn if (!this.Label.Equals(lvector.Label) && !Vector.EqualsEx(this, lvector) && !this.DoesAtLeastOneChildEncloseVector(lvector, measure)) { SphereEx <L> child = new SphereEx <L>(this.Radius - measure(this, lvector), lvector); child.LDAVectors.Add(child.Label, new List <KeyValuePair <int, LabeledVector <L> > >() { new KeyValuePair <int, LabeledVector <L> >(0, lvector) }); this.AddChild(child); nSpawnCount++; } #endregion } else { #region LDA Spawn // Note that LDA Spawn doesn't work with SquaredEuclideanDistance. I may -- in the future -- do an additional check to see if the vector is contained by the hypersphere with SquaredEuclideanDistance and EuclideanDistance. If so, this will work. Until then, I'll leave it as is. if (this.Children.Any()) { // Next if-statement a duplicate of the region "Regular Spawn" // Remember, LDA is only applied at nodes with no children (leaf) if (!this.Label.Equals(lvector.Label) && !Vector.EqualsEx(this, lvector) && !this.DoesAtLeastOneChildEncloseVector(lvector, measure)) { SphereEx <L> child = new SphereEx <L>(this.Radius - measure(this, lvector), lvector); child.LDAVectors.Add(child.Label, new List <KeyValuePair <int, LabeledVector <L> > >() { new KeyValuePair <int, LabeledVector <L> >(0, lvector) }); this.AddChild(child); nSpawnCount++; } } else { bool bContains = false; List <KeyValuePair <int, LabeledVector <L> > > lst; if (!this.LDAVectors.TryGetValue(lvector.Label, out lst)) { this.LDAVectors.Add(lvector.Label, (lst = new List <KeyValuePair <int, LabeledVector <L> > >())); } else { bContains = lst.Any(kvp => Vector.EqualsEx(kvp.Value, lvector)); } if (!bContains) { lst.Add(new KeyValuePair <int, LabeledVector <L> >(this.LDAVectors.Sum(kvp => kvp.Value.Count), lvector)); if (this.LDAVectors.Keys.Count == 2) { #region If the sphere contains exactly 2 classes, do the following... bool bIsSeparable = false; DiscriminantEx <L> discriminant = null; try { bIsSeparable = LDAEx.IsCompletelySeparatedWithDiscriminant(this.LDAVectors.ElementAt(0).Value.Select(kvp => kvp.Value).ToList(), this.LDAVectors.ElementAt(1).Value.Select(kvp => kvp.Value).ToList(), out discriminant); } catch { // Just consume, leaving bIsSeparable = false } if (!bIsSeparable) { bool spawnedAtLeastOnce = false; List <LabeledVector <L> > lstCache = this.LDAVectors.SelectMany(kvp => kvp.Value).OrderBy(kvp => kvp.Key).Select(kvp => kvp.Value).ToList(); this.LDAVectors.Clear(); this.DiscriminantEx = null; foreach (LabeledVector <L> v in lstCache) { if (!spawnedAtLeastOnce) { int count = this.Spawn(v, measure, ParallelStrategy.SingleThreaded, false); nSpawnCount += count; spawnedAtLeastOnce = count > 0; } else { nSpawnCount += this.Spawn(v, measure, ParallelStrategy.SingleThreaded, true); } } } else { if (discriminant != null) { this.DiscriminantEx = discriminant; } } #endregion } else if (this.LDAVectors.Keys.Count > 2) { #region If the sphere contains 3 or more classes, do the following... bool spawnedAtLeastOnce = false; List <LabeledVector <L> > lstCache = this.LDAVectors.SelectMany(kvp => kvp.Value).OrderBy(kvp => kvp.Key).Select(kvp => kvp.Value).ToList(); this.LDAVectors.Clear(); this.DiscriminantEx = null; foreach (LabeledVector <L> v in lstCache) { if (!spawnedAtLeastOnce) { int count = this.Spawn(v, measure, ParallelStrategy.SingleThreaded, false); nSpawnCount += count; spawnedAtLeastOnce = count > 0; } else { nSpawnCount += this.Spawn(v, measure, ParallelStrategy.SingleThreaded, true); } } #endregion } // Note: If this.LDAVectors.Keys.Count == 1, don't do anything additional. } } #endregion } } return(nSpawnCount); }