public IEnumerator <KeyValuePair <TKey, TValue> > GetEnumerator() { foreach (var value in this.vector) { yield return(new KeyValuePair <TKey, TValue>(IndexedVector <TKey, TValue> .GetKey(value), value)); } }
public static long Dot(IndexedVector vec1, IndexedVector vec2) { if (ReferenceEquals(vec1, vec2)) { return(CbocModel.DotSelf(vec1)); } long product = 0; var cursor1 = 0; var cursor2 = 0; while (cursor1 < vec1.Count && cursor2 < vec2.Count) { var i1 = vec1.Index[cursor1]; var i2 = vec2.Index[cursor2]; if (i2 > i1) { cursor1++; } else if (i1 > i2) { cursor2++; } else { product += vec1.Values[cursor1++] * vec2.Values[cursor2++]; } } return(product); }
public IEnumerable <IVector> Tokenize(string data) { var tokens = (IList <IVector>)_wordTokenizer.Tokenize(data); for (int i = 0; i < tokens.Count; i++) { var context0 = i - 1; var context1 = i + 1; var token = tokens[i]; var vector = new IndexedVector(NumOfDimensions, token.Label); if (context0 >= 0) { vector.AddInPlace(tokens[context0].Shift(0, NumOfDimensions)); } if (context1 < tokens.Count) { vector.AddInPlace(tokens[context1].Shift(_wordTokenizer.NumOfDimensions * 2, NumOfDimensions)); } if (vector.ComponentCount == 0) { yield return(token.Shift(_wordTokenizer.NumOfDimensions, NumOfDimensions)); } else { yield return(vector); } } }
public void IndexedVector_Mutations() { IndexedVector <string, VectorMember <string> > vector = new IndexedVector <string, VectorMember <string> >(); var item = new VectorMember <string> { Key = "foobar" }; Assert.IsFalse(vector.ContainsKey(item.Key)); Assert.IsFalse(vector.TryGetValue(item.Key, out _)); Assert.AreEqual(0, vector.Count); Assert.AreEqual(0, vector.Count()); Assert.IsTrue(vector.Add(item)); Assert.IsFalse(vector.Add(item)); Assert.IsTrue(vector.ContainsKey(item.Key)); Assert.IsTrue(vector.TryGetValue(item.Key, out _)); Assert.AreEqual(1, vector.Count); Assert.AreEqual(1, vector.Count()); Assert.IsTrue(vector.Remove(item.Key)); Assert.IsFalse(vector.Remove(item.Key)); Assert.AreEqual(0, vector.Count); Assert.AreEqual(0, vector.Count()); Assert.IsTrue(vector.Add(item)); Assert.AreEqual(1, vector.Count); Assert.AreEqual(1, vector.Count()); vector.Clear(); Assert.AreEqual(0, vector.Count); Assert.AreEqual(0, vector.Count()); Assert.IsFalse(vector.ContainsKey(item.Key)); Assert.IsFalse(vector.TryGetValue(item.Key, out _)); }
public IEnumerable <IVector> Tokenize(Memory <char> source) { var tokens = new List <IVector>(); if (source.Length > 0) { var embedding = new SortedList <int, float>(); var offset = 0; int index = 0; var span = source.Span; for (; index < source.Length; index++) { char c = char.ToLower(span[index]); if (c < UnicodeStartingPoint || c > UnicodeStartingPoint + VectorWidth) { continue; } if (char.IsLetterOrDigit(c)) { embedding.AddOrAppendToComponent(c); } else { if (embedding.Count > 0) { var len = index - offset; var slice = source.Slice(offset, len); var vector = new IndexedVector( embedding, VectorWidth, slice); embedding.Clear(); tokens.Add(vector); } offset = index + 1; } } if (embedding.Count > 0) { var len = index - offset; var vector = new IndexedVector( embedding, VectorWidth, source.Slice(offset, len)); tokens.Add(vector); } } return(tokens); }
private static void Compare(string first, string second, IStringModel model) { var baseVectorComponents = new List <float>(model.VectorWidth); var baseVectors = new List <IVector>(); for (int i = 0; i < model.VectorWidth; i++) { baseVectorComponents.Add(i == 0 ? 1 : Convert.ToSingle(Math.Log10(i))); var bvecs = new List <float>(model.VectorWidth); for (int y = 0; y < model.VectorWidth; y++) { float value; if (y == i) { value = 1; } else { value = 0; } bvecs.Add(value); } baseVectors.Add(new IndexedVector(bvecs, model.VectorWidth)); } var bvector = new IndexedVector(baseVectorComponents, model.VectorWidth); var doc1 = new VectorNode(model.Tokenize(first.ToCharArray()).First()); var doc2 = new VectorNode(model.Tokenize(second.ToCharArray()).First()); var angles1 = new List <float>(); var angles2 = new List <float>(); foreach (var bvec in baseVectors) { angles1.Add(Convert.ToSingle(model.CosAngle(doc1.Vector, bvec))); angles2.Add(Convert.ToSingle(model.CosAngle(doc2.Vector, bvec))); } var docVector1 = new IndexedVector(angles1, model.VectorWidth); var docVector2 = new IndexedVector(angles2, model.VectorWidth); var angle = model.CosAngle(docVector1, docVector2); var angle1 = model.CosAngle(docVector1, bvector); var angle2 = model.CosAngle(docVector2, bvector); Console.WriteLine($"similarity: {angle}"); Console.WriteLine($"bvector similarity 1: {angle1}"); Console.WriteLine($"bvector similarity 2: {angle2}"); Console.WriteLine($"base vector similarity: {Math.Min(angle1, angle2) / Math.Max(angle1, angle2)}"); }
/// <summary> /// Gets the dictionary's enumerator. /// </summary> public IEnumerator <KeyValuePair <TKey, TValue> > GetEnumerator() { int count = this.backingVector.Count; for (int i = 0; i < count; ++i) { TValue item = this.backingVector[i]; yield return(new KeyValuePair <TKey, TValue>( IndexedVector <TKey, TValue> .GetKey(item), item)); } }
public IEnumerable <IVector> Tokenize(string data) { ReadOnlyMemory <char> source = data.AsMemory(); if (source.Length > 0) { var embedding = new SortedList <int, float>(); var offset = 0; int index = 0; for (; index < source.Length; index++) { char c = char.ToLower(source.Span[index]); if (char.IsLetterOrDigit(c)) { embedding.AddOrAppendToComponent(c); } else { if (embedding.Count > 0) { var len = index - offset; var vector = new IndexedVector( embedding, NumOfDimensions, new string(source.Span.Slice(offset, len))); embedding.Clear(); yield return(vector); } offset = index + 1; } } if (embedding.Count > 0) { var len = index - offset; var vector = new IndexedVector( embedding, NumOfDimensions, new string(source.Span.Slice(offset, len))); yield return(vector); } } }
/// <summary> /// Distance the specified a and b. /// </summary> /// <param name="a">The alpha component.</param> /// <param name="b">The blue component.</param> public static Vector <double> Distance(Vector <double> a, Vector <double> b) { if (a.Count != b.Count) { throw new Exception("Can not compute distance with different size of vectos"); } IndexedVector retValue = new IndexedVector(b.Count); for (int i = 0; i < a.Count; ++i) { retValue[i] = a [i] - b [i]; } return(retValue); }
public static IIndexedVector <TKey, TValue>?Clone <TKey, TValue>(IIndexedVector <TKey, TValue>?source, Func <TValue, TValue> cloneItem) where TKey : notnull where TValue : class { if (source is null) { return(null); } IndexedVector <TKey, TValue> vector = new IndexedVector <TKey, TValue>(source.Count); foreach (var pair in source) { var item = cloneItem(pair.Value); vector.Add(item); } return(vector); }
private static void CompareToBaseVector(string first, string second, IModel <string> model) { var baseVectorStorage = new float[model.NumOfDimensions]; for (int i = 0; i < baseVectorStorage.Length; i++) { baseVectorStorage[i] = (float)i + 1; } var baseVector = new IndexedVector(baseVectorStorage); var firstVector = model.Tokenize(first).First(); var secondVector = model.Tokenize(second).First(); var angle1 = model.CosAngle(baseVector, firstVector); var angle2 = model.CosAngle(baseVector, secondVector); Console.WriteLine($"first angle to base vector: {angle1}"); Console.WriteLine($"second angle to base vector: {angle2}"); Console.WriteLine($"base vector similarity: {Math.Min(angle1, angle2) / Math.Max(angle1, angle2)}"); }
public void CopyConstructorsTest() { string schema = $@" {MetadataHelpers.AllAttributes} namespace CopyConstructorTest; union Union {{ OuterTable, InnerTable, OuterStruct, InnerStruct }} // Optionally add more tables. table OuterTable ({MetadataKeys.SerializerKind}: ""Greedy"") {{ A:string (id: 0); B:byte (id: 1); C:ubyte (id: 2); D:int16 (id: 3); E:uint16 (id: 4); F:int32 (id: 5); G:uint32 (id: 6); H:int64 (id: 7); I:uint64 (id: 8); IntVector_List:[int] ({MetadataKeys.VectorKind}:""IList"", id: 9); IntVector_RoList:[int] ({MetadataKeys.VectorKind}:""IReadOnlyList"", id: 10); IntVector_Array:[int] ({MetadataKeys.VectorKind}:""Array"", id: 11); TableVector_List:[InnerTable] ({MetadataKeys.VectorKind}:""IList"", id: 12); TableVector_RoList:[InnerTable] ({MetadataKeys.VectorKind}:""IReadOnlyList"", id: 13); TableVector_Indexed:[InnerTable] ({MetadataKeys.VectorKind}:""IIndexedVector"", id: 14); TableVector_Array:[InnerTable] ({MetadataKeys.VectorKind}:""Array"", id: 15); ByteVector:[ubyte] ({MetadataKeys.VectorKind}:""Memory"", id: 16); ByteVector_RO:[ubyte] ({MetadataKeys.VectorKind}:""ReadOnlyMemory"", id: 17); UnionVal : Union (id: 19); VectorOfUnion : [Union] (id: 21); VectorOfUnion_RoList : [Union] (id: 23, {MetadataKeys.VectorKind}:""IReadOnlyList""); VectorOfUnion_Array : [Union] (id: 25, {MetadataKeys.VectorKind}:""Array""); }} struct InnerStruct {{ LongValue:int64; }} struct OuterStruct {{ Value:int; InnerStructVal:InnerStruct; }} table InnerTable {{ Name:string ({MetadataKeys.Key}); OuterStructVal:OuterStruct; }} "; OuterTable original = new OuterTable { A = "string", B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7, I = 8, ByteVector = new byte[] { 1, 2, 3, }.AsMemory(), ByteVector_RO = new byte[] { 4, 5, 6, }.AsMemory(), IntVector_Array = new[] { 7, 8, 9, }, IntVector_List = new[] { 10, 11, 12, }.ToList(), IntVector_RoList = new[] { 13, 14, 15 }.ToList(), TableVector_Array = CreateInner("Rocket", "Molly", "Clementine"), TableVector_Indexed = new IndexedVector <string, InnerTable>(CreateInner("Pudge", "Sunshine", "Gypsy"), false), TableVector_List = CreateInner("Finnegan", "Daisy"), TableVector_RoList = CreateInner("Gordita", "Lunchbox"), UnionVal = new FlatBufferUnion <OuterTable, InnerTable, OuterStruct, InnerStruct>(new OuterStruct()), VectorOfUnion = new List <FlatBufferUnion <OuterTable, InnerTable, OuterStruct, InnerStruct> > { new(new OuterTable()), new(new InnerTable()), new(new OuterStruct()), new(new InnerStruct()), },