Example #1
0
 public IEnumerator <KeyValuePair <TKey, TValue> > GetEnumerator()
 {
     foreach (var value in this.vector)
     {
         yield return(new KeyValuePair <TKey, TValue>(IndexedVector <TKey, TValue> .GetKey(value), value));
     }
 }
Example #2
0
        public static long Dot(IndexedVector vec1, IndexedVector vec2)
        {
            if (ReferenceEquals(vec1, vec2))
            {
                return(CbocModel.DotSelf(vec1));
            }

            long product = 0;
            var  cursor1 = 0;
            var  cursor2 = 0;

            while (cursor1 < vec1.Count && cursor2 < vec2.Count)
            {
                var i1 = vec1.Index[cursor1];
                var i2 = vec2.Index[cursor2];

                if (i2 > i1)
                {
                    cursor1++;
                }
                else if (i1 > i2)
                {
                    cursor2++;
                }
                else
                {
                    product += vec1.Values[cursor1++] * vec2.Values[cursor2++];
                }
            }

            return(product);
        }
Example #3
0
            public IEnumerable <IVector> Tokenize(string data)
            {
                var tokens = (IList <IVector>)_wordTokenizer.Tokenize(data);

                for (int i = 0; i < tokens.Count; i++)
                {
                    var context0 = i - 1;
                    var context1 = i + 1;
                    var token    = tokens[i];
                    var vector   = new IndexedVector(NumOfDimensions, token.Label);

                    if (context0 >= 0)
                    {
                        vector.AddInPlace(tokens[context0].Shift(0, NumOfDimensions));
                    }

                    if (context1 < tokens.Count)
                    {
                        vector.AddInPlace(tokens[context1].Shift(_wordTokenizer.NumOfDimensions * 2, NumOfDimensions));
                    }

                    if (vector.ComponentCount == 0)
                    {
                        yield return(token.Shift(_wordTokenizer.NumOfDimensions, NumOfDimensions));
                    }
                    else
                    {
                        yield return(vector);
                    }
                }
            }
        public void IndexedVector_Mutations()
        {
            IndexedVector <string, VectorMember <string> > vector = new IndexedVector <string, VectorMember <string> >();
            var item = new VectorMember <string> {
                Key = "foobar"
            };

            Assert.IsFalse(vector.ContainsKey(item.Key));
            Assert.IsFalse(vector.TryGetValue(item.Key, out _));
            Assert.AreEqual(0, vector.Count);
            Assert.AreEqual(0, vector.Count());
            Assert.IsTrue(vector.Add(item));
            Assert.IsFalse(vector.Add(item));
            Assert.IsTrue(vector.ContainsKey(item.Key));
            Assert.IsTrue(vector.TryGetValue(item.Key, out _));
            Assert.AreEqual(1, vector.Count);
            Assert.AreEqual(1, vector.Count());
            Assert.IsTrue(vector.Remove(item.Key));
            Assert.IsFalse(vector.Remove(item.Key));
            Assert.AreEqual(0, vector.Count);
            Assert.AreEqual(0, vector.Count());
            Assert.IsTrue(vector.Add(item));
            Assert.AreEqual(1, vector.Count);
            Assert.AreEqual(1, vector.Count());
            vector.Clear();
            Assert.AreEqual(0, vector.Count);
            Assert.AreEqual(0, vector.Count());
            Assert.IsFalse(vector.ContainsKey(item.Key));
            Assert.IsFalse(vector.TryGetValue(item.Key, out _));
        }
Example #5
0
        public IEnumerable <IVector> Tokenize(Memory <char> source)
        {
            var tokens = new List <IVector>();

            if (source.Length > 0)
            {
                var embedding = new SortedList <int, float>();
                var offset    = 0;
                int index     = 0;
                var span      = source.Span;

                for (; index < source.Length; index++)
                {
                    char c = char.ToLower(span[index]);

                    if (c < UnicodeStartingPoint || c > UnicodeStartingPoint + VectorWidth)
                    {
                        continue;
                    }

                    if (char.IsLetterOrDigit(c))
                    {
                        embedding.AddOrAppendToComponent(c);
                    }
                    else
                    {
                        if (embedding.Count > 0)
                        {
                            var len   = index - offset;
                            var slice = source.Slice(offset, len);

                            var vector = new IndexedVector(
                                embedding,
                                VectorWidth,
                                slice);

                            embedding.Clear();
                            tokens.Add(vector);
                        }

                        offset = index + 1;
                    }
                }

                if (embedding.Count > 0)
                {
                    var len = index - offset;

                    var vector = new IndexedVector(
                        embedding,
                        VectorWidth,
                        source.Slice(offset, len));

                    tokens.Add(vector);
                }
            }

            return(tokens);
        }
Example #6
0
        private static void Compare(string first, string second, IStringModel model)
        {
            var baseVectorComponents = new List <float>(model.VectorWidth);
            var baseVectors          = new List <IVector>();

            for (int i = 0; i < model.VectorWidth; i++)
            {
                baseVectorComponents.Add(i == 0 ? 1 : Convert.ToSingle(Math.Log10(i)));

                var bvecs = new List <float>(model.VectorWidth);

                for (int y = 0; y < model.VectorWidth; y++)
                {
                    float value;

                    if (y == i)
                    {
                        value = 1;
                    }
                    else
                    {
                        value = 0;
                    }

                    bvecs.Add(value);
                }

                baseVectors.Add(new IndexedVector(bvecs, model.VectorWidth));
            }

            var bvector = new IndexedVector(baseVectorComponents, model.VectorWidth);

            var doc1    = new VectorNode(model.Tokenize(first.ToCharArray()).First());
            var doc2    = new VectorNode(model.Tokenize(second.ToCharArray()).First());
            var angles1 = new List <float>();
            var angles2 = new List <float>();

            foreach (var bvec in baseVectors)
            {
                angles1.Add(Convert.ToSingle(model.CosAngle(doc1.Vector, bvec)));
                angles2.Add(Convert.ToSingle(model.CosAngle(doc2.Vector, bvec)));
            }

            var docVector1 = new IndexedVector(angles1, model.VectorWidth);
            var docVector2 = new IndexedVector(angles2, model.VectorWidth);

            var angle  = model.CosAngle(docVector1, docVector2);
            var angle1 = model.CosAngle(docVector1, bvector);
            var angle2 = model.CosAngle(docVector2, bvector);

            Console.WriteLine($"similarity: {angle}");
            Console.WriteLine($"bvector similarity 1: {angle1}");
            Console.WriteLine($"bvector similarity 2: {angle2}");
            Console.WriteLine($"base vector similarity: {Math.Min(angle1, angle2) / Math.Max(angle1, angle2)}");
        }
    /// <summary>
    /// Gets the dictionary's enumerator.
    /// </summary>
    public IEnumerator <KeyValuePair <TKey, TValue> > GetEnumerator()
    {
        int count = this.backingVector.Count;

        for (int i = 0; i < count; ++i)
        {
            TValue item = this.backingVector[i];
            yield return(new KeyValuePair <TKey, TValue>(
                             IndexedVector <TKey, TValue> .GetKey(item),
                             item));
        }
    }
Example #8
0
        public IEnumerable <IVector> Tokenize(string data)
        {
            ReadOnlyMemory <char> source = data.AsMemory();

            if (source.Length > 0)
            {
                var embedding = new SortedList <int, float>();
                var offset    = 0;
                int index     = 0;

                for (; index < source.Length; index++)
                {
                    char c = char.ToLower(source.Span[index]);

                    if (char.IsLetterOrDigit(c))
                    {
                        embedding.AddOrAppendToComponent(c);
                    }
                    else
                    {
                        if (embedding.Count > 0)
                        {
                            var len = index - offset;

                            var vector = new IndexedVector(
                                embedding,
                                NumOfDimensions,
                                new string(source.Span.Slice(offset, len)));

                            embedding.Clear();
                            yield return(vector);
                        }

                        offset = index + 1;
                    }
                }

                if (embedding.Count > 0)
                {
                    var len = index - offset;

                    var vector = new IndexedVector(
                        embedding,
                        NumOfDimensions,
                        new string(source.Span.Slice(offset, len)));

                    yield return(vector);
                }
            }
        }
Example #9
0
        /// <summary>
        /// Distance the specified a and b.
        /// </summary>
        /// <param name="a">The alpha component.</param>
        /// <param name="b">The blue component.</param>
        public static Vector <double> Distance(Vector <double> a, Vector <double> b)
        {
            if (a.Count != b.Count)
            {
                throw new Exception("Can not compute distance with different size of vectos");
            }

            IndexedVector retValue = new IndexedVector(b.Count);

            for (int i = 0; i < a.Count; ++i)
            {
                retValue[i] = a [i] - b [i];
            }
            return(retValue);
        }
    public static IIndexedVector <TKey, TValue>?Clone <TKey, TValue>(IIndexedVector <TKey, TValue>?source, Func <TValue, TValue> cloneItem)
        where TKey : notnull
        where TValue : class
    {
        if (source is null)
        {
            return(null);
        }

        IndexedVector <TKey, TValue> vector = new IndexedVector <TKey, TValue>(source.Count);

        foreach (var pair in source)
        {
            var item = cloneItem(pair.Value);
            vector.Add(item);
        }

        return(vector);
    }
Example #11
0
        private static void CompareToBaseVector(string first, string second, IModel <string> model)
        {
            var baseVectorStorage = new float[model.NumOfDimensions];

            for (int i = 0; i < baseVectorStorage.Length; i++)
            {
                baseVectorStorage[i] = (float)i + 1;
            }

            var baseVector   = new IndexedVector(baseVectorStorage);
            var firstVector  = model.Tokenize(first).First();
            var secondVector = model.Tokenize(second).First();
            var angle1       = model.CosAngle(baseVector, firstVector);
            var angle2       = model.CosAngle(baseVector, secondVector);

            Console.WriteLine($"first angle to base vector: {angle1}");
            Console.WriteLine($"second angle to base vector: {angle2}");
            Console.WriteLine($"base vector similarity: {Math.Min(angle1, angle2) / Math.Max(angle1, angle2)}");
        }
Example #12
0
    public void CopyConstructorsTest()
    {
        string     schema   = $@"
{MetadataHelpers.AllAttributes}
namespace CopyConstructorTest;

union Union {{ OuterTable, InnerTable, OuterStruct, InnerStruct }} // Optionally add more tables.

table OuterTable ({MetadataKeys.SerializerKind}: ""Greedy"") {{
  A:string (id: 0);

  B:byte   (id: 1);
  C:ubyte  (id: 2);
  D:int16  (id: 3); 
  E:uint16 (id: 4);
  F:int32  (id: 5);
  G:uint32 (id: 6);
  H:int64  (id: 7);
  I:uint64 (id: 8);
  
  IntVector_List:[int] ({MetadataKeys.VectorKind}:""IList"", id: 9);
  IntVector_RoList:[int] ({MetadataKeys.VectorKind}:""IReadOnlyList"", id: 10);
  IntVector_Array:[int] ({MetadataKeys.VectorKind}:""Array"", id: 11);
  
  TableVector_List:[InnerTable] ({MetadataKeys.VectorKind}:""IList"", id: 12);
  TableVector_RoList:[InnerTable] ({MetadataKeys.VectorKind}:""IReadOnlyList"", id: 13);
  TableVector_Indexed:[InnerTable] ({MetadataKeys.VectorKind}:""IIndexedVector"", id: 14);
  TableVector_Array:[InnerTable] ({MetadataKeys.VectorKind}:""Array"", id: 15);

  ByteVector:[ubyte] ({MetadataKeys.VectorKind}:""Memory"", id: 16);
  ByteVector_RO:[ubyte] ({MetadataKeys.VectorKind}:""ReadOnlyMemory"", id: 17);
  UnionVal : Union (id: 19);

  VectorOfUnion : [Union] (id: 21);
  VectorOfUnion_RoList : [Union] (id: 23, {MetadataKeys.VectorKind}:""IReadOnlyList"");
  VectorOfUnion_Array : [Union] (id: 25, {MetadataKeys.VectorKind}:""Array"");
}}

struct InnerStruct {{
    LongValue:int64;
}}

struct OuterStruct {{
    Value:int;
    InnerStructVal:InnerStruct;
}}

table InnerTable {{
  Name:string ({MetadataKeys.Key});
  OuterStructVal:OuterStruct;
}}

";
        OuterTable original = new OuterTable
        {
            A = "string",
            B = 1,
            C = 2,
            D = 3,
            E = 4,
            F = 5,
            G = 6,
            H = 7,
            I = 8,

            ByteVector = new byte[] { 1, 2, 3, }.AsMemory(),
                   ByteVector_RO = new byte[] { 4, 5, 6, }.AsMemory(),

                   IntVector_Array = new[] { 7, 8, 9, },
                   IntVector_List = new[] { 10, 11, 12, }.ToList(),
                   IntVector_RoList = new[] { 13, 14, 15 }.ToList(),

                   TableVector_Array   = CreateInner("Rocket", "Molly", "Clementine"),
                   TableVector_Indexed = new IndexedVector <string, InnerTable>(CreateInner("Pudge", "Sunshine", "Gypsy"), false),
                   TableVector_List    = CreateInner("Finnegan", "Daisy"),
                   TableVector_RoList  = CreateInner("Gordita", "Lunchbox"),

                   UnionVal      = new FlatBufferUnion <OuterTable, InnerTable, OuterStruct, InnerStruct>(new OuterStruct()),
                   VectorOfUnion = new List <FlatBufferUnion <OuterTable, InnerTable, OuterStruct, InnerStruct> >
            {
                new(new OuterTable()),
                new(new InnerTable()),
                new(new OuterStruct()),
                new(new InnerStruct()),
            },