예제 #1
0
 private void WriteHeader(WordModel m)
 {
     Writer.Write(m.Words);
     Writer.Write(' ');
     Writer.Write(m.Size);
     Writer.Write('\n');
 }
 public void Write(WordModel m)
 {
     WriteHeader(m);
     foreach (var wv in m.Vectors)
     {
         WriteWordVector(wv);
     }
 }
예제 #3
0
        public void Write(WordModel m)
        {
            //Write the header
            WriteHeader(m);

            //Write the vectors
            foreach (var wv in m.Vectors)
            {
                WriteWordVector(wv);
            }
        }
예제 #4
0
        public static float[] GetParagraphVector(this WordModel model, params SentenceItem[] sentences)
        {
            if (model == null)
            {
                throw new ArgumentNullException(nameof(model));
            }

            if (sentences == null)
            {
                throw new ArgumentNullException(nameof(sentences));
            }

            if (sentences.Length == 0)
            {
                throw new ArgumentException("Value cannot be an empty collection.", nameof(sentences));
            }

            ConcurrentBag <WordVector> words = new ConcurrentBag <WordVector>();

            Parallel.ForEach(sentences.SelectMany(item => item.Words),
                             word =>
            {
                if (!string.IsNullOrEmpty(word.Text))
                {
                    var result = model.Find(word.Text);
                    if (result != null)
                    {
                        words.Add(result);
                        return;
                    }
                }

                if (!string.IsNullOrEmpty(word.Raw) && word.Raw != word.Text)
                {
                    var result = model.Find(word.Raw);
                    if (result != null)
                    {
                        words.Add(result);
                    }
                }
            });

            var arrays = words.ToArray();

            if (arrays.Length == 0)
            {
                return(new float[model.Size]);
            }

            return(arrays.Average());
        }
예제 #5
0
        public static IEnumerable <WordDistance> Nearest(this WordModel model, string word)
        {
            var vector = model.GetByWord(word);

            if (vector == null)
            {
                throw new ArgumentException($"cannot find word '{word}'");
            }

            return(model.Vectors.AsParallel()
                   .Select(x => new WordDistance(x.Word, x.Vector.Distance(vector.Vector)))
                   .OrderBy(x => x.Distance)
                   .Where(x => x.Word != word));
        }
예제 #6
0
        public static double Distance(this WordModel model, string word1, string word2)
        {
            var vector1 = model.GetByWord(word1);
            var vector2 = model.GetByWord(word2);

            if (vector1 == null)
            {
                throw new ArgumentException($"cannot find word1 '{word1}'");
            }

            if (vector2 == null)
            {
                throw new ArgumentException($"cannot find word2 '{word2}'");
            }

            return(vector1.Vector.Distance(vector2.Vector));
        }
예제 #7
0
 public static WordVector NearestSingle(this WordModel model, float[] vector)
 {
     return(model.Vectors.OrderBy(x => x.Vector.Distance(vector)).First());
 }
예제 #8
0
 public static IEnumerable <WordVector> Nearest(this WordModel model, float[] vector)
 {
     return(model.Vectors.OrderBy(x => x.Vector.Distance(vector)));
 }
예제 #9
0
 public static WordVector GetByWord(this WordModel model, string word)
 {
     return(model.Vectors.FirstOrDefault(x => x.Word == word));
 }
 private void WriteHeader(WordModel m)
 {
     WriteString($"{m.Words} {m.Size}\n");
 }