Ejemplo n.º 1
0
        // Initialization
        public LanguageOutputTests()
        {
            _vocabulary = new Vocabulary(NumberOfWordVectorDimensions);

            // Ensure there are some words in the vocabulary
            _vocabulary.Add("hello");
            _vocabulary.Add("beautiful");
            _vocabulary.Add("world");
        }
Ejemplo n.º 2
0
 private void AddReviewToVocabulary(Review review)
 {
     foreach (string token in review.tokens)
     {
         vocabulary.Add(token, review);
     }
 }
Ejemplo n.º 3
0
        public void FindNNeighbors()
        {
            const int numberOfNeighbors = 5;

            const string word = "beautiful";

            float[] queryVector = _vocabulary.VectorFromLabel(word);

            // Make increasingly remote neighbors
            float[][] nearbyVectors = new float[numberOfNeighbors][];
            int       iVector       = 0;

            for (iVector = 0; iVector < numberOfNeighbors; iVector++)
            {
                nearbyVectors[iVector] = new float[NumberOfWordVectorDimensions];
                for (int i = 0; i < queryVector.Length; i++)
                {
                    nearbyVectors[iVector][i] = queryVector[i] + Single.Epsilon * iVector;
                }
                _vocabulary.Add(iVector.ToString(), nearbyVectors[iVector]);
            }

            // Get nearest neighbors
            var retrievedNeighbors = _vocabulary.FindNearestNeighbors(queryVector, numberOfNeighbors);

            // Compare to expected neighbors
            iVector = 0;
            foreach (var tuple in retrievedNeighbors)
            {
                Assert.True(tuple.Item2.Vector.SequenceEqual(nearbyVectors[iVector++]));
            }
        }
Ejemplo n.º 4
0
 private void LoadGrimmDictionary(string path)
 {
     foreach (string line in System.IO.File.ReadLines(path))
     {
         string[] parts = line.Split(HashArray, 2);
         Vocabulary.Add(new Word(parts[0]));
     }
     Vocabulary.Sort((w1, w2) => - w1.Power.CompareTo(w2.Power));
 }
Ejemplo n.º 5
0
 private void IncVocabulary(FrequencyDocument doc)
 {
     foreach (var token in doc.Tokens.Keys)
     {
         if (!Vocabulary.Contains(token))
         {
             Vocabulary.Add(token);
         }
     }
 }
Ejemplo n.º 6
0
 /// <summary>
 /// Load a stored embedding map
 /// </summary>
 /// <param name="path">Path to the database</param>
 public void Load(string path)
 {
     using (var db = new LiteDatabase(path)) {
         var collection = db.GetCollection <EmbeddedWord>("vocabulary");
         foreach (var word in collection.FindAll())
         {
             if (!Vocabulary.ContainsKey(word.Word))
             {
                 Vocabulary.Add(word.Word, Vectors.DenseOfArray(word.Vector));
             }
         }
     }
 }
Ejemplo n.º 7
0
        public void ExportData()
        {
            string[] files = Directory.GetFiles(_sourceDir);
            for (int i = 0; i < files.Length; i++)
            {
                log.InfoFormat("========== Processing file {0} of {1} - {2} =============",
                               i + 1, files.Length,
                               Path.GetFileName(files[i]));

                List <KeyValuePair <string, string> > data = new List <KeyValuePair <string, string> >();
                int    fileCounter = 0;
                uint   dataCount   = 0;
                string targetPath  = GetTargetFilePath(files[i], fileCounter);

                StreamWriter            writer         = new StreamWriter(targetPath);
                Action <string, string> exportDelegate = (label, target) => {
                    writer.WriteLine(string.Format("{0} {1}", _vocabulary.IndexOf(label), _vocabulary.IndexOf(target)));
                    if (++dataCount % _maxDataCountPerFile == 0)
                    {
                        writer.Close();
                        writer.Dispose();
                        targetPath = GetTargetFilePath(files[i], ++fileCounter);
                        writer     = new StreamWriter(targetPath);
                    }
                    ;
                };

                Action <List <string> > vocabMerger = (words) => {
                    _vocabulary.Add(words);
                };

                //write the data here
                foreach (var sampler in _samplers)
                {
                    sampler.Sample(files[i], exportDelegate, vocabMerger);
                }
                if (writer != null)
                {
                    writer.Close();
                    writer.Dispose();
                }

                _count = _count + dataCount;
                log.InfoFormat("Vocabulary size: {0} words", _vocabulary.Count);
                log.InfoFormat("Saved {0} examples across {1} partitions.", dataCount, fileCounter + 1);
            }
            ExportVocabulary();
            log.InfoFormat("Finished exporting the dataset - {0} examples in total.", Count);
        }
Ejemplo n.º 8
0
        private static Vocabulary ParseFile(string filepath)
        {
            var contents = System.IO.File.ReadAllLines(filepath);
            IEnumerable <WordEntry> pairs;

            try
            {
                pairs = contents.Select(s => ParseString(s)).ToList();
            }
            catch (Exception e)
            {
                throw new ParsingException($"When processing file {filepath} got error: {e.Message}");
            }
            var vocab = new Vocabulary();

            foreach (var w in pairs)
            {
                vocab.Add(w);
            }
            return(vocab);
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Defines the appropriate constants for a word (W?FOO, A?FOO, ACT?FOO, PREP?FOO),
        /// creating the IWordBuilder if needed.
        /// </summary>
        /// <param name="word">The Word.</param>
        ///
        void DefineWord([NotNull] IWord word)
        {
            string rawWord = word.Atom.Text;

            if (!Vocabulary.ContainsKey(word))
            {
                var wAtom = ZilAtom.Parse("W?" + rawWord, Context);
                if (Constants.TryGetValue(wAtom, out var constantValue) == false)
                {
                    var wb = Game.DefineVocabularyWord(rawWord);
                    Vocabulary.Add(word, wb);
                    Constants.Add(wAtom, wb);
                }
                else
                {
                    if (constantValue is IWordBuilder wb)
                    {
                        Vocabulary.Add(word, wb);
                    }
                    else
                    {
                        throw new CompilerError(CompilerMessages.Nonvocab_Constant_0_Conflicts_With_Vocab_Word_1, wAtom, word.Atom);
                    }
                }
            }

            foreach (var pair in Context.ZEnvironment.VocabFormat.GetVocabConstants(word))
            {
                var atom = ZilAtom.Parse(pair.Key, Context);
                if (!Constants.ContainsKey(atom))
                {
                    Constants.Add(atom,
                                  Game.DefineConstant(pair.Key,
                                                      Game.MakeOperand(pair.Value)));
                }
            }
        }