// Initialization public LanguageOutputTests() { _vocabulary = new Vocabulary(NumberOfWordVectorDimensions); // Ensure there are some words in the vocabulary _vocabulary.Add("hello"); _vocabulary.Add("beautiful"); _vocabulary.Add("world"); }
private void AddReviewToVocabulary(Review review) { foreach (string token in review.tokens) { vocabulary.Add(token, review); } }
public void FindNNeighbors() { const int numberOfNeighbors = 5; const string word = "beautiful"; float[] queryVector = _vocabulary.VectorFromLabel(word); // Make increasingly remote neighbors float[][] nearbyVectors = new float[numberOfNeighbors][]; int iVector = 0; for (iVector = 0; iVector < numberOfNeighbors; iVector++) { nearbyVectors[iVector] = new float[NumberOfWordVectorDimensions]; for (int i = 0; i < queryVector.Length; i++) { nearbyVectors[iVector][i] = queryVector[i] + Single.Epsilon * iVector; } _vocabulary.Add(iVector.ToString(), nearbyVectors[iVector]); } // Get nearest neighbors var retrievedNeighbors = _vocabulary.FindNearestNeighbors(queryVector, numberOfNeighbors); // Compare to expected neighbors iVector = 0; foreach (var tuple in retrievedNeighbors) { Assert.True(tuple.Item2.Vector.SequenceEqual(nearbyVectors[iVector++])); } }
private void LoadGrimmDictionary(string path) { foreach (string line in System.IO.File.ReadLines(path)) { string[] parts = line.Split(HashArray, 2); Vocabulary.Add(new Word(parts[0])); } Vocabulary.Sort((w1, w2) => - w1.Power.CompareTo(w2.Power)); }
private void IncVocabulary(FrequencyDocument doc) { foreach (var token in doc.Tokens.Keys) { if (!Vocabulary.Contains(token)) { Vocabulary.Add(token); } } }
/// <summary> /// Load a stored embedding map /// </summary> /// <param name="path">Path to the database</param> public void Load(string path) { using (var db = new LiteDatabase(path)) { var collection = db.GetCollection <EmbeddedWord>("vocabulary"); foreach (var word in collection.FindAll()) { if (!Vocabulary.ContainsKey(word.Word)) { Vocabulary.Add(word.Word, Vectors.DenseOfArray(word.Vector)); } } } }
public void ExportData() { string[] files = Directory.GetFiles(_sourceDir); for (int i = 0; i < files.Length; i++) { log.InfoFormat("========== Processing file {0} of {1} - {2} =============", i + 1, files.Length, Path.GetFileName(files[i])); List <KeyValuePair <string, string> > data = new List <KeyValuePair <string, string> >(); int fileCounter = 0; uint dataCount = 0; string targetPath = GetTargetFilePath(files[i], fileCounter); StreamWriter writer = new StreamWriter(targetPath); Action <string, string> exportDelegate = (label, target) => { writer.WriteLine(string.Format("{0} {1}", _vocabulary.IndexOf(label), _vocabulary.IndexOf(target))); if (++dataCount % _maxDataCountPerFile == 0) { writer.Close(); writer.Dispose(); targetPath = GetTargetFilePath(files[i], ++fileCounter); writer = new StreamWriter(targetPath); } ; }; Action <List <string> > vocabMerger = (words) => { _vocabulary.Add(words); }; //write the data here foreach (var sampler in _samplers) { sampler.Sample(files[i], exportDelegate, vocabMerger); } if (writer != null) { writer.Close(); writer.Dispose(); } _count = _count + dataCount; log.InfoFormat("Vocabulary size: {0} words", _vocabulary.Count); log.InfoFormat("Saved {0} examples across {1} partitions.", dataCount, fileCounter + 1); } ExportVocabulary(); log.InfoFormat("Finished exporting the dataset - {0} examples in total.", Count); }
private static Vocabulary ParseFile(string filepath) { var contents = System.IO.File.ReadAllLines(filepath); IEnumerable <WordEntry> pairs; try { pairs = contents.Select(s => ParseString(s)).ToList(); } catch (Exception e) { throw new ParsingException($"When processing file {filepath} got error: {e.Message}"); } var vocab = new Vocabulary(); foreach (var w in pairs) { vocab.Add(w); } return(vocab); }
/// <summary> /// Defines the appropriate constants for a word (W?FOO, A?FOO, ACT?FOO, PREP?FOO), /// creating the IWordBuilder if needed. /// </summary> /// <param name="word">The Word.</param> /// void DefineWord([NotNull] IWord word) { string rawWord = word.Atom.Text; if (!Vocabulary.ContainsKey(word)) { var wAtom = ZilAtom.Parse("W?" + rawWord, Context); if (Constants.TryGetValue(wAtom, out var constantValue) == false) { var wb = Game.DefineVocabularyWord(rawWord); Vocabulary.Add(word, wb); Constants.Add(wAtom, wb); } else { if (constantValue is IWordBuilder wb) { Vocabulary.Add(word, wb); } else { throw new CompilerError(CompilerMessages.Nonvocab_Constant_0_Conflicts_With_Vocab_Word_1, wAtom, word.Atom); } } } foreach (var pair in Context.ZEnvironment.VocabFormat.GetVocabConstants(word)) { var atom = ZilAtom.Parse(pair.Key, Context); if (!Constants.ContainsKey(atom)) { Constants.Add(atom, Game.DefineConstant(pair.Key, Game.MakeOperand(pair.Value))); } } }