/// <summary> /// Get a random word weighted to occur after a specified first word. /// </summary> /// <param name="firstWord">The word before the word being queried for.</param> /// <param name="secondWordPOS">The part of speech of the word being queried for.</param> /// <returns>A word most likely to occur after the specified first word.</returns> public Word GetRandomSecondWord(Word firstWord, PartOfSpeech secondWordPOS) { CachedBigramList bigramList = BigramDictionary.Instance.GetBigramListByPOS(firstWord.PoS, secondWordPOS); CachedBigramList listToSearch = bigramList; if (bigramList.entries.Any(e => e.First.Equals(firstWord))) { List <BigramEntry> entriesWithWord = bigramList.entries.Where(e => e.First.Equals(firstWord)).ToList(); int frequency = (int)entriesWithWord.Sum(e => e.Frequency); listToSearch = new CachedBigramList() { entries = entriesWithWord, totalFrequency = frequency }; } BigramEntry randomEntry = GetWeightedRandomEntryFromList(listToSearch.entries, listToSearch.totalFrequency); Word randomWord = null; if (randomEntry == null) { randomWord = wordDictLookup.GetRandomWordByPOS(secondWordPOS); } else { randomWord = randomEntry.Second; } return(randomWord); }
public static byte[] ToBytes(BigramEntry entry) { byte[] bytes = new byte[byteSize]; using (MemoryStream mem = new MemoryStream(bytes)) { using (BinaryWriter bw = new BinaryWriter(mem)) { bw.Write(entry.First.Id); bw.Write(entry.Second.Id); bw.Write(entry.Frequency); } } return(bytes); }
/// <summary> /// Get a random word weighted to occur before a specified second word. /// </summary> /// <param name="firstWordPOS">The part of speech of the word being queried for.</param> /// <param name="secondWordPOS">The part of speech of the word that would come after the word being queried for.</param> /// <returns></returns> public Word GetRandomFirstWord(PartOfSpeech firstWordPOS, PartOfSpeech secondWordPOS) { CachedBigramList bigramList = BigramDictionary.Instance.GetBigramListByPOS(firstWordPOS, secondWordPOS); BigramEntry randomEntry = GetWeightedRandomEntryFromList(bigramList.entries, bigramList.totalFrequency); Word randomWord = null; if (randomEntry == null) { randomWord = wordDictLookup.GetRandomWordByPOS(firstWordPOS); } else { randomWord = randomEntry.First; } return(randomWord); }
public static BigramEntry FromBytes(byte[] bytes) { int firstId; int secondId; int frequency; using (MemoryStream mem = new MemoryStream(bytes)) { using (BinaryReader br = new BinaryReader(mem)) { firstId = br.ReadInt32(); secondId = br.ReadInt32(); frequency = br.ReadInt32(); } } //By storing the word ids and delaying the grabbing of the actual words until they're needed, //loading in the Bigram Dictionary binary file is greatly sped up. BigramEntry entry = new BigramEntry(firstId, secondId, frequency); return(entry); }
/// <summary> /// Gets a random BigramEntry, using the frequencies of the entries as a weight. /// </summary> /// <param name="entryList">The list of entries to select an entry from</param> /// <param name="totalFrequency">The sum of every entry's frequency in the list.</param> /// <returns>A weighted random bigram entry</returns> private BigramEntry GetWeightedRandomEntryFromList(List <BigramEntry> entryList, int totalFrequency) { float randomIndex = (float)rand.NextDouble(); bool foundWord = false; float currentIndex = 0; BigramEntry randomEntry = null; if (entryList != null && entryList.Count > 0) { randomEntry = entryList[0]; for (int i = 0; i < entryList.Count && !foundWord; i++) { float weightedIndex = (float)entryList[i].Frequency / (float)totalFrequency; currentIndex += weightedIndex; if (currentIndex + 0.00001f >= randomIndex) { foundWord = true; randomEntry = entryList[i]; } } } return(randomEntry); }
private void CreateEntriesFromBinary(byte[] byteFromFile) { int lineNum = 0; using (MemoryStream mem = new MemoryStream(byteFromFile)) { using (BinaryReader br = new BinaryReader(mem)) { br.ReadInt32(); //binary file written with a "total frequency" int used for IO loading and previous debugging in console project br.ReadInt32(); //binary file was written with an extra int to indicate how many bytes the entries made up (for typical IO loading) byte[] bytes; while ((bytes = br.ReadBytes(BigramEntry.byteSize)) != null && bytes.Length > 0) { BigramEntry e = BigramEntry.FromBytes(bytes); entries.Add(e); lineNum++; if (lineNum % 100 == 0) { //Debug.Log("\r{0} entries loaded ", lineNum); } } } } }