public void Build(int cutOff) { var knownWords = new HashSet<string> (); var rareWords = new HashSet<string> (); var signitures = new HashSet<string> (); foreach (var pair in wordCounts) { string word = pair.Key; int count = pair.Value; if (count > cutOff) { knownWords.Add (word); } else { rareWords.Add (word); } } foreach (var word in rareWords) { string lowered = word.ToLower (); bool isKnownLC = knownWords.Contains (lowered); if (initialSet.Contains (word)) { signitures.Add (GetSigniture (word, true, isKnownLC)); } if (nonInitialSet.Contains (word)) { signitures.Add (GetSigniture (word, false, isKnownLC)); } } vocab = new CodeBook32 (); signitureVocab = new CodeBook32 (knownWords.Count); foreach (var w in knownWords) { vocab.Add (w); } foreach (var w in signitures) { signitureVocab.Add (w); } }
public void Build(string ROOT) { base.ROOT = ROOT; PTs = new CodeBook32(); if (ptset.Contains(ROOT)) { throw new Exception("ROOT symbols found in preterminal set!"); } foreach (var pt in ptset) { PTs.Add(pt); } ntset.Remove(ROOT); NTs = new CodeBook32(PTs.Count); NTs.Add(ROOT); foreach (var nt in ntset) { NTs.Add(nt); } }