コード例 #1
0
        public void Build(int cutOff)
        {
            var knownWords = new HashSet<string> ();
            var rareWords = new HashSet<string> ();
            var signitures = new HashSet<string> ();

            foreach (var pair in wordCounts) {
                string word = pair.Key;
                int count = pair.Value;
                if (count > cutOff) {
                    knownWords.Add (word);
                } else {
                    rareWords.Add (word);
                }
            }

            foreach (var word in rareWords) {
                string lowered = word.ToLower ();
                bool isKnownLC = knownWords.Contains (lowered);
                if (initialSet.Contains (word)) {
                    signitures.Add (GetSigniture (word, true, isKnownLC));
                }
                if (nonInitialSet.Contains (word)) {
                    signitures.Add (GetSigniture (word, false, isKnownLC));
                }

            }

            vocab = new CodeBook32 ();
            signitureVocab = new CodeBook32 (knownWords.Count);

            foreach (var w in knownWords) {
                vocab.Add (w);
            }

            foreach (var w in signitures) {
                signitureVocab.Add (w);
            }
        }
コード例 #2
0
ファイル: TagSet.cs プロジェクト: nyanyanya/la-pcfg
 public void Build(string ROOT)
 {
     base.ROOT = ROOT;
     PTs = new CodeBook32();
     if (ptset.Contains(ROOT))
     {
         throw new Exception("ROOT symbols found in preterminal set!");
     }
     foreach (var pt in ptset)
     {
         PTs.Add(pt);
     }
     ntset.Remove(ROOT);
     NTs = new CodeBook32(PTs.Count);
     NTs.Add(ROOT);
     foreach (var nt in ntset)
     {
         NTs.Add(nt);
     }
 }