protected internal AmbiguityClass(string word, bool single, Dictionary dict, TTags ttags) { // import java.util.HashSet; // private final HashSet<String> s; this.single = single; if (single) { this.word = word; sortedIds = Java.Util.Collections.EmptyList(); } else { this.word = null; string[] tags = dict.GetTags(word); sortedIds = new List <int>(tags.Length); foreach (string tag in tags) { Add(ttags.GetIndex(tag)); } } // s = Generics.newHashSet(); // for (Integer sortedId : sortedIds) { // s.add(ttags.getTag(sortedId)); // } key = this.ToString(); }
public AmbiguityClasses(TTags ttags) { // TODO: if it's rotted and not used anywhere, can we just get rid of it all? [CDM: It would be nice to keep and revive someday. It is a nice and sometimes useful idea.] // TODO: this isn't used anywhere, either // protected final AmbiguityClass naClass = new AmbiguityClass(null, false, null, null); classes = new HashIndex <AmbiguityClass>(); }
public virtual void TestSerialization() { for (int i = 0; i < 100; i++) { tt.Add("fake" + int.ToString(i)); } tt.MarkClosed("fake44"); tt.Add("boat"); tt.Save("testoutputfile", Generics.NewHashMap <string, ICollection <string> >()); TTags t2 = new TTags(); t2.Read("testoutputfile"); NUnit.Framework.Assert.AreEqual(tt.GetSize(), t2.GetSize()); NUnit.Framework.Assert.AreEqual(tt.GetIndex("boat"), t2.GetIndex("boat")); NUnit.Framework.Assert.AreEqual(t2.GetTag(tt.GetIndex("boat")), "boat"); NUnit.Framework.Assert.IsFalse(t2.IsClosed("fake43")); NUnit.Framework.Assert.IsTrue(t2.IsClosed("fake44")); /* java=lame */ (new File("testoutputfile")).Delete(); }
protected internal virtual int GetClass(string word, Dictionary dict, int veryCommonWordThresh, TTags ttags) { if (word.Equals(naWord)) { return(-2); } if (dict.IsUnknown(word)) { return(-1); } bool veryCommon = dict.Sum(word) > veryCommonWordThresh; AmbiguityClass a = new AmbiguityClass(word, veryCommon, dict, ttags); // TODO: surely it would be faster and not too expensive to cache // the results of creating a whole bunch of these, since we're // probably constructing the same AmbiguityClass multiple times // for each word. Furthermore, the separation of having two // constructors here is pretty awful, quite frankly. return(Add(a)); }
protected virtual void SetUp() { tt = new TTags(); }
/* * public void printAmbiguous() { * String[] arr = dict.keySet().toArray(new String[dict.keySet().size()]); * try { * int countAmbiguous = 0; * int countUnAmbiguous = 0; * int countAmbDisamb = 0; * for (String word : arr) { * if (word.indexOf('|') == -1) { * continue; * } * TagCount count = get(word); * if (count.numTags() > 1) { * System.out.print(word); * countAmbiguous++; * tC.print(); * System.out.println(); * } else { * String wordA = word.substring(0, word.indexOf('|')); * if (get(wordA).numTags() > 1) { * System.out.print(word); * countAmbDisamb++; * countUnAmbiguous++; * tC.print(); * System.out.println(); * } else { * countUnAmbiguous++; * } * }// else * } * System.out.println(" ambg " + countAmbiguous + " unambg " + countUnAmbiguous + " disamb " + countAmbDisamb); * } catch (Exception e) { * e.printStackTrace(); * } * } */ /// <summary> /// This makes ambiguity classes from all words in the dictionary and remembers /// their classes in the TagCounts /// </summary> protected internal virtual void SetAmbClasses(AmbiguityClasses ambClasses, int veryCommonWordThresh, TTags ttags) { foreach (KeyValuePair <string, TagCount> entry in dict) { string w = entry.Key; TagCount count = entry.Value; int ambClassId = ambClasses.GetClass(w, this, veryCommonWordThresh, ttags); count.SetAmbClassId(ambClassId); } }