예제 #1
0
 protected internal AmbiguityClass(string word, bool single, Dictionary dict, TTags ttags)
 {
     // import java.util.HashSet;
     // private final HashSet<String> s;
     this.single = single;
     if (single)
     {
         this.word = word;
         sortedIds = Java.Util.Collections.EmptyList();
     }
     else
     {
         this.word = null;
         string[] tags = dict.GetTags(word);
         sortedIds = new List <int>(tags.Length);
         foreach (string tag in tags)
         {
             Add(ttags.GetIndex(tag));
         }
     }
     // s = Generics.newHashSet();
     // for (Integer sortedId : sortedIds) {
     //   s.add(ttags.getTag(sortedId));
     // }
     key = this.ToString();
 }
예제 #2
0
 public AmbiguityClasses(TTags ttags)
 {
     // TODO: if it's rotted and not used anywhere, can we just get rid of it all?  [CDM: It would be nice to keep and revive someday. It is a nice and sometimes useful idea.]
     // TODO: this isn't used anywhere, either
     // protected final AmbiguityClass naClass = new AmbiguityClass(null, false, null, null);
     classes = new HashIndex <AmbiguityClass>();
 }
예제 #3
0
        public virtual void TestSerialization()
        {
            for (int i = 0; i < 100; i++)
            {
                tt.Add("fake" + int.ToString(i));
            }
            tt.MarkClosed("fake44");
            tt.Add("boat");
            tt.Save("testoutputfile", Generics.NewHashMap <string, ICollection <string> >());
            TTags t2 = new TTags();

            t2.Read("testoutputfile");
            NUnit.Framework.Assert.AreEqual(tt.GetSize(), t2.GetSize());
            NUnit.Framework.Assert.AreEqual(tt.GetIndex("boat"), t2.GetIndex("boat"));
            NUnit.Framework.Assert.AreEqual(t2.GetTag(tt.GetIndex("boat")), "boat");
            NUnit.Framework.Assert.IsFalse(t2.IsClosed("fake43"));
            NUnit.Framework.Assert.IsTrue(t2.IsClosed("fake44"));
            /* java=lame */
            (new File("testoutputfile")).Delete();
        }
예제 #4
0
        protected internal virtual int GetClass(string word, Dictionary dict, int veryCommonWordThresh, TTags ttags)
        {
            if (word.Equals(naWord))
            {
                return(-2);
            }
            if (dict.IsUnknown(word))
            {
                return(-1);
            }
            bool           veryCommon = dict.Sum(word) > veryCommonWordThresh;
            AmbiguityClass a          = new AmbiguityClass(word, veryCommon, dict, ttags);

            // TODO: surely it would be faster and not too expensive to cache
            // the results of creating a whole bunch of these, since we're
            // probably constructing the same AmbiguityClass multiple times
            // for each word.  Furthermore, the separation of having two
            // constructors here is pretty awful, quite frankly.
            return(Add(a));
        }
예제 #5
0
 protected virtual void SetUp()
 {
     tt = new TTags();
 }
예제 #6
0
 /*
  * public void printAmbiguous() {
  * String[] arr = dict.keySet().toArray(new String[dict.keySet().size()]);
  * try {
  * int countAmbiguous = 0;
  * int countUnAmbiguous = 0;
  * int countAmbDisamb = 0;
  * for (String word : arr) {
  * if (word.indexOf('|') == -1) {
  * continue;
  * }
  * TagCount count = get(word);
  * if (count.numTags() > 1) {
  * System.out.print(word);
  * countAmbiguous++;
  * tC.print();
  * System.out.println();
  * } else {
  * String wordA = word.substring(0, word.indexOf('|'));
  * if (get(wordA).numTags() > 1) {
  * System.out.print(word);
  * countAmbDisamb++;
  * countUnAmbiguous++;
  * tC.print();
  * System.out.println();
  * } else {
  * countUnAmbiguous++;
  * }
  * }// else
  * }
  * System.out.println(" ambg " + countAmbiguous + " unambg " + countUnAmbiguous + " disamb " + countAmbDisamb);
  * } catch (Exception e) {
  * e.printStackTrace();
  * }
  * }
  */
 /// <summary>
 /// This makes ambiguity classes from all words in the dictionary and remembers
 /// their classes in the TagCounts
 /// </summary>
 protected internal virtual void SetAmbClasses(AmbiguityClasses ambClasses, int veryCommonWordThresh, TTags ttags)
 {
     foreach (KeyValuePair <string, TagCount> entry in dict)
     {
         string   w          = entry.Key;
         TagCount count      = entry.Value;
         int      ambClassId = ambClasses.GetClass(w, this, veryCommonWordThresh, ttags);
         count.SetAmbClassId(ambClassId);
     }
 }