static SingletonHolder()
 {
     try
     {
         INSTANCE = new TokenInfoDictionary();
     }
     catch (IOException ioe)
     {
         throw new Exception("Cannot load TokenInfoDictionary.", ioe);
     }
 }
Example #2
0
        public void TestEnumerateAll()
        {
            // just for debugging
            int numTerms                     = 0;
            int numWords                     = 0;
            int lastWordId                   = -1;
            int lastSourceId                 = -1;
            TokenInfoDictionary      tid     = TokenInfoDictionary.GetInstance();
            ConnectionCosts          matrix  = ConnectionCosts.GetInstance();
            FST <long?>              fst     = tid.FST.InternalFST;
            Int32sRefFSTEnum <long?> fstEnum = new Int32sRefFSTEnum <long?>(fst);

            Int32sRefFSTEnum.InputOutput <long?> mapping;
            Int32sRef scratch = new Int32sRef();

            while ((mapping = fstEnum.Next()) != null)
            {
                numTerms++;
                Int32sRef input = mapping.Input;
                char[]    chars = new char[input.Length];
                for (int i = 0; i < chars.Length; i++)
                {
                    chars[i] = (char)input.Int32s[input.Offset + i];
                }
                assertTrue(UnicodeUtil.ValidUTF16String(new string(chars)));

                long?output   = mapping.Output;
                int  sourceId = (int)output.Value;
                // we walk in order, terms, sourceIds, and wordIds should always be increasing
                assertTrue(sourceId > lastSourceId);
                lastSourceId = sourceId;
                tid.LookupWordIds(sourceId, scratch);
                for (int i = 0; i < scratch.Length; i++)
                {
                    numWords++;
                    int wordId = scratch.Int32s[scratch.Offset + i];
                    assertTrue(wordId > lastWordId);
                    lastWordId = wordId;

                    String baseForm = tid.GetBaseForm(wordId, chars, 0, chars.Length);
                    assertTrue(baseForm == null || UnicodeUtil.ValidUTF16String(baseForm));

                    String inflectionForm = tid.GetInflectionForm(wordId);
                    assertTrue(inflectionForm == null || UnicodeUtil.ValidUTF16String(inflectionForm));
                    if (inflectionForm != null)
                    {
                        // check that its actually an ipadic inflection form
                        assertNotNull(ToStringUtil.GetInflectedFormTranslation(inflectionForm));
                    }

                    String inflectionType = tid.GetInflectionType(wordId);
                    assertTrue(inflectionType == null || UnicodeUtil.ValidUTF16String(inflectionType));
                    if (inflectionType != null)
                    {
                        // check that its actually an ipadic inflection type
                        assertNotNull(ToStringUtil.GetInflectionTypeTranslation(inflectionType));
                    }

                    int leftId  = tid.GetLeftId(wordId);
                    int rightId = tid.GetRightId(wordId);

                    matrix.Get(rightId, leftId);

                    tid.GetWordCost(wordId);

                    String pos = tid.GetPartOfSpeech(wordId);
                    assertNotNull(pos);
                    assertTrue(UnicodeUtil.ValidUTF16String(pos));
                    // check that its actually an ipadic pos tag
                    assertNotNull(ToStringUtil.GetPOSTranslation(pos));

                    String pronunciation = tid.GetPronunciation(wordId, chars, 0, chars.Length);
                    assertNotNull(pronunciation);
                    assertTrue(UnicodeUtil.ValidUTF16String(pronunciation));

                    String reading = tid.GetReading(wordId, chars, 0, chars.Length);
                    assertNotNull(reading);
                    assertTrue(UnicodeUtil.ValidUTF16String(reading));
                }
            }
            if (VERBOSE)
            {
                Console.WriteLine("checked " + numTerms + " terms, " + numWords + " words.");
            }
        }