Beispiel #1
0
 /// <summary>
 /// Returns random string, including full unicode range. </summary>
 public static string RandomRegexp(Random r)
 {
     while (true)
     {
         string regexp = RandomRegexpString(r);
         // we will also generate some undefined unicode queries
         if (!UnicodeUtil.ValidUTF16String(regexp))
         {
             continue;
         }
         try
         {
             new RegExp(regexp, RegExpSyntax.NONE);
             return(regexp);
         }
         catch (Exception e) when(e.IsException())
         {
         }
     }
 }
        /// <summary>
        /// Returns random string, including full unicode range. </summary>
        public static string RandomRegexp(Random r)
        {
            while (true)
            {
                string regexp = RandomRegexpString(r);
                // we will also generate some undefined unicode queries
                if (!UnicodeUtil.ValidUTF16String(regexp))
                {
                    continue;
                }
                try
                {
                    new RegExp(regexp, RegExpSyntax.NONE);
                    return(regexp);
                }
#pragma warning disable 168
                catch (Exception e)
#pragma warning restore 168
                {
                }
            }
        }
        public void TestSurrogates2()
        {
            int numIterations = AtLeast(1000);

            for (int i = 0; i < numIterations; i++)
            {
                String      s  = TestUtil.RandomUnicodeString(Random(), 100);
                TokenStream ts = analyzer.GetTokenStream("foo", s);
                try
                {
                    ICharTermAttribute termAtt = ts.AddAttribute <ICharTermAttribute>();
                    ts.Reset();
                    while (ts.IncrementToken())
                    {
                        assertTrue(UnicodeUtil.ValidUTF16String(termAtt));
                    }
                    ts.End();
                }
                finally
                {
                    IOUtils.DisposeWhileHandlingException(ts);
                }
            }
        }
Beispiel #4
0
        public void TestEnumerateAll()
        {
            // just for debugging
            int numTerms                     = 0;
            int numWords                     = 0;
            int lastWordId                   = -1;
            int lastSourceId                 = -1;
            TokenInfoDictionary      tid     = TokenInfoDictionary.GetInstance();
            ConnectionCosts          matrix  = ConnectionCosts.GetInstance();
            FST <long?>              fst     = tid.FST.InternalFST;
            Int32sRefFSTEnum <long?> fstEnum = new Int32sRefFSTEnum <long?>(fst);

            Int32sRefFSTEnum.InputOutput <long?> mapping;
            Int32sRef scratch = new Int32sRef();

            while ((mapping = fstEnum.Next()) != null)
            {
                numTerms++;
                Int32sRef input = mapping.Input;
                char[]    chars = new char[input.Length];
                for (int i = 0; i < chars.Length; i++)
                {
                    chars[i] = (char)input.Int32s[input.Offset + i];
                }
                assertTrue(UnicodeUtil.ValidUTF16String(new string(chars)));

                long?output   = mapping.Output;
                int  sourceId = (int)output.Value;
                // we walk in order, terms, sourceIds, and wordIds should always be increasing
                assertTrue(sourceId > lastSourceId);
                lastSourceId = sourceId;
                tid.LookupWordIds(sourceId, scratch);
                for (int i = 0; i < scratch.Length; i++)
                {
                    numWords++;
                    int wordId = scratch.Int32s[scratch.Offset + i];
                    assertTrue(wordId > lastWordId);
                    lastWordId = wordId;

                    String baseForm = tid.GetBaseForm(wordId, chars, 0, chars.Length);
                    assertTrue(baseForm == null || UnicodeUtil.ValidUTF16String(baseForm));

                    String inflectionForm = tid.GetInflectionForm(wordId);
                    assertTrue(inflectionForm == null || UnicodeUtil.ValidUTF16String(inflectionForm));
                    if (inflectionForm != null)
                    {
                        // check that its actually an ipadic inflection form
                        assertNotNull(ToStringUtil.GetInflectedFormTranslation(inflectionForm));
                    }

                    String inflectionType = tid.GetInflectionType(wordId);
                    assertTrue(inflectionType == null || UnicodeUtil.ValidUTF16String(inflectionType));
                    if (inflectionType != null)
                    {
                        // check that its actually an ipadic inflection type
                        assertNotNull(ToStringUtil.GetInflectionTypeTranslation(inflectionType));
                    }

                    int leftId  = tid.GetLeftId(wordId);
                    int rightId = tid.GetRightId(wordId);

                    matrix.Get(rightId, leftId);

                    tid.GetWordCost(wordId);

                    String pos = tid.GetPartOfSpeech(wordId);
                    assertNotNull(pos);
                    assertTrue(UnicodeUtil.ValidUTF16String(pos));
                    // check that its actually an ipadic pos tag
                    assertNotNull(ToStringUtil.GetPOSTranslation(pos));

                    String pronunciation = tid.GetPronunciation(wordId, chars, 0, chars.Length);
                    assertNotNull(pronunciation);
                    assertTrue(UnicodeUtil.ValidUTF16String(pronunciation));

                    String reading = tid.GetReading(wordId, chars, 0, chars.Length);
                    assertNotNull(reading);
                    assertTrue(UnicodeUtil.ValidUTF16String(reading));
                }
            }
            if (VERBOSE)
            {
                Console.WriteLine("checked " + numTerms + " terms, " + numWords + " words.");
            }
        }