/// <summary> /// Returns random string, including full unicode range. </summary> public static string RandomRegexp(Random r) { while (true) { string regexp = RandomRegexpString(r); // we will also generate some undefined unicode queries if (!UnicodeUtil.ValidUTF16String(regexp)) { continue; } try { new RegExp(regexp, RegExpSyntax.NONE); return(regexp); } catch (Exception e) when(e.IsException()) { } } }
/// <summary> /// Returns random string, including full unicode range. </summary> public static string RandomRegexp(Random r) { while (true) { string regexp = RandomRegexpString(r); // we will also generate some undefined unicode queries if (!UnicodeUtil.ValidUTF16String(regexp)) { continue; } try { new RegExp(regexp, RegExpSyntax.NONE); return(regexp); } #pragma warning disable 168 catch (Exception e) #pragma warning restore 168 { } } }
public void TestSurrogates2() { int numIterations = AtLeast(1000); for (int i = 0; i < numIterations; i++) { String s = TestUtil.RandomUnicodeString(Random(), 100); TokenStream ts = analyzer.GetTokenStream("foo", s); try { ICharTermAttribute termAtt = ts.AddAttribute <ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { assertTrue(UnicodeUtil.ValidUTF16String(termAtt)); } ts.End(); } finally { IOUtils.DisposeWhileHandlingException(ts); } } }
public void TestEnumerateAll() { // just for debugging int numTerms = 0; int numWords = 0; int lastWordId = -1; int lastSourceId = -1; TokenInfoDictionary tid = TokenInfoDictionary.GetInstance(); ConnectionCosts matrix = ConnectionCosts.GetInstance(); FST <long?> fst = tid.FST.InternalFST; Int32sRefFSTEnum <long?> fstEnum = new Int32sRefFSTEnum <long?>(fst); Int32sRefFSTEnum.InputOutput <long?> mapping; Int32sRef scratch = new Int32sRef(); while ((mapping = fstEnum.Next()) != null) { numTerms++; Int32sRef input = mapping.Input; char[] chars = new char[input.Length]; for (int i = 0; i < chars.Length; i++) { chars[i] = (char)input.Int32s[input.Offset + i]; } assertTrue(UnicodeUtil.ValidUTF16String(new string(chars))); long?output = mapping.Output; int sourceId = (int)output.Value; // we walk in order, terms, sourceIds, and wordIds should always be increasing assertTrue(sourceId > lastSourceId); lastSourceId = sourceId; tid.LookupWordIds(sourceId, scratch); for (int i = 0; i < scratch.Length; i++) { numWords++; int wordId = scratch.Int32s[scratch.Offset + i]; assertTrue(wordId > lastWordId); lastWordId = wordId; String baseForm = tid.GetBaseForm(wordId, chars, 0, chars.Length); assertTrue(baseForm == null || UnicodeUtil.ValidUTF16String(baseForm)); String inflectionForm = tid.GetInflectionForm(wordId); assertTrue(inflectionForm == null || UnicodeUtil.ValidUTF16String(inflectionForm)); if (inflectionForm != null) { // check that its actually an ipadic inflection form assertNotNull(ToStringUtil.GetInflectedFormTranslation(inflectionForm)); } String inflectionType = tid.GetInflectionType(wordId); assertTrue(inflectionType == null || UnicodeUtil.ValidUTF16String(inflectionType)); if (inflectionType != null) { // check that its actually an ipadic inflection type assertNotNull(ToStringUtil.GetInflectionTypeTranslation(inflectionType)); } int leftId = tid.GetLeftId(wordId); int rightId = tid.GetRightId(wordId); matrix.Get(rightId, leftId); tid.GetWordCost(wordId); String pos = tid.GetPartOfSpeech(wordId); assertNotNull(pos); assertTrue(UnicodeUtil.ValidUTF16String(pos)); // check that its actually an ipadic pos tag assertNotNull(ToStringUtil.GetPOSTranslation(pos)); String pronunciation = tid.GetPronunciation(wordId, chars, 0, chars.Length); assertNotNull(pronunciation); assertTrue(UnicodeUtil.ValidUTF16String(pronunciation)); String reading = tid.GetReading(wordId, chars, 0, chars.Length); assertNotNull(reading); assertTrue(UnicodeUtil.ValidUTF16String(reading)); } } if (VERBOSE) { Console.WriteLine("checked " + numTerms + " terms, " + numWords + " words."); } }