public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Random random = new Random(seed); TokenizerSpec tokenizerSpec = NewTokenizer(random, reader); //System.out.println("seed=" + seed + ",create tokenizer=" + tokenizerSpec.toString); TokenFilterSpec filterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect); //System.out.println("seed=" + seed + ",create filter=" + filterSpec.toString); return(new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream)); }
private TokenFilterSpec NewFilterChain(Random random, Tokenizer tokenizer, bool offsetsAreCorrect) { TokenFilterSpec spec = new TokenFilterSpec(); spec.offsetsAreCorrect = offsetsAreCorrect; spec.stream = tokenizer; StringBuilder descr = new StringBuilder(); int numFilters = random.nextInt(5); for (int i = 0; i < numFilters; i++) { // Insert ValidatingTF after each stage so we can // catch problems right after the TF that "caused" // them: spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i, spec.offsetsAreCorrect); while (true) { ConstructorInfo ctor = tokenfilters[random.nextInt(tokenfilters.size())]; // hack: MockGraph/MockLookahead has assertions that will trip if they follow // an offsets violator. so we cant use them after e.g. wikipediatokenizer if (!spec.offsetsAreCorrect && (ctor.DeclaringType.Equals(typeof(MockGraphTokenFilter))) || ctor.DeclaringType.Equals(typeof(MockRandomLookaheadTokenFilter))) { continue; } object[] args = NewFilterArgs(random, spec.stream, ctor.GetParameters().Select(p => p.ParameterType).ToArray()); if (Broken(ctor, args)) { continue; } TokenFilter flt = CreateComponent <TokenFilter>(ctor, args, descr); if (flt != null) { spec.offsetsAreCorrect &= !BrokenOffsets(ctor, args); spec.stream = flt; break; } } } // Insert ValidatingTF after each stage so we can // catch problems right after the TF that "caused" // them: spec.stream = new ValidatingTokenFilter(spec.stream, "last stage", spec.offsetsAreCorrect); spec.toString = descr.toString(); return(spec); }
public override string ToString() { Random random = new Random(seed); StringBuilder sb = new StringBuilder(); CharFilterSpec charFilterSpec = NewCharFilterChain(random, new StringReader("")); sb.Append("\ncharfilters="); sb.Append(charFilterSpec.toString); // intentional: initReader gets its own separate random random = new Random(seed); TokenizerSpec tokenizerSpec = NewTokenizer(random, charFilterSpec.reader); sb.Append("\n"); sb.Append("tokenizer="); sb.Append(tokenizerSpec.toString); TokenFilterSpec tokenFilterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect); sb.Append("\n"); sb.Append("filters="); sb.Append(tokenFilterSpec.toString); sb.Append("\n"); sb.Append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect); return(sb.ToString()); }