예제 #1
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Random        random        = new Random(seed);
                TokenizerSpec tokenizerSpec = NewTokenizer(random, reader);
                //System.out.println("seed=" + seed + ",create tokenizer=" + tokenizerSpec.toString);
                TokenFilterSpec filterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);

                //System.out.println("seed=" + seed + ",create filter=" + filterSpec.toString);
                return(new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream));
            }
예제 #2
0
            private TokenFilterSpec NewFilterChain(Random random, Tokenizer tokenizer, bool offsetsAreCorrect)
            {
                TokenFilterSpec spec = new TokenFilterSpec();

                spec.offsetsAreCorrect = offsetsAreCorrect;
                spec.stream            = tokenizer;
                StringBuilder descr      = new StringBuilder();
                int           numFilters = random.nextInt(5);

                for (int i = 0; i < numFilters; i++)
                {
                    // Insert ValidatingTF after each stage so we can
                    // catch problems right after the TF that "caused"
                    // them:
                    spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i, spec.offsetsAreCorrect);

                    while (true)
                    {
                        ConstructorInfo ctor = tokenfilters[random.nextInt(tokenfilters.size())];

                        // hack: MockGraph/MockLookahead has assertions that will trip if they follow
                        // an offsets violator. so we cant use them after e.g. wikipediatokenizer
                        if (!spec.offsetsAreCorrect &&
                            (ctor.DeclaringType.Equals(typeof(MockGraphTokenFilter))) ||
                            ctor.DeclaringType.Equals(typeof(MockRandomLookaheadTokenFilter)))
                        {
                            continue;
                        }

                        object[] args = NewFilterArgs(random, spec.stream, ctor.GetParameters().Select(p => p.ParameterType).ToArray());
                        if (Broken(ctor, args))
                        {
                            continue;
                        }
                        TokenFilter flt = CreateComponent <TokenFilter>(ctor, args, descr);
                        if (flt != null)
                        {
                            spec.offsetsAreCorrect &= !BrokenOffsets(ctor, args);
                            spec.stream             = flt;
                            break;
                        }
                    }
                }

                // Insert ValidatingTF after each stage so we can
                // catch problems right after the TF that "caused"
                // them:
                spec.stream = new ValidatingTokenFilter(spec.stream, "last stage", spec.offsetsAreCorrect);

                spec.toString = descr.toString();
                return(spec);
            }
예제 #3
0
            public override string ToString()
            {
                Random         random         = new Random(seed);
                StringBuilder  sb             = new StringBuilder();
                CharFilterSpec charFilterSpec = NewCharFilterChain(random, new StringReader(""));

                sb.Append("\ncharfilters=");
                sb.Append(charFilterSpec.toString);
                // intentional: initReader gets its own separate random
                random = new Random(seed);
                TokenizerSpec tokenizerSpec = NewTokenizer(random, charFilterSpec.reader);

                sb.Append("\n");
                sb.Append("tokenizer=");
                sb.Append(tokenizerSpec.toString);
                TokenFilterSpec tokenFilterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);

                sb.Append("\n");
                sb.Append("filters=");
                sb.Append(tokenFilterSpec.toString);
                sb.Append("\n");
                sb.Append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect);
                return(sb.ToString());
            }