Ejemplo n.º 1
0
            public IterTokenStream(params string[] text)
            {
                int off = 0;
                ArrayList /*<Token>*/ t = new ArrayList/*<Token>*/ (text.Length);

                foreach (string txt in text)
                {
                    t.add(new Token(txt, off, off + txt.Length));
                    off += txt.Length + 2;
                }
                this.toks = t.iterator();
            }
Ejemplo n.º 2
0
        /*
         * Need to worry about multiple scenarios:
         *  - need to go for the longest match
         *    a b => foo      #shouldn't match if "a b" is followed by "c d"
         *    a b c d => bar
         *  - need to backtrack - retry matches for tokens already read
         *     a b c d => foo
         *       b c => bar
         *     If the input stream is "a b c x", one will consume "a b c d"
         *     trying to match the first rule... all but "a" should be
         *     pushed back so a match may be made on "b c".
         *  - don't try and match generated tokens (thus need separate queue)
         *    matching is not recursive.
         *  - handle optional generation of original tokens in all these cases,
         *    merging token streams to preserve token positions.
         *  - preserve original positionIncrement of first matched token
         */

#pragma warning disable 672
        public override Token next(Token target)
        {
            while (true)
            {
                // if there are any generated tokens, return them... don't try any
                // matches against them, as we specifically don't want recursion.
                if (replacement != null && replacement.hasNext())
                {
                    return((Token)replacement.next());
                }

                // common case fast-path of first token not matching anything
                Token firstTok = nextTok(target);
                if (firstTok == null)
                {
                    return(null);
                }
                SynonymMap result = (SynonymMap)(map.submap != null ? map.submap.get(firstTok.termBuffer(), 0, firstTok.termLength()) : null);
                if (result == null)
                {
                    return(firstTok);
                }

                // OK, we matched a token, so find the longest match.

                matched = new LinkedList/*<Token>*/ ();

                result = match(result);

                if (result == null)
                {
                    // no match, simply return the first token read.
                    return(firstTok);
                }

                // reuse, or create new one each time?
                ArrayList /*<Token>*/ generated = new ArrayList/*<Token>*/ (result.synonyms.Length + matched.size() + 1);

                //
                // there was a match... let's generate the new tokens, merging
                // in the matched tokens (position increments need adjusting)
                //
                Token lastTok     = (Token)(matched.isEmpty() ? firstTok : matched.getLast());
                bool  includeOrig = result.includeOrig();

                Token origTok = includeOrig ? firstTok : null;
                int   origPos = firstTok.getPositionIncrement(); // position of origTok in the original stream
                int   repPos  = 0;                               // curr position in replacement token stream
                int   pos     = 0;                               // current position in merged token stream

                for (int i = 0; i < result.synonyms.Length; i++)
                {
                    Token repTok = result.synonyms[i];
                    Token newTok = new Token(firstTok.startOffset(), lastTok.endOffset(), firstTok.type());
                    newTok.setTermBuffer(repTok.termBuffer(), 0, repTok.termLength());
                    repPos += repTok.getPositionIncrement();
                    if (i == 0)
                    {
                        repPos = origPos; // make position of first token equal to original
                    }
                    // if necessary, insert original tokens and adjust position increment
                    while (origTok != null && origPos <= repPos)
                    {
                        origTok.setPositionIncrement(origPos - pos);
                        generated.add(origTok);
                        pos    += origTok.getPositionIncrement();
                        origTok = (Token)(matched.isEmpty() ? null : matched.removeFirst());
                        if (origTok != null)
                        {
                            origPos += origTok.getPositionIncrement();
                        }
                    }

                    newTok.setPositionIncrement(repPos - pos);
                    generated.add(newTok);
                    pos += newTok.getPositionIncrement();
                }

                // finish up any leftover original tokens
                while (origTok != null)
                {
                    origTok.setPositionIncrement(origPos - pos);
                    generated.add(origTok);
                    pos    += origTok.getPositionIncrement();
                    origTok = (Token)(matched.isEmpty() ? null : matched.removeFirst());
                    if (origTok != null)
                    {
                        origPos += origTok.getPositionIncrement();
                    }
                }

                // what if we replaced a longer sequence with a shorter one?
                // a/0 b/5 =>  foo/0
                // should I re-create the gap on the next buffered token?

                replacement = generated.iterator();
                // Now return to the top of the loop to read and return the first
                // generated token.. The reason this is done is that we may have generated
                // nothing at all, and may need to continue with more matching logic.
            }
        }