Exemple #1
0
        private Token newTok(Token orig, int start, int end)
        {
            int startOff = orig.startOffset();
            int endOff   = orig.endOffset();

            // if length by start + end offsets doesn't match the term text then assume
            // this is a synonym and don't adjust the offsets.
            if (orig.termLength() == endOff - startOff)
            {
                endOff    = startOff + end;
                startOff += start;
            }

            return((Token)orig.clone(orig.termBuffer(), start, (end - start), startOff, endOff));
        }
Exemple #2
0
#pragma warning restore 672

        // index "a","b","c" as  pos0="a", pos1="b", pos2="c", pos2="abc"
        private void addCombos(List /*<Token>*/ lst, int start, int end, bool generateSubwords, bool catenateSubwords, int posOffset)
        {
            if (end - start == 1)
            {
                // always generate a word alone, even if generateSubwords=0 because
                // the catenation of all the subwords *is* the subword.
                queue.add(lst.get(start));
                return;
            }

            StringBuilder sb = null;

            if (catenateSubwords)
            {
                sb = new StringBuilder();
            }
            Token firstTok = null;
            Token tok      = null;

            for (int i = start; i < end; i++)
            {
                tok = (Token)lst.get(i);
                if (catenateSubwords)
                {
                    if (i == start)
                    {
                        firstTok = tok;
                    }
                    sb.append(tok.termBuffer(), 0, tok.termLength());
                }
                if (generateSubwords)
                {
                    queue.add(tok);
                }
            }

            if (catenateSubwords)
            {
                Token concatTok = new Token(sb.toString(),
                                            firstTok.startOffset(),
                                            tok.endOffset(),
                                            firstTok.type());
                // if we indexed some other tokens, then overlap concatTok with the last.
                // Otherwise, use the value passed in as the position offset.
                concatTok.setPositionIncrement(generateSubwords == true ? 0 : posOffset);
                queue.add(concatTok);
            }
        }
Exemple #3
0
 // use the type of the first char as the type
 // of the token.
 private int tokType(Token t)
 {
     return(charType(t.termBuffer()[0]));
 }