Ejemplo n.º 1
0
        /**
         * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
         * the tokens end up at the same position.
         *
         * Example:  [a b] merged with [c d] produces [a/b c/d]  ('/' denotes tokens in the same position)
         * Example:  [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]  (a,n means a has posInc=n)
         *
         */
        public static List /*<Token>*/ mergeTokens(List /*<Token>*/ lst1, List /*<Token>*/ lst2)
        {
            ArrayList /*<Token>*/ result = new ArrayList/*<Token>*/ ();

            if (lst1 == null || lst2 == null)
            {
                if (lst2 != null)
                {
                    result.addAll(lst2);
                }
                if (lst1 != null)
                {
                    result.addAll(lst1);
                }
                return(result);
            }

            int pos = 0;
            Iterator /*<Token>*/ iter1 = lst1.iterator();
            Iterator /*<Token>*/ iter2 = lst2.iterator();
            Token tok1 = (Token)(iter1.hasNext() ? iter1.next() : null);
            Token tok2 = (Token)(iter2.hasNext() ? iter2.next() : null);
            int   pos1 = tok1 != null?tok1.getPositionIncrement() : 0;

            int pos2 = tok2 != null?tok2.getPositionIncrement() : 0;

            while (tok1 != null || tok2 != null)
            {
                while (tok1 != null && (pos1 <= pos2 || tok2 == null))
                {
                    Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
                    tok.setTermBuffer(tok1.termBuffer(), 0, tok1.termLength());
                    tok.setPositionIncrement(pos1 - pos);
                    result.add(tok);
                    pos   = pos1;
                    tok1  = (Token)(iter1.hasNext() ? iter1.next() : null);
                    pos1 += tok1 != null?tok1.getPositionIncrement() : 0;
                }
                while (tok2 != null && (pos2 <= pos1 || tok1 == null))
                {
                    Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
                    tok.setTermBuffer(tok2.termBuffer(), 0, tok2.termLength());
                    tok.setPositionIncrement(pos2 - pos);
                    result.add(tok);
                    pos   = pos2;
                    tok2  = (Token)(iter2.hasNext() ? iter2.next() : null);
                    pos2 += tok2 != null?tok2.getPositionIncrement() : 0;
                }
            }
            return(result);
        }
Ejemplo n.º 2
0
        public void inform(ResourceLoader loader)
        {
            string synonyms = (string)args.get("synonyms");

            bool ignoreCase = getBoolean("ignoreCase", false);
            bool expand     = getBoolean("expand", true);

            //String tf = args.get("tokenizerFactory");
            //TokenizerFactory tokFactory = null;
            //if( tf != null ){
            //  tokFactory = loadTokenizerFactory( loader, tf, args );
            //}

            if (synonyms != null)
            {
                List /*<String>*/ wlist = null;
                try {
                    File synonymFile = new File(synonyms);
                    if (synonymFile.exists())
                    {
                        wlist = loader.getLines(synonyms);
                    }
                    else
                    {
                        List /*<String>*/ files = StrUtils.splitFileNames(synonyms);
                        wlist = new ArrayList/*<String>*/ ();
                        for (var iter = files.iterator(); iter.hasNext();)
                        {
                            string            file  = (string)iter.next();
                            List /*<String>*/ lines = loader.getLines(file.Trim());
                            wlist.addAll(lines);
                        }
                    }
                } catch (IOException e) {
                    throw new System.ApplicationException("Unexpected exception", e);
                }
                synMap = new SynonymMap(ignoreCase);
                parseRules(wlist, synMap, "=>", ",", expand, null);
            }
        }