/** * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that * the tokens end up at the same position. * * Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same position) * Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a has posInc=n) * */ public static List /*<Token>*/ mergeTokens(List /*<Token>*/ lst1, List /*<Token>*/ lst2) { ArrayList /*<Token>*/ result = new ArrayList/*<Token>*/ (); if (lst1 == null || lst2 == null) { if (lst2 != null) { result.addAll(lst2); } if (lst1 != null) { result.addAll(lst1); } return(result); } int pos = 0; Iterator /*<Token>*/ iter1 = lst1.iterator(); Iterator /*<Token>*/ iter2 = lst2.iterator(); Token tok1 = (Token)(iter1.hasNext() ? iter1.next() : null); Token tok2 = (Token)(iter2.hasNext() ? iter2.next() : null); int pos1 = tok1 != null?tok1.getPositionIncrement() : 0; int pos2 = tok2 != null?tok2.getPositionIncrement() : 0; while (tok1 != null || tok2 != null) { while (tok1 != null && (pos1 <= pos2 || tok2 == null)) { Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type()); tok.setTermBuffer(tok1.termBuffer(), 0, tok1.termLength()); tok.setPositionIncrement(pos1 - pos); result.add(tok); pos = pos1; tok1 = (Token)(iter1.hasNext() ? iter1.next() : null); pos1 += tok1 != null?tok1.getPositionIncrement() : 0; } while (tok2 != null && (pos2 <= pos1 || tok1 == null)) { Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type()); tok.setTermBuffer(tok2.termBuffer(), 0, tok2.termLength()); tok.setPositionIncrement(pos2 - pos); result.add(tok); pos = pos2; tok2 = (Token)(iter2.hasNext() ? iter2.next() : null); pos2 += tok2 != null?tok2.getPositionIncrement() : 0; } } return(result); }
public void inform(ResourceLoader loader) { string synonyms = (string)args.get("synonyms"); bool ignoreCase = getBoolean("ignoreCase", false); bool expand = getBoolean("expand", true); //String tf = args.get("tokenizerFactory"); //TokenizerFactory tokFactory = null; //if( tf != null ){ // tokFactory = loadTokenizerFactory( loader, tf, args ); //} if (synonyms != null) { List /*<String>*/ wlist = null; try { File synonymFile = new File(synonyms); if (synonymFile.exists()) { wlist = loader.getLines(synonyms); } else { List /*<String>*/ files = StrUtils.splitFileNames(synonyms); wlist = new ArrayList/*<String>*/ (); for (var iter = files.iterator(); iter.hasNext();) { string file = (string)iter.next(); List /*<String>*/ lines = loader.getLines(file.Trim()); wlist.addAll(lines); } } } catch (IOException e) { throw new System.ApplicationException("Unexpected exception", e); } synMap = new SynonymMap(ignoreCase); parseRules(wlist, synMap, "=>", ",", expand, null); } }