private static List /*<String>*/ splitByTokenizer(string source, TokenizerFactory tokFactory) { StringReader reader = new StringReader(source); TokenStream ts = loadTokenizer(tokFactory, reader); List /*<String>*/ tokList = new ArrayList/*<String>*/ (); try { #pragma warning disable 612 for (Token token = ts.next(); token != null; token = ts.next()) { #pragma warning restore 612 string text = new string(token.termBuffer(), 0, token.termLength()); if (text.Length > 0) { tokList.add(text); } } } catch (IOException e) { throw new System.ApplicationException("Unexpected exception.", e); } finally{ reader.close(); } return(tokList); }
public static void parseRules(List /*<String>*/ rules, SynonymMap map, string mappingSep, string synSep, bool expansion, TokenizerFactory tokFactory) { int count = 0; for (var iter = rules.iterator(); iter.hasNext();) { // To use regexes, we need an expression that specifies an odd number of chars. // This can't really be done with string.split(), and since we need to // do unescaping at some point anyway, we wouldn't be saving any effort // by using regexes. string rule = (string)iter.next(); List /*<String>*/ mapping = StrUtils.splitSmart(rule, mappingSep, false); List /*<List<String>>*/ source; List /*<List<String>>*/ target; if (mapping.size() > 2) { throw new System.ApplicationException("Invalid Synonym Rule:" + rule); } else if (mapping.size() == 2) { source = getSynList((string)mapping.get(0), synSep, tokFactory); target = getSynList((string)mapping.get(1), synSep, tokFactory); } else { source = getSynList((string)mapping.get(0), synSep, tokFactory); if (expansion) { // expand to all arguments target = source; } else { // reduce to first argument target = new ArrayList/*<List<String>>*/ (1); target.add(source.get(0)); } } bool includeOrig = false; for (var fromIter = source.iterator(); fromIter.hasNext();) { List /*<String>*/ fromToks = (List)fromIter.next(); count++; for (var toIter = target.iterator(); toIter.hasNext();) { List /*<String>*/ toToks = (List)toIter.next(); map.add(fromToks, SynonymMap.makeTokens(toToks), includeOrig, true ); } } } }
/** Splits a backslash escaped string on the separator. * <p> * Current backslash escaping supported: * <br> \n \t \r \b \f are escaped the same as a Java String * <br> Other characters following a backslash are produced verbatim (\c => c) * * @param s the string to split * @param separator the separator to split on * @param decode decode backslash escaping */ public static List /*<String>*/ splitSmart(string s, string separator, bool decode) { ArrayList /*<String>*/ lst = new ArrayList/*<String>*/ (2); java.lang.StringBuilder sb = new java.lang.StringBuilder(); int pos = 0, end = s.Length; while (pos < end) { if (java.lang.String.instancehelper_startsWith(s, separator, pos)) { if (sb.length() > 0) { lst.add(sb.toString()); sb = new java.lang.StringBuilder(); } pos += separator.Length; continue; } char ch = s[pos++]; if (ch == '\\') { if (!decode) { sb.append(ch); } if (pos >= end) { break; // ERROR, or let it go? } ch = s[pos++]; if (decode) { switch (ch) { case 'n': ch = '\n'; break; case 't': ch = '\t'; break; case 'r': ch = '\r'; break; case 'b': ch = '\b'; break; case 'f': ch = '\f'; break; } } } sb.append(ch); } if (sb.length() > 0) { lst.add(sb.toString()); } return(lst); }
public static List /*<String>*/ splitWS(string s, bool decode) { ArrayList /*<String>*/ lst = new ArrayList/*<String>*/ (2); java.lang.StringBuilder sb = new java.lang.StringBuilder(); int pos = 0, end = s.Length; while (pos < end) { char ch = s[pos++]; if (java.lang.Character.isWhitespace(ch)) { if (sb.length() > 0) { lst.add(sb.toString()); sb = new java.lang.StringBuilder(); } continue; } if (ch == '\\') { if (!decode) { sb.append(ch); } if (pos >= end) { break; // ERROR, or let it go? } ch = s[pos++]; if (decode) { switch (ch) { case 'n': ch = '\n'; break; case 't': ch = '\t'; break; case 'r': ch = '\r'; break; case 'b': ch = '\b'; break; case 'f': ch = '\f'; break; } } } sb.append(ch); } if (sb.length() > 0) { lst.add(sb.toString()); } return(lst); }
/*** * Return a list of tokens according to a test string format: * a b c => returns List<Token> [a,b,c] * a/b => tokens a and b share the same spot (b.positionIncrement=0) * a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0) * a,1,10,11 => "a" with positionIncrement=1, startOffset=10, endOffset=11 */ public List /*<Token>*/ tokens(string str) { string[] arr = str.Split(' '); List /*<Token>*/ result = new ArrayList/*<Token>*/ (); for (int i = 0; i < arr.Length; i++) { string[] toks = arr[i].Split('/'); string[] @params = toks[0].Split(','); int posInc; int start; int end; if (@params.Length > 1) { posInc = java.lang.Integer.parseInt(@params[1]); } else { posInc = 1; } if (@params.Length > 2) { start = java.lang.Integer.parseInt(@params[2]); } else { start = 0; } if (@params.Length > 3) { end = java.lang.Integer.parseInt(@params[3]); } else { end = start + @params[0].Length; } Token t = new Token(@params[0], start, end, "TEST"); t.setPositionIncrement(posInc); result.add(t); for (int j = 1; j < toks.Length; j++) { t = new Token(toks[j], 0, 0, "TEST"); t.setPositionIncrement(0); result.add(t); } } return(result); }
/** * <p> * This method is converting the independent LinkHashMaps containing various * (silo'ed) suggestions for each mis-spelled word into individual * "holistic query corrections", aka. "Spell Check Possibility" * </p> * <p> * Rank here is the sum of each selected term's position in its respective * LinkedHashMap. * </p> * * @return */ private RankedSpellPossibility internalNext() { if (done) { throw new NoSuchElementException(); } List /*<SpellCheckCorrection>*/ possibleCorrection = new ArrayList/*<SpellCheckCorrection>*/ (); int rank = 0; for (int i = 0; i < correctionIndex.Length; i++) { List /*<SpellCheckCorrection>*/ singleWordPossibilities = (List)possibilityList.get(i); SpellCheckCorrection singleWordPossibility = (SpellCheckCorrection)singleWordPossibilities.get(correctionIndex[i]); rank += correctionIndex[i]; if (i == correctionIndex.Length - 1) { correctionIndex[i]++; if (correctionIndex[i] == singleWordPossibilities.size()) { correctionIndex[i] = 0; if (correctionIndex.Length == 1) { done = true; } for (int ii = i - 1; ii >= 0; ii--) { correctionIndex[ii]++; if (correctionIndex[ii] >= ((List)possibilityList.get(ii)).size() && ii > 0) { correctionIndex[ii] = 0; } else { break; } } } } possibleCorrection.add(singleWordPossibility); } if (correctionIndex[0] == ((List)possibilityList.get(0)).size()) { done = true; } RankedSpellPossibility rsl = new RankedSpellPossibility(); rsl.setCorrections(possibleCorrection); rsl.setRank(rank); return(rsl); }
public List /*<String>*/ tok2str(java.lang.Iterable /*<Token>*/ tokLst) { ArrayList /*<String>*/ lst = new ArrayList/*<String>*/ (); for (var iter = tokLst.iterator(); iter.hasNext();) { Token t = (Token)iter.next(); lst.add(new string(t.termBuffer(), 0, t.termLength())); } return(lst); }
public IterTokenStream(params string[] text) { int off = 0; ArrayList /*<Token>*/ t = new ArrayList/*<Token>*/ (text.Length); foreach (string txt in text) { t.add(new Token(txt, off, off + txt.Length)); off += txt.Length + 2; } this.toks = t.iterator(); }
public void testRead1waySynonymRules() { SynonymMap synMap; // (a)->[a] // (b)->[a] List /*<String>*/ rules = new ArrayList/*<String>*/ (); rules.add("a,b"); synMap = new SynonymMap(true); SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", false, null); Assert.AreEqual(2, synMap.submap.size()); assertTokIncludes(synMap, "a", "a"); assertTokIncludes(synMap, "b", "a"); // (a)->[a] // (b)->[a] // (c)->[a] rules.clear(); rules.add("a,b,c"); synMap = new SynonymMap(true); SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", false, null); Assert.AreEqual(3, synMap.submap.size()); assertTokIncludes(synMap, "a", "a"); assertTokIncludes(synMap, "b", "a"); assertTokIncludes(synMap, "c", "a"); // (a)->[a] // (b1)->(b2)->[a] rules.clear(); rules.add("a,b1 b2"); synMap = new SynonymMap(true); SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", false, null); Assert.AreEqual(2, synMap.submap.size()); assertTokIncludes(synMap, "a", "a"); Assert.AreEqual(1, getSubSynonymMap(synMap, "b1").submap.size()); assertTokIncludes(getSubSynonymMap(synMap, "b1"), "b2", "a"); // (a1)->(a2)->[a1][a2] // (b)->[a1][a2] rules.clear(); rules.add("a1 a2,b"); synMap = new SynonymMap(true); SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", false, null); Assert.AreEqual(2, synMap.submap.size()); Assert.AreEqual(1, getSubSynonymMap(synMap, "a1").submap.size()); assertTokIncludes(getSubSynonymMap(synMap, "a1"), "a2", "a1"); assertTokIncludes(getSubSynonymMap(synMap, "a1"), "a2", "a2"); assertTokIncludes(synMap, "b", "a1"); assertTokIncludes(synMap, "b", "a2"); }
/** * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that * the tokens end up at the same position. * * Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same position) * Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a has posInc=n) * */ public static List /*<Token>*/ mergeTokens(List /*<Token>*/ lst1, List /*<Token>*/ lst2) { ArrayList /*<Token>*/ result = new ArrayList/*<Token>*/ (); if (lst1 == null || lst2 == null) { if (lst2 != null) { result.addAll(lst2); } if (lst1 != null) { result.addAll(lst1); } return(result); } int pos = 0; Iterator /*<Token>*/ iter1 = lst1.iterator(); Iterator /*<Token>*/ iter2 = lst2.iterator(); Token tok1 = (Token)(iter1.hasNext() ? iter1.next() : null); Token tok2 = (Token)(iter2.hasNext() ? iter2.next() : null); int pos1 = tok1 != null?tok1.getPositionIncrement() : 0; int pos2 = tok2 != null?tok2.getPositionIncrement() : 0; while (tok1 != null || tok2 != null) { while (tok1 != null && (pos1 <= pos2 || tok2 == null)) { Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type()); tok.setTermBuffer(tok1.termBuffer(), 0, tok1.termLength()); tok.setPositionIncrement(pos1 - pos); result.add(tok); pos = pos1; tok1 = (Token)(iter1.hasNext() ? iter1.next() : null); pos1 += tok1 != null?tok1.getPositionIncrement() : 0; } while (tok2 != null && (pos2 <= pos1 || tok1 == null)) { Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type()); tok.setTermBuffer(tok2.termBuffer(), 0, tok2.termLength()); tok.setPositionIncrement(pos2 - pos); result.add(tok); pos = pos2; tok2 = (Token)(iter2.hasNext() ? iter2.next() : null); pos2 += tok2 != null?tok2.getPositionIncrement() : 0; } } return(result); }
/** Produces a List<Token> from a List<String> */ public static List /*<Token>*/ makeTokens(List /*<String>*/ strings) { List /*<Token>*/ ret = new ArrayList/*<Token>*/ (strings.size()); for (var iter = strings.iterator(); iter.hasNext();) { string str = (string)iter.next(); //Token newTok = new Token(str,0,0,"SYNONYM"); Token newTok = new Token(0, 0, "SYNONYM"); newTok.setTermBuffer(str.ToCharArray(), 0, str.Length); ret.add(newTok); } return(ret); }
public void testInvalidMappingRules() { SynonymMap synMap = new SynonymMap(true); List /*<String>*/ rules = new ArrayList/*<String>*/ (1); rules.add("a=>b=>c"); try{ SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null); Assert.Fail("RuntimeException must be thrown."); } #pragma warning disable 168 catch (java.lang.RuntimeException expected) { } #pragma warning restore 168 }
/** * Split a string based on a separator, but don't split if it's inside * a string. Assume '\' escapes the next char both inside and * outside strings. */ public static List /*<String>*/ splitSmart(string s, char separator) { ArrayList /*<String>*/ lst = new ArrayList/*<String>*/ (4); int pos = 0, start = 0, end = s.Length; char inString = (char)0; char ch = (char)0; while (pos < end) { char prevChar = ch; ch = s[pos++]; if (ch == '\\') // skip escaped chars { pos++; } else if (inString != 0 && ch == inString) { inString = (char)0; } else if (ch == '\'' || ch == '"') { // If char is directly preceeded by a number or letter // then don't treat it as the start of a string. // Examples: 50" TV, or can't if (!java.lang.Character.isLetterOrDigit(prevChar)) { inString = ch; } } else if (ch == separator && inString == 0) { lst.add(java.lang.String.instancehelper_substring(s, start, pos - 1)); start = pos; } } if (start < end) { lst.add(java.lang.String.instancehelper_substring(s, start, end)); } /*** * if (SolrCore.log.isLoggable(Level.FINEST)) { * SolrCore.log.trace("splitCommand=" + lst); * } ***/ return(lst); }
/** * Splits file names separated by comma character. * File names can contain comma characters escaped by backslash '\' * * @param fileNames the string containing file names * @return a list of file names with the escaping backslashed removed */ public static List /*<String>*/ splitFileNames(string fileNames) { if (fileNames == null) { return(java.util.Collections.emptyList()); } List /*<String>*/ result = new ArrayList/*<String>*/ (); foreach (string file in fileNames.split("(?<!\\\\),")) { result.add(file.replaceAll("\\\\(?=,)", "")); } return(result); }
// a , b c , d e f => [[a],[b,c],[d,e,f]] private static List /*<List<String>>*/ getSynList(string str, string separator, TokenizerFactory tokFactory) { List /*<String>*/ strList = StrUtils.splitSmart(str, separator, false); // now split on whitespace to get a list of token strings List /*<List<String>>*/ synList = new ArrayList/*<List<String>>*/ (); for (var iter = strList.iterator(); iter.hasNext();) { string toks = (string)iter.next(); List /*<String>*/ tokList = tokFactory == null? StrUtils.splitWS(toks, true) : splitByTokenizer(toks, tokFactory); synList.add(tokList); } return(synList); }
/** * <p> * We assume here that the passed-in inner LinkedHashMaps are already sorted * in order of "Best Possible Correction". * </p> * * @param suggestions */ public PossibilityIterator(Map /*<Token, LinkedHashMap<String, Integer>>*/ suggestions) { for (var iter = suggestions.entrySet().iterator(); iter.hasNext();) { Map.Entry /*<Token, LinkedHashMap<String, Integer>>*/ entry = (Map.Entry)iter.next(); Token token = (Token)entry.getKey(); List /*<SpellCheckCorrection>*/ possibleCorrections = new ArrayList/*<SpellCheckCorrection>*/ (); for (var iter1 = ((LinkedHashMap)entry.getValue()).entrySet().iterator(); iter1.hasNext();) { Map.Entry /*<String, Integer>*/ entry1 = (Map.Entry)iter1.next(); SpellCheckCorrection correction = new SpellCheckCorrection(); correction.setOriginal(token); correction.setCorrection((string)entry1.getKey()); correction.setNumberOfOccurences((int)entry1.getValue()); possibleCorrections.add(correction); } possibilityList.add(possibleCorrections); } int wrapSize = possibilityList.size(); if (wrapSize == 0) { done = true; } else { correctionIndex = new int[wrapSize]; for (int i = 0; i < wrapSize; i++) { int suggestSize = ((List)possibilityList.get(i)).size(); if (suggestSize == 0) { done = true; break; } correctionIndex[i] = 0; } } while (internalHasNext()) { rankedPossibilityList.add(internalNext()); } Collections.sort(rankedPossibilityList); rankedPossibilityIterator = rankedPossibilityList.iterator(); }
/** * Converts the original query string to a collection of Lucene Tokens. * @param original the original query string * @return a Collection of Lucene Tokens */ public override Collection /*<Token>*/ convert(string original) { if (original == null) // this can happen with q.alt = and no query { return(Collections.emptyList()); } Collection /*<Token>*/ result = new ArrayList/*<Token>*/ (); //TODO: Extract the words using a simple regex, but not query stuff, and then analyze them to produce the token stream Matcher matcher = QUERY_REGEX.matcher(original); TokenStream stream; while (matcher.find()) { string word = matcher.group(0); if (word.Equals("AND") == false && word.Equals("OR") == false) { try { stream = analyzer.reusableTokenStream("", new StringReader(word)); // TODO: support custom attributes TermAttribute termAtt = (TermAttribute)stream.addAttribute(typeof(TermAttribute)); FlagsAttribute flagsAtt = (FlagsAttribute)stream.addAttribute(typeof(FlagsAttribute)); TypeAttribute typeAtt = (TypeAttribute)stream.addAttribute(typeof(TypeAttribute)); PayloadAttribute payloadAtt = (PayloadAttribute)stream.addAttribute(typeof(PayloadAttribute)); PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute)stream.addAttribute(typeof(PositionIncrementAttribute)); stream.reset(); while (stream.incrementToken()) { Token token = new Token(); token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); token.setStartOffset(matcher.start()); token.setEndOffset(matcher.end()); token.setFlags(flagsAtt.getFlags()); token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } } #pragma warning disable 168 catch (IOException e) { } #pragma warning restore 168 } } return(result); }
//------------------------------------------------------------------------ // These may be useful beyond test cases... //------------------------------------------------------------------------ static List /*<Token>*/ getTokens(TokenStream tstream) { List /*<Token>*/ tokens = new ArrayList/*<Token>*/ (); while (true) { #pragma warning disable 612 Token t = tstream.next(); #pragma warning restore 612 if (t == null) { break; } tokens.add(t); } return(tokens); }
public void inform(ResourceLoader loader) { string synonyms = (string)args.get("synonyms"); bool ignoreCase = getBoolean("ignoreCase", false); bool expand = getBoolean("expand", true); //String tf = args.get("tokenizerFactory"); //TokenizerFactory tokFactory = null; //if( tf != null ){ // tokFactory = loadTokenizerFactory( loader, tf, args ); //} if (synonyms != null) { List /*<String>*/ wlist = null; try { File synonymFile = new File(synonyms); if (synonymFile.exists()) { wlist = loader.getLines(synonyms); } else { List /*<String>*/ files = StrUtils.splitFileNames(synonyms); wlist = new ArrayList/*<String>*/ (); for (var iter = files.iterator(); iter.hasNext();) { string file = (string)iter.next(); List /*<String>*/ lines = loader.getLines(file.Trim()); wlist.addAll(lines); } } } catch (IOException e) { throw new System.ApplicationException("Unexpected exception", e); } synMap = new SynonymMap(ignoreCase); parseRules(wlist, synMap, "=>", ",", expand, null); } }
public List /*<Token>*/ getTokList(SynonymMap dict, string input, bool includeOrig) { ArrayList /*<Token>*/ lst = new ArrayList/*<Token>*/ (); List toks = tokens(input); TokenStream ts = new IteratorTokenStream(toks.iterator()); SynonymFilter sf = new SynonymFilter(ts, dict); Token target = new Token(); // test with token reuse while (true) { #pragma warning disable 612 Token t = sf.next(target); #pragma warning restore 612 if (t == null) { return(lst); } lst.add((Token)t.clone()); } }
private List /*<String>*/ getLines(string resource, Charset charset) { BufferedReader input = null; ArrayList /*<String>*/ lines; try { input = new BufferedReader(new InputStreamReader(openResource(resource), charset)); lines = new ArrayList/*<String>*/ (); for (string word = null; (word = input.readLine()) != null;) { // skip comments if (word.StartsWith("#")) { continue; } word = word.Trim(); // skip blank lines if (word.Length == 0) { continue; } lines.add(word); } } finally { if (input != null) { input.close(); } } return(lines); }
/* * Need to worry about multiple scenarios: * - need to go for the longest match * a b => foo #shouldn't match if "a b" is followed by "c d" * a b c d => bar * - need to backtrack - retry matches for tokens already read * a b c d => foo * b c => bar * If the input stream is "a b c x", one will consume "a b c d" * trying to match the first rule... all but "a" should be * pushed back so a match may be made on "b c". * - don't try and match generated tokens (thus need separate queue) * matching is not recursive. * - handle optional generation of original tokens in all these cases, * merging token streams to preserve token positions. * - preserve original positionIncrement of first matched token */ #pragma warning disable 672 public override Token next(Token target) { while (true) { // if there are any generated tokens, return them... don't try any // matches against them, as we specifically don't want recursion. if (replacement != null && replacement.hasNext()) { return((Token)replacement.next()); } // common case fast-path of first token not matching anything Token firstTok = nextTok(target); if (firstTok == null) { return(null); } SynonymMap result = (SynonymMap)(map.submap != null ? map.submap.get(firstTok.termBuffer(), 0, firstTok.termLength()) : null); if (result == null) { return(firstTok); } // OK, we matched a token, so find the longest match. matched = new LinkedList/*<Token>*/ (); result = match(result); if (result == null) { // no match, simply return the first token read. return(firstTok); } // reuse, or create new one each time? ArrayList /*<Token>*/ generated = new ArrayList/*<Token>*/ (result.synonyms.Length + matched.size() + 1); // // there was a match... let's generate the new tokens, merging // in the matched tokens (position increments need adjusting) // Token lastTok = (Token)(matched.isEmpty() ? firstTok : matched.getLast()); bool includeOrig = result.includeOrig(); Token origTok = includeOrig ? firstTok : null; int origPos = firstTok.getPositionIncrement(); // position of origTok in the original stream int repPos = 0; // curr position in replacement token stream int pos = 0; // current position in merged token stream for (int i = 0; i < result.synonyms.Length; i++) { Token repTok = result.synonyms[i]; Token newTok = new Token(firstTok.startOffset(), lastTok.endOffset(), firstTok.type()); newTok.setTermBuffer(repTok.termBuffer(), 0, repTok.termLength()); repPos += repTok.getPositionIncrement(); if (i == 0) { repPos = origPos; // make position of first token equal to original } // if necessary, insert original tokens and adjust position increment while (origTok != null && origPos <= repPos) { origTok.setPositionIncrement(origPos - pos); generated.add(origTok); pos += origTok.getPositionIncrement(); origTok = (Token)(matched.isEmpty() ? null : matched.removeFirst()); if (origTok != null) { origPos += origTok.getPositionIncrement(); } } newTok.setPositionIncrement(repPos - pos); generated.add(newTok); pos += newTok.getPositionIncrement(); } // finish up any leftover original tokens while (origTok != null) { origTok.setPositionIncrement(origPos - pos); generated.add(origTok); pos += origTok.getPositionIncrement(); origTok = (Token)(matched.isEmpty() ? null : matched.removeFirst()); if (origTok != null) { origPos += origTok.getPositionIncrement(); } } // what if we replaced a longer sequence with a shorter one? // a/0 b/5 => foo/0 // should I re-create the gap on the next buffered token? replacement = generated.iterator(); // Now return to the top of the loop to read and return the first // generated token.. The reason this is done is that we may have generated // nothing at all, and may need to continue with more matching logic. } }
public void testReadMappingRules() { SynonymMap synMap; // (a)->[b] List /*<String>*/ rules = new ArrayList/*<String>*/ (); rules.add("a=>b"); synMap = new SynonymMap(true); SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null); Assert.AreEqual(1, synMap.submap.size()); assertTokIncludes(synMap, "a", "b"); // (a)->[c] // (b)->[c] rules.clear(); rules.add("a,b=>c"); synMap = new SynonymMap(true); SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null); Assert.AreEqual(2, synMap.submap.size()); assertTokIncludes(synMap, "a", "c"); assertTokIncludes(synMap, "b", "c"); // (a)->[b][c] rules.clear(); rules.add("a=>b,c"); synMap = new SynonymMap(true); SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null); Assert.AreEqual(1, synMap.submap.size()); assertTokIncludes(synMap, "a", "b"); assertTokIncludes(synMap, "a", "c"); // (a)->(b)->[a2] // [a1] rules.clear(); rules.add("a=>a1"); rules.add("a b=>a2"); synMap = new SynonymMap(true); SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null); Assert.AreEqual(1, synMap.submap.size()); assertTokIncludes(synMap, "a", "a1"); Assert.AreEqual(1, getSubSynonymMap(synMap, "a").submap.size()); assertTokIncludes(getSubSynonymMap(synMap, "a"), "b", "a2"); // (a)->(b)->[a2] // (c)->[a3] // [a1] rules.clear(); rules.add("a=>a1"); rules.add("a b=>a2"); rules.add("a c=>a3"); synMap = new SynonymMap(true); SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null); Assert.AreEqual(1, synMap.submap.size()); assertTokIncludes(synMap, "a", "a1"); Assert.AreEqual(2, getSubSynonymMap(synMap, "a").submap.size()); assertTokIncludes(getSubSynonymMap(synMap, "a"), "b", "a2"); assertTokIncludes(getSubSynonymMap(synMap, "a"), "c", "a3"); // (a)->(b)->[a2] // [a1] // (b)->(c)->[b2] // [b1] rules.clear(); rules.add("a=>a1"); rules.add("a b=>a2"); rules.add("b=>b1"); rules.add("b c=>b2"); synMap = new SynonymMap(true); SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null); Assert.AreEqual(2, synMap.submap.size()); assertTokIncludes(synMap, "a", "a1"); Assert.AreEqual(1, getSubSynonymMap(synMap, "a").submap.size()); assertTokIncludes(getSubSynonymMap(synMap, "a"), "b", "a2"); assertTokIncludes(synMap, "b", "b1"); Assert.AreEqual(1, getSubSynonymMap(synMap, "b").submap.size()); assertTokIncludes(getSubSynonymMap(synMap, "b"), "c", "b2"); }