Beispiel #1
0
        public virtual void TestIncludeOrig()
        {
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig  = true;
            bool merge = true;

            map.Add(Strings("a b"), Tokens("ab"), orig, merge);
            map.Add(Strings("a c"), Tokens("ac"), orig, merge);
            map.Add(Strings("a"), Tokens("aa"), orig, merge);
            map.Add(Strings("b"), Tokens("bb"), orig, merge);
            map.Add(Strings("z x c v"), Tokens("zxcv"), orig, merge);
            map.Add(Strings("x c"), Tokens("xc"), orig, merge);

            AssertTokenizesTo(map, "$", new string[] { "$" }, new int[] { 1 });
            AssertTokenizesTo(map, "a", new string[] { "a", "aa" }, new int[] { 1, 0 });
            AssertTokenizesTo(map, "a", new string[] { "a", "aa" }, new int[] { 1, 0 });
            AssertTokenizesTo(map, "$ a", new string[] { "$", "a", "aa" }, new int[] { 1, 1, 0 });
            AssertTokenizesTo(map, "a $", new string[] { "a", "aa", "$" }, new int[] { 1, 0, 1 });
            AssertTokenizesTo(map, "$ a !", new string[] { "$", "a", "aa", "!" }, new int[] { 1, 1, 0, 1 });
            AssertTokenizesTo(map, "a a", new string[] { "a", "aa", "a", "aa" }, new int[] { 1, 0, 1, 0 });
            AssertTokenizesTo(map, "b", new string[] { "b", "bb" }, new int[] { 1, 0 });
            AssertTokenizesTo(map, "z x c v", new string[] { "z", "zxcv", "x", "c", "v" }, new int[] { 1, 0, 1, 1, 1 });
            AssertTokenizesTo(map, "z x c $", new string[] { "z", "x", "xc", "c", "$" }, new int[] { 1, 1, 0, 1, 1 });

            // check for lack of recursion
            map.Add(Strings("zoo zoo"), Tokens("zoo"), orig, merge);
            // CHECKME: I think the previous test (with 4 zoo's), was just a typo.
            AssertTokenizesTo(map, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "zoo", "$", "zoo" }, new int[] { 1, 0, 1, 1, 1 });

            map.Add(Strings("zoo"), Tokens("zoo zoo"), orig, merge);
            AssertTokenizesTo(map, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" }, new int[] { 1, 0, 1, 1, 1, 0, 1 });
        }
Beispiel #2
0
        public virtual void TestBigramTokenizer()
        {
            SlowSynonymMap synMap;

            // prepare bi-gram tokenizer factory
            IDictionary <string, string> args = new Dictionary <string, string>();

            args[AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM] = "4.4";
            args["minGramSize"] = "2";
            args["maxGramSize"] = "2";
            TokenizerFactory tf = new NGramTokenizerFactory(args);

            // (ab)->(bc)->(cd)->[ef][fg][gh]
            IList <string> rules = new List <string>();

            rules.Add("abcd=>efgh");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, tf);
            assertEquals(1, synMap.Submap.size());
            assertEquals(1, GetSubSynonymMap(synMap, "ab").Submap.size());
            assertEquals(1, GetSubSynonymMap(GetSubSynonymMap(synMap, "ab"), "bc").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(GetSubSynonymMap(synMap, "ab"), "bc"), "cd", "ef");
            AssertTokIncludes(GetSubSynonymMap(GetSubSynonymMap(synMap, "ab"), "bc"), "cd", "fg");
            AssertTokIncludes(GetSubSynonymMap(GetSubSynonymMap(synMap, "ab"), "bc"), "cd", "gh");
        }
Beispiel #3
0
        public virtual void TestMapMerge()
        {
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig  = false;
            bool merge = true;

            map.Add(Strings("a"), Tokens("a5,5"), orig, merge);
            map.Add(Strings("a"), Tokens("a3,3"), orig, merge);

            AssertTokenizesTo(map, "a", new string[] { "a3", "a5" }, new int[] { 1, 2 });

            map.Add(Strings("b"), Tokens("b3,3"), orig, merge);
            map.Add(Strings("b"), Tokens("b5,5"), orig, merge);

            AssertTokenizesTo(map, "b", new string[] { "b3", "b5" }, new int[] { 1, 2 });

            map.Add(Strings("a"), Tokens("A3,3"), orig, merge);
            map.Add(Strings("a"), Tokens("A5,5"), orig, merge);

            AssertTokenizesTo(map, "a", new string[] { "a3", "A3", "a5", "A5" }, new int[] { 1, 0, 2, 0 });

            map.Add(Strings("a"), Tokens("a1"), orig, merge);
            AssertTokenizesTo(map, "a", new string[] { "a1", "a3", "A3", "a5", "A5" }, new int[] { 1, 2, 0, 2, 0 });

            map.Add(Strings("a"), Tokens("a2,2"), orig, merge);
            map.Add(Strings("a"), Tokens("a4,4 a6,2"), orig, merge);
            AssertTokenizesTo(map, "a", new string[] { "a1", "a2", "a3", "A3", "a4", "a5", "A5", "a6" }, new int[] { 1, 1, 1, 0, 1, 1, 0, 1 });
        }
Beispiel #4
0
        internal static void AssertTokenizesTo(SlowSynonymMap dict, string input, string[] expected, int[] posIncs)
        {
            Tokenizer         tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
            SlowSynonymFilter stream    = new SlowSynonymFilter(tokenizer, dict);

            AssertTokenStreamContents(stream, expected, posIncs);
        }
Beispiel #5
0
        internal static void AssertTokenizesTo(SlowSynonymMap dict, IList <Token> input, string[] expected, int[] startOffsets, int[] endOffsets, int[] posIncs)
        {
            TokenStream       tokenizer = new IterTokenStream(input);
            SlowSynonymFilter stream    = new SlowSynonymFilter(tokenizer, dict);

            AssertTokenStreamContents(stream, expected, startOffsets, endOffsets, posIncs);
        }
Beispiel #6
0
        /// <param name="singleMatch">  List<String>, the sequence of strings to match </param>
        /// <param name="replacement">  List<Token> the list of tokens to use on a match </param>
        /// <param name="includeOrig">  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens </param>
        /// <param name="mergeExisting"> merge the replacement tokens with any other mappings that exist </param>
        public virtual void Add(IList<string> singleMatch, IList<Token> replacement, bool includeOrig, bool mergeExisting)
        {
            var currMap = this;
            foreach (string str in singleMatch)
            {
                if (currMap.submap == null)
                {
                    // for now hardcode at 4.0, as its what the old code did.
                    // would be nice to fix, but shouldn't store a version in each submap!!!
                    currMap.submap = new CharArrayMap<SlowSynonymMap>(Lucene.Net.Util.Version.LUCENE_CURRENT, 1, IgnoreCase());
                }

                var map = currMap.submap.Get(str);
                if (map == null)
                {
                    map = new SlowSynonymMap();
                    map.flags |= flags & IGNORE_CASE;
                    currMap.submap.put(str, map);
                }

                currMap = map;
            }

            if (currMap.synonyms != null && !mergeExisting)
            {
                throw new System.ArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
            }
            IList<Token> superset = currMap.synonyms == null ? replacement : MergeTokens(currMap.synonyms, replacement);
            currMap.synonyms = superset.ToArray();
            if (includeOrig)
            {
                currMap.flags |= INCLUDE_ORIG;
            }
        }
Beispiel #7
0
        public virtual void TestMultiWordSynonymsOld()
        {
            IList<string> rules = new JCG.List<string>();
            rules.Add("a b c,d");
            SlowSynonymMap synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);

            SlowSynonymFilter ts = new SlowSynonymFilter(new MockTokenizer(new StringReader("a e"), MockTokenizer.WHITESPACE, false), synMap);
            // This fails because ["e","e"] is the value of the token stream
            AssertTokenStreamContents(ts, new string[] { "a", "e" });
        }
        internal static void ParseRules(IEnumerable <string> rules, SlowSynonymMap map, string mappingSep, string synSep, bool expansion, TokenizerFactory tokFactory)
        {
            int count = 0;

            foreach (string rule in rules)
            {
                // To use regexes, we need an expression that specifies an odd number of chars.
                // This can't really be done with string.split(), and since we need to
                // do unescaping at some point anyway, we wouldn't be saving any effort
                // by using regexes.

                IList <string> mapping = SplitSmart(rule, mappingSep, false);

                IList <IList <string> > source;
                IList <IList <string> > target;

                if (mapping.Count > 2)
                {
                    throw new ArgumentException("Invalid Synonym Rule:" + rule);
                }
                else if (mapping.Count == 2)
                {
                    source = GetSynList(mapping[0], synSep, tokFactory);
                    target = GetSynList(mapping[1], synSep, tokFactory);
                }
                else
                {
                    source = GetSynList(mapping[0], synSep, tokFactory);
                    if (expansion)
                    {
                        // expand to all arguments
                        target = source;
                    }
                    else
                    {
                        // reduce to first argument
                        target = new List <IList <string> >(1)
                        {
                            source[0]
                        };
                    }
                }

                bool includeOrig = false;
                foreach (IList <string> fromToks in source)
                {
                    count++;
                    foreach (IList <string> toToks in target)
                    {
                        map.Add(fromToks, SlowSynonymMap.MakeTokens(toToks), includeOrig, true);
                    }
                }
            }
        }
        public virtual void TestMultiWordSynonymsOld()
        {
            IList<string> rules = new List<string>();
            rules.Add("a b c,d");
            SlowSynonymMap synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);

            SlowSynonymFilter ts = new SlowSynonymFilter(new MockTokenizer(new StringReader("a e"), MockTokenizer.WHITESPACE, false), synMap);
            // This fails because ["e","e"] is the value of the token stream
            AssertTokenStreamContents(ts, new string[] { "a", "e" });
        }
Beispiel #10
0
        public virtual void TestRead1waySynonymRules()
        {
            SlowSynonymMap synMap;

            // (a)->[a]
            // (b)->[a]
            IList <string> rules = new List <string>();

            rules.Add("a,b");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null);
            assertEquals(2, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a");
            AssertTokIncludes(synMap, "b", "a");

            // (a)->[a]
            // (b)->[a]
            // (c)->[a]
            rules.Clear();
            rules.Add("a,b,c");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null);
            assertEquals(3, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a");
            AssertTokIncludes(synMap, "b", "a");
            AssertTokIncludes(synMap, "c", "a");

            // (a)->[a]
            // (b1)->(b2)->[a]
            rules.Clear();
            rules.Add("a,b1 b2");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null);
            assertEquals(2, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a");
            assertEquals(1, GetSubSynonymMap(synMap, "b1").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "b1"), "b2", "a");

            // (a1)->(a2)->[a1][a2]
            // (b)->[a1][a2]
            rules.Clear();
            rules.Add("a1 a2,b");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", false, null);
            assertEquals(2, synMap.Submap.size());
            assertEquals(1, GetSubSynonymMap(synMap, "a1").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "a1"), "a2", "a1");
            AssertTokIncludes(GetSubSynonymMap(synMap, "a1"), "a2", "a2");
            AssertTokIncludes(synMap, "b", "a1");
            AssertTokIncludes(synMap, "b", "a2");
        }
Beispiel #11
0
        private void AssertTokIncludes(SlowSynonymMap map, string src, string exp)
        {
            Token[] tokens = map.Submap.Get(src).Synonyms;
            bool    inc    = false;

            foreach (Token token in tokens)
            {
                if (exp.Equals(new string(token.Buffer, 0, token.Length), StringComparison.Ordinal))
                {
                    inc = true;
                }
            }
            assertTrue(inc);
        }
        public void Inform(IResourceLoader loader)
        {
            TokenizerFactory tokFactory = null;

            if (tf != null)
            {
                tokFactory = LoadTokenizerFactory(loader, tf);
            }

            IEnumerable <string> wlist = LoadRules(synonyms, loader);

            synMap = new SlowSynonymMap(ignoreCase);
            ParseRules(wlist, synMap, "=>", ",", expand, tokFactory);
        }
	  private IEnumerator<AttributeSource> replacement; // iterator over generated tokens

	  public SlowSynonymFilter(TokenStream @in, SlowSynonymMap map) : base(@in)
	  {
		if (map == null)
		{
		  throw new System.ArgumentException("map is required", "map");
		}

		this.map = map;
		// just ensuring these attributes exist...
	      AddAttribute<ICharTermAttribute>();
          AddAttribute < IPositionIncrementAttribute>();
          AddAttribute < IOffsetAttribute>();
          AddAttribute < TypeAttribute>();
	  }
Beispiel #14
0
        private IEnumerator <AttributeSource> replacement; // iterator over generated tokens

        public SlowSynonymFilter(TokenStream @in, SlowSynonymMap map) : base(@in)
        {
            if (map == null)
            {
                throw new System.ArgumentException("map is required", "map");
            }

            this.map = map;
            // just ensuring these attributes exist...
            AddAttribute <ICharTermAttribute>();
            AddAttribute <IPositionIncrementAttribute>();
            AddAttribute <IOffsetAttribute>();
            AddAttribute <TypeAttribute>();
        }
Beispiel #15
0
        public virtual void TestInvalidMappingRules()
        {
            SlowSynonymMap synMap = new SlowSynonymMap(true);
            IList <string> rules  = new List <string>(1);

            rules.Add("a=>b=>c");
            try
            {
                SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
                fail("IllegalArgumentException must be thrown.");
            }
            catch (ArgumentException)
            {
            }
        }
Beispiel #16
0
        public virtual void TestLoadRules()
        {
            IDictionary <string, string> args = new Dictionary <string, string>();

            args["synonyms"] = "something.txt";
            SlowSynonymFilterFactory ff = new SlowSynonymFilterFactory(args);

            ff.Inform(new ResourceLoaderAnonymousInnerClassHelper());

            SlowSynonymMap synMap = ff.SynonymMap;

            assertEquals(2, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a");
            AssertTokIncludes(synMap, "a", "b");
            AssertTokIncludes(synMap, "b", "a");
            AssertTokIncludes(synMap, "b", "b");
        }
Beispiel #17
0
        public virtual void TestOffsetBug()
        {
            // With the following rules:
            // a a=>b
            // x=>y
            // analysing "a x" causes "y" to have a bad offset (end less than start)
            // SOLR-167
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig  = false;
            bool merge = true;

            map.Add(Strings("a a"), Tokens("b"), orig, merge);
            map.Add(Strings("x"), Tokens("y"), orig, merge);

            // "a a x" => "b y"
            AssertTokenizesTo(map, Tokens("a,1,0,1 a,1,2,3 x,1,4,5"), new string[] { "b", "y" }, new int[] { 0, 4 }, new int[] { 3, 5 }, new int[] { 1, 1 });
        }
Beispiel #18
0
        private SlowSynonymMap Match(SlowSynonymMap map)
        {
            SlowSynonymMap result = null;

            if (map.Submap != null)
            {
                AttributeSource tok = NextTok();
                if (tok != null)
                {
                    // clone ourselves.
                    if (tok == this)
                    {
                        tok = CloneAttributes();
                    }
                    // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
                    var            termAtt = tok.GetAttribute <ICharTermAttribute>();
                    SlowSynonymMap subMap  = map.Submap.Get(termAtt.Buffer, 0, termAtt.Length);

                    if (subMap != null)
                    {
                        // recurse
                        result = Match(subMap);
                    }

                    if (result != null)
                    {
                        matched.AddFirst(tok);
                    }
                    else
                    {
                        // push back unmatched token
                        PushTok(tok);
                    }
                }
            }

            // if no longer sequence matched, so if this node has synonyms, it's the match.
            if (result == null && map.Synonyms != null)
            {
                result = map;
            }

            return(result);
        }
Beispiel #19
0
        public virtual void TestOverlap()
        {
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig  = false;
            bool merge = true;

            map.Add(Strings("qwe"), Tokens("qq/ww/ee"), orig, merge);
            map.Add(Strings("qwe"), Tokens("xx"), orig, merge);
            map.Add(Strings("qwe"), Tokens("yy"), orig, merge);
            map.Add(Strings("qwe"), Tokens("zz"), orig, merge);
            AssertTokenizesTo(map, "$", new string[] { "$" });
            AssertTokenizesTo(map, "qwe", new string[] { "qq", "ww", "ee", "xx", "yy", "zz" }, new int[] { 1, 0, 0, 0, 0, 0 });

            // test merging within the map

            map.Add(Strings("a"), Tokens("a5,5 a8,3 a10,2"), orig, merge);
            map.Add(Strings("a"), Tokens("a3,3 a7,4 a9,2 a11,2 a111,100"), orig, merge);
            AssertTokenizesTo(map, "a", new string[] { "a3", "a5", "a7", "a8", "a9", "a10", "a11", "a111" }, new int[] { 1, 2, 2, 1, 1, 1, 1, 100 });
        }
Beispiel #20
0
        public virtual void TestPositionIncrementsWithOrig()
        {
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig  = true;
            bool merge = true;

            // test that generated tokens start at the same offset as the original
            map.Add(Strings("a"), Tokens("aa"), orig, merge);
            AssertTokenizesTo(map, Tokens("a,5"), new string[] { "a", "aa" }, new int[] { 5, 0 });
            AssertTokenizesTo(map, Tokens("b,1 a,0"), new string[] { "b", "a", "aa" }, new int[] { 1, 0, 0 });

            // test that offset of first replacement is ignored (always takes the orig offset)
            map.Add(Strings("b"), Tokens("bb,100"), orig, merge);
            AssertTokenizesTo(map, Tokens("b,5"), new string[] { "b", "bb" }, new int[] { 5, 0 });
            AssertTokenizesTo(map, Tokens("c,1 b,0"), new string[] { "c", "b", "bb" }, new int[] { 1, 0, 0 });

            // test that subsequent tokens are adjusted accordingly
            map.Add(Strings("c"), Tokens("cc,100 c2,2"), orig, merge);
            AssertTokenizesTo(map, Tokens("c,5"), new string[] { "c", "cc", "c2" }, new int[] { 5, 0, 2 });
            AssertTokenizesTo(map, Tokens("d,1 c,0"), new string[] { "d", "c", "cc", "c2" }, new int[] { 1, 0, 0, 2 });
        }
Beispiel #21
0
        public virtual void TestMatching()
        {
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig  = false;
            bool merge = true;

            map.Add(Strings("a b"), Tokens("ab"), orig, merge);
            map.Add(Strings("a c"), Tokens("ac"), orig, merge);
            map.Add(Strings("a"), Tokens("aa"), orig, merge);
            map.Add(Strings("b"), Tokens("bb"), orig, merge);
            map.Add(Strings("z x c v"), Tokens("zxcv"), orig, merge);
            map.Add(Strings("x c"), Tokens("xc"), orig, merge);

            AssertTokenizesTo(map, "$", new string[] { "$" });
            AssertTokenizesTo(map, "a", new string[] { "aa" });
            AssertTokenizesTo(map, "a $", new string[] { "aa", "$" });
            AssertTokenizesTo(map, "$ a", new string[] { "$", "aa" });
            AssertTokenizesTo(map, "a a", new string[] { "aa", "aa" });
            AssertTokenizesTo(map, "b", new string[] { "bb" });
            AssertTokenizesTo(map, "z x c v", new string[] { "zxcv" });
            AssertTokenizesTo(map, "z x c $", new string[] { "z", "xc", "$" });

            // repeats
            map.Add(Strings("a b"), Tokens("ab"), orig, merge);
            map.Add(Strings("a b"), Tokens("ab"), orig, merge);

            // FIXME: the below test intended to be { "ab" }
            AssertTokenizesTo(map, "a b", new string[] { "ab", "ab", "ab" });

            // check for lack of recursion
            map.Add(Strings("zoo"), Tokens("zoo"), orig, merge);
            AssertTokenizesTo(map, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "$", "zoo" });
            map.Add(Strings("zoo"), Tokens("zoo zoo"), orig, merge);
            // FIXME: the below test intended to be { "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo" }
            // maybe this was just a typo in the old test????
            AssertTokenizesTo(map, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" });
        }
        public virtual void TestMatching()
        {
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig = false;
            bool merge = true;
            map.Add(Strings("a b"), Tokens("ab"), orig, merge);
            map.Add(Strings("a c"), Tokens("ac"), orig, merge);
            map.Add(Strings("a"), Tokens("aa"), orig, merge);
            map.Add(Strings("b"), Tokens("bb"), orig, merge);
            map.Add(Strings("z x c v"), Tokens("zxcv"), orig, merge);
            map.Add(Strings("x c"), Tokens("xc"), orig, merge);

            AssertTokenizesTo(map, "$", new string[] { "$" });
            AssertTokenizesTo(map, "a", new string[] { "aa" });
            AssertTokenizesTo(map, "a $", new string[] { "aa", "$" });
            AssertTokenizesTo(map, "$ a", new string[] { "$", "aa" });
            AssertTokenizesTo(map, "a a", new string[] { "aa", "aa" });
            AssertTokenizesTo(map, "b", new string[] { "bb" });
            AssertTokenizesTo(map, "z x c v", new string[] { "zxcv" });
            AssertTokenizesTo(map, "z x c $", new string[] { "z", "xc", "$" });

            // repeats
            map.Add(Strings("a b"), Tokens("ab"), orig, merge);
            map.Add(Strings("a b"), Tokens("ab"), orig, merge);

            // FIXME: the below test intended to be { "ab" }
            AssertTokenizesTo(map, "a b", new string[] { "ab", "ab", "ab" });

            // check for lack of recursion
            map.Add(Strings("zoo"), Tokens("zoo"), orig, merge);
            AssertTokenizesTo(map, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "$", "zoo" });
            map.Add(Strings("zoo"), Tokens("zoo zoo"), orig, merge);
            // FIXME: the below test intended to be { "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo" }
            // maybe this was just a typo in the old test????
            AssertTokenizesTo(map, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" });
        }
Beispiel #23
0
        /// <param name="singleMatch">  List<String>, the sequence of strings to match </param>
        /// <param name="replacement">  List<Token> the list of tokens to use on a match </param>
        /// <param name="includeOrig">  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens </param>
        /// <param name="mergeExisting"> merge the replacement tokens with any other mappings that exist </param>
        public virtual void add(IList <string> singleMatch, IList <Token> replacement, bool includeOrig, bool mergeExisting)
        {
            SlowSynonymMap currMap = this;

            foreach (string str in singleMatch)
            {
                if (currMap.submap == null)
                {
                    // for now hardcode at 4.0, as its what the old code did.
                    // would be nice to fix, but shouldn't store a version in each submap!!!
                    currMap.submap = new CharArrayMap <>(Version.LUCENE_CURRENT, 1, ignoreCase());
                }

                SlowSynonymMap map = currMap.submap.get(str);
                if (map == null)
                {
                    map        = new SlowSynonymMap();
                    map.flags |= flags & IGNORE_CASE;
                    currMap.submap.put(str, map);
                }

                currMap = map;
            }

            if (currMap.synonyms != null && !mergeExisting)
            {
                throw new System.ArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
            }
            IList <Token> superset = currMap.synonyms == null ? replacement : mergeTokens(currMap.synonyms, replacement);

            currMap.synonyms = superset.ToArray();
            if (includeOrig_Renamed)
            {
                currMap.flags |= INCLUDE_ORIG;
            }
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: static void parseRules(Iterable<String> rules, SlowSynonymMap map, String mappingSep, String synSep, boolean expansion, TokenizerFactory tokFactory) throws java.io.IOException
        internal static void parseRules(IEnumerable<string> rules, SlowSynonymMap map, string mappingSep, string synSep, bool expansion, TokenizerFactory tokFactory)
        {
            int count = 0;
            foreach (string rule in rules)
            {
              // To use regexes, we need an expression that specifies an odd number of chars.
              // This can't really be done with string.split(), and since we need to
              // do unescaping at some point anyway, we wouldn't be saving any effort
              // by using regexes.

              IList<string> mapping = splitSmart(rule, mappingSep, false);

              IList<IList<string>> source;
              IList<IList<string>> target;

              if (mapping.Count > 2)
              {
            throw new System.ArgumentException("Invalid Synonym Rule:" + rule);
              }
              else if (mapping.Count == 2)
              {
            source = getSynList(mapping[0], synSep, tokFactory);
            target = getSynList(mapping[1], synSep, tokFactory);
              }
              else
              {
            source = getSynList(mapping[0], synSep, tokFactory);
            if (expansion)
            {
              // expand to all arguments
              target = source;
            }
            else
            {
              // reduce to first argument
              target = new List<>(1);
              target.Add(source[0]);
            }
              }

              bool includeOrig = false;
              foreach (IList<string> fromToks in source)
              {
            count++;
            foreach (IList<string> toToks in target)
            {
              map.add(fromToks, SlowSynonymMap.makeTokens(toToks), includeOrig, true);
            }
              }
            }
        }
        public virtual void TestMapMerge()
        {
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig = false;
            bool merge = true;
            map.Add(Strings("a"), Tokens("a5,5"), orig, merge);
            map.Add(Strings("a"), Tokens("a3,3"), orig, merge);

            AssertTokenizesTo(map, "a", new string[] { "a3", "a5" }, new int[] { 1, 2 });

            map.Add(Strings("b"), Tokens("b3,3"), orig, merge);
            map.Add(Strings("b"), Tokens("b5,5"), orig, merge);

            AssertTokenizesTo(map, "b", new string[] { "b3", "b5" }, new int[] { 1, 2 });

            map.Add(Strings("a"), Tokens("A3,3"), orig, merge);
            map.Add(Strings("a"), Tokens("A5,5"), orig, merge);

            AssertTokenizesTo(map, "a", new string[] { "a3", "A3", "a5", "A5" }, new int[] { 1, 0, 2, 0 });

            map.Add(Strings("a"), Tokens("a1"), orig, merge);
            AssertTokenizesTo(map, "a", new string[] { "a1", "a3", "A3", "a5", "A5" }, new int[] { 1, 2, 0, 2, 0 });

            map.Add(Strings("a"), Tokens("a2,2"), orig, merge);
            map.Add(Strings("a"), Tokens("a4,4 a6,2"), orig, merge);
            AssertTokenizesTo(map, "a", new string[] { "a1", "a2", "a3", "A3", "a4", "a5", "A5", "a6" }, new int[] { 1, 1, 1, 0, 1, 1, 0, 1 });
        }
 internal static void AssertTokenizesTo(SlowSynonymMap dict, IList<Token> input, string[] expected, int[] startOffsets, int[] endOffsets, int[] posIncs)
 {
     TokenStream tokenizer = new IterTokenStream(input);
     SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
     AssertTokenStreamContents(stream, expected, startOffsets, endOffsets, posIncs);
 }
 internal static void AssertTokenizesTo(SlowSynonymMap dict, string input, string[] expected, int[] posIncs)
 {
     Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
     SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
     AssertTokenStreamContents(stream, expected, posIncs);
 }
        public virtual void TestOverlap()
        {
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig = false;
            bool merge = true;
            map.Add(Strings("qwe"), Tokens("qq/ww/ee"), orig, merge);
            map.Add(Strings("qwe"), Tokens("xx"), orig, merge);
            map.Add(Strings("qwe"), Tokens("yy"), orig, merge);
            map.Add(Strings("qwe"), Tokens("zz"), orig, merge);
            AssertTokenizesTo(map, "$", new string[] { "$" });
            AssertTokenizesTo(map, "qwe", new string[] { "qq", "ww", "ee", "xx", "yy", "zz" }, new int[] { 1, 0, 0, 0, 0, 0 });

            // test merging within the map

            map.Add(Strings("a"), Tokens("a5,5 a8,3 a10,2"), orig, merge);
            map.Add(Strings("a"), Tokens("a3,3 a7,4 a9,2 a11,2 a111,100"), orig, merge);
            AssertTokenizesTo(map, "a", new string[] { "a3", "a5", "a7", "a8", "a9", "a10", "a11", "a111" }, new int[] { 1, 2, 2, 1, 1, 1, 1, 100 });
        }
Beispiel #29
0
 private SlowSynonymMap GetSubSynonymMap(SlowSynonymMap map, string src)
 {
     return(map.Submap.Get(src));
 }
        public virtual void TestIncludeOrig()
        {
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig = true;
            bool merge = true;
            map.Add(Strings("a b"), Tokens("ab"), orig, merge);
            map.Add(Strings("a c"), Tokens("ac"), orig, merge);
            map.Add(Strings("a"), Tokens("aa"), orig, merge);
            map.Add(Strings("b"), Tokens("bb"), orig, merge);
            map.Add(Strings("z x c v"), Tokens("zxcv"), orig, merge);
            map.Add(Strings("x c"), Tokens("xc"), orig, merge);

            AssertTokenizesTo(map, "$", new string[] { "$" }, new int[] { 1 });
            AssertTokenizesTo(map, "a", new string[] { "a", "aa" }, new int[] { 1, 0 });
            AssertTokenizesTo(map, "a", new string[] { "a", "aa" }, new int[] { 1, 0 });
            AssertTokenizesTo(map, "$ a", new string[] { "$", "a", "aa" }, new int[] { 1, 1, 0 });
            AssertTokenizesTo(map, "a $", new string[] { "a", "aa", "$" }, new int[] { 1, 0, 1 });
            AssertTokenizesTo(map, "$ a !", new string[] { "$", "a", "aa", "!" }, new int[] { 1, 1, 0, 1 });
            AssertTokenizesTo(map, "a a", new string[] { "a", "aa", "a", "aa" }, new int[] { 1, 0, 1, 0 });
            AssertTokenizesTo(map, "b", new string[] { "b", "bb" }, new int[] { 1, 0 });
            AssertTokenizesTo(map, "z x c v", new string[] { "z", "zxcv", "x", "c", "v" }, new int[] { 1, 0, 1, 1, 1 });
            AssertTokenizesTo(map, "z x c $", new string[] { "z", "x", "xc", "c", "$" }, new int[] { 1, 1, 0, 1, 1 });

            // check for lack of recursion
            map.Add(Strings("zoo zoo"), Tokens("zoo"), orig, merge);
            // CHECKME: I think the previous test (with 4 zoo's), was just a typo.
            AssertTokenizesTo(map, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "zoo", "$", "zoo" }, new int[] { 1, 0, 1, 1, 1 });

            map.Add(Strings("zoo"), Tokens("zoo zoo"), orig, merge);
            AssertTokenizesTo(map, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" }, new int[] { 1, 0, 1, 1, 1, 0, 1 });
        }
        public virtual void TestPositionIncrementsWithOrig()
        {
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig = true;
            bool merge = true;

            // test that generated tokens start at the same offset as the original
            map.Add(Strings("a"), Tokens("aa"), orig, merge);
            AssertTokenizesTo(map, Tokens("a,5"), new string[] { "a", "aa" }, new int[] { 5, 0 });
            AssertTokenizesTo(map, Tokens("b,1 a,0"), new string[] { "b", "a", "aa" }, new int[] { 1, 0, 0 });

            // test that offset of first replacement is ignored (always takes the orig offset)
            map.Add(Strings("b"), Tokens("bb,100"), orig, merge);
            AssertTokenizesTo(map, Tokens("b,5"), new string[] { "b", "bb" }, new int[] { 5, 0 });
            AssertTokenizesTo(map, Tokens("c,1 b,0"), new string[] { "c", "b", "bb" }, new int[] { 1, 0, 0 });

            // test that subsequent tokens are adjusted accordingly
            map.Add(Strings("c"), Tokens("cc,100 c2,2"), orig, merge);
            AssertTokenizesTo(map, Tokens("c,5"), new string[] { "c", "cc", "c2" }, new int[] { 5, 0, 2 });
            AssertTokenizesTo(map, Tokens("d,1 c,0"), new string[] { "d", "c", "cc", "c2" }, new int[] { 1, 0, 0, 2 });
        }
        public virtual void TestOffsetBug()
        {
            // With the following rules:
            // a a=>b
            // x=>y
            // analysing "a x" causes "y" to have a bad offset (end less than start)
            // SOLR-167
            SlowSynonymMap map = new SlowSynonymMap();

            bool orig = false;
            bool merge = true;

            map.Add(Strings("a a"), Tokens("b"), orig, merge);
            map.Add(Strings("x"), Tokens("y"), orig, merge);

            // "a a x" => "b y"
            AssertTokenizesTo(map, Tokens("a,1,0,1 a,1,2,3 x,1,4,5"), new string[] { "b", "y" }, new int[] { 0, 4 }, new int[] { 3, 5 }, new int[] { 1, 1 });
        }
Beispiel #33
0
        public virtual void TestReadMappingRules()
        {
            SlowSynonymMap synMap;

            // (a)->[b]
            IList <string> rules = new List <string>();

            rules.Add("a=>b");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(1, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "b");

            // (a)->[c]
            // (b)->[c]
            rules.Clear();
            rules.Add("a,b=>c");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(2, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "c");
            AssertTokIncludes(synMap, "b", "c");

            // (a)->[b][c]
            rules.Clear();
            rules.Add("a=>b,c");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(1, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "b");
            AssertTokIncludes(synMap, "a", "c");

            // (a)->(b)->[a2]
            //      [a1]
            rules.Clear();
            rules.Add("a=>a1");
            rules.Add("a b=>a2");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(1, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a1");
            assertEquals(1, GetSubSynonymMap(synMap, "a").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "b", "a2");

            // (a)->(b)->[a2]
            //      (c)->[a3]
            //      [a1]
            rules.Clear();
            rules.Add("a=>a1");
            rules.Add("a b=>a2");
            rules.Add("a c=>a3");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(1, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a1");
            assertEquals(2, GetSubSynonymMap(synMap, "a").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "b", "a2");
            AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "c", "a3");

            // (a)->(b)->[a2]
            //      [a1]
            // (b)->(c)->[b2]
            //      [b1]
            rules.Clear();
            rules.Add("a=>a1");
            rules.Add("a b=>a2");
            rules.Add("b=>b1");
            rules.Add("b c=>b2");
            synMap = new SlowSynonymMap(true);
            SlowSynonymFilterFactory.ParseRules(rules, synMap, "=>", ",", true, null);
            assertEquals(2, synMap.Submap.size());
            AssertTokIncludes(synMap, "a", "a1");
            assertEquals(1, GetSubSynonymMap(synMap, "a").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "a"), "b", "a2");
            AssertTokIncludes(synMap, "b", "b1");
            assertEquals(1, GetSubSynonymMap(synMap, "b").Submap.size());
            AssertTokIncludes(GetSubSynonymMap(synMap, "b"), "c", "b2");
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void inform(ResourceLoader loader) throws java.io.IOException
        public void inform(ResourceLoader loader)
        {
            TokenizerFactory tokFactory = null;
            if (tf != null)
            {
              tokFactory = loadTokenizerFactory(loader, tf);
            }

            IEnumerable<string> wlist = loadRules(synonyms, loader);

            synMap = new SlowSynonymMap(ignoreCase);
            parseRules(wlist, synMap, "=>", ",", expand,tokFactory);
        }
Beispiel #35
0
        /*
         * Need to worry about multiple scenarios:
         *  - need to go for the longest match
         *    a b => foo      #shouldn't match if "a b" is followed by "c d"
         *    a b c d => bar
         *  - need to backtrack - retry matches for tokens already read
         *     a b c d => foo
         *       b c => bar
         *     If the input stream is "a b c x", one will consume "a b c d"
         *     trying to match the first rule... all but "a" should be
         *     pushed back so a match may be made on "b c".
         *  - don't try and match generated tokens (thus need separate queue)
         *    matching is not recursive.
         *  - handle optional generation of original tokens in all these cases,
         *    merging token streams to preserve token positions.
         *  - preserve original positionIncrement of first matched token
         */
        public override bool IncrementToken()
        {
            while (true)
            {
                // if there are any generated tokens, return them... don't try any
                // matches against them, as we specifically don't want recursion.
                if (replacement != null && replacement.MoveNext())
                {
                    Copy(this, replacement.Current);
                    return(true);
                }

                // common case fast-path of first token not matching anything
                AttributeSource firstTok = NextTok();
                if (firstTok == null)
                {
                    return(false);
                }
                var            termAtt = firstTok.AddAttribute <ICharTermAttribute>();
                SlowSynonymMap result  = map.Submap != null?map.Submap.Get(termAtt.Buffer, 0, termAtt.Length) : null;

                if (result == null)
                {
                    Copy(this, firstTok);
                    return(true);
                }

                // fast-path failed, clone ourselves if needed
                if (firstTok == this)
                {
                    firstTok = CloneAttributes();
                }
                // OK, we matched a token, so find the longest match.

                matched = new LinkedList <AttributeSource>();

                result = Match(result);

                if (result == null)
                {
                    // no match, simply return the first token read.
                    Copy(this, firstTok);
                    return(true);
                }

                // reuse, or create new one each time?
                IList <AttributeSource> generated = new JCG.List <AttributeSource>(result.Synonyms.Length + matched.Count + 1);

                //
                // there was a match... let's generate the new tokens, merging
                // in the matched tokens (position increments need adjusting)
                //
                AttributeSource lastTok     = matched.Count == 0 ? firstTok : matched.Last.Value;
                bool            includeOrig = result.IncludeOrig;

                AttributeSource             origTok        = includeOrig ? firstTok : null;
                IPositionIncrementAttribute firstPosIncAtt = firstTok.AddAttribute <IPositionIncrementAttribute>();
                int origPos = firstPosIncAtt.PositionIncrement; // position of origTok in the original stream
                int repPos  = 0;                                // curr position in replacement token stream
                int pos     = 0;                                // current position in merged token stream

                for (int i = 0; i < result.Synonyms.Length; i++)
                {
                    Token                       repTok       = result.Synonyms[i];
                    AttributeSource             newTok       = firstTok.CloneAttributes();
                    ICharTermAttribute          newTermAtt   = newTok.AddAttribute <ICharTermAttribute>();
                    IOffsetAttribute            newOffsetAtt = newTok.AddAttribute <IOffsetAttribute>();
                    IPositionIncrementAttribute newPosIncAtt = newTok.AddAttribute <IPositionIncrementAttribute>();

                    IOffsetAttribute lastOffsetAtt = lastTok.AddAttribute <IOffsetAttribute>();

                    newOffsetAtt.SetOffset(newOffsetAtt.StartOffset, lastOffsetAtt.EndOffset);
                    newTermAtt.CopyBuffer(repTok.Buffer, 0, repTok.Length);
                    repPos += repTok.PositionIncrement;
                    if (i == 0) // make position of first token equal to original
                    {
                        repPos = origPos;
                    }

                    // if necessary, insert original tokens and adjust position increment
                    while (origTok != null && origPos <= repPos)
                    {
                        IPositionIncrementAttribute origPosInc = origTok.AddAttribute <IPositionIncrementAttribute>();
                        origPosInc.PositionIncrement = origPos - pos;
                        generated.Add(origTok);
                        pos += origPosInc.PositionIncrement;
                        //origTok = matched.Count == 0 ? null : matched.RemoveFirst();
                        if (matched.Count == 0)
                        {
                            origTok = null;
                        }
                        else
                        {
                            origTok = matched.First.Value;
                            matched.Remove(origTok);
                        }
                        if (origTok != null)
                        {
                            origPosInc = origTok.AddAttribute <IPositionIncrementAttribute>();
                            origPos   += origPosInc.PositionIncrement;
                        }
                    }

                    newPosIncAtt.PositionIncrement = repPos - pos;
                    generated.Add(newTok);
                    pos += newPosIncAtt.PositionIncrement;
                }

                // finish up any leftover original tokens
                while (origTok != null)
                {
                    IPositionIncrementAttribute origPosInc = origTok.AddAttribute <IPositionIncrementAttribute>();
                    origPosInc.PositionIncrement = origPos - pos;
                    generated.Add(origTok);
                    pos += origPosInc.PositionIncrement;
                    if (matched.Count == 0)
                    {
                        origTok = null;
                    }
                    else
                    {
                        origTok = matched.First.Value;
                        matched.Remove(origTok);
                    }
                    if (origTok != null)
                    {
                        origPosInc = origTok.AddAttribute <IPositionIncrementAttribute>();
                        origPos   += origPosInc.PositionIncrement;
                    }
                }

                // what if we replaced a longer sequence with a shorter one?
                // a/0 b/5 =>  foo/0
                // should I re-create the gap on the next buffered token?

                replacement = generated.GetEnumerator();
                // Now return to the top of the loop to read and return the first
                // generated token.. The reason this is done is that we may have generated
                // nothing at all, and may need to continue with more matching logic.
            }
        }
	  private SlowSynonymMap match(SlowSynonymMap map)
	  {
		SlowSynonymMap result = null;

		if (map.submap != null)
		{
		  AttributeSource tok = nextTok();
		  if (tok != null)
		  {
			// clone ourselves.
			if (tok == this)
			{
			  tok = CloneAttributes();
			}
			// check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
            var termAtt = tok.GetAttribute < ICharTermAttribute>();
			SlowSynonymMap subMap = map.submap.Get(termAtt.buffer(), 0, termAtt.length());

			if (subMap != null)
			{
			  // recurse
			  result = match(subMap);
			}

			if (result != null)
			{
			  matched.AddFirst(tok);
			}
			else
			{
			  // push back unmatched token
			  pushTok(tok);
			}
		  }
		}

		// if no longer sequence matched, so if this node has synonyms, it's the match.
		if (result == null && map.synonyms != null)
		{
		  result = map;
		}

		return result;
	  }
Beispiel #37
0
        /*
         * Need to worry about multiple scenarios:
         *  - need to go for the longest match
         *    a b => foo      #shouldn't match if "a b" is followed by "c d"
         *    a b c d => bar
         *  - need to backtrack - retry matches for tokens already read
         *     a b c d => foo
         *       b c => bar
         *     If the input stream is "a b c x", one will consume "a b c d"
         *     trying to match the first rule... all but "a" should be
         *     pushed back so a match may be made on "b c".
         *  - don't try and match generated tokens (thus need separate queue)
         *    matching is not recursive.
         *  - handle optional generation of original tokens in all these cases,
         *    merging token streams to preserve token positions.
         *  - preserve original positionIncrement of first matched token
         */
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            while (true)
            {
                // if there are any generated tokens, return them... don't try any
                // matches against them, as we specifically don't want recursion.
//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
                if (replacement != null && replacement.hasNext())
                {
//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
                    copy(this, replacement.next());
                    return(true);
                }

                // common case fast-path of first token not matching anything
                AttributeSource firstTok = nextTok();
                if (firstTok == null)
                {
                    return(false);
                }
                CharTermAttribute termAtt = firstTok.addAttribute(typeof(CharTermAttribute));
                SlowSynonymMap    result  = map.submap != null?map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;

                if (result == null)
                {
                    copy(this, firstTok);
                    return(true);
                }

                // fast-path failed, clone ourselves if needed
                if (firstTok == this)
                {
                    firstTok = cloneAttributes();
                }
                // OK, we matched a token, so find the longest match.

                matched = new LinkedList <>();

                result = match(result);

                if (result == null)
                {
                    // no match, simply return the first token read.
                    copy(this, firstTok);
                    return(true);
                }

                // reuse, or create new one each time?
                List <AttributeSource> generated = new List <AttributeSource>(result.synonyms.Length + matched.Count + 1);

                //
                // there was a match... let's generate the new tokens, merging
                // in the matched tokens (position increments need adjusting)
                //
                AttributeSource lastTok     = matched.Count == 0 ? firstTok : matched.Last.Value;
                bool            includeOrig = result.includeOrig();

                AttributeSource            origTok        = includeOrig ? firstTok : null;
                PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(typeof(PositionIncrementAttribute));
                int origPos = firstPosIncAtt.PositionIncrement; // position of origTok in the original stream
                int repPos  = 0;                                // curr position in replacement token stream
                int pos     = 0;                                // current position in merged token stream

                for (int i = 0; i < result.synonyms.Length; i++)
                {
                    Token                      repTok       = result.synonyms[i];
                    AttributeSource            newTok       = firstTok.cloneAttributes();
                    CharTermAttribute          newTermAtt   = newTok.addAttribute(typeof(CharTermAttribute));
                    OffsetAttribute            newOffsetAtt = newTok.addAttribute(typeof(OffsetAttribute));
                    PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(typeof(PositionIncrementAttribute));

                    OffsetAttribute lastOffsetAtt = lastTok.addAttribute(typeof(OffsetAttribute));

                    newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
                    newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
                    repPos += repTok.PositionIncrement;
                    if (i == 0)     // make position of first token equal to original
                    {
                        repPos = origPos;
                    }

                    // if necessary, insert original tokens and adjust position increment
                    while (origTok != null && origPos <= repPos)
                    {
                        PositionIncrementAttribute origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
                        origPosInc.PositionIncrement = origPos - pos;
                        generated.Add(origTok);
                        pos    += origPosInc.PositionIncrement;
                        origTok = matched.Count == 0 ? null : matched.RemoveFirst();
                        if (origTok != null)
                        {
                            origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
                            origPos   += origPosInc.PositionIncrement;
                        }
                    }

                    newPosIncAtt.PositionIncrement = repPos - pos;
                    generated.Add(newTok);
                    pos += newPosIncAtt.PositionIncrement;
                }

                // finish up any leftover original tokens
                while (origTok != null)
                {
                    PositionIncrementAttribute origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
                    origPosInc.PositionIncrement = origPos - pos;
                    generated.Add(origTok);
                    pos    += origPosInc.PositionIncrement;
                    origTok = matched.Count == 0 ? null : matched.RemoveFirst();
                    if (origTok != null)
                    {
                        origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
                        origPos   += origPosInc.PositionIncrement;
                    }
                }

                // what if we replaced a longer sequence with a shorter one?
                // a/0 b/5 =>  foo/0
                // should I re-create the gap on the next buffered token?

                replacement = generated.GetEnumerator();
                // Now return to the top of the loop to read and return the first
                // generated token.. The reason this is done is that we may have generated
                // nothing at all, and may need to continue with more matching logic.
            }
        }