/* * Need to worry about multiple scenarios: * - need to go for the longest match * a b => foo #shouldn't match if "a b" is followed by "c d" * a b c d => bar * - need to backtrack - retry matches for tokens already read * a b c d => foo * b c => bar * If the input stream is "a b c x", one will consume "a b c d" * trying to match the first rule... all but "a" should be * pushed back so a match may be made on "b c". * - don't try and match generated tokens (thus need separate queue) * matching is not recursive. * - handle optional generation of original tokens in all these cases, * merging token streams to preserve token positions. * - preserve original positionIncrement of first matched token */ public override bool IncrementToken() { while (true) { // if there are any generated tokens, return them... don't try any // matches against them, as we specifically don't want recursion. if (replacement != null && replacement.MoveNext()) { copy(this, replacement.Current); return(true); } // common case fast-path of first token not matching anything AttributeSource firstTok = nextTok(); if (firstTok == null) { return(false); } var termAtt = firstTok.AddAttribute <ICharTermAttribute>(); SlowSynonymMap result = map.submap != null?map.submap.Get(termAtt.Buffer(), 0, termAtt.Length) : null; if (result == null) { copy(this, firstTok); return(true); } // fast-path failed, clone ourselves if needed if (firstTok == this) { firstTok = CloneAttributes(); } // OK, we matched a token, so find the longest match. matched = new LinkedList <>(); result = match(result); if (result == null) { // no match, simply return the first token read. copy(this, firstTok); return(true); } // reuse, or create new one each time? List <AttributeSource> generated = new List <AttributeSource>(result.synonyms.Length + matched.Count + 1); // // there was a match... let's generate the new tokens, merging // in the matched tokens (position increments need adjusting) // AttributeSource lastTok = matched.Count == 0 ? firstTok : matched.Last.Value; bool includeOrig = result.IncludeOrig; AttributeSource origTok = includeOrig ? firstTok : null; PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(typeof(PositionIncrementAttribute)); int origPos = firstPosIncAtt.PositionIncrement; // position of origTok in the original stream int repPos = 0; // curr position in replacement token stream int pos = 0; // current position in merged token stream for (int i = 0; i < result.synonyms.Length; i++) { Token repTok = result.synonyms[i]; AttributeSource newTok = firstTok.cloneAttributes(); CharTermAttribute newTermAtt = newTok.addAttribute(typeof(CharTermAttribute)); OffsetAttribute newOffsetAtt = newTok.addAttribute(typeof(OffsetAttribute)); PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(typeof(PositionIncrementAttribute)); OffsetAttribute lastOffsetAtt = lastTok.addAttribute(typeof(OffsetAttribute)); newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset()); newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length()); repPos += repTok.PositionIncrement; if (i == 0) // make position of first token equal to original { repPos = origPos; } // if necessary, insert original tokens and adjust position increment while (origTok != null && origPos <= repPos) { PositionIncrementAttribute origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute)); origPosInc.PositionIncrement = origPos - pos; generated.Add(origTok); pos += origPosInc.PositionIncrement; origTok = matched.Count == 0 ? null : matched.RemoveFirst(); if (origTok != null) { origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute)); origPos += origPosInc.PositionIncrement; } } newPosIncAtt.PositionIncrement = repPos - pos; generated.Add(newTok); pos += newPosIncAtt.PositionIncrement; } // finish up any leftover original tokens while (origTok != null) { PositionIncrementAttribute origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute)); origPosInc.PositionIncrement = origPos - pos; generated.Add(origTok); pos += origPosInc.PositionIncrement; origTok = matched.Count == 0 ? null : matched.RemoveFirst(); if (origTok != null) { origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute)); origPos += origPosInc.PositionIncrement; } } // what if we replaced a longer sequence with a shorter one? // a/0 b/5 => foo/0 // should I re-create the gap on the next buffered token? replacement = generated.GetEnumerator(); // Now return to the top of the loop to read and return the first // generated token.. The reason this is done is that we may have generated // nothing at all, and may need to continue with more matching logic. } }