Ejemplo n.º 1
0
        /// <summary>
        /// decodes the contexts at the current position </summary>
        protected internal virtual ISet <BytesRef> DecodeContexts(BytesRef scratch, ByteArrayDataInput tmpInput)
        {
            tmpInput.Reset(scratch.Bytes);
            tmpInput.SkipBytes(scratch.Length - 2); //skip to context set size
            ushort ctxSetSize = (ushort)tmpInput.ReadInt16();

            scratch.Length -= 2;

            var contextSet = new JCG.HashSet <BytesRef>();

            for (ushort i = 0; i < ctxSetSize; i++)
            {
                tmpInput.Position = scratch.Length - 2;
                ushort curContextLength = (ushort)tmpInput.ReadInt16();
                scratch.Length   -= 2;
                tmpInput.Position = scratch.Length - curContextLength;
                BytesRef contextSpare = new BytesRef(curContextLength);
                tmpInput.ReadBytes(contextSpare.Bytes, 0, curContextLength);
                contextSpare.Length = curContextLength;
                contextSet.Add(contextSpare);
                scratch.Length -= curContextLength;
            }

            // LUCENENET TODO: We are writing the data forward.
            // Not sure exactly why, but when we read it back it
            // is reversed. So, we need to fix that before returning the result.
            // If the underlying problem is found and fixed, then this line can just be
            // return contextSet;
            return(new JCG.HashSet <BytesRef>(contextSet.Reverse()));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// decodes the contexts at the current position </summary>
        protected internal virtual ISet <BytesRef> DecodeContexts(BytesRef scratch, ByteArrayDataInput tmpInput)
        {
            tmpInput.Reset(scratch.Bytes);
            tmpInput.SkipBytes(scratch.Length - 2); //skip to context set size
            ushort ctxSetSize = (ushort)tmpInput.ReadInt16();

            scratch.Length -= 2;
            var contextSet = new JCG.HashSet <BytesRef>();

            for (ushort i = 0; i < ctxSetSize; i++)
            {
                tmpInput.Position = scratch.Length - 2;
                ushort curContextLength = (ushort)tmpInput.ReadInt16();
                scratch.Length   -= 2;
                tmpInput.Position = scratch.Length - curContextLength;
                BytesRef contextSpare = new BytesRef(curContextLength);
                tmpInput.ReadBytes(contextSpare.Bytes, 0, curContextLength);
                contextSpare.Length = curContextLength;
                contextSet.Add(contextSpare);
                scratch.Length -= curContextLength;
            }
            // LUCENENET NOTE: The result was at one point reversed because of test failures, but since we are
            // using JCG.HashSet<T> now (whose Equals() implementation respects set equality),
            // we have reverted back to the original implementation.
            return(contextSet);
        }
Ejemplo n.º 3
0
            public int Compare(BytesRef a, BytesRef b)
            {
                // First by analyzed form:
                readerA.Reset(a.Bytes, a.Offset, a.Length);
                scratchA.Length = (ushort)readerA.ReadInt16();
                scratchA.Bytes  = a.Bytes;
                scratchA.Offset = readerA.Position;

                readerB.Reset(b.Bytes, b.Offset, b.Length);
                scratchB.Bytes  = b.Bytes;
                scratchB.Length = (ushort)readerB.ReadInt16();
                scratchB.Offset = readerB.Position;

                int cmp = scratchA.CompareTo(scratchB);

                if (cmp != 0)
                {
                    return(cmp);
                }
                readerA.SkipBytes(scratchA.Length);
                readerB.SkipBytes(scratchB.Length);

                // Next by cost:
                long aCost = readerA.ReadInt32();
                long bCost = readerB.ReadInt32();

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(DecodeWeight(aCost) >= 0);
                    Debugging.Assert(DecodeWeight(bCost) >= 0);
                }
                if (aCost < bCost)
                {
                    return(-1);
                }
                else if (aCost > bCost)
                {
                    return(1);
                }

                // Finally by surface form:
                if (hasPayloads)
                {
                    scratchA.Length = (ushort)readerA.ReadInt16();
                    scratchB.Length = (ushort)readerB.ReadInt16();
                    scratchA.Offset = readerA.Position;
                    scratchB.Offset = readerB.Position;
                }
                else
                {
                    scratchA.Offset = readerA.Position;
                    scratchB.Offset = readerB.Position;
                    scratchA.Length = a.Length - scratchA.Offset;
                    scratchB.Length = b.Length - scratchB.Offset;
                }

                return(scratchA.CompareTo(scratchB));
            }
Ejemplo n.º 4
0
        /// <summary>
        /// decodes the payload at the current position
        /// </summary>
        protected internal virtual BytesRef DecodePayload(BytesRef scratch, ByteArrayDataInput tmpInput)
        {
            tmpInput.Reset(scratch.Bytes);
            tmpInput.SkipBytes(scratch.Length - 2);                 // skip to payload size
            ushort payloadLength = (ushort)tmpInput.ReadInt16();    // read payload size

            tmpInput.Position = scratch.Length - 2 - payloadLength; // setPosition to start of payload
            BytesRef payloadScratch = new BytesRef(payloadLength);

            tmpInput.ReadBytes(payloadScratch.Bytes, 0, payloadLength); // read payload
            payloadScratch.Length = payloadLength;
            scratch.Length       -= 2;                                  // payload length info (short)
            scratch.Length       -= payloadLength;                      // payload
            return(payloadScratch);
        }
Ejemplo n.º 5
0
            public virtual int Compare(BytesRef a, BytesRef b)
            {
                readerA.Reset(a.Bytes, a.Offset, a.Length);
                readerB.Reset(b.Bytes, b.Offset, b.Length);

                // By token:
                scratchA.Length = (ushort)readerA.ReadInt16();
                scratchA.Bytes  = a.Bytes;
                scratchA.Offset = readerA.Position;

                scratchB.Bytes  = b.Bytes;
                scratchB.Length = (ushort)readerB.ReadInt16();
                scratchB.Offset = readerB.Position;

                int cmp = scratchA.CompareTo(scratchB);

                if (cmp != 0)
                {
                    return(cmp);
                }
                readerA.SkipBytes(scratchA.Length);
                readerB.SkipBytes(scratchB.Length);

                // By length (smaller surface forms sorted first):
                cmp = a.Length - b.Length;
                if (cmp != 0)
                {
                    return(cmp);
                }

                // By surface form:
                scratchA.Offset = readerA.Position;
                scratchA.Length = a.Length - scratchA.Offset;
                scratchB.Offset = readerB.Position;
                scratchB.Length = b.Length - scratchB.Offset;

                return(scratchA.CompareTo(scratchB));
            }
Ejemplo n.º 6
0
        public override void Build(IInputEnumerator enumerator)
        {
            if (enumerator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            string prefix     = this.GetType().Name;
            var    directory  = OfflineSorter.DefaultTempDir();
            var    tempInput  = FileSupport.CreateTempFile(prefix, ".input", directory);
            var    tempSorted = FileSupport.CreateTempFile(prefix, ".sorted", directory);

            hasPayloads = enumerator.HasPayloads;

            var writer = new OfflineSorter.ByteSequencesWriter(tempInput);

            OfflineSorter.ByteSequencesReader reader = null;
            var scratch = new BytesRef();

            TokenStreamToAutomaton ts2a = GetTokenStreamToAutomaton();

            bool success = false;

            count = 0;
            byte[] buffer = new byte[8];
            try
            {
                var      output = new ByteArrayDataOutput(buffer);
                BytesRef surfaceForm;

                while (enumerator.MoveNext())
                {
                    surfaceForm = enumerator.Current;
                    ISet <Int32sRef> paths = ToFiniteStrings(surfaceForm, ts2a);

                    maxAnalyzedPathsForOneInput = Math.Max(maxAnalyzedPathsForOneInput, paths.Count);

                    foreach (Int32sRef path in paths)
                    {
                        Util.Fst.Util.ToBytesRef(path, scratch);

                        // length of the analyzed text (FST input)
                        if (scratch.Length > ushort.MaxValue - 2)
                        {
                            throw new ArgumentException("cannot handle analyzed forms > " + (ushort.MaxValue - 2) +
                                                        " in length (got " + scratch.Length + ")");
                        }
                        ushort analyzedLength = (ushort)scratch.Length;

                        // compute the required length:
                        // analyzed sequence + weight (4) + surface + analyzedLength (short)
                        int requiredLength = analyzedLength + 4 + surfaceForm.Length + 2;

                        BytesRef payload;

                        if (hasPayloads)
                        {
                            if (surfaceForm.Length > (ushort.MaxValue - 2))
                            {
                                throw new ArgumentException("cannot handle surface form > " + (ushort.MaxValue - 2) +
                                                            " in length (got " + surfaceForm.Length + ")");
                            }
                            payload = enumerator.Payload;
                            // payload + surfaceLength (short)
                            requiredLength += payload.Length + 2;
                        }
                        else
                        {
                            payload = null;
                        }

                        buffer = ArrayUtil.Grow(buffer, requiredLength);

                        output.Reset(buffer);

                        output.WriteInt16((short)analyzedLength);

                        output.WriteBytes(scratch.Bytes, scratch.Offset, scratch.Length);

                        output.WriteInt32(EncodeWeight(enumerator.Weight));

                        if (hasPayloads)
                        {
                            for (int i = 0; i < surfaceForm.Length; i++)
                            {
                                if (surfaceForm.Bytes[i] == PAYLOAD_SEP)
                                {
                                    throw new ArgumentException(
                                              "surface form cannot contain unit separator character U+001F; this character is reserved");
                                }
                            }
                            output.WriteInt16((short)surfaceForm.Length);
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                            output.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
                        }
                        else
                        {
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                        }

                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(output.Position == requiredLength, () => output.Position + " vs " + requiredLength);
                        }

                        writer.Write(buffer, 0, output.Position);
                    }
                    count++;
                }
                writer.Dispose();

                // Sort all input/output pairs (required by FST.Builder):
                (new OfflineSorter(new AnalyzingComparer(hasPayloads))).Sort(tempInput, tempSorted);

                // Free disk space:
                tempInput.Delete();

                reader = new OfflineSorter.ByteSequencesReader(tempSorted);

                var outputs = new PairOutputs <long?, BytesRef>(PositiveInt32Outputs.Singleton,
                                                                ByteSequenceOutputs.Singleton);
                var builder = new Builder <PairOutputs <long?, BytesRef> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);

                // Build FST:
                BytesRef  previousAnalyzed = null;
                BytesRef  analyzed         = new BytesRef();
                BytesRef  surface          = new BytesRef();
                Int32sRef scratchInts      = new Int32sRef();
                var       input            = new ByteArrayDataInput();

                // Used to remove duplicate surface forms (but we
                // still index the hightest-weight one).  We clear
                // this when we see a new analyzed form, so it cannot
                // grow unbounded (at most 256 entries):
                var seenSurfaceForms = new JCG.HashSet <BytesRef>();

                var dedup = 0;
                while (reader.Read(scratch))
                {
                    input.Reset(scratch.Bytes, scratch.Offset, scratch.Length);
                    ushort analyzedLength = (ushort)input.ReadInt16();
                    analyzed.Grow(analyzedLength + 2);
                    input.ReadBytes(analyzed.Bytes, 0, analyzedLength);
                    analyzed.Length = analyzedLength;

                    long cost = input.ReadInt32();

                    surface.Bytes = scratch.Bytes;
                    if (hasPayloads)
                    {
                        surface.Length = (ushort)input.ReadInt16();
                        surface.Offset = input.Position;
                    }
                    else
                    {
                        surface.Offset = input.Position;
                        surface.Length = scratch.Length - surface.Offset;
                    }

                    if (previousAnalyzed == null)
                    {
                        previousAnalyzed = new BytesRef();
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else if (analyzed.Equals(previousAnalyzed))
                    {
                        dedup++;
                        if (dedup >= maxSurfaceFormsPerAnalyzedForm)
                        {
                            // More than maxSurfaceFormsPerAnalyzedForm
                            // dups: skip the rest:
                            continue;
                        }
                        if (seenSurfaceForms.Contains(surface))
                        {
                            continue;
                        }
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else
                    {
                        dedup = 0;
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Clear();
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }

                    // TODO: I think we can avoid the extra 2 bytes when
                    // there is no dup (dedup==0), but we'd have to fix
                    // the exactFirst logic ... which would be sort of
                    // hairy because we'd need to special case the two
                    // (dup/not dup)...

                    // NOTE: must be byte 0 so we sort before whatever
                    // is next
                    analyzed.Bytes[analyzed.Offset + analyzed.Length]     = 0;
                    analyzed.Bytes[analyzed.Offset + analyzed.Length + 1] = (byte)dedup;
                    analyzed.Length += 2;

                    Util.Fst.Util.ToInt32sRef(analyzed, scratchInts);
                    //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
                    if (!hasPayloads)
                    {
                        builder.Add(scratchInts, outputs.NewPair(cost, BytesRef.DeepCopyOf(surface)));
                    }
                    else
                    {
                        int      payloadOffset = input.Position + surface.Length;
                        int      payloadLength = scratch.Length - payloadOffset;
                        BytesRef br            = new BytesRef(surface.Length + 1 + payloadLength);
                        Array.Copy(surface.Bytes, surface.Offset, br.Bytes, 0, surface.Length);
                        br.Bytes[surface.Length] = PAYLOAD_SEP;
                        Array.Copy(scratch.Bytes, payloadOffset, br.Bytes, surface.Length + 1, payloadLength);
                        br.Length = br.Bytes.Length;
                        builder.Add(scratchInts, outputs.NewPair(cost, br));
                    }
                }
                fst = builder.Finish();

                //Util.dotToFile(fst, "/tmp/suggest.dot");

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(reader, writer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(reader, writer);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Generates a list of stems for the provided word
        /// </summary>
        /// <param name="word"> Word to generate the stems for </param>
        /// <param name="length"> length </param>
        /// <param name="previous"> previous affix that was removed (so we dont remove same one twice) </param>
        /// <param name="prevFlag"> Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step </param>
        /// <param name="prefixFlag"> flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word </param>
        /// <param name="recursionDepth"> current recursiondepth </param>
        /// <param name="doPrefix"> true if we should remove prefixes </param>
        /// <param name="doSuffix"> true if we should remove suffixes </param>
        /// <param name="previousWasPrefix"> true if the previous removal was a prefix:
        ///        if we are removing a suffix, and it has no continuation requirements, its ok.
        ///        but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. </param>
        /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
        ///        this means inner most suffix must also contain circumfix flag. </param>
        /// <param name="caseVariant"> true if we are searching for a case variant. if the word has KEEPCASE flag it cannot succeed. </param>
        /// <returns> <see cref="IList{CharsRef}"/> of stems, or empty list if no stems are found </returns>
        private IList <CharsRef> Stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix, bool caseVariant)
        {
            // TODO: allow this stuff to be reused by tokenfilter
            JCG.List <CharsRef> stems = new JCG.List <CharsRef>();

            if (doPrefix && dictionary.prefixes != null)
            {
                FST <Int32sRef>     fst         = dictionary.prefixes;
                Outputs <Int32sRef> outputs     = fst.Outputs;
                FST.BytesReader     bytesReader = prefixReaders[recursionDepth];
                FST.Arc <Int32sRef> arc         = prefixArcs[recursionDepth];
                fst.GetFirstArc(arc);
                Int32sRef NO_OUTPUT = outputs.NoOutput;
                Int32sRef output    = NO_OUTPUT;
                int       limit     = dictionary.fullStrip ? length : length - 1;
                for (int i = 0; i < limit; i++)
                {
                    if (i > 0)
                    {
                        int ch = word[i - 1];
                        if (fst.FindTargetArc(ch, arc, arc, bytesReader) == null)
                        {
                            break;
                        }
                        else if (arc.Output != NO_OUTPUT)
                        {
                            output = fst.Outputs.Add(output, arc.Output);
                        }
                    }
                    Int32sRef prefixes; // LUCENENET: IDE0059 - Removed unnecessary value assignment
                    if (!arc.IsFinal)
                    {
                        continue;
                    }
                    else
                    {
                        prefixes = fst.Outputs.Add(output, arc.NextFinalOutput);
                    }

                    for (int j = 0; j < prefixes.Length; j++)
                    {
                        int prefix = prefixes.Int32s[prefixes.Offset + j];
                        if (prefix == previous)
                        {
                            continue;
                        }
                        affixReader.Position = 8 * prefix;
                        char flag         = (char)(affixReader.ReadInt16() & 0xffff);
                        char stripOrd     = (char)(affixReader.ReadInt16() & 0xffff);
                        int  condition    = (char)(affixReader.ReadInt16() & 0xffff);
                        bool crossProduct = (condition & 1) == 1;
                        condition = condition.TripleShift(1);
                        char append = (char)(affixReader.ReadInt16() & 0xffff);

                        bool compatible;
                        if (recursionDepth == 0)
                        {
                            if (dictionary.onlyincompound == -1)
                            {
                                compatible = true;
                            }
                            else
                            {
                                // check if affix is allowed in a non-compound word
                                dictionary.flagLookup.Get(append, scratch);
                                char[] appendFlags = Dictionary.DecodeFlags(scratch);
                                compatible = !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
                            }
                        }
                        else if (crossProduct)
                        {
                            // cross check incoming continuation class (flag of previous affix) against list.
                            dictionary.flagLookup.Get(append, scratch);
                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(prevFlag >= 0);
                            }
                            bool allowed = dictionary.onlyincompound == -1 ||
                                           !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
                            compatible = allowed && HasCrossCheckedFlag((char)prevFlag, appendFlags, false);
                        }
                        else
                        {
                            compatible = false;
                        }

                        if (compatible)
                        {
                            int deAffixedStart  = i;
                            int deAffixedLength = length - deAffixedStart;

                            int stripStart  = dictionary.stripOffsets[stripOrd];
                            int stripEnd    = dictionary.stripOffsets[stripOrd + 1];
                            int stripLength = stripEnd - stripStart;

                            if (!CheckCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength))
                            {
                                continue;
                            }

                            char[] strippedWord = new char[stripLength + deAffixedLength];
                            Array.Copy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
                            Array.Copy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);

                            IList <CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix, caseVariant);

                            stems.AddRange(stemList);
                        }
                    }
                }
            }

            if (doSuffix && dictionary.suffixes != null)
            {
                FST <Int32sRef>     fst         = dictionary.suffixes;
                Outputs <Int32sRef> outputs     = fst.Outputs;
                FST.BytesReader     bytesReader = suffixReaders[recursionDepth];
                FST.Arc <Int32sRef> arc         = suffixArcs[recursionDepth];
                fst.GetFirstArc(arc);
                Int32sRef NO_OUTPUT = outputs.NoOutput;
                Int32sRef output    = NO_OUTPUT;
                int       limit     = dictionary.fullStrip ? 0 : 1;
                for (int i = length; i >= limit; i--)
                {
                    if (i < length)
                    {
                        int ch = word[i];
                        if (fst.FindTargetArc(ch, arc, arc, bytesReader) == null)
                        {
                            break;
                        }
                        else if (arc.Output != NO_OUTPUT)
                        {
                            output = fst.Outputs.Add(output, arc.Output);
                        }
                    }
                    Int32sRef suffixes; // LUCENENET: IDE0059 - Removed unnecessary value assignment
                    if (!arc.IsFinal)
                    {
                        continue;
                    }
                    else
                    {
                        suffixes = fst.Outputs.Add(output, arc.NextFinalOutput);
                    }

                    for (int j = 0; j < suffixes.Length; j++)
                    {
                        int suffix = suffixes.Int32s[suffixes.Offset + j];
                        if (suffix == previous)
                        {
                            continue;
                        }
                        affixReader.Position = 8 * suffix;
                        char flag         = (char)(affixReader.ReadInt16() & 0xffff);
                        char stripOrd     = (char)(affixReader.ReadInt16() & 0xffff);
                        int  condition    = (char)(affixReader.ReadInt16() & 0xffff);
                        bool crossProduct = (condition & 1) == 1;
                        condition = condition.TripleShift(1);
                        char append = (char)(affixReader.ReadInt16() & 0xffff);

                        bool compatible;
                        if (recursionDepth == 0)
                        {
                            if (dictionary.onlyincompound == -1)
                            {
                                compatible = true;
                            }
                            else
                            {
                                // check if affix is allowed in a non-compound word
                                dictionary.flagLookup.Get(append, scratch);
                                char[] appendFlags = Dictionary.DecodeFlags(scratch);
                                compatible = !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
                            }
                        }
                        else if (crossProduct)
                        {
                            // cross check incoming continuation class (flag of previous affix) against list.
                            dictionary.flagLookup.Get(append, scratch);
                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(prevFlag >= 0);
                            }
                            bool allowed = dictionary.onlyincompound == -1 ||
                                           !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
                            compatible = HasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
                        }
                        else
                        {
                            compatible = false;
                        }

                        if (compatible)
                        {
                            int appendLength    = length - i;
                            int deAffixedLength = length - appendLength;

                            int stripStart  = dictionary.stripOffsets[stripOrd];
                            int stripEnd    = dictionary.stripOffsets[stripOrd + 1];
                            int stripLength = stripEnd - stripStart;

                            if (!CheckCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength))
                            {
                                continue;
                            }

                            char[] strippedWord = new char[stripLength + deAffixedLength];
                            Array.Copy(word, 0, strippedWord, 0, deAffixedLength);
                            Array.Copy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);

                            IList <CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix, caseVariant);

                            stems.AddRange(stemList);
                        }
                    }
                }
            }

            return(stems);
        }
Ejemplo n.º 8
0
        // ================================================= Helper Methods ================================================

        /// <summary>
        /// Generates a list of stems for the provided word
        /// </summary>
        /// <param name="word"> Word to generate the stems for </param>
        /// <param name="length"> length </param>
        /// <param name="previous"> previous affix that was removed (so we dont remove same one twice) </param>
        /// <param name="prevFlag"> Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step </param>
        /// <param name="prefixFlag"> flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word </param>
        /// <param name="recursionDepth"> current recursiondepth </param>
        /// <param name="doPrefix"> true if we should remove prefixes </param>
        /// <param name="doSuffix"> true if we should remove suffixes </param>
        /// <param name="previousWasPrefix"> true if the previous removal was a prefix:
        ///        if we are removing a suffix, and it has no continuation requirements, its ok.
        ///        but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. </param>
        /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
        ///        this means inner most suffix must also contain circumfix flag. </param>
        /// <returns> <see cref="IList{CharsRef}"/> of stems, or empty list if no stems are found </returns>
        private IList <CharsRef> Stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix)
        {
            // TODO: allow this stuff to be reused by tokenfilter
            List <CharsRef> stems = new List <CharsRef>();

            if (doPrefix && dictionary.prefixes != null)
            {
                for (int i = length - 1; i >= 0; i--)
                {
                    Int32sRef prefixes = dictionary.LookupPrefix(word, 0, i);
                    if (prefixes == null)
                    {
                        continue;
                    }

                    for (int j = 0; j < prefixes.Length; j++)
                    {
                        int prefix = prefixes.Int32s[prefixes.Offset + j];
                        if (prefix == previous)
                        {
                            continue;
                        }
                        affixReader.Position = 8 * prefix;
                        char flag         = (char)(affixReader.ReadInt16() & 0xffff);
                        char stripOrd     = (char)(affixReader.ReadInt16() & 0xffff);
                        int  condition    = (char)(affixReader.ReadInt16() & 0xffff);
                        bool crossProduct = (condition & 1) == 1;
                        condition = (int)((uint)condition >> 1);
                        char append = (char)(affixReader.ReadInt16() & 0xffff);

                        bool compatible;
                        if (recursionDepth == 0)
                        {
                            compatible = true;
                        }
                        else if (crossProduct)
                        {
                            // cross check incoming continuation class (flag of previous affix) against list.
                            dictionary.flagLookup.Get(append, scratch);
                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(prevFlag >= 0);
                            }
                            compatible = HasCrossCheckedFlag((char)prevFlag, appendFlags, false);
                        }
                        else
                        {
                            compatible = false;
                        }

                        if (compatible)
                        {
                            int deAffixedStart  = i;
                            int deAffixedLength = length - deAffixedStart;

                            int stripStart  = dictionary.stripOffsets[stripOrd];
                            int stripEnd    = dictionary.stripOffsets[stripOrd + 1];
                            int stripLength = stripEnd - stripStart;

                            if (!CheckCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength))
                            {
                                continue;
                            }

                            char[] strippedWord = new char[stripLength + deAffixedLength];
                            Array.Copy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
                            Array.Copy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);

                            IList <CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix);

                            stems.AddRange(stemList);
                        }
                    }
                }
            }

            if (doSuffix && dictionary.suffixes != null)
            {
                for (int i = 0; i < length; i++)
                {
                    Int32sRef suffixes = dictionary.LookupSuffix(word, i, length - i);
                    if (suffixes == null)
                    {
                        continue;
                    }

                    for (int j = 0; j < suffixes.Length; j++)
                    {
                        int suffix = suffixes.Int32s[suffixes.Offset + j];
                        if (suffix == previous)
                        {
                            continue;
                        }
                        affixReader.Position = 8 * suffix;
                        char flag         = (char)(affixReader.ReadInt16() & 0xffff);
                        char stripOrd     = (char)(affixReader.ReadInt16() & 0xffff);
                        int  condition    = (char)(affixReader.ReadInt16() & 0xffff);
                        bool crossProduct = (condition & 1) == 1;
                        condition = (int)((uint)condition >> 1);
                        char append = (char)(affixReader.ReadInt16() & 0xffff);

                        bool compatible;
                        if (recursionDepth == 0)
                        {
                            compatible = true;
                        }
                        else if (crossProduct)
                        {
                            // cross check incoming continuation class (flag of previous affix) against list.
                            dictionary.flagLookup.Get(append, scratch);
                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(prevFlag >= 0);
                            }
                            compatible = HasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
                        }
                        else
                        {
                            compatible = false;
                        }

                        if (compatible)
                        {
                            int appendLength    = length - i;
                            int deAffixedLength = length - appendLength;

                            int stripStart  = dictionary.stripOffsets[stripOrd];
                            int stripEnd    = dictionary.stripOffsets[stripOrd + 1];
                            int stripLength = stripEnd - stripStart;

                            if (!CheckCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength))
                            {
                                continue;
                            }

                            char[] strippedWord = new char[stripLength + deAffixedLength];
                            Array.Copy(word, 0, strippedWord, 0, deAffixedLength);
                            Array.Copy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);

                            IList <CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix);

                            stems.AddRange(stemList);
                        }
                    }
                }
            }

            return(stems);
        }