Exemplo n.º 1
0
            /// <summary>
            /// Returns a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </summary>
            /// <returns> a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </returns>
            /// <exception cref="IOException"> if an <see cref="IOException"/> occurs; </exception>
            public virtual StemmerOverrideMap Build()
            {
                ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
                Builder <BytesRef>  builder = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);

                int[]     sort      = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
                Int32sRef intsSpare = new Int32sRef();
                int       size      = hash.Count;

                for (int i = 0; i < size; i++)
                {
                    int      id       = sort[i];
                    BytesRef bytesRef = hash.Get(id, spare);
                    UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
                    builder.Add(intsSpare, new BytesRef(outputValues[id]));
                }
                return(new StemmerOverrideMap(builder.Finish(), ignoreCase));
            }
Exemplo n.º 2
0
            /// <summary>
            /// Returns an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </summary>
            /// <returns> an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </returns>
            /// <exception cref="IOException"> if an <seealso cref="IOException"/> occurs; </exception>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public StemmerOverrideMap build() throws java.io.IOException
            public virtual StemmerOverrideMap build()
            {
                ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;

                org.apache.lucene.util.fst.Builder <BytesRef> builder = new org.apache.lucene.util.fst.Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] sort = hash.sort(org.apache.lucene.util.BytesRef.getUTF8SortedAsUnicodeComparator());
                int[]   sort      = hash.sort(BytesRef.UTF8SortedAsUnicodeComparator);
                IntsRef intsSpare = new IntsRef();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int size = hash.size();
                int size = hash.size();

                for (int i = 0; i < size; i++)
                {
                    int      id       = sort[i];
                    BytesRef bytesRef = hash.get(id, spare);
                    UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
                    builder.add(intsSpare, new BytesRef(outputValues[id]));
                }
                return(new StemmerOverrideMap(builder.finish(), ignoreCase));
            }
Exemplo n.º 3
0
            [MethodImpl(MethodImplOptions.NoOptimization)] // LUCENENET specific: comparing float equality fails in x86 on .NET Framework with optimizations enabled. Fixes TestTokenLengthOpt.
#endif
            protected override sealed AcceptStatus Accept(BytesRef term)
            {
                if (StringHelper.StartsWith(term, prefixBytesRef))
                {
                    UnicodeUtil.UTF8toUTF32(term, utf32);
                    int distance = CalcDistance(utf32.Int32s, outerInstance.m_realPrefixLength, utf32.Length - outerInstance.m_realPrefixLength);

                    //Integer.MIN_VALUE is the sentinel that Levenshtein stopped early
                    if (distance == int.MinValue)
                    {
                        return(AcceptStatus.NO);
                    }
                    //no need to calc similarity, if raw is true and distance > maxEdits
                    if (outerInstance.m_raw == true && distance > outerInstance.m_maxEdits)
                    {
                        return(AcceptStatus.NO);
                    }
                    float similarity = CalcSimilarity(distance, (utf32.Length - outerInstance.m_realPrefixLength), text.Length);

                    //if raw is true, then distance must also be <= maxEdits by now
                    //given the previous if statement
                    if (outerInstance.m_raw == true ||
                        (outerInstance.m_raw == false && similarity > outerInstance.MinSimilarity))
                    {
                        boostAtt.Boost = (similarity - outerInstance.MinSimilarity) * outerInstance.m_scaleFactor;
                        return(AcceptStatus.YES);
                    }
                    else
                    {
                        return(AcceptStatus.NO);
                    }
                }
                else
                {
                    return(AcceptStatus.END);
                }
            }