/// <summary> /// Returns a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </summary> /// <returns> a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </returns> /// <exception cref="IOException"> if an <see cref="IOException"/> occurs; </exception> public virtual StemmerOverrideMap Build() { ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton; Builder <BytesRef> builder = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs); int[] sort = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer); Int32sRef intsSpare = new Int32sRef(); int size = hash.Count; for (int i = 0; i < size; i++) { int id = sort[i]; BytesRef bytesRef = hash.Get(id, spare); UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare); builder.Add(intsSpare, new BytesRef(outputValues[id])); } return(new StemmerOverrideMap(builder.Finish(), ignoreCase)); }
/// <summary> /// Returns an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </summary> /// <returns> an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </returns> /// <exception cref="IOException"> if an <seealso cref="IOException"/> occurs; </exception> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public StemmerOverrideMap build() throws java.io.IOException public virtual StemmerOverrideMap build() { ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton; org.apache.lucene.util.fst.Builder <BytesRef> builder = new org.apache.lucene.util.fst.Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int[] sort = hash.sort(org.apache.lucene.util.BytesRef.getUTF8SortedAsUnicodeComparator()); int[] sort = hash.sort(BytesRef.UTF8SortedAsUnicodeComparator); IntsRef intsSpare = new IntsRef(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int size = hash.size(); int size = hash.size(); for (int i = 0; i < size; i++) { int id = sort[i]; BytesRef bytesRef = hash.get(id, spare); UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare); builder.add(intsSpare, new BytesRef(outputValues[id])); } return(new StemmerOverrideMap(builder.finish(), ignoreCase)); }
[MethodImpl(MethodImplOptions.NoOptimization)] // LUCENENET specific: comparing float equality fails in x86 on .NET Framework with optimizations enabled. Fixes TestTokenLengthOpt. #endif protected override sealed AcceptStatus Accept(BytesRef term) { if (StringHelper.StartsWith(term, prefixBytesRef)) { UnicodeUtil.UTF8toUTF32(term, utf32); int distance = CalcDistance(utf32.Int32s, outerInstance.m_realPrefixLength, utf32.Length - outerInstance.m_realPrefixLength); //Integer.MIN_VALUE is the sentinel that Levenshtein stopped early if (distance == int.MinValue) { return(AcceptStatus.NO); } //no need to calc similarity, if raw is true and distance > maxEdits if (outerInstance.m_raw == true && distance > outerInstance.m_maxEdits) { return(AcceptStatus.NO); } float similarity = CalcSimilarity(distance, (utf32.Length - outerInstance.m_realPrefixLength), text.Length); //if raw is true, then distance must also be <= maxEdits by now //given the previous if statement if (outerInstance.m_raw == true || (outerInstance.m_raw == false && similarity > outerInstance.MinSimilarity)) { boostAtt.Boost = (similarity - outerInstance.MinSimilarity) * outerInstance.m_scaleFactor; return(AcceptStatus.YES); } else { return(AcceptStatus.NO); } } else { return(AcceptStatus.END); } }