/// <summary> /// Sugar: just joins the provided terms with {@link /// SynonymMap#WORD_SEPARATOR}. reuse and its chars /// must not be null. /// </summary> public static CharsRef join(string[] words, CharsRef reuse) { int upto = 0; char[] buffer = reuse.chars; foreach (string word in words) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int wordLen = word.length(); int wordLen = word.Length; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int needed = (0 == upto ? wordLen : 1 + upto + wordLen); int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR if (needed > buffer.Length) { reuse.grow(needed); buffer = reuse.chars; } if (upto > 0) { buffer[upto++] = SynonymMap.WORD_SEPARATOR; } word.CopyTo(0, buffer, upto, wordLen - 0); upto += wordLen; } reuse.length = upto; return(reuse); }
/// <summary> /// Adds an input string and it's stemmer override output to this builder. /// </summary> /// <param name="input"> the input char sequence </param> /// <param name="output"> the stemmer override output char sequence </param> /// <returns> <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>. </returns> public virtual bool add(ICharSequence input, ICharSequence output) { int length = input.length(); if (ignoreCase) { // convert on the fly to lowercase charsSpare.grow(length); char[] buffer = charsSpare.chars; for (int i = 0; i < length;) { i += char.toChars(char.ToLower(char.codePointAt(input, i)), buffer, i); } UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare); } else { UnicodeUtil.UTF16toUTF8(input, 0, length, spare); } if (hash.add(spare) >= 0) { outputValues.Add(output); return(true); } return(false); }
/// <summary> /// Adds an input string and it's stemmer override output to this builder. /// </summary> /// <param name="input"> the input char sequence </param> /// <param name="output"> the stemmer override output char sequence </param> /// <returns> <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>. </returns> public virtual bool add(CharSequence input, CharSequence output) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int length = input.length(); int length = input.length(); if (ignoreCase) { // convert on the fly to lowercase charsSpare.grow(length); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final char[] buffer = charsSpare.chars; char[] buffer = charsSpare.chars; for (int i = 0; i < length;) { i += char.toChars(char.ToLower(char.codePointAt(input, i)), buffer, i); } UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare); } else { UnicodeUtil.UTF16toUTF8(input, 0, length, spare); } if (hash.add(spare) >= 0) { outputValues.Add(output); return(true); } return(false); }
/// <summary> /// Sugar: analyzes the text with the analyzer and /// separates by <seealso cref="SynonymMap#WORD_SEPARATOR"/>. /// reuse and its chars must not be null. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public org.apache.lucene.util.CharsRef analyze(String text, org.apache.lucene.util.CharsRef reuse) throws java.io.IOException public virtual CharsRef analyze(string text, CharsRef reuse) { IOException priorException = null; TokenStream ts = analyzer.tokenStream("", text); try { CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute)); PositionIncrementAttribute posIncAtt = ts.addAttribute(typeof(PositionIncrementAttribute)); ts.reset(); reuse.length = 0; while (ts.incrementToken()) { int length = termAtt.length(); if (length == 0) { throw new System.ArgumentException("term: " + text + " analyzed to a zero-length token"); } if (posIncAtt.PositionIncrement != 1) { throw new System.ArgumentException("term: " + text + " analyzed to a token with posinc != 1"); } reuse.grow(reuse.length + length + 1); // current + word + separator int end = reuse.offset + reuse.length; if (reuse.length > 0) { reuse.chars[end++] = SynonymMap.WORD_SEPARATOR; reuse.length++; } Array.Copy(termAtt.buffer(), 0, reuse.chars, end, length); reuse.length += length; } ts.end(); } catch (IOException e) { priorException = e; } finally { IOUtils.closeWhileHandlingException(priorException, ts); } if (reuse.length == 0) { throw new System.ArgumentException("term: " + text + " was completely eliminated by analyzer"); } return(reuse); }
public override IList <LookupResult> DoLookup(string key, HashSet <BytesRef> contexts, bool onlyMorePopular, int num) { if (contexts != null) { throw new System.ArgumentException("this suggester doesn't support contexts"); } Debug.Assert(num > 0); if (onlyMorePopular) { throw new System.ArgumentException("this suggester only works with onlyMorePopular=false"); } if (fst == null) { return(Collections.EmptyList <LookupResult>()); } BytesRef scratch = new BytesRef(key); int prefixLength = scratch.Length; FST.Arc <long?> arc = new FST.Arc <long?>(); // match the prefix portion exactly long?prefixOutput = null; try { prefixOutput = LookupPrefix(scratch, arc); } catch (IOException bogus) { throw new Exception(bogus); } if (prefixOutput == null) { return(Collections.EmptyList()); } IList <LookupResult> results = new List <LookupResult>(num); CharsRef spare = new CharsRef(); if (exactFirst && arc.Final) { spare.grow(scratch.length); UnicodeUtil.UTF8toUTF16(scratch, spare); results.Add(new LookupResult(spare.ToString(), decodeWeight(prefixOutput + arc.NextFinalOutput))); if (--num == 0) { return(results); // that was quick } } // complete top-N Util.Fst.Util.TopResults <long?> completions = null; try { completions = Util.ShortestPaths(fst, arc, prefixOutput, weightComparator, num, !exactFirst); Debug.Assert(completions.IsComplete); } catch (IOException bogus) { throw new Exception(bogus); } BytesRef suffix = new BytesRef(8); foreach (Util.Fst.Util.Result <long?> completion in completions) { scratch.length = prefixLength; // append suffix Util.ToBytesRef(completion.input, suffix); scratch.Append(suffix); spare.Grow(scratch.Length); UnicodeUtil.UTF8toUTF16(scratch, spare); results.Add(new LookupResult(spare.ToString(), decodeWeight(completion.output))); } return(results); }
/// <summary> /// Sugar: analyzes the text with the analyzer and /// separates by <seealso cref="SynonymMap#WORD_SEPARATOR"/>. /// reuse and its chars must not be null. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public org.apache.lucene.util.CharsRef analyze(String text, org.apache.lucene.util.CharsRef reuse) throws java.io.IOException public virtual CharsRef analyze(string text, CharsRef reuse) { IOException priorException = null; TokenStream ts = analyzer.tokenStream("", text); try { CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute)); PositionIncrementAttribute posIncAtt = ts.addAttribute(typeof(PositionIncrementAttribute)); ts.reset(); reuse.length = 0; while (ts.incrementToken()) { int length = termAtt.length(); if (length == 0) { throw new System.ArgumentException("term: " + text + " analyzed to a zero-length token"); } if (posIncAtt.PositionIncrement != 1) { throw new System.ArgumentException("term: " + text + " analyzed to a token with posinc != 1"); } reuse.grow(reuse.length + length + 1); // current + word + separator int end = reuse.offset + reuse.length; if (reuse.length > 0) { reuse.chars[end++] = SynonymMap.WORD_SEPARATOR; reuse.length++; } Array.Copy(termAtt.buffer(), 0, reuse.chars, end, length); reuse.length += length; } ts.end(); } catch (IOException e) { priorException = e; } finally { IOUtils.closeWhileHandlingException(priorException, ts); } if (reuse.length == 0) { throw new System.ArgumentException("term: " + text + " was completely eliminated by analyzer"); } return reuse; }
/// <summary> /// Sugar: just joins the provided terms with {@link /// SynonymMap#WORD_SEPARATOR}. reuse and its chars /// must not be null. /// </summary> public static CharsRef join(string[] words, CharsRef reuse) { int upto = 0; char[] buffer = reuse.chars; foreach (string word in words) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int wordLen = word.length(); int wordLen = word.Length; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int needed = (0 == upto ? wordLen : 1 + upto + wordLen); int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR if (needed > buffer.Length) { reuse.grow(needed); buffer = reuse.chars; } if (upto > 0) { buffer[upto++] = SynonymMap.WORD_SEPARATOR; } word.CopyTo(0, buffer, upto, wordLen - 0); upto += wordLen; } reuse.length = upto; return reuse; }