示例#1
0
            /// <summary>
            /// Sugar: just joins the provided terms with {@link
            ///  SynonymMap#WORD_SEPARATOR}.  reuse and its chars
            ///  must not be null.
            /// </summary>
            public static CharsRef join(string[] words, CharsRef reuse)
            {
                int upto = 0;

                char[] buffer = reuse.chars;
                foreach (string word in words)
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int wordLen = word.length();
                    int wordLen = word.Length;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int needed = (0 == upto ? wordLen : 1 + upto + wordLen);
                    int needed = (0 == upto ? wordLen : 1 + upto + wordLen);     // Add 1 for WORD_SEPARATOR
                    if (needed > buffer.Length)
                    {
                        reuse.grow(needed);
                        buffer = reuse.chars;
                    }
                    if (upto > 0)
                    {
                        buffer[upto++] = SynonymMap.WORD_SEPARATOR;
                    }

                    word.CopyTo(0, buffer, upto, wordLen - 0);
                    upto += wordLen;
                }
                reuse.length = upto;
                return(reuse);
            }
示例#2
0
            /// <summary>
            /// Adds an input string and it's stemmer override output to this builder.
            /// </summary>
            /// <param name="input"> the input char sequence </param>
            /// <param name="output"> the stemmer override output char sequence </param>
            /// <returns> <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>. </returns>
            public virtual bool add(ICharSequence input, ICharSequence output)
            {
                int length = input.length();

                if (ignoreCase)
                {
                    // convert on the fly to lowercase
                    charsSpare.grow(length);
                    char[] buffer = charsSpare.chars;
                    for (int i = 0; i < length;)
                    {
                        i += char.toChars(char.ToLower(char.codePointAt(input, i)), buffer, i);
                    }
                    UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare);
                }
                else
                {
                    UnicodeUtil.UTF16toUTF8(input, 0, length, spare);
                }
                if (hash.add(spare) >= 0)
                {
                    outputValues.Add(output);
                    return(true);
                }
                return(false);
            }
            /// <summary>
            /// Adds an input string and it's stemmer override output to this builder.
            /// </summary>
            /// <param name="input"> the input char sequence </param>
            /// <param name="output"> the stemmer override output char sequence </param>
            /// <returns> <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>. </returns>
            public virtual bool add(CharSequence input, CharSequence output)
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int length = input.length();
                int length = input.length();

                if (ignoreCase)
                {
                    // convert on the fly to lowercase
                    charsSpare.grow(length);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char[] buffer = charsSpare.chars;
                    char[] buffer = charsSpare.chars;
                    for (int i = 0; i < length;)
                    {
                        i += char.toChars(char.ToLower(char.codePointAt(input, i)), buffer, i);
                    }
                    UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare);
                }
                else
                {
                    UnicodeUtil.UTF16toUTF8(input, 0, length, spare);
                }
                if (hash.add(spare) >= 0)
                {
                    outputValues.Add(output);
                    return(true);
                }
                return(false);
            }
示例#4
0
            /// <summary>
            /// Sugar: analyzes the text with the analyzer and
            ///  separates by <seealso cref="SynonymMap#WORD_SEPARATOR"/>.
            ///  reuse and its chars must not be null.
            /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public org.apache.lucene.util.CharsRef analyze(String text, org.apache.lucene.util.CharsRef reuse) throws java.io.IOException
            public virtual CharsRef analyze(string text, CharsRef reuse)
            {
                IOException priorException = null;
                TokenStream ts             = analyzer.tokenStream("", text);

                try
                {
                    CharTermAttribute          termAtt   = ts.addAttribute(typeof(CharTermAttribute));
                    PositionIncrementAttribute posIncAtt = ts.addAttribute(typeof(PositionIncrementAttribute));
                    ts.reset();
                    reuse.length = 0;
                    while (ts.incrementToken())
                    {
                        int length = termAtt.length();
                        if (length == 0)
                        {
                            throw new System.ArgumentException("term: " + text + " analyzed to a zero-length token");
                        }
                        if (posIncAtt.PositionIncrement != 1)
                        {
                            throw new System.ArgumentException("term: " + text + " analyzed to a token with posinc != 1");
                        }
                        reuse.grow(reuse.length + length + 1);   // current + word + separator
                        int end = reuse.offset + reuse.length;
                        if (reuse.length > 0)
                        {
                            reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
                            reuse.length++;
                        }
                        Array.Copy(termAtt.buffer(), 0, reuse.chars, end, length);
                        reuse.length += length;
                    }
                    ts.end();
                }
                catch (IOException e)
                {
                    priorException = e;
                }
                finally
                {
                    IOUtils.closeWhileHandlingException(priorException, ts);
                }
                if (reuse.length == 0)
                {
                    throw new System.ArgumentException("term: " + text + " was completely eliminated by analyzer");
                }
                return(reuse);
            }
示例#5
0
        public override IList <LookupResult> DoLookup(string key, HashSet <BytesRef> contexts, bool onlyMorePopular, int num)
        {
            if (contexts != null)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            Debug.Assert(num > 0);

            if (onlyMorePopular)
            {
                throw new System.ArgumentException("this suggester only works with onlyMorePopular=false");
            }

            if (fst == null)
            {
                return(Collections.EmptyList <LookupResult>());
            }

            BytesRef scratch      = new BytesRef(key);
            int      prefixLength = scratch.Length;

            FST.Arc <long?> arc = new FST.Arc <long?>();

            // match the prefix portion exactly
            long?prefixOutput = null;

            try
            {
                prefixOutput = LookupPrefix(scratch, arc);
            }
            catch (IOException bogus)
            {
                throw new Exception(bogus);
            }

            if (prefixOutput == null)
            {
                return(Collections.EmptyList());
            }

            IList <LookupResult> results = new List <LookupResult>(num);
            CharsRef             spare   = new CharsRef();

            if (exactFirst && arc.Final)
            {
                spare.grow(scratch.length);
                UnicodeUtil.UTF8toUTF16(scratch, spare);
                results.Add(new LookupResult(spare.ToString(), decodeWeight(prefixOutput + arc.NextFinalOutput)));
                if (--num == 0)
                {
                    return(results); // that was quick
                }
            }

            // complete top-N
            Util.Fst.Util.TopResults <long?> completions = null;
            try
            {
                completions = Util.ShortestPaths(fst, arc, prefixOutput, weightComparator, num, !exactFirst);
                Debug.Assert(completions.IsComplete);
            }
            catch (IOException bogus)
            {
                throw new Exception(bogus);
            }

            BytesRef suffix = new BytesRef(8);

            foreach (Util.Fst.Util.Result <long?> completion in completions)
            {
                scratch.length = prefixLength;
                // append suffix
                Util.ToBytesRef(completion.input, suffix);
                scratch.Append(suffix);
                spare.Grow(scratch.Length);
                UnicodeUtil.UTF8toUTF16(scratch, spare);
                results.Add(new LookupResult(spare.ToString(), decodeWeight(completion.output)));
            }
            return(results);
        }
示例#6
0
		/// <summary>
		/// Sugar: analyzes the text with the analyzer and
		///  separates by <seealso cref="SynonymMap#WORD_SEPARATOR"/>.
		///  reuse and its chars must not be null. 
		/// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public org.apache.lucene.util.CharsRef analyze(String text, org.apache.lucene.util.CharsRef reuse) throws java.io.IOException
		public virtual CharsRef analyze(string text, CharsRef reuse)
		{
		  IOException priorException = null;
		  TokenStream ts = analyzer.tokenStream("", text);
		  try
		  {
			CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute));
			PositionIncrementAttribute posIncAtt = ts.addAttribute(typeof(PositionIncrementAttribute));
			ts.reset();
			reuse.length = 0;
			while (ts.incrementToken())
			{
			  int length = termAtt.length();
			  if (length == 0)
			  {
				throw new System.ArgumentException("term: " + text + " analyzed to a zero-length token");
			  }
			  if (posIncAtt.PositionIncrement != 1)
			  {
				throw new System.ArgumentException("term: " + text + " analyzed to a token with posinc != 1");
			  }
			  reuse.grow(reuse.length + length + 1); // current + word + separator
			  int end = reuse.offset + reuse.length;
			  if (reuse.length > 0)
			  {
				reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
				reuse.length++;
			  }
			  Array.Copy(termAtt.buffer(), 0, reuse.chars, end, length);
			  reuse.length += length;
			}
			ts.end();
		  }
		  catch (IOException e)
		  {
			priorException = e;
		  }
		  finally
		  {
			IOUtils.closeWhileHandlingException(priorException, ts);
		  }
		  if (reuse.length == 0)
		  {
			throw new System.ArgumentException("term: " + text + " was completely eliminated by analyzer");
		  }
		  return reuse;
		}
示例#7
0
		/// <summary>
		/// Sugar: just joins the provided terms with {@link
		///  SynonymMap#WORD_SEPARATOR}.  reuse and its chars
		///  must not be null. 
		/// </summary>
		public static CharsRef join(string[] words, CharsRef reuse)
		{
		  int upto = 0;
		  char[] buffer = reuse.chars;
		  foreach (string word in words)
		  {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int wordLen = word.length();
			int wordLen = word.Length;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int needed = (0 == upto ? wordLen : 1 + upto + wordLen);
			int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR
			if (needed > buffer.Length)
			{
			  reuse.grow(needed);
			  buffer = reuse.chars;
			}
			if (upto > 0)
			{
			  buffer[upto++] = SynonymMap.WORD_SEPARATOR;
			}

			word.CopyTo(0, buffer, upto, wordLen - 0);
			upto += wordLen;
		  }
		  reuse.length = upto;
		  return reuse;
		}