Beispiel #1
0
	  public TrimFilter(Version version, TokenStream @in, bool updateOffsets) : base(@in)
	  {
		if (updateOffsets && version.onOrAfter(Version.LUCENE_44))
		{
		  throw new System.ArgumentException("updateOffsets=true is not supported anymore as of Lucene 4.4");
		}
		this.updateOffsets = updateOffsets;
	  }
Beispiel #2
0
	  private void init(Version version, int minGram, int maxGram, bool edgesOnly)
	  {
		if (!version.onOrAfter(Version.LUCENE_44))
		{
		  throw new System.ArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
		}
		charUtils = version.onOrAfter(Version.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
		if (minGram < 1)
		{
		  throw new System.ArgumentException("minGram must be greater than zero");
		}
		if (minGram > maxGram)
		{
		  throw new System.ArgumentException("minGram must not be greater than maxGram");
		}
		this.minGram = minGram;
		this.maxGram = maxGram;
		this.edgesOnly = edgesOnly;
		charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
		buffer = new int[charBuffer.Buffer.Length];
		// Make the term att large enough
		termAtt.resizeBuffer(2 * maxGram);
	  }
 private void Init(Version matchVersion)
 {
     if (matchVersion.OnOrAfter(Version.LUCENE_47))
     {
         this.scanner = new StandardTokenizerImpl(input);
     }
     else if (matchVersion.onOrAfter(Version.LUCENE_40))
     {
         this.scanner = new StandardTokenizerImpl40(input);
     }
     else if (matchVersion.onOrAfter(Version.LUCENE_34))
     {
         this.scanner = new StandardTokenizerImpl34(input);
     }
     else if (matchVersion.onOrAfter(Version.LUCENE_31))
     {
         this.scanner = new StandardTokenizerImpl31(input);
     }
     else
     {
         this.scanner = new ClassicTokenizerImpl(input);
     }
 }
	  /// <summary>
	  /// Partially reverses the given input buffer in-place from the given offset
	  /// up to the given length. </summary>
	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
	  /// <param name="buffer"> the input char array to reverse </param>
	  /// <param name="start"> the offset from where to reverse the buffer </param>
	  /// <param name="len"> the length in the buffer up to where the
	  ///        buffer should be reversed </param>
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
//ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer, final int start, final int len)
	  public static void reverse(Version matchVersion, char[] buffer, int start, int len)
	  {
		if (!matchVersion.onOrAfter(Version.LUCENE_31))
		{
		  reverseUnicode3(buffer, start, len);
		  return;
		}
		/* modified version of Apache Harmony AbstractStringBuilder reverse0() */
		if (len < 2)
		{
		  return;
		}
		int end = (start + len) - 1;
		char frontHigh = buffer[start];
		char endLow = buffer[end];
		bool allowFrontSur = true, allowEndSur = true;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int mid = start + (len >> 1);
		int mid = start + (len >> 1);
		for (int i = start; i < mid; ++i, --end)
		{
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char frontLow = buffer[i + 1];
		  char frontLow = buffer[i + 1];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char endHigh = buffer[end - 1];
		  char endHigh = buffer[end - 1];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final boolean surAtFront = allowFrontSur && Character.isSurrogatePair(frontHigh, frontLow);
		  bool surAtFront = allowFrontSur && char.IsSurrogatePair(frontHigh, frontLow);
		  if (surAtFront && (len < 3))
		  {
			// nothing to do since surAtFront is allowed and 1 char left
			return;
		  }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final boolean surAtEnd = allowEndSur && Character.isSurrogatePair(endHigh, endLow);
		  bool surAtEnd = allowEndSur && char.IsSurrogatePair(endHigh, endLow);
		  allowFrontSur = allowEndSur = true;
		  if (surAtFront == surAtEnd)
		  {
			if (surAtFront)
			{
			  // both surrogates
			  buffer[end] = frontLow;
			  buffer[--end] = frontHigh;
			  buffer[i] = endHigh;
			  buffer[++i] = endLow;
			  frontHigh = buffer[i + 1];
			  endLow = buffer[end - 1];
			}
			else
			{
			  // neither surrogates
			  buffer[end] = frontHigh;
			  buffer[i] = endLow;
			  frontHigh = frontLow;
			  endLow = endHigh;
			}
		  }
		  else
		  {
			if (surAtFront)
			{
			  // surrogate only at the front
			  buffer[end] = frontLow;
			  buffer[i] = endLow;
			  endLow = endHigh;
			  allowFrontSur = false;
			}
			else
			{
			  // surrogate only at the end
			  buffer[end] = frontHigh;
			  buffer[i] = endHigh;
			  frontHigh = frontLow;
			  allowEndSur = false;
			}
		  }
		}
		if ((len & 0x01) == 1 && !(allowFrontSur && allowEndSur))
		{
		  // only if odd length
		  buffer[end] = allowFrontSur ? endLow : frontHigh;
		}
	  }
Beispiel #5
0
	  /// <summary>
	  /// Builds an analyzer with the default stop words.
	  /// </summary>
	  /// <param name="matchVersion"> lucene compatibility version </param>
	  public ThaiAnalyzer(Version matchVersion) : this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_36) ? DefaultSetHolder.DEFAULT_STOP_SET : StopAnalyzer.ENGLISH_STOP_WORDS_SET)
	  {
	  }
	  /// <summary>
	  /// Creates a new WordDelimiterFilter
	  /// </summary>
	  /// <param name="in"> TokenStream to be filtered </param>
	  /// <param name="charTypeTable"> table containing character types </param>
	  /// <param name="configurationFlags"> Flags configuring the filter </param>
	  /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
	  public WordDelimiterFilter(Version matchVersion, TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords) : base(@in)
	  {
		  if (!InstanceFieldsInitialized)
		  {
			  InitializeInstanceFields();
			  InstanceFieldsInitialized = true;
		  }
		if (!matchVersion.onOrAfter(Version.LUCENE_48))
		{
		  throw new System.ArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter");
		}
		this.flags = configurationFlags;
		this.protWords = protWords;
		this.iterator = new WordDelimiterIterator(charTypeTable, has(SPLIT_ON_CASE_CHANGE), has(SPLIT_ON_NUMERICS), has(STEM_ENGLISH_POSSESSIVE));
	  }
Beispiel #7
0
	  /// <summary>
	  /// Builds an analyzer with the default stop words (<seealso cref="#getDefaultStopSet"/>).
	  /// </summary>
	  public FrenchAnalyzer(Version matchVersion) : this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET : DefaultSetHolder.DEFAULT_STOP_SET_30)
	  {
	  }