public TrimFilter(Version version, TokenStream @in, bool updateOffsets) : base(@in) { if (updateOffsets && version.onOrAfter(Version.LUCENE_44)) { throw new System.ArgumentException("updateOffsets=true is not supported anymore as of Lucene 4.4"); } this.updateOffsets = updateOffsets; }
private void init(Version version, int minGram, int maxGram, bool edgesOnly) { if (!version.onOrAfter(Version.LUCENE_44)) { throw new System.ArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer"); } charUtils = version.onOrAfter(Version.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance; if (minGram < 1) { throw new System.ArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new System.ArgumentException("minGram must not be greater than maxGram"); } this.minGram = minGram; this.maxGram = maxGram; this.edgesOnly = edgesOnly; charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader buffer = new int[charBuffer.Buffer.Length]; // Make the term att large enough termAtt.resizeBuffer(2 * maxGram); }
private void Init(Version matchVersion) { if (matchVersion.OnOrAfter(Version.LUCENE_47)) { this.scanner = new StandardTokenizerImpl(input); } else if (matchVersion.onOrAfter(Version.LUCENE_40)) { this.scanner = new StandardTokenizerImpl40(input); } else if (matchVersion.onOrAfter(Version.LUCENE_34)) { this.scanner = new StandardTokenizerImpl34(input); } else if (matchVersion.onOrAfter(Version.LUCENE_31)) { this.scanner = new StandardTokenizerImpl31(input); } else { this.scanner = new ClassicTokenizerImpl(input); } }
/// <summary> /// Partially reverses the given input buffer in-place from the given offset /// up to the given length. </summary> /// <param name="matchVersion"> See <a href="#version">above</a> </param> /// <param name="buffer"> the input char array to reverse </param> /// <param name="start"> the offset from where to reverse the buffer </param> /// <param name="len"> the length in the buffer up to where the /// buffer should be reversed </param> //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: //ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer, final int start, final int len) public static void reverse(Version matchVersion, char[] buffer, int start, int len) { if (!matchVersion.onOrAfter(Version.LUCENE_31)) { reverseUnicode3(buffer, start, len); return; } /* modified version of Apache Harmony AbstractStringBuilder reverse0() */ if (len < 2) { return; } int end = (start + len) - 1; char frontHigh = buffer[start]; char endLow = buffer[end]; bool allowFrontSur = true, allowEndSur = true; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int mid = start + (len >> 1); int mid = start + (len >> 1); for (int i = start; i < mid; ++i, --end) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final char frontLow = buffer[i + 1]; char frontLow = buffer[i + 1]; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final char endHigh = buffer[end - 1]; char endHigh = buffer[end - 1]; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final boolean surAtFront = allowFrontSur && Character.isSurrogatePair(frontHigh, frontLow); bool surAtFront = allowFrontSur && char.IsSurrogatePair(frontHigh, frontLow); if (surAtFront && (len < 3)) { // nothing to do since surAtFront is allowed and 1 char left return; } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final boolean surAtEnd = allowEndSur && Character.isSurrogatePair(endHigh, endLow); bool surAtEnd = allowEndSur && char.IsSurrogatePair(endHigh, endLow); allowFrontSur = allowEndSur = true; if (surAtFront == surAtEnd) { if (surAtFront) { // both surrogates buffer[end] = frontLow; buffer[--end] = frontHigh; buffer[i] = endHigh; buffer[++i] = endLow; frontHigh = buffer[i + 1]; endLow = buffer[end - 1]; } else { // neither surrogates buffer[end] = frontHigh; buffer[i] = endLow; frontHigh = frontLow; endLow = endHigh; } } else { if (surAtFront) { // surrogate only at the front buffer[end] = frontLow; buffer[i] = endLow; endLow = endHigh; allowFrontSur = false; } else { // surrogate only at the end buffer[end] = frontHigh; buffer[i] = endHigh; frontHigh = frontLow; allowEndSur = false; } } } if ((len & 0x01) == 1 && !(allowFrontSur && allowEndSur)) { // only if odd length buffer[end] = allowFrontSur ? endLow : frontHigh; } }
/// <summary> /// Builds an analyzer with the default stop words. /// </summary> /// <param name="matchVersion"> lucene compatibility version </param> public ThaiAnalyzer(Version matchVersion) : this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_36) ? DefaultSetHolder.DEFAULT_STOP_SET : StopAnalyzer.ENGLISH_STOP_WORDS_SET) { }
/// <summary> /// Creates a new WordDelimiterFilter /// </summary> /// <param name="in"> TokenStream to be filtered </param> /// <param name="charTypeTable"> table containing character types </param> /// <param name="configurationFlags"> Flags configuring the filter </param> /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param> public WordDelimiterFilter(Version matchVersion, TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords) : base(@in) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } if (!matchVersion.onOrAfter(Version.LUCENE_48)) { throw new System.ArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter"); } this.flags = configurationFlags; this.protWords = protWords; this.iterator = new WordDelimiterIterator(charTypeTable, has(SPLIT_ON_CASE_CHANGE), has(SPLIT_ON_NUMERICS), has(STEM_ENGLISH_POSSESSIVE)); }
/// <summary> /// Builds an analyzer with the default stop words (<seealso cref="#getDefaultStopSet"/>). /// </summary> public FrenchAnalyzer(Version matchVersion) : this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET : DefaultSetHolder.DEFAULT_STOP_SET_30) { }