Ejemplo n.º 1
 /// <summary>
 /// Creates NGramTokenFilter with given min and max n-grams. </summary>
 /// <param name="version"> Lucene version to enable correct position increments.
 ///                See <a href="#version">above</a> for details. </param>
 /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
 /// <param name="minGram"> the smallest n-gram to generate </param>
 /// <param name="maxGram"> the largest n-gram to generate </param>
 public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram)
     : base(new CodepointCountFilter(version, input, minGram, int.MaxValue))
     this.version = version;
     this.charUtils = version.OnOrAfter(Version.LUCENE_44) ? CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
     if (minGram < 1)
         throw new System.ArgumentException("minGram must be greater than zero");
     if (minGram > maxGram)
         throw new System.ArgumentException("minGram must not be greater than maxGram");
     this.minGram = minGram;
     this.maxGram = maxGram;
     if (version.OnOrAfter(Version.LUCENE_44))
         posIncAtt = AddAttribute(typeof(PositionIncrementAttribute));
         posLenAtt = AddAttribute(typeof(PositionLengthAttribute));
         posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this);
         posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this);
Ejemplo n.º 2
 /// <summary>
 /// Create a new ReverseStringFilter that reverses and marks all tokens in the
 /// supplied <seealso cref="TokenStream"/>.
 /// <para>
 /// The reversed tokens will be prepended (marked) by the <code>marker</code>
 /// character.
 /// </para>
 /// </summary>
 /// <param name="matchVersion"> See <a href="#version">above</a> </param>
 /// <param name="in"> <seealso cref="TokenStream"/> to filter </param>
 /// <param name="marker"> A character used to mark reversed tokens </param>
 public ReverseStringFilter(Version matchVersion, TokenStream @in, char marker) : base(@in)
     this.matchVersion = matchVersion;
     this.marker       = marker;
Ejemplo n.º 3
        /// <summary>
        /// Reverses the given input buffer in-place </summary>
        /// <param name="matchVersion"> See <a href="#version">above</a> </param>
        /// <param name="buffer"> the input char array to reverse </param>
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
//ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer)
        public static void reverse(Version matchVersion, char[] buffer)
            reverse(matchVersion, buffer, 0, buffer.Length);
Ejemplo n.º 4
        /// <summary>
        /// Partially reverses the given input buffer in-place from the given offset
        /// up to the given length. </summary>
        /// <param name="matchVersion"> See <a href="#version">above</a> </param>
        /// <param name="buffer"> the input char array to reverse </param>
        /// <param name="start"> the offset from where to reverse the buffer </param>
        /// <param name="len"> the length in the buffer up to where the
        ///        buffer should be reversed </param>
        public static void reverse(Version matchVersion, char[] buffer, int start, int len)
            if (!matchVersion.OnOrAfter(Version.LUCENE_31))
                reverseUnicode3(buffer, start, len);
            /* modified version of Apache Harmony AbstractStringBuilder reverse0() */
            if (len < 2)
            int  end = (start + len) - 1;
            char frontHigh = buffer[start];
            char endLow = buffer[end];
            bool allowFrontSur = true, allowEndSur = true;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int mid = start + (len >> 1);
            int mid = start + (len >> 1);

            for (int i = start; i < mid; ++i, --end)
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char frontLow = buffer[i + 1];
                char frontLow = buffer[i + 1];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char endHigh = buffer[end - 1];
                char endHigh = buffer[end - 1];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final boolean surAtFront = allowFrontSur && Character.isSurrogatePair(frontHigh, frontLow);
                bool surAtFront = allowFrontSur && char.IsSurrogatePair(frontHigh, frontLow);
                if (surAtFront && (len < 3))
                    // nothing to do since surAtFront is allowed and 1 char left
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final boolean surAtEnd = allowEndSur && Character.isSurrogatePair(endHigh, endLow);
                bool surAtEnd = allowEndSur && char.IsSurrogatePair(endHigh, endLow);
                allowFrontSur = allowEndSur = true;
                if (surAtFront == surAtEnd)
                    if (surAtFront)
                        // both surrogates
                        buffer[end]   = frontLow;
                        buffer[--end] = frontHigh;
                        buffer[i]     = endHigh;
                        buffer[++i]   = endLow;
                        frontHigh     = buffer[i + 1];
                        endLow        = buffer[end - 1];
                        // neither surrogates
                        buffer[end] = frontHigh;
                        buffer[i]   = endLow;
                        frontHigh   = frontLow;
                        endLow      = endHigh;
                    if (surAtFront)
                        // surrogate only at the front
                        buffer[end]   = frontLow;
                        buffer[i]     = endLow;
                        endLow        = endHigh;
                        allowFrontSur = false;
                        // surrogate only at the end
                        buffer[end] = frontHigh;
                        buffer[i]   = endHigh;
                        frontHigh   = frontLow;
                        allowEndSur = false;
            if ((len & 0x01) == 1 && !(allowFrontSur && allowEndSur))
                // only if odd length
                buffer[end] = allowFrontSur ? endLow : frontHigh;
Ejemplo n.º 5
 /// <summary>
 /// Returns a fully initialized TokenFilterFactory with the specified name and key-value arguments.
 /// <seealso cref="ClasspathResourceLoader"/> is used for loading resources, so any required ones should
 /// be on the test classpath.
 /// </summary>
 protected internal virtual TokenFilterFactory TokenFilterFactory(string name, Version version, params string[] keysAndValues)
     return(TokenFilterFactory(name, version, new ClasspathResourceLoader(this.GetType()), keysAndValues));
 /// <summary>
 /// Returns a fully initialized CharFilterFactory with the specified name, version, resource loader, 
 /// and key-value arguments.
 /// </summary>
 protected internal virtual CharFilterFactory CharFilterFactory(string name, Version matchVersion, IResourceLoader loader, params string[] keysAndValues)
     return (CharFilterFactory)AnalysisFactory(Lucene.Net.Analysis.Util.CharFilterFactory.LookupClass(name), matchVersion, loader, keysAndValues);
Ejemplo n.º 7
 /// <summary>
 /// Reverses the given input string
 /// </summary>
 /// <param name="matchVersion"> See <a href="#version">above</a> </param>
 /// <param name="input"> the string to reverse </param>
 /// <returns> the given input string in reversed order </returns>
 //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
 //ORIGINAL LINE: public static String reverse(org.apache.lucene.util.Version matchVersion, final String input)
 public static string reverse(Version matchVersion, string input)
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final char[] charInput = input.toCharArray();
     char[] charInput = input.ToCharArray();
     reverse(matchVersion, charInput, 0, charInput.Length);
     return new string(charInput);
Ejemplo n.º 8
	  /// <summary>
	  /// Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
	  /// leading and trailing whitespace). Every line of the Reader should contain only
	  /// one word. The words need to be in lowercase if you make use of an
	  /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
	  /// </summary>
	  /// <param name="reader"> Reader containing the wordlist </param>
	  /// <param name="comment"> The string representing a comment. </param>
	  /// <param name="matchVersion"> the Lucene <seealso cref="Version"/> </param>
	  /// <returns> A CharArraySet with the reader's words </returns>
	  public static CharArraySet GetWordSet(TextReader reader, string comment, Version matchVersion)
		return GetWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
 public Lucene43EdgeNGramTokenizer(Version version, Reader input, string sideLabel, int minGram, int maxGram)
     : this(version, input, GetSide(sideLabel), minGram, maxGram)
Ejemplo n.º 10
 /// <summary>
 /// Create a new ReverseStringFilter that reverses and marks all tokens in the
 /// supplied <seealso cref="TokenStream"/>.
 /// <para>
 /// The reversed tokens will be prepended (marked) by the <code>marker</code>
 /// character.
 /// </para>
 /// </summary>
 /// <param name="matchVersion"> See <a href="#version">above</a> </param>
 /// <param name="in"> <seealso cref="TokenStream"/> to filter </param>
 /// <param name="marker"> A character used to mark reversed tokens </param>
 public ReverseStringFilter(Version matchVersion, TokenStream @in, char marker)
     : base(@in)
     this.matchVersion = matchVersion;
     this.marker = marker;
 public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, Side side, int minGram, int maxGram)
     : base(factory, input)
     init(version, side, minGram, maxGram);
 public Lucene43EdgeNGramTokenizer(Version version, Reader input, Side side, int minGram, int maxGram)
     : base(input)
     init(version, side, minGram, maxGram);
        private void init(Version version, Side side, int minGram, int maxGram)
            if (version == null)
                throw new System.ArgumentException("version must not be null");

            if (side == null)
                throw new System.ArgumentException("sideLabel must be either front or back");

            if (minGram < 1)
                throw new System.ArgumentException("minGram must be greater than zero");

            if (minGram > maxGram)
                throw new System.ArgumentException("minGram must not be greater than maxGram");

            if (version.OnOrAfter(Version.LUCENE_44))
                if (side == Side.BACK)
                    throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4");
                maxGram = Math.Min(maxGram, 1024);

            this.minGram = minGram;
            this.maxGram = maxGram;
            this.side = side;
Ejemplo n.º 14
 /// <summary>
 /// Creates a CharArraySet from a file.
 /// </summary>
 /// <param name="stopwords">
 ///          the stopwords reader to load
 /// </param>
 /// <param name="matchVersion">
 ///          the Lucene version for cross version compatibility </param>
 /// <returns> a CharArraySet containing the distinct stopwords from the given
 ///         reader </returns>
 /// <exception cref="IOException">
 ///           if loading the stopwords throws an <seealso cref="IOException"/> </exception>
 protected internal static CharArraySet loadStopwordSet(Reader stopwords, Version matchVersion)
       return WordlistLoader.GetWordSet(stopwords, matchVersion);
Ejemplo n.º 15
 /// <summary>
 /// Reverses the given input buffer in-place </summary>
 /// <param name="matchVersion"> See <a href="#version">above</a> </param>
 /// <param name="buffer"> the input char array to reverse </param>
 //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
 //ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer)
 public static void reverse(Version matchVersion, char[] buffer)
     reverse(matchVersion, buffer, 0, buffer.Length);
Ejemplo n.º 16
 /// <summary>
 /// Creates a new Analyzer with an empty stopword set
 /// </summary>
 /// <param name="version">
 ///          the Lucene version for cross version compatibility </param>
 protected internal StopwordAnalyzerBase(Version version)
     : this(version, null)
Ejemplo n.º 17
 /// <summary>
 /// Partially reverses the given input buffer in-place from offset 0
 /// up to the given length. </summary>
 /// <param name="matchVersion"> See <a href="#version">above</a> </param>
 /// <param name="buffer"> the input char array to reverse </param>
 /// <param name="len"> the length in the buffer up to where the
 ///        buffer should be reversed </param>
 //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
 //ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer, final int len)
 public static void reverse(Version matchVersion, char[] buffer, int len)
     reverse(matchVersion, buffer, 0, len);
Ejemplo n.º 18
 /// <summary>
 /// Creates a new StandardTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> 
 /// </summary>
 public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input)
     : base(factory, input)
Ejemplo n.º 19
 /// <summary>
 /// Partially reverses the given input buffer in-place from the given offset
 /// up to the given length. </summary>
 /// <param name="matchVersion"> See <a href="#version">above</a> </param>
 /// <param name="buffer"> the input char array to reverse </param>
 /// <param name="start"> the offset from where to reverse the buffer </param>
 /// <param name="len"> the length in the buffer up to where the
 ///        buffer should be reversed </param>
 public static void reverse(Version matchVersion, char[] buffer, int start, int len)
     if (!matchVersion.OnOrAfter(Version.LUCENE_31))
       reverseUnicode3(buffer, start, len);
     /* modified version of Apache Harmony AbstractStringBuilder reverse0() */
     if (len < 2)
     int end = (start + len) - 1;
     char frontHigh = buffer[start];
     char endLow = buffer[end];
     bool allowFrontSur = true, allowEndSur = true;
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final int mid = start + (len >> 1);
     int mid = start + (len >> 1);
     for (int i = start; i < mid; ++i, --end)
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final char frontLow = buffer[i + 1];
       char frontLow = buffer[i + 1];
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final char endHigh = buffer[end - 1];
       char endHigh = buffer[end - 1];
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final boolean surAtFront = allowFrontSur && Character.isSurrogatePair(frontHigh, frontLow);
       bool surAtFront = allowFrontSur && char.IsSurrogatePair(frontHigh, frontLow);
       if (surAtFront && (len < 3))
     // nothing to do since surAtFront is allowed and 1 char left
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final boolean surAtEnd = allowEndSur && Character.isSurrogatePair(endHigh, endLow);
       bool surAtEnd = allowEndSur && char.IsSurrogatePair(endHigh, endLow);
       allowFrontSur = allowEndSur = true;
       if (surAtFront == surAtEnd)
     if (surAtFront)
       // both surrogates
       buffer[end] = frontLow;
       buffer[--end] = frontHigh;
       buffer[i] = endHigh;
       buffer[++i] = endLow;
       frontHigh = buffer[i + 1];
       endLow = buffer[end - 1];
       // neither surrogates
       buffer[end] = frontHigh;
       buffer[i] = endLow;
       frontHigh = frontLow;
       endLow = endHigh;
     if (surAtFront)
       // surrogate only at the front
       buffer[end] = frontLow;
       buffer[i] = endLow;
       endLow = endHigh;
       allowFrontSur = false;
       // surrogate only at the end
       buffer[end] = frontHigh;
       buffer[i] = endHigh;
       frontHigh = frontLow;
       allowEndSur = false;
     if ((len & 0x01) == 1 && !(allowFrontSur && allowEndSur))
       // only if odd length
       buffer[end] = allowFrontSur ? endLow : frontHigh;
Ejemplo n.º 20
 /// <summary>
 /// Create set with enough capacity to hold startSize terms
 /// </summary>
 /// <param name="matchVersion">
 ///          compatibility match version see <a href="#version">Version
 ///          note</a> above for details. </param>
 /// <param name="startSize">
 ///          the initial capacity </param>
 /// <param name="ignoreCase">
 ///          <code>false</code> if and only if the set should be case sensitive
 ///          otherwise <code>true</code>. </param>
 public CharArraySet(Lucene.Net.Util.LuceneVersion matchVersion, int startSize, bool ignoreCase) : this(new CharArrayMap <>(matchVersion, startSize, ignoreCase))
Ejemplo n.º 21
 /// <summary>
 /// Create a new ReverseStringFilter that reverses all tokens in the 
 /// supplied <seealso cref="TokenStream"/>.
 /// <para>
 /// The reversed tokens will not be marked. 
 /// </para>
 /// </summary>
 /// <param name="matchVersion"> See <a href="#version">above</a> </param>
 /// <param name="in"> <seealso cref="TokenStream"/> to filter </param>
 public ReverseStringFilter(Version matchVersion, TokenStream @in)
     : this(matchVersion, @in, NOMARKER)
 /// <summary>
 /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
 /// </summary>
 /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
 /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
 /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
 /// <param name="minGram"> the smallest n-gram to generate </param>
 /// <param name="maxGram"> the largest n-gram to generate </param>
 public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram)
     : this(version, factory, input, Side.FRONT, minGram, maxGram)
Ejemplo n.º 23
 /// <summary>
 /// Creates a new instance initialized with the given stopword set
 /// </summary>
 /// <param name="version">
 ///          the Lucene version for cross version compatibility </param>
 /// <param name="stopwords">
 ///          the analyzer's stopword set </param>
 protected internal StopwordAnalyzerBase(Version version, CharArraySet stopwords)
     matchVersion = version;
     // analyzers should use char array set for stopwords!
     this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet.UnmodifiableSet(CharArraySet.Copy(version, stopwords));
Ejemplo n.º 24
        /// <summary>
        /// Partially reverses the given input buffer in-place from offset 0
        /// up to the given length. </summary>
        /// <param name="matchVersion"> See <a href="#version">above</a> </param>
        /// <param name="buffer"> the input char array to reverse </param>
        /// <param name="len"> the length in the buffer up to where the
        ///        buffer should be reversed </param>
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
//ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer, final int len)
        public static void reverse(Version matchVersion, char[] buffer, int len)
            reverse(matchVersion, buffer, 0, len);
Ejemplo n.º 25
 /// <summary>
 /// Creates a new Analyzer with an empty stopword set
 /// </summary>
 /// <param name="version">
 ///          the Lucene version for cross version compatibility </param>
 protected internal StopwordAnalyzerBase(Version version) : this(version, null)
Ejemplo n.º 26
 /// <summary>
 /// Create a new ReverseStringFilter that reverses all tokens in the
 /// supplied <seealso cref="TokenStream"/>.
 /// <para>
 /// The reversed tokens will not be marked.
 /// </para>
 /// </summary>
 /// <param name="matchVersion"> See <a href="#version">above</a> </param>
 /// <param name="in"> <seealso cref="TokenStream"/> to filter </param>
 public ReverseStringFilter(Version matchVersion, TokenStream @in) : this(matchVersion, @in, NOMARKER)
Ejemplo n.º 27
 /// <summary>
 /// Creates a new instance of the <seealso cref="StandardTokenizer"/>.  Attaches
 /// the <code>input</code> to the newly created JFlex scanner.
 /// </summary>
 /// <param name="input"> The input reader
 /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
 public StandardTokenizer(Version matchVersion, Reader input)
     : base(input)
Ejemplo n.º 28
 /// <summary>
 /// Creates NGramTokenFilter with default min and max n-grams. </summary>
 /// <param name="version"> Lucene version to enable correct position increments.
 ///                See <a href="#version">above</a> for details. </param>
 /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
 public NGramTokenFilter(Version version, TokenStream input)
Ejemplo n.º 29
 /// <summary>
 /// Creates a new StandardTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
 /// </summary>
 public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input)
     : base(factory, input)
Ejemplo n.º 30
 /// <summary>
 /// Creates a CharArraySet from a file.
 /// </summary>
 /// <param name="stopwords">
 ///          the stopwords file to load
 /// </param>
 /// <param name="matchVersion">
 ///          the Lucene version for cross version compatibility </param>
 /// <returns> a CharArraySet containing the distinct stopwords from the given
 ///         file </returns>
 /// <exception cref="IOException">
 ///           if loading the stopwords throws an <seealso cref="IOException"/> </exception>
 protected internal static CharArraySet LoadStopwordSet(File stopwords, Version matchVersion)
     Reader reader = null;
       reader = IOUtils.GetDecodingReader(stopwords, StandardCharsets.UTF_8);
       return WordlistLoader.GetWordSet(reader, matchVersion);
Ejemplo n.º 31
        /// <summary>
        /// Creates a new instance of the <seealso cref="StandardTokenizer"/>.  Attaches
        /// the <code>input</code> to the newly created JFlex scanner.
        /// </summary>
        /// <param name="input"> The input reader
        /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
        public StandardTokenizer(Version matchVersion, Reader input)
            : base(input)
            termAtt = AddAttribute<ICharTermAttribute>();
            posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
            offsetAtt = AddAttribute<IOffsetAttribute>();
            typeAtt = AddAttribute<ITypeAttribute>();

Ejemplo n.º 32
 /// <summary>
 /// Creates a new instance initialized with the given stopword set
 /// </summary>
 /// <param name="version">
 ///          the Lucene version for cross version compatibility </param>
 /// <param name="stopwords">
 ///          the analyzer's stopword set </param>
 protected internal StopwordAnalyzerBase(Version version, CharArraySet stopwords)
     matchVersion = version;
     // analyzers should use char array set for stopwords!
     this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet.UnmodifiableSet(CharArraySet.Copy(version, stopwords));
Ejemplo n.º 33
 private void Init(Version matchVersion)
     if (matchVersion.OnOrAfter(Version.LUCENE_47))
         this.scanner = new StandardTokenizerImpl(input);
     else if (matchVersion.OnOrAfter(Version.LUCENE_40))
         this.scanner = new StandardTokenizerImpl40(input);
     else if (matchVersion.OnOrAfter(Version.LUCENE_34))
         this.scanner = new StandardTokenizerImpl34(input);
     else if (matchVersion.OnOrAfter(Version.LUCENE_31))
         this.scanner = new StandardTokenizerImpl31(input);
         this.scanner = new ClassicTokenizerImpl(input);
Ejemplo n.º 34
	  /// <summary>
	  /// Reads stopwords from a stopword list in Snowball format.
	  /// <para>
	  /// The snowball format is the following:
	  /// <ul>
	  /// <li>Lines may contain multiple words separated by whitespace.
	  /// <li>The comment character is the vertical line (&#124;).
	  /// <li>Lines may contain trailing comments.
	  /// </ul>
	  /// </para>
	  /// </summary>
	  /// <param name="reader"> Reader containing a Snowball stopword list </param>
	  /// <param name="matchVersion"> the Lucene <seealso cref="Version"/> </param>
	  /// <returns> A <seealso cref="CharArraySet"/> with the reader's words </returns>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public static CharArraySet getSnowballWordSet(java.io.Reader reader, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
	  public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion)
		return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
Ejemplo n.º 35
 /// <summary>
 /// Creates NGramTokenFilter with default min and max n-grams. </summary>
 /// <param name="version"> Lucene version to enable correct position increments.
 ///                See <a href="#version">above</a> for details. </param>
 /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
 public NGramTokenFilter(Version version, TokenStream input)
Ejemplo n.º 36
 /// <summary>
 /// Creates a new instance of the <seealso cref="StandardTokenizer"/>.  Attaches
 /// the <code>input</code> to the newly created JFlex scanner.
 /// </summary>
 /// <param name="input"> The input reader
 /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
 public StandardTokenizer(Version matchVersion, Reader input)
     : base(input)
Ejemplo n.º 37
 /// <summary>
 /// Returns a fully initialized TokenFilterFactory with the specified name and key-value arguments.
 /// <seealso cref="ClasspathResourceLoader"/> is used for loading resources, so any required ones should
 /// be on the test classpath.
 /// </summary>
 protected internal virtual TokenFilterFactory TokenFilterFactory(string name, Version version, params string[] keysAndValues)
     return(TokenFilterFactory(name, version, GetCurrentTypeResourceLoader(), keysAndValues));
Ejemplo n.º 38
        private void Init(Version matchVersion)
#pragma warning disable 612, 618
            if (matchVersion.OnOrAfter(Version.LUCENE_47))
                this.scanner = new StandardTokenizerImpl(input);
            else if (matchVersion.OnOrAfter(Version.LUCENE_40))
                this.scanner = new StandardTokenizerImpl40(input);
            else if (matchVersion.OnOrAfter(Version.LUCENE_34))
                this.scanner = new StandardTokenizerImpl34(input);
            else if (matchVersion.OnOrAfter(Version.LUCENE_31))
                this.scanner = new StandardTokenizerImpl31(input);
#pragma warning restore 612, 618
                this.scanner = new ClassicTokenizerImpl(input);

            termAtt = AddAttribute<ICharTermAttribute>();
            posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
            offsetAtt = AddAttribute<IOffsetAttribute>();
            typeAtt = AddAttribute<ITypeAttribute>();
Ejemplo n.º 39
        /// <summary>
        /// Reads stopwords from a stopword list in Snowball format.
        /// <para>
        /// The snowball format is the following:
        /// <ul>
        /// <li>Lines may contain multiple words separated by whitespace.
        /// <li>The comment character is the vertical line (&#124;).
        /// <li>Lines may contain trailing comments.
        /// </ul>
        /// </para>
        /// </summary>
        /// <param name="reader"> Reader containing a Snowball stopword list </param>
        /// <param name="matchVersion"> the Lucene <seealso cref="Version"/> </param>
        /// <returns> A <seealso cref="CharArraySet"/> with the reader's words </returns>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public static CharArraySet getSnowballWordSet(java.io.Reader reader, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
        public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion)
            return(getSnowballWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false)));
Ejemplo n.º 40
 /// <summary>
 /// Returns a fully initialized CharFilterFactory with the specified name, version, resource loader,
 /// and key-value arguments.
 /// </summary>
 protected internal virtual CharFilterFactory CharFilterFactory(string name, Version matchVersion, IResourceLoader loader, params string[] keysAndValues)
     return((CharFilterFactory)AnalysisFactory(Lucene.Net.Analysis.Util.CharFilterFactory.LookupClass(name), matchVersion, loader, keysAndValues));
Ejemplo n.º 41
 /// <summary>
 /// Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
 /// leading and trailing whitespace). Every line of the Reader should contain only
 /// one word. The words need to be in lowercase if you make use of an
 /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
 /// </summary>
 /// <param name="reader"> Reader containing the wordlist </param>
 /// <param name="comment"> The string representing a comment. </param>
 /// <param name="matchVersion"> the Lucene <seealso cref="Version"/> </param>
 /// <returns> A CharArraySet with the reader's words </returns>
 public static CharArraySet GetWordSet(TextReader reader, string comment, Version matchVersion)
     return(GetWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false)));
        private AbstractAnalysisFactory AnalysisFactory(Type clazz, Version matchVersion, IResourceLoader loader, params string[] keysAndValues)
            if (keysAndValues.Length % 2 == 1)
                throw new System.ArgumentException("invalid keysAndValues map");
            string previous;
            IDictionary<string, string> args = new Dictionary<string, string>();
            for (int i = 0; i < keysAndValues.Length; i += 2)
                if (args.TryGetValue(keysAndValues[i], out previous))
                    fail("duplicate values for key: " + keysAndValues[i]);
                args[keysAndValues[i]] = keysAndValues[i + 1];

            if (args.TryGetValue("luceneMatchVersion", out previous))
                fail("duplicate values for key: luceneMatchVersion");
            args["luceneMatchVersion"] = matchVersion.ToString();

            AbstractAnalysisFactory factory = null;
                factory = (AbstractAnalysisFactory)Activator.CreateInstance(clazz,
                    BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance,
                    null, new object[] { args }, CultureInfo.InvariantCulture);
            catch (TargetInvocationException e)
                // to simplify tests that check for illegal parameters
                if (e.InnerException is System.ArgumentException)
                    throw (System.ArgumentException)e.InnerException;
                    throw e;
            if (factory is IResourceLoaderAware)
            return factory;
Ejemplo n.º 43
 public FilteringTokenFilter(Lucene.Net.Util.LuceneVersion version, bool enablePositionIncrements, TokenStream input)
     : this(version, input)
     CheckPositionIncrement(version, enablePositionIncrements);
     this.enablePositionIncrements = enablePositionIncrements;
 /// <summary>
 /// Returns a fully initialized TokenFilterFactory with the specified name and key-value arguments.
 /// <seealso cref="ClasspathResourceLoader"/> is used for loading resources, so any required ones should
 /// be on the test classpath.
 /// </summary>
 protected internal virtual TokenFilterFactory TokenFilterFactory(string name, Version version, params string[] keysAndValues)
     return TokenFilterFactory(name, version, GetCurrentTypeResourceLoader(), keysAndValues);
 /// <summary>
 /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
 /// </summary>
 /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
 /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
 /// <param name="minGram"> the smallest n-gram to generate </param>
 /// <param name="maxGram"> the largest n-gram to generate </param>
 public Lucene43EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram)
     : this(version, input, Side.FRONT, minGram, maxGram)