Ejemplo n.º 1
0
 private static void CheckPositionIncrement(Version version, bool enablePositionIncrements)
 {
     if (!enablePositionIncrements && version.OnOrAfter(Version.LUCENE_44))
     {
         throw new System.ArgumentException("enablePositionIncrements=false is not supported anymore as of Lucene 4.4 as it can create broken token streams");
     }
 }
        /// <summary>
        /// Performs the explanation.
        /// </summary>
        /// <param name="luceneVersion">The lucene version.</param>
        /// <param name="fsDirectory">The fs directory.</param>
        /// <param name="searchQuery">The search query.</param>
        /// <param name="resultId">The result identifier.</param>
        /// <returns></returns>
        protected virtual string PerformExplain(Version luceneVersion, FSDirectory fsDirectory, string searchQuery, int resultId)
		{
			/*
			 * The obvious problem here is that we're not using the exact same search as the real one.
			 */

			var explanation = string.Empty;

			using (var indexSearcher = new IndexSearcher(fsDirectory, false))
			{
				var analyzer = new StandardAnalyzer(luceneVersion);
				
				var queryParser = new MultiFieldQueryParser(luceneVersion, new[] { "Id".ToLowerInvariant() }, analyzer)
									{
										DefaultOperator = QueryParser.Operator.AND
									};
				
				var query = this.searchQueryParser.ParseQuery(searchQuery, queryParser);

				explanation = indexSearcher.Explain(query, resultId).ToHtml();

				analyzer.Close();
			}

			return explanation;
		}
		/// <summary>Builds an analyzer with the given stop words.</summary>
        /// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
		///
		/// </param>
		/// <param name="stopWords">stop words 
		/// </param>
		public StandardAnalyzer(Version matchVersion, ISet<string> stopWords)
		{
			stopSet = stopWords;
            SetOverridesTokenStreamMethod<StandardAnalyzer>();
            enableStopPositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
            replaceInvalidAcronym = matchVersion.OnOrAfter(Version.LUCENE_24);
            this.matchVersion = matchVersion;
		}
Ejemplo n.º 4
0
        /// <summary>
        /// Creates a new <seealso cref="CharTokenizer"/> instance
        /// </summary>
        /// <param name="matchVersion">
        ///          Lucene version to match </param>
        /// <param name="input">
        ///          the input to split up into tokens </param>
        protected CharTokenizer(Version matchVersion, TextReader input)
            : base(input)
        {
            termAtt = AddAttribute<ICharTermAttribute>();
            offsetAtt = AddAttribute<IOffsetAttribute>();

            charUtils = CharacterUtils.GetInstance(matchVersion);
        }
 /// <summary>
 /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/>
 /// </summary>
 /// <param name="matchVersion">
 ///          Lucene version to enable correct Unicode 4.0 behavior in the
 ///          dictionaries if Version > 3.0. See <a
 ///          href="CompoundWordTokenFilterBase.html#version"
 ///          >CompoundWordTokenFilterBase</a> for details. </param>
 /// <param name="input">
 ///          the <seealso cref="TokenStream"/> to process </param>
 /// <param name="dictionary">
 ///          the word dictionary to match against. </param>
 /// <param name="minWordSize">
 ///          only words longer than this get processed </param>
 /// <param name="minSubwordSize">
 ///          only subwords longer than this get to the output stream </param>
 /// <param name="maxSubwordSize">
 ///          only subwords shorter than this get to the output stream </param>
 /// <param name="onlyLongestMatch">
 ///          Add only the longest matching subword to the stream </param>
 public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
     : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
 {
     if (dictionary == null)
     {
         throw new System.ArgumentException("dictionary cannot be null");
     }
 }
 /// <summary>
 /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/>
 /// </summary>
 /// <param name="matchVersion">
 ///          Lucene version to enable correct Unicode 4.0 behavior in the
 ///          dictionaries if Version > 3.0. See <a
 ///          href="CompoundWordTokenFilterBase.html#version"
 ///          >CompoundWordTokenFilterBase</a> for details. </param>
 /// <param name="input">
 ///          the <seealso cref="TokenStream"/> to process </param>
 /// <param name="dictionary">
 ///          the word dictionary to match against. </param>
 public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary)
     : base(matchVersion, input, dictionary)
 {
     if (dictionary == null)
     {
         throw new System.ArgumentException("dictionary cannot be null");
     }
 }
Ejemplo n.º 7
0
 public LuceneSearch(string typeName, string indexDir, IDocumentBuilder docBuilder, IIndexPathBuilder pathBuilder, IFacetFieldNameProvider facetFieldNameProvider, LN.Util.Version version)
 {
     this.TypeName = typeName;
     this.Directory = indexDir;
     this.version = version;
     this.DocumentBuilder = docBuilder;
     this.IndexPathBuilder = pathBuilder;
     this.facetFieldNameProvider = facetFieldNameProvider;
     indexPaths = new Dictionary<string, LuceneIndexPath>();
 }
Ejemplo n.º 8
0
 /// <summary>
 /// Construct a token stream filtering the given input using a Set of common
 /// words to create bigrams. Outputs both unigrams with position increment and
 /// bigrams with position increment 0 type=gram where one or both of the words
 /// in a potential bigram are in the set of common words .
 /// </summary>
 /// <param name="input"> TokenStream input in filter chain </param>
 /// <param name="commonWords"> The set of common words. </param>
 public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords)
     : base(input)
 {
     termAttribute = AddAttribute<ICharTermAttribute>();
     offsetAttribute = AddAttribute<IOffsetAttribute>();
     typeAttribute = AddAttribute<ITypeAttribute>();
     posIncAttribute = AddAttribute<IPositionIncrementAttribute>();
     posLenAttribute = AddAttribute<IPositionLengthAttribute>();
     this.commonWords = commonWords;
 }
        /// <summary>
        /// Optimizes the Lucene index.
        /// </summary>
        /// <param name="luceneVersion">The lucene version.</param>
        /// <param name="fsDirectory">The fs directory.</param>
        /// <param name="maxFieldLength">Maximum length of the field.</param>
		public virtual void Optimize(Version luceneVersion, FSDirectory fsDirectory, IndexWriter.MaxFieldLength maxFieldLength)
		{
			var analyzer = new StandardAnalyzer(luceneVersion);

			using (var indexWriter = new IndexWriter(fsDirectory, analyzer, maxFieldLength))
			{
				analyzer.Close();

				indexWriter.Optimize();
			}
		}
Ejemplo n.º 10
0
        /// <summary>
        /// Clears the entire index.
        /// </summary>
        /// <param name="luceneVersion">The lucene version.</param>
        /// <param name="fsDirectory">The fs directory.</param>
        /// <param name="maxFieldLength">Maximum length of the field.</param>
		public virtual void ClearIndex(Version luceneVersion, FSDirectory fsDirectory, IndexWriter.MaxFieldLength maxFieldLength)
		{
			var analyzer = new StandardAnalyzer(luceneVersion);

			using (var indexWriter = new IndexWriter(fsDirectory, analyzer, maxFieldLength))
			{
				indexWriter.DeleteAll();

				analyzer.Close();
			}
		}
Ejemplo n.º 11
0
        /// <summary>
        /// Creates a new Base Lucene Search Provider
        /// </summary>
        /// <param name="ver">Lucene Version</param>
        /// <param name="indexDir">Directory</param>
        /// <param name="analyzer">Analyzer</param>
        /// <param name="schema">Index Schema</param>
        public BaseLuceneSearchProvider(LucUtil.Version ver, Directory indexDir, Analyzer analyzer, IFullTextIndexSchema schema)
        {
            this._version = ver;
            this._indexDir = indexDir;
            this._analyzer = analyzer;
            this._schema = schema;

            //Create necessary objects
            this._searcher = new LucSearch.IndexSearcher(this._indexDir, true);
            this._parser = new QueryParser(this._version, this._schema.IndexField, this._analyzer);
        }
Ejemplo n.º 12
0
        /**
         * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
         */
        public ArabicAnalyzer(Version matchVersion)
        {
            this.matchVersion = matchVersion;

            using (StreamReader reader = new StreamReader(System.Reflection.Assembly.GetAssembly(this.GetType()).GetManifestResourceStream("Lucene.Net.Analyzers.AR." + DEFAULT_STOPWORD_FILE)))
            {
                while (!reader.EndOfStream)
                {
                    string word = reader.ReadLine();
                    stoptable.Add(word, word);
                }
            }
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Clears an item from the index.
        /// </summary>
        /// <param name="id">The identifier.</param>
        /// <param name="luceneVersion">The lucene version.</param>
        /// <param name="fsDirectory">The fs directory.</param>
        /// <param name="maxFieldLength">Maximum length of the field.</param>
        public void ClearIndex(string id, Version luceneVersion, FSDirectory fsDirectory, IndexWriter.MaxFieldLength maxFieldLength)
        {
            var analyzer = new StandardAnalyzer(luceneVersion);

            using (var indexWriter = new IndexWriter(fsDirectory, analyzer, maxFieldLength))
            {
                var searchQuery = new TermQuery(new Term("Key", id));

                indexWriter.DeleteDocuments(searchQuery);

                analyzer.Close();
            }
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="SearchServiceConfiguration" /> class.
        /// </summary>
        /// <param name="luceneVersion">The lucene version.</param>
        /// <param name="maxFieldLength">Maximum length of the field.</param>
        /// <param name="indexFolder">The index folder.</param>
        /// <param name="writeLockSemaphoreFileName">Name of the write lock semaphore file.</param>
        /// <param name="hitLimit">The hit limit.</param>
	    public SearchServiceConfiguration(
            Version luceneVersion,
            IndexWriter.MaxFieldLength maxFieldLength,
            string indexFolder,
            string writeLockSemaphoreFileName,
            int hitLimit)
	    {
            this.luceneVersion = luceneVersion.IsNull() ? Version.LUCENE_30 : luceneVersion;

            this.maxFieldLength = maxFieldLength.IsNull() ? IndexWriter.MaxFieldLength.UNLIMITED : maxFieldLength;

            this.indexFolder = indexFolder.IsNullOrEmpty() ? @"C:\SearchIndex\" : indexFolder;

            this.writeLockSemaphoreFileName = writeLockSemaphoreFileName.IsNullOrEmpty() ? Path.Combine(this.indexFolder, "write.lock") : writeLockSemaphoreFileName;

            this.hitLimit = hitLimit.Equals(EqualityComparer<int>.Default.Equals(hitLimit, default(int))) ? 1000 : hitLimit;

            this.fsDirectory = this.GetDirectory();
	    }
Ejemplo n.º 15
0
        public HomeController()
        {
            string path = HostingEnvironment.MapPath("~/App_Data/LuceneIndex");
            var fsd = FSDirectory.Open(new DirectoryInfo(path));
            _indexReader = IndexReader.Open(fsd, true);
            _indexSearcher = new IndexSearcher(fsd, true);
            _luceneVersion = Version.LUCENE_29;

            _searchFields = new string[]
                                {
                                    LuceneIndexFieldMap.FirstNameField,
                                    LuceneIndexFieldMap.CityField,
                                    LuceneIndexFieldMap.TimeZoneField
                                };

            _facetFields = new string[]
                               {
                                   LuceneIndexFieldMap.TimeZoneField
                               };

            _simpleFacetedSearch = new SimpleFacetedSearch(_indexReader, _facetFields);
        }
	  protected internal CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(input)
	  {
		this.matchVersion = matchVersion;
		this.tokens = new LinkedList<CompoundToken>();
		if (minWordSize < 0)
		{
		  throw new System.ArgumentException("minWordSize cannot be negative");
		}
		this.minWordSize = minWordSize;
		if (minSubwordSize < 0)
		{
		  throw new System.ArgumentException("minSubwordSize cannot be negative");
		}
		this.minSubwordSize = minSubwordSize;
		if (maxSubwordSize < 0)
		{
		  throw new System.ArgumentException("maxSubwordSize cannot be negative");
		}
		this.maxSubwordSize = maxSubwordSize;
		this.onlyLongestMatch = onlyLongestMatch;
		this.dictionary = dictionary;
	  }
 /*
    * Initializes this analyzer with the Analyzer object that actually produces the tokens
    *
    * @param _delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering
    */
 public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer _delegate)
 {
     this._delegate = _delegate;
     SetOverridesTokenStreamMethod<QueryAutoStopWordAnalyzer>();
     this.matchVersion = matchVersion;
 }
Ejemplo n.º 18
0
 public LuceneSearch(string typeName, string indexDir, IDocumentBuilder docBuilder, IIndexPathBuilder pathBuilder, IFacetFieldNameProvider facetFieldNameProvider, LN.Util.Version version)
 {
     this.TypeName               = typeName;
     this.Directory              = indexDir;
     this.version                = version;
     this.DocumentBuilder        = docBuilder;
     this.IndexPathBuilder       = pathBuilder;
     this.facetFieldNameProvider = facetFieldNameProvider;
     indexPaths = new Dictionary <string, LuceneIndexPath>();
 }
        /// <summary>
        /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance.
        /// </summary>
        /// <param name="matchVersion">
        ///          Lucene version to enable correct Unicode 4.0 behavior in the
        ///          dictionaries if Version > 3.0. See <a
        ///          href="CompoundWordTokenFilterBase.html#version"
        ///          >CompoundWordTokenFilterBase</a> for details. </param>
        /// <param name="input">
        ///          the <seealso cref="TokenStream"/> to process </param>
        /// <param name="hyphenator">
        ///          the hyphenation pattern tree to use for hyphenation </param>
        /// <param name="dictionary">
        ///          the word dictionary to match against. </param>
        /// <param name="minWordSize">
        ///          only words longer than this get processed </param>
        /// <param name="minSubwordSize">
        ///          only subwords longer than this get to the output stream </param>
        /// <param name="maxSubwordSize">
        ///          only subwords shorter than this get to the output stream </param>
        /// <param name="onlyLongestMatch">
        ///          Add only the longest matching subword to the stream </param>
        public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
            : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
        {

            this.hyphenator = hyphenator;
        }
Ejemplo n.º 20
0
 /// <summary>
 /// Construct a new LowerCaseTokenizer.
 /// </summary>
 /// <param name="matchVersion">
 ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/>
 /// </param>
 /// <param name="in">
 ///          the input to split up into tokens </param>
 public LowerCaseTokenizer(Version matchVersion, TextReader @in)
     : base(matchVersion, @in)
 {
 }
Ejemplo n.º 21
0
 /// <summary>
 /// Construct a new WhitespaceTokenizer using a given
 /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
 /// 
 /// @param
 ///          matchVersion Lucene version to match See
 ///          <seealso cref="<a href="#version">above</a>"/> </summary>
 /// <param name="factory">
 ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
 /// <param name="in">
 ///          the input to split up into tokens </param>
 public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in)
     : base(matchVersion, factory, @in)
 {
 }
 /// <summary>
 /// Create a HyphenationCompoundWordTokenFilter with no dictionary.
 /// <para>
 /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, int, int, int) 
 /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, 
 /// DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE }
 /// </para>
 /// </summary>
 public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator)
     : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE)
 {
 }
Ejemplo n.º 23
0
        /// <summary>
        /// Explains the search score for a result.
        /// </summary>
        /// <param name="luceneVersion">The lucene version.</param>
        /// <param name="fsDirectory">The fs directory.</param>
        /// <param name="input">The input.</param>
        /// <param name="resultId">The result identifier.</param>
        /// <returns></returns>
		public virtual string Explain(Version luceneVersion, FSDirectory fsDirectory, string input, int resultId)
		{
			return string.IsNullOrEmpty(input) ? string.Empty : this.PerformExplain(luceneVersion, fsDirectory, input, resultId);
		}
Ejemplo n.º 24
0
 /// <summary>
 /// Creates a new <seealso cref="WhitespaceAnalyzer"/> </summary>
 /// <param name="matchVersion"> Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
 public WhitespaceAnalyzer(Version matchVersion)
 {
     this.matchVersion = matchVersion;
 }
Ejemplo n.º 25
0
 /// <summary>
 /// Creates a new Lucene Search Provider
 /// </summary>
 /// <param name="ver">Version</param>
 /// <param name="indexDir">Directory</param>
 /// <param name="analyzer">Analyzer</param>
 /// <param name="autoSync">Whether to keep the search provider in sync with the index</param>
 /// <remarks>
 /// Uses the <see cref="DefaultIndexSchema">DefaultIndexSchema</see> as the schema
 /// </remarks>
 public LuceneSearchProvider(LucUtil.Version ver, Directory indexDir, Analyzer analyzer, bool autoSync)
     : this(ver, indexDir, analyzer, new DefaultIndexSchema(), autoSync)
 {
 }
Ejemplo n.º 26
0
 /*
  * Initializes this analyzer with the Analyzer object that actually produces the tokens
  *
  * @param _delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering
  */
 public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer _delegate)
 {
     this._delegate = _delegate;
     SetOverridesTokenStreamMethod <QueryAutoStopWordAnalyzer>();
     this.matchVersion = matchVersion;
 }
Ejemplo n.º 27
0
 /// <summary>Builds an analyzer with the default stop words (<see cref="STOP_WORDS_SET" />).
 /// </summary>
 /// <param name="matchVersion">Lucene version to match see <see cref="Version">above</see></param>
 public StandardAnalyzer(Version matchVersion)
     : this(matchVersion, STOP_WORDS_SET)
 {
 }
Ejemplo n.º 28
0
 /// <summary>
 /// Create a new <seealso cref="FilteringTokenFilter"/>. </summary>
 /// <param name="version"> the Lucene match version </param>
 /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
 public FilteringTokenFilter(Version version, TokenStream @in)
     : base(@in)
 {
     this.version = version;
     this.enablePositionIncrements = true;
 }
Ejemplo n.º 29
0
 public FilteringTokenFilter(Version version, bool enablePositionIncrements, TokenStream input)
     : this(version, input)
 {
     CheckPositionIncrement(version, enablePositionIncrements);
     this.enablePositionIncrements = enablePositionIncrements;
 }
Ejemplo n.º 30
0
 /// <summary>
 /// Creates a new Lucene Search Provider
 /// </summary>
 /// <param name="ver">Version</param>
 /// <param name="indexDir">Directory</param>
 /// <param name="analyzer">Analyzer</param>
 /// <remarks>
 /// Uses the <see cref="DefaultIndexSchema">DefaultIndexSchema</see> as the schema
 /// </remarks>
 public LuceneSearchProvider(LucUtil.Version ver, Directory indexDir, Analyzer analyzer)
     : this(ver, indexDir, analyzer, true)
 {
 }
Ejemplo n.º 31
0
 /// <summary> Returns version-dependent default for enablePositionIncrements. Analyzers
 /// that embed StopFilter use this method when creating the StopFilter. Prior
 /// to 2.9, this returns false. On 2.9 or later, it returns true.
 /// </summary>
 public static bool GetEnablePositionIncrementsVersionDefault(Version matchVersion)
 {
     return(matchVersion.OnOrAfter(Version.LUCENE_29));
 }
		/// <summary> Creates a new StandardTokenizer with a given
		/// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />
		/// </summary>
		public StandardTokenizer(Version matchVersion, AttributeFactory factory, System.IO.TextReader input):base(factory)
		{
			InitBlock();
			this.scanner = new StandardTokenizerImpl(input);
			Init(input, matchVersion);
		}
 public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) : base(matchVersion, f, a)
 {
 }
Ejemplo n.º 34
0
 /// Construct a new WhitespaceTokenizer. * <param name="matchVersion"> Lucene version
 /// to match See <seealso cref="<a href="#version">above</a>"/>
 /// </param>
 /// <param name="in">
 ///          the input to split up into tokens </param>
 public WhitespaceTokenizer(Version matchVersion, TextReader @in)
     : base(matchVersion, @in)
 {
 }
Ejemplo n.º 35
0
 /// <summary>
 /// Creates a new Base Lucene Search Provider.
 /// </summary>
 /// <param name="ver">Lucene Version.</param>
 /// <param name="indexDir">Directory.</param>
 /// <param name="analyzer">Analyzer.</param>
 /// <param name="schema">Index Schema.</param>
 public BaseLuceneSearchProvider(LucUtil.Version ver, Directory indexDir, Analyzer analyzer, IFullTextIndexSchema schema)
     : this(ver, indexDir, analyzer, schema, true)
 {
     _uriComparer = new UriComparer();
 }
 /// <summary>
 /// Create a HyphenationCompoundWordTokenFilter with no dictionary.
 /// <para>
 /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
 /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
 /// null, minWordSize, minSubwordSize, maxSubwordSize }
 /// </para>
 /// </summary>
 public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, int minWordSize, int minSubwordSize, int maxSubwordSize)
     : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, maxSubwordSize, false)
 {
 }
Ejemplo n.º 37
0
 /// <summary>
 /// Creates a new Lucene Search Provider
 /// </summary>
 /// <param name="ver">Version</param>
 /// <param name="indexDir">Directory</param>
 /// <remarks>
 /// Uses the <see cref="DefaultIndexSchema">DefaultIndexSchema</see> as the schema and the <see cref="StandardAnalyzer">StandardAnalyzer</see> as the analyzer
 /// </remarks>
 public LuceneSearchProvider(LucUtil.Version ver, Directory indexDir)
     : this(ver, indexDir, new StandardAnalyzer(ver), new DefaultIndexSchema())
 {
 }
 /// <summary>
 /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance. 
 /// </summary>
 /// <param name="matchVersion">
 ///          Lucene version to enable correct Unicode 4.0 behavior in the
 ///          dictionaries if Version > 3.0. See <a
 ///          href="CompoundWordTokenFilterBase.html#version"
 ///          >CompoundWordTokenFilterBase</a> for details. </param>
 /// <param name="input">
 ///          the <seealso cref="TokenStream"/> to process </param>
 /// <param name="hyphenator">
 ///          the hyphenation pattern tree to use for hyphenation </param>
 /// <param name="dictionary">
 ///          the word dictionary to match against. </param>
 public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary)
     : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
 {
 }
Ejemplo n.º 39
0
 /// <summary>
 /// Creates a new Lucene Search Provider
 /// </summary>
 /// <param name="ver">Version</param>
 /// <param name="indexDir">Directory</param>
 /// <param name="schema">Index Schema</param>
 /// <param name="autoSync">Whether to keep the search provider in sync with the index</param>
 /// <remarks>
 /// Uses the <see cref="StandardAnalyzer">StandardAnalyzer</see> as the analyzer
 /// </remarks>
 public LuceneSearchProvider(LucUtil.Version ver, Directory indexDir, IFullTextIndexSchema schema, bool autoSync)
     : this(ver, indexDir, new StandardAnalyzer(ver), schema, autoSync)
 {
 }
Ejemplo n.º 40
0
 public PorterStemAnalyzer(Version version) : base(version)
 {
 }
Ejemplo n.º 41
0
 /// <summary>
 /// Creates a new Lucene Search Provider
 /// </summary>
 /// <param name="ver">Version</param>
 /// <param name="indexDir">Directory</param>
 /// <param name="analyzer">Analyzer</param>
 /// <param name="schema">Index Schema</param>
 /// <param name="autoSync">Whether to keep the search provider in sync with the index</param>
 public LuceneSearchProvider(LucUtil.Version ver, Directory indexDir, Analyzer analyzer, IFullTextIndexSchema schema, bool autoSync)
     : base(ver, indexDir, analyzer, schema, autoSync)
 {
 }
Ejemplo n.º 42
0
 /// <summary>
 /// Construct a new LowerCaseTokenizer using a given
 /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
 /// </summary>
 /// <param name="matchVersion">
 ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
 /// <param name="factory">
 ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
 /// <param name="in">
 ///          the input to split up into tokens </param>
 public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in)
     : base(matchVersion, factory, @in)
 {
 }
Ejemplo n.º 43
0
 /// <summary>
 /// Creates a new Lucene Search Provider
 /// </summary>
 /// <param name="ver">Version</param>
 /// <param name="indexDir">Directory</param>
 /// <param name="analyzer">Analyzer</param>
 /// <param name="schema">Index Schema</param>
 public LuceneSearchProvider(LucUtil.Version ver, Directory indexDir, Analyzer analyzer, IFullTextIndexSchema schema)
     : this(ver, indexDir, analyzer, schema, true)
 {
 }
Ejemplo n.º 44
0
 /// <summary>
 /// Returns a <seealso cref="CharacterUtils"/> implementation according to the given
 /// <seealso cref="Version"/> instance.
 /// </summary>
 /// <param name="matchVersion">
 ///          a version instance </param>
 /// <returns> a <seealso cref="CharacterUtils"/> implementation according to the given
 ///         <seealso cref="Version"/> instance. </returns>
 public static CharacterUtils GetInstance(Version matchVersion)
 {
     return(matchVersion.OnOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4);
 }
Ejemplo n.º 45
0
 /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
 /// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)">
 /// </seealso>
 /// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
 ///
 /// </param>
 /// <param name="stopwords">Reader to read stop words from
 /// </param>
 public StandardAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
     : this(matchVersion, WordlistLoader.GetWordSet(stopwords))
 {
 }
		private void  Init(System.IO.TextReader input, Version matchVersion)
		{
			if (matchVersion.OnOrAfter(Version.LUCENE_24))
			{
			    replaceInvalidAcronym = true;
			}
			else
			{
			    replaceInvalidAcronym = false;
			}
		    this.input = input;
		    termAtt = AddAttribute<ITermAttribute>();
		    offsetAtt = AddAttribute<IOffsetAttribute>();
		    posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
		    typeAtt = AddAttribute<ITypeAttribute>();
		}
Ejemplo n.º 47
0
 /// <summary> Creates a new StandardTokenizer with a given
 /// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />
 /// </summary>
 public StandardTokenizer(Version matchVersion, AttributeFactory factory, System.IO.TextReader input) : base(factory)
 {
     InitBlock();
     this.scanner = new StandardTokenizerImpl(input);
     Init(input, matchVersion);
 }