private Analyzer GetAnalyer(int type) { Analyzer analyzer; string path =System.Configuration.ConfigurationSettings.AppSettings["Data"].ToString(); switch (type) { case 0: analyzer=new StockFooAnalyzer(path); break; case 1: analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); break; case 2: analyzer = new SimpleAnalyzer(); break; case 3: analyzer = new StopAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); break; case 4: analyzer = new KeywordAnalyzer(); break; case 5: analyzer = new WhitespaceAnalyzer(); break; default: analyzer = new StockFooAnalyzer(path); break; } return analyzer; }
public static Query BuildQuery(string query, IndexQuery indexQuery, PerFieldAnalyzerWrapper analyzer) { var originalQuery = query; Analyzer keywordAnalyzer = new KeywordAnalyzer(); try { var queryParser = new RangeQueryParser(Version.LUCENE_29, indexQuery.DefaultField ?? string.Empty, analyzer) { DefaultOperator = indexQuery.DefaultOperator == QueryOperator.Or ? QueryParser.Operator.OR : QueryParser.Operator.AND, AllowLeadingWildcard = true }; query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); query = PreProcessDateTerms(query, queryParser); var generatedQuery = queryParser.Parse(query); generatedQuery = HandleMethods(generatedQuery); return generatedQuery; } catch (ParseException pe) { if (originalQuery == query) throw new ParseException("Could not parse: '" + query + "'", pe); throw new ParseException("Could not parse modified query: '" + query + "' original was: '" + originalQuery + "'", pe); } finally { keywordAnalyzer.Close(); } }
public virtual void TestOffsets() { TokenStream stream = new KeywordAnalyzer().TokenStream("field", new System.IO.StringReader("abcd")); IOffsetAttribute offsetAtt = stream.AddAttribute <IOffsetAttribute>(); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual(0, offsetAtt.StartOffset); Assert.AreEqual(4, offsetAtt.EndOffset); }
public IDictionary<string, string> Find(string id) { var keyworldAnalyzer = new KeywordAnalyzer(); var parser = new QueryParser(Version.LUCENE_30, "id", keyworldAnalyzer); var query = parser.Parse(id); var scoreDocs = _searcher.Search(query, 1).ScoreDocs; if (scoreDocs != null && scoreDocs.Length > 0) return Docs.DocToDict(_searcher.Doc(scoreDocs[0].Doc)); return new Dictionary<string, string>(); }
public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer) { var keywordAnalyzer = new KeywordAnalyzer(); try { query = PreProcessUntokenizedTerms(analyzer, query, keywordAnalyzer); var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer); queryParser.SetAllowLeadingWildcard(true); return queryParser.Parse(query);; } finally { keywordAnalyzer.Close(); } }
public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer) { Analyzer keywordAnalyzer = new KeywordAnalyzer(); try { var queryParser = new RangeQueryParser(Version.LUCENE_29, string.Empty, analyzer); query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); queryParser.SetAllowLeadingWildcard(true); // not the recommended approach, should rather use ReverseFilter return queryParser.Parse(query); } finally { keywordAnalyzer.Close(); } }
/// <summary> /// Detects untokenized fields and sets as NotAnalyzed in analyzer /// </summary> private static string PreProcessUntokenizedTerms(PerFieldAnalyzerWrapper analyzer, string query, ref Analyzer keywordAnalyzer) { var untokenizedMatches = untokenizedQuery.Matches(query); if (untokenizedMatches.Count < 1) return query; var sb = new StringBuilder(query); // Initialize a KeywordAnalyzer // KeywordAnalyzer will not tokenize the values keywordAnalyzer = new KeywordAnalyzer(); // process in reverse order to leverage match string indexes for (var i = untokenizedMatches.Count; i > 0; i--) { var match = untokenizedMatches[i - 1]; // specify that term for this field should not be tokenized analyzer.AddAnalyzer(match.Groups[1].Value, keywordAnalyzer); var term = match.Groups[2]; // introduce " " around the term var startIndex = term.Index; var length = term.Length - 2; if (sb[startIndex + length - 1] != '"') { sb.Insert(startIndex + length, '"'); length += 1; } if (sb[startIndex + 2] != '"') { sb.Insert(startIndex + 2, '"'); length += 1; } // remove enclosing "[[" "]]" from term value (again in reverse order) sb.Remove(startIndex + length, 2); sb.Remove(startIndex, 2); } return sb.ToString(); }
public RavenPerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection<Action> toDispose, bool forQuerying = false) { toDispose.Add(defaultAnalyzer.Close); string value; if (indexDefinition.Analyzers.TryGetValue(Constants.AllFields, out value)) { defaultAnalyzer = IndexingExtensions.CreateAnalyzerInstance(Constants.AllFields, value); toDispose.Add(defaultAnalyzer.Close); } var perFieldAnalyzerWrapper = new RavenPerFieldAnalyzerWrapper(defaultAnalyzer); foreach (var analyzer in indexDefinition.Analyzers) { Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value); toDispose.Add(analyzerInstance.Close); if (forQuerying) { var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false); if (customAttributes.Length > 0) continue; } perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance); } StandardAnalyzer standardAnalyzer = null; KeywordAnalyzer keywordAnalyzer = null; foreach (var fieldIndexing in indexDefinition.Indexes) { switch (fieldIndexing.Value) { case FieldIndexing.NotAnalyzed: if (keywordAnalyzer == null) { keywordAnalyzer = new KeywordAnalyzer(); toDispose.Add(keywordAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer); break; case FieldIndexing.Analyzed: if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key)) continue; if (standardAnalyzer == null) { standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29); toDispose.Add(standardAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer); break; } } return perFieldAnalyzerWrapper; }
public virtual void TestOffsets() { TokenStream stream = new KeywordAnalyzer().TokenStream("field", new System.IO.StringReader("abcd")); OffsetAttribute offsetAtt = (OffsetAttribute) stream.AddAttribute(typeof(OffsetAttribute)); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual(0, offsetAtt.StartOffset()); Assert.AreEqual(4, offsetAtt.EndOffset()); }
public PerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection<Action> toDispose) { toDispose.Add(defaultAnalyzer.Close); var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer); foreach (var analyzer in indexDefinition.Analyzers) { Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value); if (analyzerInstance == null) continue; toDispose.Add(analyzerInstance.Close); perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance); } StandardAnalyzer standardAnalyzer = null; KeywordAnalyzer keywordAnalyzer = null; foreach (var fieldIndexing in indexDefinition.Indexes) { switch (fieldIndexing.Value) { case FieldIndexing.NotAnalyzed: if (keywordAnalyzer == null) { keywordAnalyzer = new KeywordAnalyzer(); toDispose.Add(keywordAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer); break; case FieldIndexing.Analyzed: if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key)) continue; if (standardAnalyzer == null) { standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29); toDispose.Add(standardAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer); break; } } return perFieldAnalyzerWrapper; }
private static Query Parse(string q) { using (var defaultAnalyzer = new KeywordAnalyzer()) using (var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer)) return QueryBuilder.BuildQuery(q, perFieldAnalyzerWrapper); }
public static IndexWriter Create(AbstractConnection connection, Entity entity) { var dir = LuceneDirectoryFactory.Create(connection, entity); Analyzer defaultAnalyzer = new KeywordAnalyzer(); var analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer); foreach (var field in GetFields(entity, connection.Version, connection.Logger)) { analyzer.AddAnalyzer(field.Key, field.Value); } return new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); }
public static IndexWriter Create(AbstractConnection connection, Process process, Entity entity) { using (var dir = LuceneDirectoryFactory.Create(connection, entity)) { Analyzer defaultAnalyzer = new KeywordAnalyzer(); if (process.SearchTypes.ContainsKey("default")) { defaultAnalyzer = LuceneAnalyzerFactory.Create(process.SearchTypes["default"].Analyzer, connection.Version); } var analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer); foreach (var field in GetFields(entity, connection.Version, connection.Logger)) { analyzer.AddAnalyzer(field.Key, field.Value); } return new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); } }
/// <summary> /// Loads the data into the Lucene index /// </summary> /// <param name="directory"> /// Directory where the index is located. /// </param> private void LoadLuceneIndex(SimpleFSDirectory directory) { Analyzer analyzer = new KeywordAnalyzer(); // ----------- // Store products into Lucene. // This will create a new index. Other requests will still be able to read the existing index. // Create writer that will overwrite the existing index using (var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED)) { IEnumerable<ProductSuggestion> productSuggestions = _productRepository.GetAllSuggestions(); foreach (var productSuggestion in productSuggestions) { // Storing all names in upper case, so we can do case insensitive search easily var doc = new Document(); doc.Add(new Field("Object", productSuggestion.ToString(), Field.Store.YES, Field.Index.NO)); doc.Add(new Field("UcName", productSuggestion.ProductCode.ToUpper(), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("SuggestionType", ProductSuggestionCode , Field.Store.YES, Field.Index.NO)); writer.AddDocument(doc); } IEnumerable<CategorySuggestion> categorySuggestions = _categoryRepository.GetAllSuggestions(); foreach (var categorySuggestion in categorySuggestions) { var doc = new Document(); doc.Add(new Field("Object", categorySuggestion.ToString(), Field.Store.YES, Field.Index.NO)); doc.Add(new Field("UcName", categorySuggestion.CategoryName.ToUpper(), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("SuggestionType", CategorySuggestionCode, Field.Store.YES, Field.Index.NO)); writer.AddDocument(doc); } } }
private PerFieldAnalyzerWrapper CreateAnalyzer(ICollection<Action> toDispose) { var standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29); toDispose.Add(standardAnalyzer.Close); var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(standardAnalyzer); foreach (var analyzer in indexDefinition.Analyzers) { var analyzerInstance = indexDefinition.CreateAnalyzerInstance(analyzer.Key, analyzer.Value); if(analyzerInstance == null) continue; toDispose.Add(analyzerInstance.Close); perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance); } KeywordAnalyzer keywordAnalyzer = null; foreach (var fieldIndexing in indexDefinition.Indexes) { switch (fieldIndexing.Value) { case FieldIndexing.NotAnalyzedNoNorms: case FieldIndexing.NotAnalyzed: if(keywordAnalyzer == null) { keywordAnalyzer = new KeywordAnalyzer(); toDispose.Add(keywordAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer); break; } } return perFieldAnalyzerWrapper; }