public void PureLucene() { using (var dir = new RAMDirectory()) { using (var keywordAnalyzer = new KeywordAnalyzer()) using (var writer = new IndexWriter(dir, keywordAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED)) { var doc = new Lucene.Net.Documents.Document(); var writeShape = NtsSpatialContext.GEO.ReadShape("LINESTRING (0 0, 1 1, 2 1)"); var writeStrategy = SpatialIndex.CreateStrategy("WKT", SpatialSearchStrategy.GeohashPrefixTree, GeohashPrefixTree.GetMaxLevelsPossible()); foreach (var f in writeStrategy.CreateIndexableFields(writeShape)) { doc.Add(f); } writer.AddDocument(doc); writer.Commit(); } var shape = NtsSpatialContext.GEO.ReadShape("LINESTRING (1 0, 1 1, 1 2)"); SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects, shape); var strategy = SpatialIndex.CreateStrategy("WKT", SpatialSearchStrategy.GeohashPrefixTree, GeohashPrefixTree.GetMaxLevelsPossible()); var makeQuery = strategy.MakeQuery(args); using (var search = new IndexSearcher(dir)) { var topDocs = search.Search(makeQuery, 5); Assert.Equal(1, topDocs.TotalHits); } } }
public void Setup() { _directory1 = new RAMDirectory(); _directory2 = new RAMDirectory(); var analyzer = new KeywordAnalyzer(); var indexWriter = new IndexWriter(_directory1, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.AddDocument(CreateDocument(10, "Anders")); indexWriter.AddDocument(CreateDocument(30, "Anne")); indexWriter.Commit(); indexWriter.Dispose(); var indexWriter2 = new IndexWriter(_directory2, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter2.AddDocument(CreateDocument(40, "Andreas")); indexWriter2.AddDocument(CreateDocument(20, "Anja")); indexWriter2.AddDocument(CreateDocument(50, "Abe")); indexWriter2.Commit(); indexWriter2.Dispose(); _query = new PrefixQuery(new Term(NameFieldName, "A")); _searcher1 = new IndexSearcher(_directory1, true); _searcher2 = new IndexSearcher(_directory2, true); }
public static Query BuildQuery(string query, IndexQuery indexQuery, PerFieldAnalyzerWrapper analyzer) { var originalQuery = query; Analyzer keywordAnalyzer = new KeywordAnalyzer(); try { var queryParser = new RangeQueryParser(Version.LUCENE_29, indexQuery.DefaultField ?? string.Empty, analyzer) { DefaultOperator = indexQuery.DefaultOperator == QueryOperator.Or ? QueryParser.Operator.OR : QueryParser.Operator.AND, AllowLeadingWildcard = true }; query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); query = PreProcessDateTerms(query, queryParser); return(queryParser.Parse(query)); } catch (ParseException pe) { if (originalQuery == query) { throw new ParseException("Could not parse: '" + query + "'", pe); } throw new ParseException("Could not parse modified query: '" + query + "' original was: '" + originalQuery + "'", pe); } finally { keywordAnalyzer.Close(); } }
public List <ContentAddress> Search(string textualSearch, string nonTextualSearch) { string indexFileLocation = GetIndexFilePath(); Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation); IndexSearcher searcher = new IndexSearcher(dir); Analyzer textualAnal = null; Analyzer nonTextualAnal = null; try { Query textualQuery = null; Query nonTextualQuery = null; if (!string.IsNullOrEmpty(textualSearch)) { textualAnal = new StandardAnalyzer(); QueryParser parser = new QueryParser("_GLOM_", textualAnal); textualQuery = parser.Parse(textualSearch); } if (!string.IsNullOrEmpty(nonTextualSearch)) { nonTextualAnal = new KeywordAnalyzer(); QueryParser parser = new QueryParser("", nonTextualAnal); nonTextualQuery = parser.Parse(nonTextualSearch); } Query query = textualQuery; if (query == null) { query = nonTextualQuery; } else if (nonTextualQuery != null) { query = new BooleanQuery(); (query as BooleanQuery).Add(textualQuery, BooleanClause.Occur.MUST); (query as BooleanQuery).Add(nonTextualQuery, BooleanClause.Occur.MUST); } TopDocs hits = searcher.Search(query, 1000); List <ContentAddress> addrs = Enumerable.Range(0, hits.totalHits) .Select(n => ContentAddress.FromString(searcher.Doc(hits.scoreDocs[n].doc).Get("_CONTENTADDRESS_"))) .ToList(); return(addrs); } finally { searcher.Close(); if (textualAnal != null) { textualAnal.Close(); } if (nonTextualAnal != null) { nonTextualAnal.Close(); } dir.Close(); } }
public void CreateIndex(string name, FieldDefinitionCollection fields, Analyzer analyzer = null) { if (analyzer == null) { analyzer = new KeywordAnalyzer(); } // examineIndex.WaitForIndexQueueOnShutdown = false; _examineManager.TryGetIndex(name, out var index); if (index == null) { var dir = examineIndex.GetLuceneDirectory(); if (!string.IsNullOrEmpty(dir.GetLockID())) { // _loggingService.Info("Forcing index {IndexerName} to be unlocked since it was left in a locked state", examineIndex.Name); dir.ClearLock("write.lock"); } if (IndexWriter.IsLocked(dir)) { IndexWriter.Unlock(dir); } } }
public void DeleteLuceneIndexRecord(string cin) { var path = ConfigurationManager.AppSettings["LuceneDirectory"]; var indexDirInfo = new DirectoryInfo(path); if (!System.IO.Directory.Exists(path)) { System.IO.Directory.CreateDirectory(path); } Directory directory = FSDirectory.Open(indexDirInfo, new SimpleFSLockFactory(indexDirInfo)); IndexWriter.Unlock(directory); // init lucene var analyzer = new KeywordAnalyzer(); using (var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { // remove older index entry var searchQuery = new TermQuery(new Term("Cin", cin)); writer.DeleteDocuments(searchQuery); // close handles analyzer.Close(); writer.Dispose(); } }
/// <summary> /// Insert the record into the ElasticSearch Server now /// </summary> private async Task <bool> InsertAsync(Record recordToIndex, string filePath, Log runLog, bool checkIndex = true) { //If the index doesn't exist, we'll create it if (checkIndex) { if (!ElasticSearchConnection.ESClient.IndexExists(recordToIndex.indexName).Exists) { var keywordA = new KeywordAnalyzer(); //We set the Default analyzer here to be "Keyword" type - so it will treat strings as "whole strings" and not as separate keywords //very important for strings like "C#" or "Some long string" ElasticSearchConnection.ESClient.CreateIndex(recordToIndex.indexName, x => x.Analysis(a => a.Analyzers(an => an.Add("default", keywordA)))); } } Elasticsearch.Net.ElasticsearchResponse <Elasticsearch.Net.DynamicDictionary> response = null; JObject myObj = null; try { // See if we can parse the JSON and try to retrieve the _id field out of it myObj = JObject.Parse(recordToIndex.jsonString); if (myObj["_id"] != null) { //If _id was specified then we'll just udpate the previous record automatically if there was one string id = myObj["_id"].Value <String>(); response = await ElasticSearchConnection.ESClient.Raw.IndexAsync(recordToIndex.indexName, recordToIndex.typeName, id, recordToIndex.jsonString); if (response.Success == false) { //The _id field of the JSON document is often problematic, so letting ES generate it's own ID field instead here might fix it recordToIndex.jsonString = recordToIndex.jsonString.Replace("_id", "old_id"); // Try one more time to index now after renaming the _id field response = await ElasticSearchConnection.ESClient.Raw.IndexAsync(recordToIndex.indexName, recordToIndex.typeName, recordToIndex.jsonString); if (response.Success == true) { //renaming the _id field fixed the problem runLog.LogMessage("Recovered from ERROR - But had to rename _id field to old_id to index record in " + filePath); } } } else { // If they didn't specify an "_id" then that's fine just index it anyway response = await ElasticSearchConnection.ESClient.Raw.IndexAsync(recordToIndex.indexName, recordToIndex.typeName, recordToIndex.jsonString); } } catch { //just couldn't parse the JSON, try to index it anyway response = ElasticSearchConnection.ESClient.Raw.Index(recordToIndex.indexName, recordToIndex.typeName, recordToIndex.jsonString); } if (response.Success == false) { //Failed to index a record runLog.LogMessage("Error: Failed to index a record in " + filePath, Log.MessageType.Error); } return(response.Success); }
static void Main(string[] args) { Directory index = new RAMDirectory(); Analyzer analyzer = new KeywordAnalyzer(); IndexWriter writer = new IndexWriter(index, analyzer, true); Document doc = new Document(); doc.Add(new Field("title", "t1", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("title", "t2", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); Searcher searcher = new IndexSearcher(index); Query query = new MatchAllDocsQuery(); Filter filter = new LuceneCustomFilter(); Sort sort = new Sort("title", true); Hits hits = searcher.Search(query, filter, sort); IEnumerator hitsEnumerator = hits.Iterator(); while (hitsEnumerator.MoveNext()) { Hit hit = (Hit)hitsEnumerator.Current; Console.WriteLine(hit.GetDocument().GetField("title"). StringValue()); } }
public List <string> GetSimilarElementsByKeywords(SimilarElementsSearch elementsList) { var selectors = new List <string>(); foreach (var element in elementsList.Elements) { var keywordsContained = 0; foreach (var keyword in elementsList.Keywords) { var content = KeywordAnalyzer.ReplaceSpecialCharacters(element.InnerText); if (content.Contains(keyword)) { keywordsContained++; } } if (keywordsContained >= 1) { selectors.Add(element.Selector); } } return(selectors); }
internal static Analyzer GetAnalyzer() { var defaultAnalyzer = new KeywordAnalyzer(); var analyzer = new Indexing.SnPerFieldAnalyzerWrapper(defaultAnalyzer); return(analyzer); }
public PerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection <Action> toDispose, bool forQuerying = false) { toDispose.Add(defaultAnalyzer.Close); //string value; //if (indexDefinition.Analyzers.TryGetValue(Constants.AllFields, out value)) //{ // defaultAnalyzer = IndexingExtensions.CreateAnalyzerInstance(Constants.AllFields, value); // toDispose.Add(defaultAnalyzer.Close); //} var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer); foreach (var analyzer in indexDefinition.Analyzers) { //Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value); //toDispose.Add(analyzerInstance.Close); //if (forQuerying) //{ // var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false); // if (customAttributes.Length > 0) // continue; //} //perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance); } StandardAnalyzer standardAnalyzer = null; KeywordAnalyzer keywordAnalyzer = null; foreach (var fieldIndexing in indexDefinition.Indexes) { switch (fieldIndexing.Value) { case FieldIndexing.NotAnalyzed: if (keywordAnalyzer == null) { keywordAnalyzer = new KeywordAnalyzer(); toDispose.Add(keywordAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer); break; case FieldIndexing.Analyzed: if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key)) { continue; // already added } if (standardAnalyzer == null) { standardAnalyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); toDispose.Add(standardAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer); break; } } return(perFieldAnalyzerWrapper); }
public List <PraticienToIndexModel> Search(string q) { q = Uri.UnescapeDataString(q); var path = ConfigurationManager.AppSettings["LuceneDirectory"]; //get path var chemin = HttpContext.Current.Server.MapPath("~"); path = Path.Combine(chemin, path); var result = new List <PraticienToIndexModel>(); var analyzer = new KeywordAnalyzer(); var indexDirInfo = new DirectoryInfo(path); if (!System.IO.Directory.Exists(path)) { System.IO.Directory.CreateDirectory(path); } Directory directory = FSDirectory.Open(indexDirInfo, new SimpleFSLockFactory(indexDirInfo)); Index(directory); var searcher = new IndexSearcher(directory); var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "All", analyzer) { AllowLeadingWildcard = true, LowercaseExpandedTerms = false }; var queryString = string.Format("{0} OR *{0}* OR *{0} OR {0}*", q); var query = parser.Parse(queryString); var maxResult = 100; var hits = searcher.Search(query, maxResult); var mapper = new ReflectionDocumentMapper <PraticienToIndexModel>(Lucene.Net.Util.Version.LUCENE_30); maxResult = (hits.TotalHits < maxResult) ? hits.TotalHits : maxResult; for (var i = 0; i < maxResult; i++) { var praticien = new PraticienToIndexModel(); var doc = searcher.Doc(hits.ScoreDocs[i].Doc); mapper.ToObject(doc, null, praticien); if (!result.Any(r => r.Cin.Equals(praticien.Cin))) { result.Add(praticien); } } searcher.Dispose(); return(result); }
public void AnalyzeMetaKeyword_KeywordMeta_Found_Test() { const string expected = "Found meta keyword: 'Pet Insurance, Car Insurance, Vehicle Insurance'"; var html = @"<meta name='keywords' content='Pet Insurance, Car Insurance, Vehicle Insurance'>"; var actual = new KeywordAnalyzer(html).AnalyzeHtml(); Assert.IsTrue(actual.Contains(expected), $"Expected results to contain: '{expected}'. Actual: {string.Join(", ",actual)}"); }
public void AnalyzeMetaKeyword_NoKeywordMeta_Test() { const string expected = "No keyword meta tag found"; var html = @"<meta name='someMeta' content='bla'>"; var actual = new KeywordAnalyzer(html).AnalyzeHtml(); Assert.IsTrue(actual.Contains(expected), $"Expected results to contain: '{expected}'"); }
public void AnalyzeMetaKeyword_NoMeta_Test() { const string expected = "No meta tags found"; var html = string.Empty; var actual = new KeywordAnalyzer(html).AnalyzeHtml(); Assert.IsTrue(actual.Contains(expected), $"Expected results to contain: '{expected}'"); }
public PerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection <Action> toDispose, bool forQuerying = false) { toDispose.Add(defaultAnalyzer.Close); var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer); foreach (var analyzer in indexDefinition.Analyzers) { Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value); if (analyzerInstance == null) { continue; } toDispose.Add(analyzerInstance.Close); if (forQuerying) { var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false); if (customAttributes.Length > 0) { continue; } } perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance); } StandardAnalyzer standardAnalyzer = null; KeywordAnalyzer keywordAnalyzer = null; foreach (var fieldIndexing in indexDefinition.Indexes) { switch (fieldIndexing.Value) { case FieldIndexing.NotAnalyzed: if (keywordAnalyzer == null) { keywordAnalyzer = new KeywordAnalyzer(); toDispose.Add(keywordAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer); break; case FieldIndexing.Analyzed: if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key)) { continue; } if (standardAnalyzer == null) { standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29); toDispose.Add(standardAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer); break; } } return(perFieldAnalyzerWrapper); }
// // POST: /Xml2Model/upvote /*public ActionResult upvote() * { * return View(db.likes.Find(id)); //去upvote页面,调用.AntiForgeryToken,再回到Index页面,费事 * } * [HttpPost, ActionName("Index")] * [ValidateAntiForgeryToken] * public ActionResult upvoteConfirmed(int Uid, int Iid) * { * //Models.like Li = db.likes.Find(Uid,Iid); * Models.like Li = new Models.like( Uid, Iid ); * db.likes.Add(Li); * db.SaveChanges(); * return RedirectToAction("Index"); * } */ public ActionResult About() { ViewBag.Message = "Your app description page."; KeywordAnalyzer ka = new KeywordAnalyzer(); return(View()); }
public void UpdateWithExpressionAndAnalyzerSuccess() { Analyzer analyzer = new KeywordAnalyzer(); const int NumObjects = 10; WriteTestObjects(NumObjects, o => o.ToDocument(), analyzer); TestObject t = new TestObject() { Number = 1234, String = "Test Object 1234", }; Assert.AreEqual(NumObjects, writer.NumDocs); writer.Add(t, analyzer); writer.Commit(); Assert.AreEqual(NumObjects + 1, writer.NumDocs); TestObject t2 = new TestObject() { Number = 2345, String = "Something Else 2345", }; writer.Update(t2, MappingSettings.Default, o => o.String == "Test Object 1234", analyzer); writer.Commit(); Assert.AreEqual(NumObjects + 1, writer.NumDocs); using (DirectoryReader reader = DirectoryReader.Open(dir)) { IndexSearcher searcher = new IndexSearcher(reader); // Verify that the updated item can be found. TestObject t3 = searcher.AsQueryable <TestObject>().Single(o => o.Number == 2345); Assert.AreEqual(t2.Number, t3.Number); Assert.AreEqual(t2.String, t3.String); // Verify that the old item cannot be found anymore. TestObject t4 = searcher.AsQueryable <TestObject>().SingleOrDefault(o => o.Number == 1234); Assert.IsNull(t4); // Verify that all other items remain untouched. TestObject[] others = (from o in searcher.AsQueryable <TestObject>() where o.Number != 2345 select o).ToArray(); Assert.IsNotNull(others); Assert.AreEqual(NumObjects, others.Length); foreach (TestObject o in others) { Assert.AreNotEqual(t2.Number, o.Number); Assert.AreNotEqual(t2.String, o.String); } } }
static void Main1(string[] args) { //Note: you will have to supply your own text files string gettys = File.ReadAllText(@"C:\Users\LaKissMe\Desktop\First.docx"); // string gu = File.ReadAllText(@"C:\Users\LaKissMe\Desktop\Second.txt"); KeywordAnalyzer ka = new KeywordAnalyzer(); var g = ka.Analyze(gettys, "hello"); // var s = ka.Analyze(gu); Console.WriteLine("first"); foreach (var key in g.Keywords) { Console.WriteLine(key.Word, key.Rank); // Console.WriteLine( key.Word, key.Rank); } //Console.WriteLine("second"); /*foreach (var key in s.Keywords) * { * Console.WriteLine(" key: {0}, rank: {1}", key.Word, key.Rank); * }*/ Console.WriteLine("first"); var gty = (from n in g.Keywords select n).Take(50); foreach (var key in gty) { Console.WriteLine(" {0}", key.Word); // Console.WriteLine("Hitlergruß"); //Translating here.. /* TranslatorContainer tc = InitializeTranslatorContainer(); * var sourceLanguage1 = DetectSourceLanguage(tc, key.Word); * var targetLanguage1 = PickRandomLanguage(tc); * var translationResult1 = TranslateString(tc, key.Word, sourceLanguage1, targetLanguage1); * Console.WriteLine(" Translated to English : " + translationResult1.Text); */ } //Console.WriteLine("second"); //var gus = (from n in s.Keywords select n).Take(50); //foreach (var key in gus) //{ // Console.WriteLine(" {0}", key.Word); //} Console.ReadLine(); }
protected static RavenPerFieldAnalyzerWrapper CreateAnalyzer(Func <Analyzer> createDefaultAnalyzer, Dictionary <string, IndexField> fields, bool forQuerying = false) { if (fields.ContainsKey(Constants.Documents.Indexing.Fields.AllFields)) { throw new InvalidOperationException($"Detected '{Constants.Documents.Indexing.Fields.AllFields}'. This field should not be present here, because inheritance is done elsewhere."); } var defaultAnalyzer = createDefaultAnalyzer(); RavenStandardAnalyzer standardAnalyzer = null; KeywordAnalyzer keywordAnalyzer = null; var perFieldAnalyzerWrapper = new RavenPerFieldAnalyzerWrapper(defaultAnalyzer); foreach (var field in fields) { var fieldName = field.Value.Name; switch (field.Value.Indexing) { case FieldIndexing.Exact: if (keywordAnalyzer == null) { keywordAnalyzer = new KeywordAnalyzer(); } perFieldAnalyzerWrapper.AddAnalyzer(fieldName, keywordAnalyzer); break; case FieldIndexing.Search: var analyzer = GetAnalyzer(fieldName, field.Value, forQuerying); if (analyzer != null) { perFieldAnalyzerWrapper.AddAnalyzer(fieldName, analyzer); continue; } AddStandardAnalyzer(fieldName); break; } } return(perFieldAnalyzerWrapper); void AddStandardAnalyzer(string fieldName) { if (standardAnalyzer == null) { standardAnalyzer = new RavenStandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_29); } perFieldAnalyzerWrapper.AddAnalyzer(fieldName, standardAnalyzer); } }
public static IndexWriter Create(AbstractConnection connection, Entity entity) { var dir = LuceneDirectoryFactory.Create(connection, entity); Analyzer defaultAnalyzer = new KeywordAnalyzer(); var analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer); foreach (var field in GetFields(entity, connection.Version, connection.Logger)) { analyzer.AddAnalyzer(field.Key, field.Value); } return(new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)); }
public IDictionary <string, string> Find(string id) { var keywordAnalyzer = new KeywordAnalyzer(); var parser = new QueryParser(Version.LUCENE_30, "id", keywordAnalyzer); var query = parser.Parse(id); var scoreDocs = _searcher.Search(query, 1).ScoreDocs; if (scoreDocs != null && scoreDocs.Length > 0) { return(Docs.DocToDict(_searcher.Doc(scoreDocs[0].Doc), 1f)); } return(null); }
public void Code() { Analyzer _keywordanalyzer = new KeywordAnalyzer(); Analyzer _simpleanalyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); Analyzer _stopanalyzer = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); Analyzer _standardanalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer); _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer); _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer); IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED); IndexReader _reader = _writer.GetReader(); IndexSearcher _searcher = new IndexSearcher(_reader); //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer); string[] fields = new[] { "text", "title", "author" }; var boosts = new Dictionary <string, float>(); boosts.Add("text", 2.0f); boosts.Add("title", 1.5f); QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts); Query query = parser.Parse("lucene is great"); TopDocs hits = _searcher.Search(query, 1000); IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc)); var books = docs.Select(doc => new Book() { Text = doc.Get("text"), Title = doc.Get("title"), Author = doc.Get("author"), Length = Int32.Parse(doc.Get("length")) }); _writer.Optimize(); _writer.Commit(); _writer.DeleteAll(); }
public PerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection <Action> toDispose) { toDispose.Add(defaultAnalyzer.Close); var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer); foreach (var analyzer in indexDefinition.Analyzers) { var analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value); if (analyzerInstance == null) { continue; } toDispose.Add(analyzerInstance.Close); perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance); } StandardAnalyzer standardAnalyzer = null; KeywordAnalyzer keywordAnalyzer = null; foreach (var fieldIndexing in indexDefinition.Indexes) { switch (fieldIndexing.Value) { case FieldIndexing.NotAnalyzed: case FieldIndexing.NotAnalyzedNoNorms: if (keywordAnalyzer == null) { keywordAnalyzer = new KeywordAnalyzer(); toDispose.Add(keywordAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer); break; case FieldIndexing.Analyzed: if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key)) { continue; } if (standardAnalyzer == null) { standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29); toDispose.Add(standardAnalyzer.Close); } perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer); break; } } return(perFieldAnalyzerWrapper); }
internal static Analyzer GetAnalyzer() { // Field Analyzer // ----------------------------------------------------------------- // Name Lucene.Net.Analysis.KeywordAnalyzer // Path Lucene.Net.Analysis.KeywordAnalyzer // Keywords Lucene.Net.Analysis.StopAnalyzer // _Text Lucene.Net.Analysis.Standard.StandardAnalyzer // ----------------------------------------------------------------- // Default Lucene.Net.Analysis.WhitespaceAnalyzer var defaultAnalyzer = new KeywordAnalyzer(); var analyzer = new Indexing.SnPerFieldAnalyzerWrapper(defaultAnalyzer); return(analyzer); }
public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer) { var keywordAnalyzer = new KeywordAnalyzer(); try { query = PreProcessUntokenizedTerms(analyzer, query, keywordAnalyzer); var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer); queryParser.SetAllowLeadingWildcard(true); return(queryParser.Parse(query));; } finally { keywordAnalyzer.Close(); } }
public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer) { Analyzer keywordAnalyzer = new KeywordAnalyzer(); try { var queryParser = new RangeQueryParser(Version.LUCENE_29, string.Empty, analyzer); query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); queryParser.SetAllowLeadingWildcard(true); // not the recommended approach, should rather use ReverseFilter return(queryParser.Parse(query)); } finally { keywordAnalyzer.Close(); } }
public static IndexWriter Create(AbstractConnection connection, Process process, Entity entity) { using (var dir = LuceneDirectoryFactory.Create(connection, entity)) { Analyzer defaultAnalyzer = new KeywordAnalyzer(); if (process.SearchTypes.ContainsKey("default")) { defaultAnalyzer = LuceneAnalyzerFactory.Create(process.SearchTypes["default"].Analyzer, connection.Version); } var analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer); foreach (var field in GetFields(entity, connection.Version, connection.Logger)) { analyzer.AddAnalyzer(field.Key, field.Value); } return(new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)); } }
private static void LoadLuceneIndex(Directory dir, string filePath) { Analyzer analyzer = new KeywordAnalyzer(); // new SimpleAnalyzer(); var indexWriter = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); foreach (var name in System.IO.File.ReadLines(filePath)) { var document = new Document(); document.Add(new Field(FieldName, name, Field.Store.NO, Field.Index.ANALYZED)); indexWriter.AddDocument(document); } indexWriter.Commit(); indexWriter.Dispose(); }
/// <summary> /// Detects untokenized fields and sets as NotAnalyzed in analyzer /// </summary> private static string PreProcessUntokenizedTerms(PerFieldAnalyzerWrapper analyzer, string query, ref Analyzer keywordAnalyzer) { var untokenizedMatches = untokenizedQuery.Matches(query); if (untokenizedMatches.Count < 1) { return(query); } var sb = new StringBuilder(query); // Initialize a KeywordAnalyzer // KeywordAnalyzer will not tokenize the values keywordAnalyzer = new KeywordAnalyzer(); // process in reverse order to leverage match string indexes for (var i = untokenizedMatches.Count; i > 0; i--) { var match = untokenizedMatches[i - 1]; // specify that term for this field should not be tokenized analyzer.AddAnalyzer(match.Groups[1].Value, keywordAnalyzer); var term = match.Groups[2]; // introduce " " around the term var startIndex = term.Index; var length = term.Length - 2; if (sb[startIndex + length - 1] != '"') { sb.Insert(startIndex + length, '"'); length += 1; } if (sb[startIndex + 2] != '"') { sb.Insert(startIndex + 2, '"'); length += 1; } // remove enclosing "[[" "]]" from term value (again in reverse order) sb.Remove(startIndex + length, 2); sb.Remove(startIndex, 2); } return(sb.ToString()); }