Example #1
0
        public void PureLucene()
        {
            using (var dir = new RAMDirectory())
            {
                using (var keywordAnalyzer = new KeywordAnalyzer())
                    using (var writer = new IndexWriter(dir, keywordAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                    {
                        var doc = new Lucene.Net.Documents.Document();

                        var writeShape    = NtsSpatialContext.GEO.ReadShape("LINESTRING (0 0, 1 1, 2 1)");
                        var writeStrategy = SpatialIndex.CreateStrategy("WKT", SpatialSearchStrategy.GeohashPrefixTree, GeohashPrefixTree.GetMaxLevelsPossible());
                        foreach (var f in writeStrategy.CreateIndexableFields(writeShape))
                        {
                            doc.Add(f);
                        }
                        writer.AddDocument(doc);
                        writer.Commit();
                    }


                var         shape     = NtsSpatialContext.GEO.ReadShape("LINESTRING (1 0, 1 1, 1 2)");
                SpatialArgs args      = new SpatialArgs(SpatialOperation.Intersects, shape);
                var         strategy  = SpatialIndex.CreateStrategy("WKT", SpatialSearchStrategy.GeohashPrefixTree, GeohashPrefixTree.GetMaxLevelsPossible());
                var         makeQuery = strategy.MakeQuery(args);
                using (var search = new IndexSearcher(dir))
                {
                    var topDocs = search.Search(makeQuery, 5);
                    Assert.Equal(1, topDocs.TotalHits);
                }
            }
        }
        public void Setup()
        {
            _directory1 = new RAMDirectory();
            _directory2 = new RAMDirectory();
            var analyzer = new KeywordAnalyzer();

            var indexWriter = new IndexWriter(_directory1, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            indexWriter.AddDocument(CreateDocument(10, "Anders"));
            indexWriter.AddDocument(CreateDocument(30, "Anne"));
            indexWriter.Commit();
            indexWriter.Dispose();

            var indexWriter2 = new IndexWriter(_directory2, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            indexWriter2.AddDocument(CreateDocument(40, "Andreas"));
            indexWriter2.AddDocument(CreateDocument(20, "Anja"));
            indexWriter2.AddDocument(CreateDocument(50, "Abe"));
            indexWriter2.Commit();
            indexWriter2.Dispose();

            _query = new PrefixQuery(new Term(NameFieldName, "A"));

            _searcher1 = new IndexSearcher(_directory1, true);
            _searcher2 = new IndexSearcher(_directory2, true);
        }
Example #3
0
        public static Query BuildQuery(string query, IndexQuery indexQuery, PerFieldAnalyzerWrapper analyzer)
        {
            var      originalQuery   = query;
            Analyzer keywordAnalyzer = new KeywordAnalyzer();

            try
            {
                var queryParser = new RangeQueryParser(Version.LUCENE_29, indexQuery.DefaultField ?? string.Empty, analyzer)
                {
                    DefaultOperator = indexQuery.DefaultOperator == QueryOperator.Or
                                                                ? QueryParser.Operator.OR
                                                                : QueryParser.Operator.AND,
                    AllowLeadingWildcard = true
                };
                query = PreProcessUntokenizedTerms(query, queryParser);
                query = PreProcessSearchTerms(query);
                query = PreProcessDateTerms(query, queryParser);
                return(queryParser.Parse(query));
            }
            catch (ParseException pe)
            {
                if (originalQuery == query)
                {
                    throw new ParseException("Could not parse: '" + query + "'", pe);
                }
                throw new ParseException("Could not parse modified query: '" + query + "' original was: '" + originalQuery + "'", pe);
            }
            finally
            {
                keywordAnalyzer.Close();
            }
        }
Example #4
0
        public List <ContentAddress> Search(string textualSearch, string nonTextualSearch)
        {
            string    indexFileLocation = GetIndexFilePath();
            Directory dir =
                Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation);
            IndexSearcher searcher       = new IndexSearcher(dir);
            Analyzer      textualAnal    = null;
            Analyzer      nonTextualAnal = null;

            try
            {
                Query textualQuery    = null;
                Query nonTextualQuery = null;

                if (!string.IsNullOrEmpty(textualSearch))
                {
                    textualAnal = new StandardAnalyzer();
                    QueryParser parser = new QueryParser("_GLOM_", textualAnal);
                    textualQuery = parser.Parse(textualSearch);
                }
                if (!string.IsNullOrEmpty(nonTextualSearch))
                {
                    nonTextualAnal = new KeywordAnalyzer();
                    QueryParser parser = new QueryParser("", nonTextualAnal);
                    nonTextualQuery = parser.Parse(nonTextualSearch);
                }

                Query query = textualQuery;
                if (query == null)
                {
                    query = nonTextualQuery;
                }
                else if (nonTextualQuery != null)
                {
                    query = new BooleanQuery();
                    (query as BooleanQuery).Add(textualQuery, BooleanClause.Occur.MUST);
                    (query as BooleanQuery).Add(nonTextualQuery, BooleanClause.Occur.MUST);
                }

                TopDocs hits = searcher.Search(query, 1000);
                List <ContentAddress> addrs =
                    Enumerable.Range(0, hits.totalHits)
                    .Select(n => ContentAddress.FromString(searcher.Doc(hits.scoreDocs[n].doc).Get("_CONTENTADDRESS_")))
                    .ToList();
                return(addrs);
            }
            finally
            {
                searcher.Close();
                if (textualAnal != null)
                {
                    textualAnal.Close();
                }
                if (nonTextualAnal != null)
                {
                    nonTextualAnal.Close();
                }
                dir.Close();
            }
        }
Example #5
0
        public void CreateIndex(string name, FieldDefinitionCollection fields, Analyzer analyzer = null)
        {
            if (analyzer == null)
            {
                analyzer = new KeywordAnalyzer();
            }

            //  examineIndex.WaitForIndexQueueOnShutdown = false;
            _examineManager.TryGetIndex(name, out var index);
            if (index == null)
            {
                var dir = examineIndex.GetLuceneDirectory();

                if (!string.IsNullOrEmpty(dir.GetLockID()))
                {
                    //  _loggingService.Info("Forcing index {IndexerName} to be unlocked since it was left in a locked state", examineIndex.Name);

                    dir.ClearLock("write.lock");
                }

                if (IndexWriter.IsLocked(dir))
                {
                    IndexWriter.Unlock(dir);
                }
            }
        }
        public void DeleteLuceneIndexRecord(string cin)
        {
            var path         = ConfigurationManager.AppSettings["LuceneDirectory"];
            var indexDirInfo = new DirectoryInfo(path);

            if (!System.IO.Directory.Exists(path))
            {
                System.IO.Directory.CreateDirectory(path);
            }

            Directory directory = FSDirectory.Open(indexDirInfo,
                                                   new SimpleFSLockFactory(indexDirInfo));

            IndexWriter.Unlock(directory);
            // init lucene
            var analyzer = new KeywordAnalyzer();

            using (var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                // remove older index entry
                var searchQuery = new TermQuery(new Term("Cin", cin));
                writer.DeleteDocuments(searchQuery);

                // close handles
                analyzer.Close();
                writer.Dispose();
            }
        }
Example #7
0
        /// <summary>
        /// Insert the record into the ElasticSearch Server now
        /// </summary>
        private async Task <bool> InsertAsync(Record recordToIndex, string filePath, Log runLog, bool checkIndex = true)
        {
            //If the index doesn't exist, we'll create it
            if (checkIndex)
            {
                if (!ElasticSearchConnection.ESClient.IndexExists(recordToIndex.indexName).Exists)
                {
                    var keywordA = new KeywordAnalyzer();
                    //We set the Default analyzer here to be "Keyword" type - so it will treat strings as "whole strings" and not as separate keywords
                    //very important for strings like "C#" or "Some long string"
                    ElasticSearchConnection.ESClient.CreateIndex(recordToIndex.indexName, x => x.Analysis(a => a.Analyzers(an => an.Add("default", keywordA))));
                }
            }

            Elasticsearch.Net.ElasticsearchResponse <Elasticsearch.Net.DynamicDictionary> response = null;
            JObject myObj = null;

            try
            {
                // See if we can parse the JSON and try to retrieve the _id field out of it
                myObj = JObject.Parse(recordToIndex.jsonString);
                if (myObj["_id"] != null)
                {
                    //If _id was specified then we'll just udpate the previous record automatically if there was one
                    string id = myObj["_id"].Value <String>();
                    response = await ElasticSearchConnection.ESClient.Raw.IndexAsync(recordToIndex.indexName, recordToIndex.typeName, id, recordToIndex.jsonString);

                    if (response.Success == false)
                    {
                        //The _id field of the JSON document is often problematic, so letting ES generate it's own ID field instead here might fix it
                        recordToIndex.jsonString = recordToIndex.jsonString.Replace("_id", "old_id");
                        // Try one more time to index now after renaming the _id field
                        response = await ElasticSearchConnection.ESClient.Raw.IndexAsync(recordToIndex.indexName, recordToIndex.typeName, recordToIndex.jsonString);

                        if (response.Success == true)
                        {
                            //renaming the _id field fixed the problem
                            runLog.LogMessage("Recovered from ERROR - But had to rename _id field to old_id to index record in " + filePath);
                        }
                    }
                }
                else
                {
                    // If they didn't specify an "_id" then that's fine just index it anyway
                    response = await ElasticSearchConnection.ESClient.Raw.IndexAsync(recordToIndex.indexName, recordToIndex.typeName, recordToIndex.jsonString);
                }
            }
            catch
            {
                //just couldn't parse the JSON, try to index it anyway
                response = ElasticSearchConnection.ESClient.Raw.Index(recordToIndex.indexName, recordToIndex.typeName, recordToIndex.jsonString);
            }

            if (response.Success == false)
            {
                //Failed to index a record
                runLog.LogMessage("Error: Failed to index a record in " + filePath, Log.MessageType.Error);
            }
            return(response.Success);
        }
    static void Main(string[] args)
    {
        Directory   index    = new RAMDirectory();
        Analyzer    analyzer = new KeywordAnalyzer();
        IndexWriter writer   = new IndexWriter(index, analyzer, true);
        Document    doc      = new Document();

        doc.Add(new Field("title", "t1", Field.Store.YES,
                          Field.Index.TOKENIZED));
        writer.AddDocument(doc);
        doc = new Document();
        doc.Add(new Field("title", "t2", Field.Store.YES,
                          Field.Index.TOKENIZED));
        writer.AddDocument(doc);

        writer.Close();

        Searcher    searcher       = new IndexSearcher(index);
        Query       query          = new MatchAllDocsQuery();
        Filter      filter         = new LuceneCustomFilter();
        Sort        sort           = new Sort("title", true);
        Hits        hits           = searcher.Search(query, filter, sort);
        IEnumerator hitsEnumerator = hits.Iterator();

        while (hitsEnumerator.MoveNext())
        {
            Hit hit = (Hit)hitsEnumerator.Current;
            Console.WriteLine(hit.GetDocument().GetField("title").
                              StringValue());
        }
    }
        public List <string> GetSimilarElementsByKeywords(SimilarElementsSearch elementsList)
        {
            var selectors = new List <string>();

            foreach (var element in elementsList.Elements)
            {
                var keywordsContained = 0;

                foreach (var keyword in elementsList.Keywords)
                {
                    var content = KeywordAnalyzer.ReplaceSpecialCharacters(element.InnerText);

                    if (content.Contains(keyword))
                    {
                        keywordsContained++;
                    }
                }

                if (keywordsContained >= 1)
                {
                    selectors.Add(element.Selector);
                }
            }

            return(selectors);
        }
Example #10
0
        internal static Analyzer GetAnalyzer()
        {
            var defaultAnalyzer = new KeywordAnalyzer();
            var analyzer        = new Indexing.SnPerFieldAnalyzerWrapper(defaultAnalyzer);

            return(analyzer);
        }
Example #11
0
        public PerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection <Action> toDispose, bool forQuerying = false)
        {
            toDispose.Add(defaultAnalyzer.Close);

            //string value;
            //if (indexDefinition.Analyzers.TryGetValue(Constants.AllFields, out value))
            //{
            //    defaultAnalyzer = IndexingExtensions.CreateAnalyzerInstance(Constants.AllFields, value);
            //    toDispose.Add(defaultAnalyzer.Close);
            //}
            var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer);

            foreach (var analyzer in indexDefinition.Analyzers)
            {
                //Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
                //toDispose.Add(analyzerInstance.Close);

                //if (forQuerying)
                //{
                //    var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false);
                //    if (customAttributes.Length > 0)
                //        continue;
                //}

                //perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
            }
            StandardAnalyzer standardAnalyzer = null;
            KeywordAnalyzer  keywordAnalyzer  = null;

            foreach (var fieldIndexing in indexDefinition.Indexes)
            {
                switch (fieldIndexing.Value)
                {
                case FieldIndexing.NotAnalyzed:
                    if (keywordAnalyzer == null)
                    {
                        keywordAnalyzer = new KeywordAnalyzer();
                        toDispose.Add(keywordAnalyzer.Close);
                    }
                    perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
                    break;

                case FieldIndexing.Analyzed:
                    if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
                    {
                        continue;     // already added
                    }
                    if (standardAnalyzer == null)
                    {
                        standardAnalyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                        toDispose.Add(standardAnalyzer.Close);
                    }
                    perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
                    break;
                }
            }
            return(perFieldAnalyzerWrapper);
        }
        public List <PraticienToIndexModel> Search(string q)
        {
            q = Uri.UnescapeDataString(q);

            var path = ConfigurationManager.AppSettings["LuceneDirectory"];

            //get path
            var chemin = HttpContext.Current.Server.MapPath("~");

            path = Path.Combine(chemin, path);
            var result = new List <PraticienToIndexModel>();

            var analyzer = new KeywordAnalyzer();

            var indexDirInfo = new DirectoryInfo(path);

            if (!System.IO.Directory.Exists(path))
            {
                System.IO.Directory.CreateDirectory(path);
            }

            Directory directory = FSDirectory.Open(indexDirInfo,
                                                   new SimpleFSLockFactory(indexDirInfo));

            Index(directory);
            var searcher = new IndexSearcher(directory);

            var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "All", analyzer)
            {
                AllowLeadingWildcard   = true,
                LowercaseExpandedTerms = false
            };
            var queryString = string.Format("{0} OR *{0}* OR *{0} OR {0}*", q);

            var query     = parser.Parse(queryString);
            var maxResult = 100;
            var hits      = searcher.Search(query, maxResult);

            var mapper = new ReflectionDocumentMapper <PraticienToIndexModel>(Lucene.Net.Util.Version.LUCENE_30);

            maxResult = (hits.TotalHits < maxResult) ? hits.TotalHits : maxResult;

            for (var i = 0; i < maxResult; i++)
            {
                var praticien = new PraticienToIndexModel();

                var doc = searcher.Doc(hits.ScoreDocs[i].Doc);

                mapper.ToObject(doc, null, praticien);
                if (!result.Any(r => r.Cin.Equals(praticien.Cin)))
                {
                    result.Add(praticien);
                }
            }

            searcher.Dispose();
            return(result);
        }
        public void AnalyzeMetaKeyword_KeywordMeta_Found_Test()
        {
            const string expected = "Found meta keyword: 'Pet Insurance, Car Insurance, Vehicle Insurance'";

            var html   = @"<meta name='keywords' content='Pet Insurance, Car Insurance, Vehicle Insurance'>";
            var actual = new KeywordAnalyzer(html).AnalyzeHtml();

            Assert.IsTrue(actual.Contains(expected), $"Expected results to contain: '{expected}'. Actual: {string.Join(", ",actual)}");
        }
        public void AnalyzeMetaKeyword_NoKeywordMeta_Test()
        {
            const string expected = "No keyword meta tag found";

            var html   = @"<meta name='someMeta' content='bla'>";
            var actual = new KeywordAnalyzer(html).AnalyzeHtml();

            Assert.IsTrue(actual.Contains(expected), $"Expected results to contain: '{expected}'");
        }
        public void AnalyzeMetaKeyword_NoMeta_Test()
        {
            const string expected = "No meta tags found";

            var html   = string.Empty;
            var actual = new KeywordAnalyzer(html).AnalyzeHtml();

            Assert.IsTrue(actual.Contains(expected), $"Expected results to contain: '{expected}'");
        }
Example #16
0
        public PerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection <Action> toDispose, bool forQuerying = false)
        {
            toDispose.Add(defaultAnalyzer.Close);
            var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer);

            foreach (var analyzer in indexDefinition.Analyzers)
            {
                Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
                if (analyzerInstance == null)
                {
                    continue;
                }
                toDispose.Add(analyzerInstance.Close);

                if (forQuerying)
                {
                    var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false);
                    if (customAttributes.Length > 0)
                    {
                        continue;
                    }
                }

                perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
            }
            StandardAnalyzer standardAnalyzer = null;
            KeywordAnalyzer  keywordAnalyzer  = null;

            foreach (var fieldIndexing in indexDefinition.Indexes)
            {
                switch (fieldIndexing.Value)
                {
                case FieldIndexing.NotAnalyzed:
                    if (keywordAnalyzer == null)
                    {
                        keywordAnalyzer = new KeywordAnalyzer();
                        toDispose.Add(keywordAnalyzer.Close);
                    }
                    perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
                    break;

                case FieldIndexing.Analyzed:
                    if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
                    {
                        continue;
                    }
                    if (standardAnalyzer == null)
                    {
                        standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
                        toDispose.Add(standardAnalyzer.Close);
                    }
                    perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
                    break;
                }
            }
            return(perFieldAnalyzerWrapper);
        }
Example #17
0
        //
        // POST: /Xml2Model/upvote

        /*public ActionResult upvote()
         * {
         *  return View(db.likes.Find(id));     //去upvote页面,调用.AntiForgeryToken,再回到Index页面,费事
         * }
         * [HttpPost, ActionName("Index")]
         * [ValidateAntiForgeryToken]
         * public ActionResult upvoteConfirmed(int Uid, int Iid)
         * {
         *  //Models.like Li = db.likes.Find(Uid,Iid);
         *  Models.like Li = new Models.like( Uid, Iid );
         *  db.likes.Add(Li);
         *  db.SaveChanges();
         *  return RedirectToAction("Index");
         * }
         */



        public ActionResult About()
        {
            ViewBag.Message = "Your app description page.";


            KeywordAnalyzer ka = new KeywordAnalyzer();

            return(View());
        }
Example #18
0
        public void UpdateWithExpressionAndAnalyzerSuccess()
        {
            Analyzer  analyzer   = new KeywordAnalyzer();
            const int NumObjects = 10;

            WriteTestObjects(NumObjects, o => o.ToDocument(), analyzer);

            TestObject t = new TestObject()
            {
                Number = 1234,
                String = "Test Object 1234",
            };

            Assert.AreEqual(NumObjects, writer.NumDocs);
            writer.Add(t, analyzer);
            writer.Commit();
            Assert.AreEqual(NumObjects + 1, writer.NumDocs);

            TestObject t2 = new TestObject()
            {
                Number = 2345,
                String = "Something Else 2345",
            };

            writer.Update(t2, MappingSettings.Default, o => o.String == "Test Object 1234", analyzer);
            writer.Commit();
            Assert.AreEqual(NumObjects + 1, writer.NumDocs);

            using (DirectoryReader reader = DirectoryReader.Open(dir))
            {
                IndexSearcher searcher = new IndexSearcher(reader);

                // Verify that the updated item can be found.
                TestObject t3 = searcher.AsQueryable <TestObject>().Single(o => o.Number == 2345);

                Assert.AreEqual(t2.Number, t3.Number);
                Assert.AreEqual(t2.String, t3.String);

                // Verify that the old item cannot be found anymore.
                TestObject t4 = searcher.AsQueryable <TestObject>().SingleOrDefault(o => o.Number == 1234);
                Assert.IsNull(t4);

                // Verify that all other items remain untouched.
                TestObject[] others = (from o in searcher.AsQueryable <TestObject>()
                                       where o.Number != 2345
                                       select o).ToArray();
                Assert.IsNotNull(others);
                Assert.AreEqual(NumObjects, others.Length);

                foreach (TestObject o in others)
                {
                    Assert.AreNotEqual(t2.Number, o.Number);
                    Assert.AreNotEqual(t2.String, o.String);
                }
            }
        }
Example #19
0
        static void Main1(string[] args)
        {
            //Note: you will have to supply your own text files
            string gettys = File.ReadAllText(@"C:\Users\LaKissMe\Desktop\First.docx");
            // string gu = File.ReadAllText(@"C:\Users\LaKissMe\Desktop\Second.txt");

            KeywordAnalyzer ka = new KeywordAnalyzer();

            var g = ka.Analyze(gettys, "hello");

            //	var s = ka.Analyze(gu);

            Console.WriteLine("first");
            foreach (var key in g.Keywords)
            {
                Console.WriteLine(key.Word, key.Rank);

                //  Console.WriteLine( key.Word, key.Rank);
            }

            //Console.WriteLine("second");

            /*foreach (var key in s.Keywords)
             * {
             *      Console.WriteLine("   key: {0}, rank: {1}", key.Word, key.Rank);
             * }*/

            Console.WriteLine("first");
            var gty = (from n in g.Keywords select n).Take(50);

            foreach (var key in gty)
            {
                Console.WriteLine("   {0}", key.Word);
                // Console.WriteLine("Hitlergruß");

                //Translating here..

                /* TranslatorContainer tc = InitializeTranslatorContainer();
                 * var sourceLanguage1 = DetectSourceLanguage(tc, key.Word);
                 * var targetLanguage1 = PickRandomLanguage(tc);
                 * var translationResult1 = TranslateString(tc, key.Word, sourceLanguage1, targetLanguage1);
                 * Console.WriteLine(" Translated to English : " + translationResult1.Text);
                 */
            }

            //Console.WriteLine("second");
            //var gus = (from n in s.Keywords select n).Take(50);
            //foreach (var key in gus)
            //{
            //	Console.WriteLine("   {0}", key.Word);
            //}
            Console.ReadLine();
        }
Example #20
0
        protected static RavenPerFieldAnalyzerWrapper CreateAnalyzer(Func <Analyzer> createDefaultAnalyzer, Dictionary <string, IndexField> fields, bool forQuerying = false)
        {
            if (fields.ContainsKey(Constants.Documents.Indexing.Fields.AllFields))
            {
                throw new InvalidOperationException($"Detected '{Constants.Documents.Indexing.Fields.AllFields}'. This field should not be present here, because inheritance is done elsewhere.");
            }

            var defaultAnalyzer = createDefaultAnalyzer();

            RavenStandardAnalyzer standardAnalyzer = null;
            KeywordAnalyzer       keywordAnalyzer  = null;
            var perFieldAnalyzerWrapper            = new RavenPerFieldAnalyzerWrapper(defaultAnalyzer);

            foreach (var field in fields)
            {
                var fieldName = field.Value.Name;

                switch (field.Value.Indexing)
                {
                case FieldIndexing.Exact:
                    if (keywordAnalyzer == null)
                    {
                        keywordAnalyzer = new KeywordAnalyzer();
                    }

                    perFieldAnalyzerWrapper.AddAnalyzer(fieldName, keywordAnalyzer);
                    break;

                case FieldIndexing.Search:
                    var analyzer = GetAnalyzer(fieldName, field.Value, forQuerying);
                    if (analyzer != null)
                    {
                        perFieldAnalyzerWrapper.AddAnalyzer(fieldName, analyzer);
                        continue;
                    }
                    AddStandardAnalyzer(fieldName);

                    break;
                }
            }

            return(perFieldAnalyzerWrapper);

            void AddStandardAnalyzer(string fieldName)
            {
                if (standardAnalyzer == null)
                {
                    standardAnalyzer = new RavenStandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_29);
                }

                perFieldAnalyzerWrapper.AddAnalyzer(fieldName, standardAnalyzer);
            }
        }
Example #21
0
        public static IndexWriter Create(AbstractConnection connection, Entity entity)
        {
            var      dir             = LuceneDirectoryFactory.Create(connection, entity);
            Analyzer defaultAnalyzer = new KeywordAnalyzer();

            var analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer);

            foreach (var field in GetFields(entity, connection.Version, connection.Logger))
            {
                analyzer.AddAnalyzer(field.Key, field.Value);
            }
            return(new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED));
        }
Example #22
0
        public IDictionary <string, string> Find(string id)
        {
            var keywordAnalyzer = new KeywordAnalyzer();
            var parser          = new QueryParser(Version.LUCENE_30, "id", keywordAnalyzer);
            var query           = parser.Parse(id);
            var scoreDocs       = _searcher.Search(query, 1).ScoreDocs;

            if (scoreDocs != null && scoreDocs.Length > 0)
            {
                return(Docs.DocToDict(_searcher.Doc(scoreDocs[0].Doc), 1f));
            }

            return(null);
        }
Example #23
0
        public void Code()
        {
            Analyzer _keywordanalyzer    = new KeywordAnalyzer();
            Analyzer _simpleanalyzer     = new Lucene.Net.Analysis.SimpleAnalyzer();
            Analyzer _stopanalyzer       = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            Analyzer _standardanalyzer   = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);


            var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer);

            _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer);
            _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer);


            IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            IndexReader _reader = _writer.GetReader();

            IndexSearcher _searcher = new IndexSearcher(_reader);


            //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer);

            string[] fields = new[] { "text", "title", "author" };
            var      boosts = new Dictionary <string, float>();

            boosts.Add("text", 2.0f);
            boosts.Add("title", 1.5f);
            QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts);
            Query       query  = parser.Parse("lucene is great");


            TopDocs hits = _searcher.Search(query, 1000);

            IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc));

            var books = docs.Select(doc => new Book()
            {
                Text   = doc.Get("text"),
                Title  = doc.Get("title"),
                Author = doc.Get("author"),
                Length = Int32.Parse(doc.Get("length"))
            });


            _writer.Optimize();
            _writer.Commit();
            _writer.DeleteAll();
        }
Example #24
0
        public PerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection <Action> toDispose)
        {
            toDispose.Add(defaultAnalyzer.Close);
            var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer);

            foreach (var analyzer in indexDefinition.Analyzers)
            {
                var analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
                if (analyzerInstance == null)
                {
                    continue;
                }
                toDispose.Add(analyzerInstance.Close);
                perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
            }
            StandardAnalyzer standardAnalyzer = null;
            KeywordAnalyzer  keywordAnalyzer  = null;

            foreach (var fieldIndexing in indexDefinition.Indexes)
            {
                switch (fieldIndexing.Value)
                {
                case FieldIndexing.NotAnalyzed:
                case FieldIndexing.NotAnalyzedNoNorms:
                    if (keywordAnalyzer == null)
                    {
                        keywordAnalyzer = new KeywordAnalyzer();
                        toDispose.Add(keywordAnalyzer.Close);
                    }
                    perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
                    break;

                case FieldIndexing.Analyzed:
                    if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
                    {
                        continue;
                    }
                    if (standardAnalyzer == null)
                    {
                        standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
                        toDispose.Add(standardAnalyzer.Close);
                    }
                    perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
                    break;
                }
            }
            return(perFieldAnalyzerWrapper);
        }
Example #25
0
        internal static Analyzer GetAnalyzer()
        {
            //  Field          Analyzer
            //  -----------------------------------------------------------------
            //  Name           Lucene.Net.Analysis.KeywordAnalyzer
            //  Path           Lucene.Net.Analysis.KeywordAnalyzer
            //  Keywords       Lucene.Net.Analysis.StopAnalyzer
            //  _Text          Lucene.Net.Analysis.Standard.StandardAnalyzer
            //  -----------------------------------------------------------------
            //  Default        Lucene.Net.Analysis.WhitespaceAnalyzer

            var defaultAnalyzer = new KeywordAnalyzer();
            var analyzer        = new Indexing.SnPerFieldAnalyzerWrapper(defaultAnalyzer);

            return(analyzer);
        }
Example #26
0
        public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer)
        {
            var keywordAnalyzer = new KeywordAnalyzer();

            try
            {
                query = PreProcessUntokenizedTerms(analyzer, query, keywordAnalyzer);
                var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer);
                queryParser.SetAllowLeadingWildcard(true);
                return(queryParser.Parse(query));;
            }
            finally
            {
                keywordAnalyzer.Close();
            }
        }
Example #27
0
        public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer)
        {
            Analyzer keywordAnalyzer = new KeywordAnalyzer();

            try
            {
                var queryParser = new RangeQueryParser(Version.LUCENE_29, string.Empty, analyzer);
                query = PreProcessUntokenizedTerms(query, queryParser);
                query = PreProcessSearchTerms(query);
                queryParser.SetAllowLeadingWildcard(true);                 // not the recommended approach, should rather use ReverseFilter
                return(queryParser.Parse(query));
            }
            finally
            {
                keywordAnalyzer.Close();
            }
        }
Example #28
0
        public static IndexWriter Create(AbstractConnection connection, Process process, Entity entity)
        {
            using (var dir = LuceneDirectoryFactory.Create(connection, entity)) {
                Analyzer defaultAnalyzer = new KeywordAnalyzer();
                if (process.SearchTypes.ContainsKey("default"))
                {
                    defaultAnalyzer = LuceneAnalyzerFactory.Create(process.SearchTypes["default"].Analyzer, connection.Version);
                }

                var analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer);
                foreach (var field in GetFields(entity, connection.Version, connection.Logger))
                {
                    analyzer.AddAnalyzer(field.Key, field.Value);
                }
                return(new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED));
            }
        }
        private static void LoadLuceneIndex(Directory dir, string filePath)
        {
            Analyzer analyzer = new KeywordAnalyzer(); // new SimpleAnalyzer();

            var indexWriter = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            foreach (var name in System.IO.File.ReadLines(filePath))
            {
                var document = new Document();
                document.Add(new Field(FieldName, name, Field.Store.NO, Field.Index.ANALYZED));

                indexWriter.AddDocument(document);
            }

            indexWriter.Commit();
            indexWriter.Dispose();
        }
Example #30
0
        /// <summary>
        /// Detects untokenized fields and sets as NotAnalyzed in analyzer
        /// </summary>
        private static string PreProcessUntokenizedTerms(PerFieldAnalyzerWrapper analyzer, string query, ref Analyzer keywordAnalyzer)
        {
            var untokenizedMatches = untokenizedQuery.Matches(query);

            if (untokenizedMatches.Count < 1)
            {
                return(query);
            }

            var sb = new StringBuilder(query);

            // Initialize a KeywordAnalyzer
            // KeywordAnalyzer will not tokenize the values
            keywordAnalyzer = new KeywordAnalyzer();

            // process in reverse order to leverage match string indexes
            for (var i = untokenizedMatches.Count; i > 0; i--)
            {
                var match = untokenizedMatches[i - 1];

                // specify that term for this field should not be tokenized
                analyzer.AddAnalyzer(match.Groups[1].Value, keywordAnalyzer);

                var term = match.Groups[2];

                // introduce " " around the term
                var startIndex = term.Index;
                var length     = term.Length - 2;
                if (sb[startIndex + length - 1] != '"')
                {
                    sb.Insert(startIndex + length, '"');
                    length += 1;
                }
                if (sb[startIndex + 2] != '"')
                {
                    sb.Insert(startIndex + 2, '"');
                    length += 1;
                }
                // remove enclosing "[[" "]]" from term value (again in reverse order)
                sb.Remove(startIndex + length, 2);
                sb.Remove(startIndex, 2);
            }

            return(sb.ToString());
        }