Ejemplo n.º 1
0
 public virtual void TestSynonymsPhrase()
 {
     MultiPhraseQuery expected = new MultiPhraseQuery();
     expected.Add(new Term("field", "old"));
     expected.Add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") });
     QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer());
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "old dogs"));
 }
Ejemplo n.º 2
0
        private static void Main(string[] args)
        {
            // Ensures index backwards compatibility
            var AppLuceneVersion = LuceneVersion.LUCENE_48;

            var indexLocation = @"Index";
            var dir           = FSDirectory.Open(indexLocation);

            //create an analyzer to process the text
            var analyzer = new StandardAnalyzer(AppLuceneVersion);

            //create an index writer
            var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer);
            var writer      = new IndexWriter(dir, indexConfig);

            var source = new
            {
                Name           = "Kermit the Frog",
                FavoritePhrase = "The quick brown fox jumps over the lazy dog"
            };
            Document doc = new Document
            {
                // StringField indexes but doesn't tokenize
                new StringField("name",
                                source.Name,
                                Field.Store.YES),
                new TextField("favoritePhrase",
                              source.FavoritePhrase,
                              Field.Store.YES)
            };

            writer.AddDocument(doc);
            writer.Flush(triggerMerge: false, applyAllDeletes: false);

            // search with a phrase
            var phrase = new MultiPhraseQuery();

            phrase.Add(new Term("favoritePhrase", "brown"));
            phrase.Add(new Term("favoritePhrase", "fox"));

            // re-use the writer to get real-time updates
            var searcher = new IndexSearcher(writer.GetReader(applyAllDeletes: true));
            var hits     = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs;

            foreach (var hit in hits)
            {
                var   foundDoc = searcher.Doc(hit.Doc);
                float score    = hit.Score;
                Console.WriteLine("--结果 num {0}, 耗时 {1}", 1, score);
                Console.WriteLine("--ID: {0}", foundDoc.Get("name"));
                Console.WriteLine("--Text found: {0}" + Environment.NewLine, foundDoc.Get("favoritePhrase"));
                //hit.Score.Dump("Score");
                //foundDoc.Get("name").Dump("Name");
                //foundDoc.Get("favoritePhrase").Dump("Favorite Phrase");
            }
        }
Ejemplo n.º 3
0
 public virtual void TestCJKSynonymsPhrase()
 {
     MultiPhraseQuery expected = new MultiPhraseQuery();
     expected.Add(new Term("field", "中"));
     expected.Add(new Term[] { new Term("field", "国"), new Term("field", "國") });
     QueryBuilder builder = new QueryBuilder(new MockCJKSynonymAnalyzer());
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国"));
     expected.Slop = 3;
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国", 3));
 }
Ejemplo n.º 4
0
        public void TestFlattenMultiPhraseQuery()
        {
            var query = new MultiPhraseQuery();

            query.Add(new[] { new Term(F, "a1"), new Term(F, "a2") });
            query.Add(new[] { new Term(F, "b1"), new Term(F, "b2") });

            var fieldQuery  = new FieldQuery(query, true, true);
            var flatQueries = new HashSet <Query>();

            fieldQuery.flatten(query, flatQueries);
            AssertCollectionQueries(flatQueries, Tq("a1"), Tq("a2"), Tq("b1"), Tq("b2"));
        }
Ejemplo n.º 5
0
        public virtual void TestCJKSynonymsPhrase()
        {
            MultiPhraseQuery expected = new MultiPhraseQuery();

            expected.Add(new Index.Term("field", "中"));
            expected.Add(new Index.Term[] { new Index.Term("field", "国"), new Index.Term("field", "國") });
            QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());

            qp.DefaultOperator = (Operator.AND);
            assertEquals(expected, qp.Parse("\"中国\""));
            expected.Boost = (2.0f);
            assertEquals(expected, qp.Parse("\"中国\"^2"));
            expected.Slop = (3);
            assertEquals(expected, qp.Parse("\"中国\"~3^2"));
        }
Ejemplo n.º 6
0
        public override IList <IndexFile> Search(SearchingOptions options)
        {
            // var analyzer = new SpanishAnalyzer(LuceneVersion.LUCENE_48);
            var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
            var phrase   = new MultiPhraseQuery();

            phrase.Add(new Term("contents", options.SearchTerm));

            var indexConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
            var indexDir    = Path.Combine(options.DataDir, ".luceneindex");
            var dir         = FSDirectory.Open(indexDir);
            var dirReader   = DirectoryReader.Open(dir);

            using (var writer = new IndexWriter(dir, indexConfig))
            {
                var searcher      = new IndexSearcher(writer.GetReader(true));
                var hits          = searcher.Search(phrase, options.DocCount).ScoreDocs;
                var indexFileList = from ScoreDoc hit in hits
                                    let doc = searcher.Doc(hit.Doc)
                                              select new IndexFile
                {
                    Score    = hit.Score,
                    Path     = doc.Get("path"),
                    Modified = doc.Get("modified")
                };
                var result = indexFileList.ToList();
                return(result);
            }
        }
Ejemplo n.º 7
0
        public string FetchResults(string json)
        {
#if LUCENE
            List <SearchArtTextRequest> resultList = new List <SearchArtTextRequest>();

            var searchRequest = JsonConvert.DeserializeObject <SearchRequest>(json);
            var keyword       = searchRequest.keyword;

            // search with a phrase
            var phrase = new MultiPhraseQuery();
            phrase.Add(new Term("text", keyword));

            // re-use the writer to get real-time updates
            var searcher = new IndexSearcher(writer.GetReader(applyAllDeletes: true));
            var hits     = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs;
            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                resultList.Add(new SearchArtTextRequest {
                    ArtId = foundDoc.Get("artId"), Text = HighlightText(searchRequest.keyword, foundDoc.Get("text"))
                });
            }
            Console.Error.WriteLine("---");
            Console.Error.WriteLine(hits.Length);
            Console.Error.WriteLine("---");
            Console.Error.WriteLine(json);
            Console.Error.WriteLine("---");
            Console.Error.WriteLine(JsonConvert.SerializeObject(resultList));
            Console.Error.WriteLine("---");
            return(JsonConvert.SerializeObject(resultList));
#endif
            return("[]");
        }
Ejemplo n.º 8
0
        public IEnumerable <(string Term, T Data)> Search(string[] searchTerms, int maxHits = 100)
        {
            if (!_reader.IsCurrent())
            {
                using (_reader)
                {
                    _reader   = DirectoryReader.OpenIfChanged(_reader);
                    _searcher = new IndexSearcher(_reader);
                }
            }

            MultiPhraseQuery query = new MultiPhraseQuery();

            foreach (string searchTerm in searchTerms)
            {
                query.Add(new Term(TermsFieldName, searchTerm));
            }

            TopDocs hits = _searcher.Search(query, maxHits);

            foreach (ScoreDoc topDoc in hits.ScoreDocs)
            {
                Document document       = _reader.Document(topDoc.Doc);
                string   serializedData = document.GetField(DataFieldName).GetStringValue();
                T        t    = _serializer.Deserialize <T>(serializedData);
                string   term = document.GetField(TermsFieldName).GetStringValue();
                yield return(term, t);
            }
        }
Ejemplo n.º 9
0
        public void TestSynonymsPhrase()
        {
            MultiPhraseQuery expected = new MultiPhraseQuery();

            expected.Add(new Index.Term("field", "old"));
            expected.Add(new Index.Term[] { new Index.Term("field", "dogs"), new Index.Term("field", "dog") });
            QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockSynonymAnalyzer());

            assertEquals(expected, qp.Parse("\"old dogs\""));
            qp.DefaultOperator = (QueryParserBase.Operator.AND);
            assertEquals(expected, qp.Parse("\"old dogs\""));
            expected.Boost = (2.0f);
            assertEquals(expected, qp.Parse("\"old dogs\"^2"));
            expected.Slop = (3);
            assertEquals(expected, qp.Parse("\"old dogs\"~3^2"));
        }
        public IEnumerable <LuceneDto> Search(string search, int take = 20)
        {
            var lucene = new List <LuceneDto>();

            if (string.IsNullOrEmpty(search))
            {
                return(lucene);
            }

            var phrase = new MultiPhraseQuery();

            foreach (var x in search.Split(' '))
            {
                phrase.Add(new Term("data", x));
            }

            var searcher = new IndexSearcher(_writer.GetReader(applyAllDeletes: true));
            var hits     = searcher.Search(phrase, take).ScoreDocs;

            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                lucene.Add(
                    new LuceneDto
                {
                    Score      = hit.Score,
                    IdInternal = foundDoc.Get("id"),
                    Name       = foundDoc.Get("name"),
                    Data       = foundDoc.Get("data")
                }
                    );
            }

            return(lucene.OrderByDescending(x => x.Score).DistinctBy(x => x.Id));
        }
Ejemplo n.º 11
0
        public IEnumerable <string> FindMatches(string searchTerm, int maxDifference)
        {
            if (String.IsNullOrEmpty(searchTerm))
            {
                yield break;
            }
            var dir          = FSDirectory.Open(IndexLocation);
            var searcher     = new IndexSearcher(DirectoryReader.Open(dir));
            var directPhrase = new MultiPhraseQuery();

            foreach (var item in searchTerm.Split(' '))
            {
                directPhrase.Add(new Term("data", item));
            }
            var directHits = searcher.Search(directPhrase, 20 /* top 20 */).ScoreDocs;

            foreach (var hit in directHits)
            {
                yield return(ConvertHits(searcher, hit));
            }
            // don't do expensive fuzzyQuery if results are enough
            if (directHits.Length > 10)
            {
                yield break;
            }

            var phrase = new FuzzyQuery(new Term("data", searchTerm), 2);
            var hits   = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs;

            foreach (var hit in hits)
            {
                yield return(ConvertHits(searcher, hit));
            }
        }
Ejemplo n.º 12
0
        private static List <string> searchFull_Lucene(string query, IndexSearcher searcher)
        {   //Ищем полное название
            List <string> result_lucene = new List <string>();
            var           phrase        = new MultiPhraseQuery();

            phrase.Add(new Term("name", query));
            var hits = searcher.Search(phrase, 10).ScoreDocs;

            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                result_lucene.Add(getFoundDocResult(foundDoc));
            }
            return(result_lucene);
        }
        public virtual void TestMultiPhraseQueryParsing()
        {
            TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[]
            {
                new TokenAndPos("a", 0),
                new TokenAndPos("1", 0),
                new TokenAndPos("b", 1),
                new TokenAndPos("1", 1),
                new TokenAndPos("c", 2)
            };

            QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND));
            Query       q  = qp.Parse("\"this text is acually ignored\"");

            assertTrue("wrong query type!", q is MultiPhraseQuery);

            MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery();

            multiPhraseQuery.Add(new Term[] { new Term("field", "a"), new Term("field", "1") }, -1);
            multiPhraseQuery.Add(new Term[] { new Term("field", "b"), new Term("field", "1") }, 0);
            multiPhraseQuery.Add(new Term[] { new Term("field", "c") }, 1);

            assertEquals(multiPhraseQuery, q);
        }
Ejemplo n.º 14
0
        static Query ExecuteAnalyzer(Analyzer analyzer, string field, string text)
        {
            List <List <Term> > terms   = new List <List <Term> >();
            List <Term>         current = null;

            using var reader = new StringReader(text);

            TokenStream    tokenStream   = analyzer.TokenStream(field, reader);
            ITermAttribute termAttribute = tokenStream.AddAttribute <ITermAttribute>();
            IPositionIncrementAttribute positionIncrementAttribute = tokenStream.AddAttribute <IPositionIncrementAttribute>();

            while (tokenStream.IncrementToken())
            {
                if (positionIncrementAttribute.PositionIncrement > 0)
                {
                    current = new List <Term>();
                    terms.Add(current);
                }
                if (current != null)
                {
                    current.Add(new Term(field, termAttribute.Term));
                }
            }

            if (terms.Count == 1 && terms[0].Count == 1)
            {
                return(new TermQuery(terms[0][0]));
            }
            else if (terms.Select(l => l.Count).Sum() == terms.Count)
            {
                PhraseQuery phraseQuery = new PhraseQuery();
                foreach (var positionList in terms)
                {
                    phraseQuery.Add(positionList[0]);
                }
                return(phraseQuery);
            }
            else
            {
                MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery();
                foreach (var positionList in terms)
                {
                    multiPhraseQuery.Add(positionList.ToArray());
                }
                return(multiPhraseQuery);
            }
        }
Ejemplo n.º 15
0
        private QueryProvider <TLuceneEntity> TokenizedIncludePhrase <TResult>(string term, Expression <Func <TLuceneEntity, TResult> > selector, Occur occur)
        {
            if (term.Contains(" "))
            {
                this.Open(occur);

                var termsList = term.Split(' ');

                termsList = RemoveEmpty(termsList);

                var wildCards = new List <PermutermWildcardQuery>();

                for (int i = 0; i < termsList.Length; i++)
                {
                    wildCards.Add(new PermutermWildcardQuery(new Term(selector.GetName(), $"{termsList[i]}")));
                }


                var multiPhrase = new MultiPhraseQuery();

                foreach (var item in termsList)
                {
                    multiPhrase.Add(new Term(selector.GetName(), item));
                }

                multiPhrase.Slop = 8;

                _current.Add(multiPhrase, Occur.MUST);

                this.Close();
            }

            else
            {
                var query = new PermutermWildcardQuery(new Term(selector.GetName(), $"{term}"));

                _current.Add(query, occur);
            }

            _containsWildCard = true;

            return(this);
        }
        public static List <ResultModel> Search(IWebHostEnvironment hostEnvironment, string query)
        {
            var AppLuceneVersion = LuceneVersion.LUCENE_48;

            var indexLocation = hostEnvironment.WebRootPath + "\\Index";
            var dir           = FSDirectory.Open(indexLocation);

            //create an analyzer to process the text
            var analyzer = new StandardAnalyzer(AppLuceneVersion);

            //create an index writer
            var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer);
            var writer      = new IndexWriter(dir, indexConfig);


            var phrase = new MultiPhraseQuery();

            phrase.Add(new Term("content", query));
            //  phrase.Add(new Term("title", query));


            // re-use the writer to get real-time updates
            var searcher = new IndexSearcher(writer.GetReader(applyAllDeletes: true));
            var hits     = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs;

            var resultList = new List <ResultModel>();

            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                // hit.Score.Dump("Score");

                resultList.Add(new ResultModel()
                {
                    Title = foundDoc.Get("title"),
                    Url   = foundDoc.Get("url")
                });
            }
            writer.Dispose();
            return(resultList);
        }
        public virtual Query Build(IQueryNode queryNode)
        {
            MultiPhraseQueryNode phraseNode = (MultiPhraseQueryNode)queryNode;

            MultiPhraseQuery phraseQuery = new MultiPhraseQuery();

            IList <IQueryNode> children = phraseNode.GetChildren();

            if (children != null)
            {
                IDictionary <int?, List <Term> > positionTermMap = new SortedDictionary <int?, List <Term> >();

                foreach (IQueryNode child in children)
                {
                    FieldQueryNode termNode  = (FieldQueryNode)child;
                    TermQuery      termQuery = (TermQuery)termNode
                                               .GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID);

                    List <Term> termList;
                    if (!positionTermMap.TryGetValue(termNode.PositionIncrement, out termList) || termList == null)
                    {
                        termList = new List <Term>();
                        positionTermMap[termNode.PositionIncrement] = termList;
                    }

                    termList.Add(termQuery.Term);
                }

                foreach (int positionIncrement in positionTermMap.Keys)
                {
                    List <Term> termList = positionTermMap[positionIncrement];

                    phraseQuery.Add(termList.ToArray(/*new Term[termList.size()]*/),
                                    positionIncrement);
                }
            }

            return(phraseQuery);
        }
Ejemplo n.º 18
0
        private static List <string> searchOneWord_Lucene(string query, IndexSearcher searcher)
        {   //Ищем по одному слову
            List <string> result_lucene = new List <string>();
            var           array         = query.Split(' ');

            var phrase = new MultiPhraseQuery();

            foreach (var word in array)
            {
                phrase = new MultiPhraseQuery();
                if (!String.IsNullOrEmpty(word))
                {
                    phrase.Add(new Term("name_word", word));
                    var res = searcher.Search(phrase, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        result_lucene.Add(getFoundDocResult(foundDoc));
                    }
                }
            }
            return(result_lucene);
        }
Ejemplo n.º 19
0
        private void GetDataIndexId(DirectoryInfo directoryInfo, ref string textSearcher, ref List <string> lReturn)
        {
            using (Directory directory = FSDirectory.Open(directoryInfo))
                using (Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30))
                    using (IndexReader indexReader = IndexReader.Open(directory, true))
                        using (Searcher indexSearcher = new IndexSearcher(indexReader))
                        {
                            TopScoreDocCollector collectorMultiPhraseQuery = TopScoreDocCollector.Create(100, true);
                            TopScoreDocCollector collectorQueryParser      = TopScoreDocCollector.Create(100, true);
                            int           docId          = 0;
                            string        tempObjectId   = string.Empty;
                            List <string> listTemp       = new List <string>();
                            char[]        delimiterChars = { ' ', ',', '.', ':', '\t' };

                            MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery();

                            //Here implement the search lines for graphs at level 3
                            multiPhraseQuery.Slop = 3;



                            foreach (var word in textSearcher.Split(delimiterChars))
                            {
                                multiPhraseQuery.Add(new Term("FullRequest", word));
                            }

                            indexSearcher.Search(multiPhraseQuery, collectorMultiPhraseQuery);
                            ScoreDoc[] listResultPharseQuery = collectorMultiPhraseQuery.TopDocs().ScoreDocs;

                            foreach (var itemPharseQuery in listResultPharseQuery)
                            {
                                docId = itemPharseQuery.Doc;
                                Document docPharseQuery = indexSearcher.Doc(docId);
                                tempObjectId = docPharseQuery.Get("ObjetcId");

                                if (!string.IsNullOrEmpty(tempObjectId))
                                {
                                    listTemp.Add(tempObjectId);
                                }
                            }

                            //This lineas implement QueryPArse
                            docId        = 0;
                            tempObjectId = string.Empty;

                            var queryParser = new QueryParser(Version.LUCENE_30, "FullRequest", analyzer);
                            var query       = queryParser.Parse(textSearcher);

                            indexSearcher.Search(query, collectorQueryParser);
                            ScoreDoc[] listResultquery = collectorQueryParser.TopDocs().ScoreDocs;

                            foreach (var itemQuery in listResultquery)
                            {
                                docId = itemQuery.Doc;
                                Document docQuery = indexSearcher.Doc(docId);
                                tempObjectId = docQuery.Get("ObjetcId");

                                if (!string.IsNullOrEmpty(tempObjectId))
                                {
                                    listTemp.Add(tempObjectId);
                                }
                            }
                            lReturn.AddRange(listTemp.Distinct().ToList());
                        }
        }
Ejemplo n.º 20
0
        private void lucene_serach_Click(object sender, EventArgs e)
        {
            results.Rows.Clear();
            var query    = search_field.Text.ToLower();
            var array    = query.Split(' ').ToList();
            var searcher = new IndexSearcher(writer1.GetReader(applyAllDeletes: true));

            var totalResults = new List <Document>();
            //одно слово
            QueryParser parser = new QueryParser(AppLuceneVersion, "name", analyzer);
            var         phrase = new MultiPhraseQuery();

            foreach (var word in array)
            {
                var q = parser.Parse(query);
                if (!String.IsNullOrEmpty(word))
                {
                    var res = searcher.Search(q, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        if (!totalResults.Any(f =>
                                              f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value()))
                        {
                            totalResults.Add(foundDoc);
                        }
                    }
                }
            }

            // полное название
            phrase.Add(new Term("name", query));
            var hits = searcher.Search(phrase, 10).ScoreDocs;

            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value()))
                {
                    totalResults.Add(foundDoc);
                }
            }

            //части слов
            foreach (var word in array)
            {
                if (!string.IsNullOrEmpty(word))
                {
                    var wild = new WildcardQuery(new Term("name", "*" + word + "*"));
                    var res  = searcher.Search(wild, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        if (!totalResults.Any(f =>
                                              f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value()))
                        {
                            totalResults.Add(foundDoc);
                        }
                    }
                }
            }

            //год и часть слова
            var year_to_find = "";
            var number       = 0;

            foreach (var word in array)
            {
                var result = TryParse(word, out number);
                if (result && number > 1800 && number <= 9999)
                {
                    year_to_find = word;
                    array.RemoveAt(array.IndexOf(word));
                    break;
                }
            }

            Console.WriteLine(number != 0);

            if (number != 0)
            {
                phrase = new MultiPhraseQuery();
                foreach (var word in array)
                {
                    if (!string.IsNullOrEmpty(word))
                    {
                        var booleanQuery = new BooleanQuery();
                        var wild         = new WildcardQuery(new Term("name", "*" + word + "*"));
                        var num          = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true);

                        booleanQuery.Add(wild, Occur.SHOULD);
                        booleanQuery.Add(num, Occur.SHOULD);
                        var res = searcher.Search(booleanQuery, 10).ScoreDocs;
                        foreach (var hit in res)
                        {
                            var foundDoc = searcher.Doc(hit.Doc);
                            if (!totalResults.Any(f =>
                                                  f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value()))
                            {
                                totalResults.Add(foundDoc);
                            }
                        }
                    }
                }
            }


            foreach (var doc in totalResults)
            {
                results.Rows.Add(doc.GetField("id").GetInt32Value().ToString(),
                                 doc.GetValues("name")[0],
                                 doc.GetField("year").GetInt32Value().ToString());
            }
        }
Ejemplo n.º 21
0
        private Query BuildMatchAnywhereQuery(IndexReader indexReader, string expandedSearchString, bool matchCase)
        {
            List <string> searchTerms          = null;
            string        adjustedSearchString = expandedSearchString;
            string        fieldToSearch        = Constants.IndexFields.Content;

            if (!matchCase)
            {
                fieldToSearch        = Constants.IndexFields.ContentCaseInsensitive;
                adjustedSearchString = adjustedSearchString.ToLower();
            }

            searchTerms = adjustedSearchString.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList();

            bool onlyOneTerm = searchTerms.Count == 1;
            var  firstTerm   = searchTerms.FirstOrDefault();
            var  lastTerm    = searchTerms.LastOrDefault();

            Query contentQuery = null;

            if (onlyOneTerm)
            {
                bool isFirstTermPunctuation = (firstTerm.Length == 1 && LuceneHelper.IsPunctuation(firstTerm.First()));
                if (isFirstTermPunctuation)
                {
                    contentQuery = new TermQuery(new Term(fieldToSearch, firstTerm));
                }
                else
                {
                    contentQuery = new WildcardQuery(new Term(fieldToSearch, "*" + firstTerm + "*"));
                }
            }
            else
            {
                MultiPhraseQuery phraseQuery = new MultiPhraseQuery();

                List <Term> firstTermMatches = new List <Term>();
                List <Term> lastTermMatches  = new List <Term>();
                CollectFirstAndLastTermMatches(indexReader, fieldToSearch, firstTermMatches, lastTermMatches, firstTerm, lastTerm);

                if (firstTermMatches.Count > 0)
                {
                    phraseQuery.Add(firstTermMatches.ToArray());
                }

                bool includeFirstTerm = firstTermMatches.Count == 0;
                bool includeLastTerm  = lastTermMatches.Count == 0;

                int startIndex = includeFirstTerm ? 0 : 1;
                int endIndex   = searchTerms.Count - (includeLastTerm ? 0 : 1);

                for (int i = startIndex; i < endIndex; i++)
                {
                    phraseQuery.Add(new Term(fieldToSearch, searchTerms[i]));
                }

                if (lastTermMatches.Count > 0)
                {
                    phraseQuery.Add(lastTermMatches.ToArray());
                }

                contentQuery = phraseQuery;
            }

            return(contentQuery);
        }
Ejemplo n.º 22
0
        private void button4_Click(object sender, EventArgs e)
        {
            int counter = 0;

            var           query    = find_text.ToLower();
            var           array    = query.Split(' ').ToList();
            List <string> res_list = new List <string>();
            var           searcher = new IndexSearcher(writer.GetReader());

            var totalResults = new List <Document>();

            //поиск по одному слову из названия
            var phrase = new MultiPhraseQuery();

            foreach (var word in array)
            {
                phrase = new MultiPhraseQuery();
                if (!String.IsNullOrEmpty(word))
                {
                    phrase.Add(new Term("name", word));
                    var res = searcher.Search(phrase, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        if (!totalResults.Any(f =>
                                              f.GetField("id").ToString() == foundDoc.GetField("id").ToString()))
                        {
                            totalResults.Add(foundDoc);
                        }
                    }
                }
            }

            //поиск по всем словам названия
            phrase = new MultiPhraseQuery();
            phrase.Add(new Term("name", query));
            var hits = searcher.Search(phrase, 10).ScoreDocs;

            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                if (!totalResults.Any(f => f.GetField("id").ToString() == foundDoc.GetField("id").ToString()))
                {
                    totalResults.Add(foundDoc);
                }
            }

            //поиск по частичным словам названия
            foreach (var word in array)
            {
                if (!String.IsNullOrEmpty(word))
                {
                    var wild = new WildcardQuery(new Term("name", "*" + word + "*"));
                    var res  = searcher.Search(wild, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        if (!totalResults.Any(f =>
                                              f.GetField("id").ToString() == foundDoc.GetField("id").ToString()))
                        {
                            totalResults.Add(foundDoc);
                        }
                    }
                }
            }

            //поиск по году и названию (части названия)
            string year_to_find = "";
            int    number       = 0;

            foreach (var word in array)
            {
                bool result = Int32.TryParse(word, out number);
                if (result && number > 1800 && number <= 9999)
                {
                    year_to_find = word;
                    array.RemoveAt(array.IndexOf(word));
                    break;
                }
            }
            Console.WriteLine(number != 0);

            if (number != 0)
            {
                phrase = new MultiPhraseQuery();
                foreach (var word in array)
                {
                    if (!String.IsNullOrEmpty(word))
                    {
                        BooleanQuery booleanQuery = new BooleanQuery();

                        var wild = new WildcardQuery(new Term("name", "*" + word + "*"));
                        var num  = NumericRangeQuery.NewIntRange("year", 1, number, number, true, true);

                        booleanQuery.Add(wild, Occur.SHOULD);
                        booleanQuery.Add(num, Occur.SHOULD);
                        var res = searcher.Search(booleanQuery, 10).ScoreDocs;
                        foreach (var hit in res)
                        {
                            var foundDoc = searcher.Doc(hit.Doc);
                            if (!totalResults.Any(f =>
                                                  f.GetField("id").ToString() == foundDoc.GetField("id").ToString()))
                            {
                                totalResults.Add(foundDoc);
                            }
                        }
                    }
                }
            }
            foreach (var doc in totalResults)
            {
                textBox1.AppendText(doc.ToString());
            }
        }
Ejemplo n.º 23
0
        private void SearchButton_Click(object sender, EventArgs e)
        {
            //Variables and pretty stuff
            int counter = 0;

            Cursor.Current       = Cursors.WaitCursor;
            SearchButton.Enabled = false;
            ResultBox.Items.Clear();
            var           query    = TextSearch.Text;
            var           array    = query.Split(' ').ToList();
            List <string> res_list = new List <string>();

            //Some sort of  error handling
            try
            {
                if (!luceneCheck.Checked)
                {
                    using (var conn = new NpgsqlConnection(connString))
                    {
                        conn.Open();
                        var statement = "";

                        //Поиск по точному названию
                        statement = "SELECT * " +
                                    "FROM movies " +
                                    "WHERE name = \'" + query + "\'";
                        var command = new NpgsqlCommand(statement, conn);
                        var id      = 0;
                        var year    = 0;
                        var name    = "";
                        using (var reader = command.ExecuteReader())
                        {
                            while (reader.Read() && counter < 10)
                            {
                                id       = reader.GetInt32(0);
                                year     = reader.GetInt32(1);
                                name     = reader.GetString(2);
                                counter += 1;
                                res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name);
                            }
                        }

                        //Поиск по году и по названию  //, если предыдущий ничего не дал
                        //if (ResultBox.Items.Count == 0)

                        //Ищем год в запросе
                        string year_to_find = "";
                        int    number       = 0;
                        foreach (var word in array)
                        {
                            bool result = Int32.TryParse(word, out number);
                            if (result && number > 1800 && number <= 9999)
                            {
                                year_to_find = word;
                                array.RemoveAt(array.IndexOf(word));
                                break;
                            }
                            else
                            {
                                number = 0;
                            }
                        }

                        //Если нашли
                        if (number != 0)
                        {
                            foreach (var word in array)
                            {
                                if (!String.IsNullOrEmpty(word))
                                {
                                    statement = "SELECT * " +
                                                "FROM movies " +
                                                "WHERE year = " + year_to_find + " AND name ILIKE \'%" + word + "%\' ";
                                    command = new NpgsqlCommand(statement, conn);
                                    using (var reader = command.ExecuteReader())
                                    {
                                        while (reader.Read() && counter < 10)
                                        {
                                            counter += 1;
                                            id       = reader.GetInt32(0);
                                            year     = reader.GetInt32(1);
                                            name     = reader.GetString(2);
                                            res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name);
                                        }
                                    }
                                }
                            }
                        }

                        //Поиск по слову в названии //, если предыдущие ничего не дали
                        //if (ResultBox.Items.Count == 0)
                        foreach (var word in array)
                        {
                            if (!String.IsNullOrEmpty(word))
                            {
                                statement = "SELECT * " +
                                            "FROM movies " +
                                            "WHERE name ILIKE \'" + word + " %\' " +
                                            "OR name = \'" + word + "\' " +
                                            "OR  name ILIKE \'% " + word + "\'";
                                command = new NpgsqlCommand(statement, conn);
                                using (var reader = command.ExecuteReader())
                                {
                                    while (reader.Read() && counter < 10)
                                    {
                                        counter += 1;
                                        id       = reader.GetInt32(0);
                                        year     = reader.GetInt32(1);
                                        name     = reader.GetString(2);
                                        res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name);
                                    }
                                }
                            }
                        }

                        //Поиск по части слова в названии. Потому что надо найти хоть что-то
                        //if (ResultBox.Items.Count == 0)
                        foreach (var word in array)
                        {
                            if (!String.IsNullOrEmpty(word))
                            {
                                statement = "SELECT * " +
                                            "FROM movies " +
                                            "WHERE name ILIKE \'%" + word + "%\' ";
                                command = new NpgsqlCommand(statement, conn);
                                using (var reader = command.ExecuteReader())
                                {
                                    while (reader.Read() && counter < 10)
                                    {
                                        counter += 1;
                                        id       = reader.GetInt32(0);
                                        year     = reader.GetInt32(1);
                                        name     = reader.GetString(2);
                                        res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name);
                                    }
                                }
                            }
                        }

                        //Дубли не хотим
                        res_list = res_list.Select(x => x).Distinct().ToList();
                        ResultBox.Items.Clear();
                        foreach (var item in res_list)
                        {
                            ResultBox.Items.Add(item);
                        }
                        conn.Close();
                    }
                }
                else
                {
                    //Ищем по одному слову
                    QueryParser parser = new QueryParser(AppLuceneVersion, "name", analyzer);
                    var         phrase = new MultiPhraseQuery();
                    foreach (var word in array)
                    {
                        var q = parser.Parse(query);
                        if (!String.IsNullOrEmpty(word))
                        {
                            var res = searcher.Search(q, 10).ScoreDocs;
                            foreach (var hit in res)
                            {
                                var foundDoc = searcher.Doc(hit.Doc);
                                var score    = hit.Score;
                                res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() +
                                             " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]);
                            }
                        }
                    }

                    //Ищем полное название
                    phrase.Add(new Term("name", query));
                    var hits = searcher.Search(phrase, 10).ScoreDocs;
                    foreach (var hit in hits)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        var score    = hit.Score;
                        res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() +
                                     " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]);
                    }

                    //Ищем части слов
                    foreach (var word in array)
                    {
                        if (!String.IsNullOrEmpty(word))
                        {
                            var wild = new WildcardQuery(new Term("name", word));
                            var res  = searcher.Search(wild, 10).ScoreDocs;
                            foreach (var hit in res)
                            {
                                var foundDoc = searcher.Doc(hit.Doc);
                                var score    = hit.Score;
                                res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() +
                                             " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]);
                            }
                        }
                    }

                    //Ищем год и часть слова
                    string year_to_find = "";
                    int    number       = 0;
                    foreach (var word in array)
                    {
                        bool result = Int32.TryParse(word, out number);
                        if (result && number > 1800 && number <= 9999)
                        {
                            year_to_find = word;
                            array.RemoveAt(array.IndexOf(word));
                            break;
                        }
                        else
                        {
                            number = 0;
                        }
                    }

                    //Если нашли
                    if (number != 0)
                    {
                        phrase = new MultiPhraseQuery();
                        foreach (var word in array)
                        {
                            if (!String.IsNullOrEmpty(word))
                            {
                                BooleanQuery booleanQuery = new BooleanQuery();
                                var          wild         = new WildcardQuery(new Term("name", word));
                                var          num          = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true);
                                booleanQuery.Add(wild, Occur.MUST);
                                booleanQuery.Add(num, Occur.MUST);
                                var res = searcher.Search(booleanQuery, 10).ScoreDocs;
                                foreach (var hit in res)
                                {
                                    var foundDoc = searcher.Doc(hit.Doc);
                                    var score    = hit.Score;
                                    res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() +
                                                 " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]);
                                }
                            }
                        }
                    }
                }

                //Не хотим дубли
                res_list = res_list.Select(x => x).Distinct().ToList();
                ResultBox.Items.Clear();
                foreach (var item in res_list)
                {
                    ResultBox.Items.Add(item);
                }

                //Ну и если всё плохо
                if (ResultBox.Items.Count == 0)
                {
                    ResultBox.Items.Add("Нет результатов. Попробуйте расширить поисковый запрос");
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("Error occured while searching: " + ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
            Cursor.Current       = Cursors.Default;
            SearchButton.Enabled = true;
        }
Ejemplo n.º 24
0
 public virtual void TestCJKSynonymsPhrase()
 {
     MultiPhraseQuery expected = new MultiPhraseQuery();
     expected.Add(new Term("field", "中"));
     expected.Add(new Term[] { new Term("field", "国"), new Term("field", "國") });
     QueryBuilder builder = new QueryBuilder(new MockCJKSynonymAnalyzer());
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国"));
     expected.Slop = 3;
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国", 3));
 }
        public static Hashtable Search(bool api)
        {
            BillType type;
            int      number;
            int      session = -1;

            if (HttpContext.Current.Request["session"] != null && HttpContext.Current.Request["session"] != "")
            {
                session = int.Parse(HttpContext.Current.Request["session"]);
            }

            string q = HttpContext.Current.Request["q"];

            int start = 0, count = (!api ? 30 : 1000);

            if (HttpContext.Current.Request["start"] != null)
            {
                start = int.Parse(HttpContext.Current.Request["start"]);
            }
            if (HttpContext.Current.Request["count"] != null)
            {
                count = int.Parse(HttpContext.Current.Request["count"]);
            }

            BooleanQuery query = new BooleanQuery();

            Hashtable no_results = new Hashtable();

            no_results["count"] = 0;

            if (q != null && q.IndexOf("*") > -1)
            {
                return(no_results);
            }

            if (!api && session == -1 && q != null)
            {
                int slash = q.IndexOf('/');
                if (slash >= q.Length - 4 && slash > 2)
                {
                    try {
                        session = int.Parse(q.Substring(slash + 1));                       // and if that worked...
                        q       = q.Substring(0, slash);
                        HttpContext.Current.Response.Redirect("billsearch.xpd?session=" + session + "&q=" + HttpUtility.UrlEncode(q));
                        return(null);
                    } catch { }
                }
            }

            if (session == -1)
            {
                session = Util.CurrentSession;
            }

            //Console.Error.WriteLine("Find: " + q);

            string    search_method = "search";
            ArrayList specs         = new ArrayList();
            Hashtable scores        = new Hashtable();

            // Match a bill number exactly
            if (q != null && Bills.ParseID(q, out type, out number))
            {
                if (!api)
                {
                    // Redirect the user right to the bill page.
                    // Don't even check if bill exists.
                    HttpContext.Current.Response.Redirect(
                        Bills.BillLink2(session, type, number));
                    return(null);
                }
                else
                {
                    search_method = "search by bill number";
                    scores[session + EnumsConv.BillTypeToString(type) + number] = 1.0F;
                    specs.Add(new Database.AndSpec(
                                  new Database.SpecEQ("session", session),
                                  new Database.SpecEQ("type", EnumsConv.BillTypeToString(type)),
                                  new Database.SpecEQ("number", number)));
                }
            }

            // Match public law number exactly
            if (!api && q != null && (q.StartsWith("P.L.") || q.StartsWith("PL")))
            {
                try {
                    string num = null;
                    if (q.StartsWith("P.L."))
                    {
                        num = q.Substring(4);
                    }
                    if (q.StartsWith("PL"))
                    {
                        num = q.Substring(2);
                    }
                    num = num.Replace(" ", "");

                    int dash = num.IndexOf('-');
                    int s    = int.Parse(num.Substring(0, dash));

                    TableRow bill = Util.Database.DBSelectFirst("billindex", "session, type, number",
                                                                new Database.SpecEQ("idx", "publiclawnumber"),
                                                                new Database.SpecEQ("session", s),
                                                                new Database.SpecEQ("value", num));

                    if (bill != null)
                    {
                        if (!api)
                        {
                            HttpContext.Current.Response.Redirect(Bills.BillLink3((int)bill["session"], (string)bill["type"], (int)bill["number"]));
                            return(null);
                        }
                        else
                        {
                            search_method = "search by public law number";
                            scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F;
                            specs.Add(new Database.AndSpec(
                                          new Database.SpecEQ("session", (int)bill["session"]),
                                          new Database.SpecEQ("type", (string)bill["type"]),
                                          new Database.SpecEQ("number", (int)bill["number"])));
                        }
                    }
                } catch {
                }
            }

            if (session == -1)
            {
                session = Util.CurrentSession;
            }

            // Match USC reference
            Regex uscexp = new Regex(@"(\d[0-9A-Za-z\-]*)\s+U\.?S\.?C\.?\s+(\d[0-9A-Za-z\-]*)((\s*\([^\) ]+\))*)",
                                     RegexOptions.IgnoreCase);
            Match uscmc = (q == null ? null : uscexp.Match(q));

            if (uscmc != null && uscmc.Success)
            {
                string title     = uscmc.Groups[1].Value;
                string section   = uscmc.Groups[2].Value;
                string paragraph = uscmc.Groups[3].Value;

                string[] ps  = paragraph.Split('[', '(', ')', ' ');
                int      psi = 0; while (psi < ps.Length - 1 && ps[psi] == "")
                {
                    psi++;
                }
                int pse = ps.Length - 1; while (pse > 0 && ps[pse] == "")
                {
                    pse--;
                }
                if (ps.Length != 0)
                {
                    paragraph = "_" + String.Join("_", ps, psi, pse - psi + 1);
                }

                Table table = Util.Database.DBSelect("billusc", "session, type, number",
                                                     new Database.SpecEQ("session", session),
                                                     new Database.OrSpec(
                                                         new Database.SpecEQ("ref", title + "_" + section + paragraph),
                                                         new Database.SpecStartsWith("ref", title + "_" + section + paragraph + "_")));
                foreach (TableRow bill in table)
                {
                    search_method = "search by U.S.C. section";
                    scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F;
                    specs.Add(new Database.AndSpec(
                                  new Database.SpecEQ("session", (int)bill["session"]),
                                  new Database.SpecEQ("type", (string)bill["type"]),
                                  new Database.SpecEQ("number", (int)bill["number"])));
                }
            }

            int total_count = -1;

            if (specs.Count == 0)
            {
                if (q != null && q.Trim() != "")
                {
                    BooleanQuery query1 = new BooleanQuery();
                    query.Add(query1, BooleanClause.Occur.MUST);
                    try {
                        /*if (!q.StartsWith("-")) {
                         *      PhraseQuery pq = new PhraseQuery();
                         *      pq.Add( new Term("shorttitles", q) );
                         *      pq.SetBoost((float)4);
                         *      query1.Add(pq, false, false);
                         * }*/

                        Query query_titles2 = new QueryParser("shorttitles", new StandardAnalyzer()).Parse(q);
                        query_titles2.SetBoost((float)3);
                        query1.Add(query_titles2, BooleanClause.Occur.SHOULD);

                        Query query_titles1 = new QueryParser("officialtitles", new StandardAnalyzer()).Parse(q);
                        query_titles1.SetBoost((float)2);
                        query1.Add(query_titles1, BooleanClause.Occur.SHOULD);

                        Query query_summary = new QueryParser("summary", new StandardAnalyzer()).Parse(q);
                        query1.Add(query_summary, BooleanClause.Occur.SHOULD);

                        Query query_text = new QueryParser("fulltext", new StandardAnalyzer()).Parse(q);
                        query1.Add(query_text, BooleanClause.Occur.SHOULD);
                    } catch (Exception e) {
                        return(no_results);
                    }
                }

                string   chamber   = HttpContext.Current.Request["chamber"];
                string[] status    = HttpContext.Current.Request["status"] == null ? null : HttpContext.Current.Request["status"].Split(',');
                string   sponsor   = HttpContext.Current.Request["sponsor"];
                string   cosponsor = HttpContext.Current.Request["cosponsor"];

                if (chamber != null && (chamber == "s" || chamber == "h"))
                {
                    query.Add(new WildcardQuery(new Term("type", chamber + "*")), BooleanClause.Occur.MUST);
                }
                if (status != null && status[0] != "")
                {
                    List <Term> terms = new List <Term>();
                    foreach (string s in status)
                    {
                        terms.Add(new Term("state", s));
                    }
                    MultiPhraseQuery mpq = new MultiPhraseQuery();
                    mpq.Add(terms.ToArray());
                    query.Add(mpq, BooleanClause.Occur.MUST);
                }
                if (sponsor != null && sponsor != "")
                {
                    query.Add(new TermQuery(new Term("sponsor", sponsor)), BooleanClause.Occur.MUST);
                }
                if (cosponsor != null && cosponsor != "")
                {
                    query.Add(new TermQuery(new Term("cosponsor", cosponsor)), BooleanClause.Occur.MUST);
                }

                IndexSearcher searcher = new IndexSearcher(Util.DataPath + Path.DirectorySeparatorChar + session + Path.DirectorySeparatorChar + "index.bills.lucene");

                Sort sort = null;
                if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "introduced")
                {
                    sort = new Sort(new SortField("introduced", SortField.STRING, true));
                }
                if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "lastaction")
                {
                    sort = new Sort(new SortField("lastaction", SortField.STRING, true));
                }

                Hits hits = searcher.Search(query, sort == null ? new Sort() : sort);

                int end = hits.Length();
                if (start + count < end)
                {
                    end = start + count;
                }
                total_count = hits.Length();

                for (int i = start; i < end; i++)
                {
                    Document doc         = hits.Doc(i);
                    string   billsession = doc.Get("session");
                    string   billtype    = doc.Get("type");
                    string   billnumber  = doc.Get("number");

                    int istatus = (int)EnumsConv.BillStatusFromString(doc.Get("status"));

                    float score;
                    if (sort == null)             // readjust the score based on status
                    {
                        score = hits.Score(i) + istatus / (float)8 * (float).2;
                    }
                    else             // keep order from Lucene
                    {
                        score = -i;
                    }

                    scores[billsession + billtype + billnumber] = score;
                    specs.Add(new Database.AndSpec(
                                  new Database.SpecEQ("session", billsession),
                                  new Database.SpecEQ("type", billtype),
                                  new Database.SpecEQ("number", billnumber)));
                }

                if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "hits" && specs.Count > 0)
                {
                    Table hitsinfo = Util.Database.DBSelect("billhits", "*",
                                                            Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec))));
                    foreach (TableRow billhits in hitsinfo)
                    {
                        scores["" + billhits["session"] + billhits["type"] + billhits["number"]] = (float)(int)billhits["hits1"];
                    }
                }
            }

            if (specs.Count == 0)
            {
                return(no_results);
            }

            Table billinfo = Util.Database.DBSelect("billstatus", "*",
                                                    Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec))));

            if (total_count == -1)
            {
                total_count = billinfo.Rows;
            }

            ArrayList ret = new ArrayList();

            foreach (TableRow r in billinfo)
            {
                ret.Add(r);
            }

            BillHitComparer bhc = new BillHitComparer();

            bhc.scores = scores;
            ret.Sort(bhc);

            Hashtable ret2 = new Hashtable();

            ret2["count"]   = total_count;
            ret2["method"]  = search_method;
            ret2["results"] = ret;

            return(ret2);
        }
Ejemplo n.º 26
0
        public void TestQueryScorerMultiPhraseQueryHighlightingWithGap()
        {
            MultiPhraseQuery mpq = new MultiPhraseQuery();

            /*
             * The toString of MultiPhraseQuery doesn't work so well with these
             * out-of-order additions, but the Query itself seems to match accurately.
             */

            mpq.Add(new Term[] {new Term(FIELD_NAME, "wordz")}, 2);
            mpq.Add(new Term[] {new Term(FIELD_NAME, "wordx")}, 0);

            DoSearching(mpq);

            int maxNumFragmentsRequired = 1;
            int expectedHighlights = 2;

            AssertExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights);
        }
Ejemplo n.º 27
0
        static void Main(string[] args)
        {
            var jsonProps = "[{\"name\":\"Ondrej\",\"surname\":\"Kubicek\",\"app_data\":[\"112233\", \"56612\"]},"
                            + "{\"name\":\"Lukas\",\"surname\":\"Bily\",\"app_data\":[\"12355\", \"112233\", \"89466\"]}," +
                            "{\"name\":\"Lenak\",\"surname\":\"Nejaka\",\"app_data\":[\"89700\"]}]";

            var version = LuceneVersion.LUCENE_48;
            var dir     = new RAMDirectory();

            var analyzer    = new StandardAnalyzer(version);
            var indexConfig = new IndexWriterConfig(version, analyzer);

            var writer = new IndexWriter(dir, indexConfig);

            var d    = JsonDocument.Parse(jsonProps);
            var root = d.RootElement;

            foreach (var line in root.EnumerateArray())
            {
                var doc = new Document();

                doc.Add(new StringField("name", line.GetProperty("name").GetString(), Field.Store.NO));
                doc.Add(new StringField("surname", line.GetProperty("surname").GetString(), Field.Store.NO));
                foreach (var f in line.GetProperty("app_data").EnumerateArray())
                {
                    doc.Add(new StringField("app_data", f.GetString(), Field.Store.NO));
                }

                doc.Add(new StringField("payload", line.ToString(), Field.Store.YES));

                writer.AddDocument(doc);
                // Console.WriteLine(line.GetProperty("name"));
                // if (line.GetProperty("app_data").EnumerateArray().Any(x => x.GetString() == "1"))
                // {

                // }

                // foreach(var data in line.GetProperty("app_data").EnumerateArray())
                // {
                // }

                // Console.WriteLine(line.GetProperty("app_data").GetArrayLength());
            }

            writer.Flush(false, false);

            var searcher = new IndexSearcher(writer.GetReader(true));

            var query = new MultiPhraseQuery();

            query.Add(new Term("app_data", "12355"));

            var booleanQuery = new BooleanQuery();

            booleanQuery.Add(new TermQuery(new Term("app_data", "12355")), Occur.SHOULD);
            booleanQuery.Add(new TermQuery(new Term("app_data", "89700")), Occur.SHOULD);

            var res = searcher.Search(booleanQuery, 100);

            Console.WriteLine(res.TotalHits);
            foreach (var hit in res.ScoreDocs)
            {
                var item = searcher.Doc(hit.Doc);
                Console.WriteLine(item.Get("payload"));
            }
        }
Ejemplo n.º 28
0
        /// <summary>
        /// Creates a query from the analysis chain.
        /// <para/>
        /// Expert: this is more useful for subclasses such as queryparsers.
        /// If using this class directly, just use <see cref="CreateBooleanQuery(string, string)"/>
        /// and <see cref="CreatePhraseQuery(string, string)"/>. </summary>
        /// <param name="analyzer"> Analyzer used for this query. </param>
        /// <param name="operator"> Default boolean operator used for this query. </param>
        /// <param name="field"> Field to create queries against. </param>
        /// <param name="queryText"> Text to be passed to the analysis chain. </param>
        /// <param name="quoted"> <c>true</c> if phrases should be generated when terms occur at more than one position. </param>
        /// <param name="phraseSlop"> Slop factor for phrase/multiphrase queries. </param>
        protected Query CreateFieldQuery(Analyzer analyzer, Occur @operator, string field, string queryText, bool quoted, int phraseSlop)
        {
            Debug.Assert(@operator == Occur.SHOULD || @operator == Occur.MUST);
            // Use the analyzer to get all the tokens, and then build a TermQuery,
            // PhraseQuery, or nothing based on the term count
            CachingTokenFilter          buffer     = null;
            ITermToBytesRefAttribute    termAtt    = null;
            IPositionIncrementAttribute posIncrAtt = null;
            int  numTokens     = 0;
            int  positionCount = 0;
            bool severalTokensAtSamePosition = false;
            bool hasMoreTokens = false;

            TokenStream source = null;

            try
            {
                source = analyzer.GetTokenStream(field, new StringReader(queryText));
                source.Reset();
                buffer = new CachingTokenFilter(source);
                buffer.Reset();

                if (buffer.HasAttribute <ITermToBytesRefAttribute>())
                {
                    termAtt = buffer.GetAttribute <ITermToBytesRefAttribute>();
                }
                if (buffer.HasAttribute <IPositionIncrementAttribute>())
                {
                    posIncrAtt = buffer.GetAttribute <IPositionIncrementAttribute>();
                }

                if (termAtt != null)
                {
                    try
                    {
                        hasMoreTokens = buffer.IncrementToken();
                        while (hasMoreTokens)
                        {
                            numTokens++;
                            int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1;
                            if (positionIncrement != 0)
                            {
                                positionCount += positionIncrement;
                            }
                            else
                            {
                                severalTokensAtSamePosition = true;
                            }
                            hasMoreTokens = buffer.IncrementToken();
                        }
                    }
                    catch (System.IO.IOException)
                    {
                        // ignore
                    }
                }
            }
            catch (System.IO.IOException e)
            {
                throw new Exception("Error analyzing query text", e);
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(source);
            }

            // rewind the buffer stream
            buffer.Reset();

            BytesRef bytes = termAtt == null ? null : termAtt.BytesRef;

            if (numTokens == 0)
            {
                return(null);
            }
            else if (numTokens == 1)
            {
                try
                {
                    bool hasNext = buffer.IncrementToken();
                    Debug.Assert(hasNext == true);
                    termAtt.FillBytesRef();
                }
                catch (System.IO.IOException)
                {
                    // safe to ignore, because we know the number of tokens
                }
                return(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))));
            }
            else
            {
                if (severalTokensAtSamePosition || (!quoted))
                {
                    if (positionCount == 1 || (!quoted))
                    {
                        // no phrase query:

                        if (positionCount == 1)
                        {
                            // simple case: only one position, with synonyms
                            BooleanQuery q = NewBooleanQuery(true);
                            for (int i = 0; i < numTokens; i++)
                            {
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                    termAtt.FillBytesRef();
                                }
                                catch (System.IO.IOException)
                                {
                                    // safe to ignore, because we know the number of tokens
                                }
                                Query currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
                                q.Add(currentQuery, Occur.SHOULD);
                            }
                            return(q);
                        }
                        else
                        {
                            // multiple positions
                            BooleanQuery q            = NewBooleanQuery(false);
                            Query        currentQuery = null;
                            for (int i = 0; i < numTokens; i++)
                            {
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                    termAtt.FillBytesRef();
                                }
                                catch (System.IO.IOException)
                                {
                                    // safe to ignore, because we know the number of tokens
                                }
                                if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0)
                                {
                                    if (!(currentQuery is BooleanQuery))
                                    {
                                        Query t = currentQuery;
                                        currentQuery = NewBooleanQuery(true);
                                        ((BooleanQuery)currentQuery).Add(t, Occur.SHOULD);
                                    }
                                    ((BooleanQuery)currentQuery).Add(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))), Occur.SHOULD);
                                }
                                else
                                {
                                    if (currentQuery != null)
                                    {
                                        q.Add(currentQuery, @operator);
                                    }
                                    currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
                                }
                            }
                            q.Add(currentQuery, @operator);
                            return(q);
                        }
                    }
                    else
                    {
                        // phrase query:
                        MultiPhraseQuery mpq = NewMultiPhraseQuery();
                        mpq.Slop = phraseSlop;
                        IList <Term> multiTerms = new List <Term>();
                        int          position   = -1;
                        for (int i = 0; i < numTokens; i++)
                        {
                            int positionIncrement = 1;
                            try
                            {
                                bool hasNext = buffer.IncrementToken();
                                Debug.Assert(hasNext == true);
                                termAtt.FillBytesRef();
                                if (posIncrAtt != null)
                                {
                                    positionIncrement = posIncrAtt.PositionIncrement;
                                }
                            }
                            catch (System.IO.IOException)
                            {
                                // safe to ignore, because we know the number of tokens
                            }

                            if (positionIncrement > 0 && multiTerms.Count > 0)
                            {
                                if (enablePositionIncrements)
                                {
                                    mpq.Add(multiTerms.ToArray(), position);
                                }
                                else
                                {
                                    mpq.Add(multiTerms.ToArray());
                                }
                                multiTerms.Clear();
                            }
                            position += positionIncrement;
                            multiTerms.Add(new Term(field, BytesRef.DeepCopyOf(bytes)));
                        }
                        if (enablePositionIncrements)
                        {
                            mpq.Add(multiTerms.ToArray(), position);
                        }
                        else
                        {
                            mpq.Add(multiTerms.ToArray());
                        }
                        return(mpq);
                    }
                }
                else
                {
                    PhraseQuery pq = NewPhraseQuery();
                    pq.Slop = phraseSlop;
                    int position = -1;

                    for (int i = 0; i < numTokens; i++)
                    {
                        int positionIncrement = 1;

                        try
                        {
                            bool hasNext = buffer.IncrementToken();
                            Debug.Assert(hasNext == true);
                            termAtt.FillBytesRef();
                            if (posIncrAtt != null)
                            {
                                positionIncrement = posIncrAtt.PositionIncrement;
                            }
                        }
                        catch (System.IO.IOException)
                        {
                            // safe to ignore, because we know the number of tokens
                        }

                        if (enablePositionIncrements)
                        {
                            position += positionIncrement;
                            pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)), position);
                        }
                        else
                        {
                            pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)));
                        }
                    }
                    return(pq);
                }
            }
        }
Ejemplo n.º 29
0
 public virtual void TestSynonymsPhrase()
 {
     MultiPhraseQuery expected = new MultiPhraseQuery();
     expected.Add(new Term("field", "old"));
     expected.Add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") });
     QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer());
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "old dogs"));
 }
Ejemplo n.º 30
0
        /// <summary>
        /// On base of https://lucenenet.apache.org
        /// </summary>
        /// <param name="query"></param>
        /// <returns></returns>
        public IEnumerable <(string name, int year)> SearchWithLucy(string query)
        {
            var words    = query.Split(' ').ToList();
            var searcher = new IndexSearcher(lucyAdapter.lucyWriter.GetReader(applyAllDeletes: true));

            var totalResults = new List <Document>();
            //word
            MultiPhraseQuery multiPhraseQuery;

            foreach (var word in words)
            {
                multiPhraseQuery = new MultiPhraseQuery();
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                multiPhraseQuery.Add(new Term("name_word", word));
                var docs = searcher.Search(multiPhraseQuery, 10).ScoreDocs;
                foreach (var doc in docs)
                {
                    var document = searcher.Doc(doc.Doc);
                    if (totalResults.All(f => f.GetField("id").GetInt32Value() != document.GetField("id").GetInt32Value()))
                    {
                        totalResults.Add(document);
                    }
                }
            }

            // full name
            multiPhraseQuery = new MultiPhraseQuery();
            multiPhraseQuery.Add(new Term("full_name", query));
            var scoreDocs = searcher.Search(multiPhraseQuery, 10).ScoreDocs;

            foreach (var scoreDoc in scoreDocs)
            {
                var doc = searcher.Doc(scoreDoc.Doc);
                if (totalResults.All(f => f.GetField("id").GetInt32Value() != doc.GetField("id").GetInt32Value()))
                {
                    totalResults.Add(doc);
                }
            }

            //word parts
            foreach (var word in words)
            {
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                var wildcardQuery = new WildcardQuery(new Term("name_word", "*" + word + "*"));
                var docs          = searcher.Search(wildcardQuery, 10).ScoreDocs;
                foreach (var doc in docs)
                {
                    var document = searcher.Doc(doc.Doc);
                    if (totalResults.All(f => f.GetField("id").GetInt32Value() != document.GetField("id").GetInt32Value()))
                    {
                        totalResults.Add(document);
                    }
                }
            }

            //year and word part
            var number = 0;

            foreach (var word in words)
            {
                var result = int.TryParse(word, out number);
                if (!result)
                {
                    continue;
                }
                words.RemoveAt(words.IndexOf(word));
                break;
            }

            if (number != 0)
            {
                foreach (var word in words)
                {
                    if (string.IsNullOrEmpty(word))
                    {
                        continue;
                    }
                    var booleanQuery = new BooleanQuery();

                    var wildcardQuery = new WildcardQuery(new Term("name_word", "*" + word + "*"));
                    var rangeQuery    = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true);

                    booleanQuery.Add(wildcardQuery, Occur.SHOULD);
                    booleanQuery.Add(rangeQuery, Occur.SHOULD);
                    var docs = searcher.Search(booleanQuery, 10).ScoreDocs;
                    foreach (var doc in docs)
                    {
                        var foundDoc = searcher.Doc(doc.Doc);
                        if (totalResults.All(f => f.GetField("id").GetInt32Value() != foundDoc.GetField("id").GetInt32Value()))
                        {
                            totalResults.Add(foundDoc);
                        }
                    }
                }
            }

            foreach (var doc in totalResults.Take(10))
            {
                yield return(doc.GetValues("full_name")[0], (int)doc.GetField("year").GetInt32Value());
            }
        }
Ejemplo n.º 31
0
        public async Task <SearchResult <T> > SearchAsync(SearchQuery queryDefinition, CancellationToken cancellationToken = default)
        {
            using (await writerLock.ReaderLockAsync(cancellationToken))
            {
                var      result = new SearchResult <T>();
                List <T> hits   = new List <T>();

                using (var writer = getWriter())
                {
                    Query query = new MatchAllDocsQuery();

                    // Term queries
                    if (queryDefinition.TermQueries.Any())
                    {
                        var phraseQuery = new MultiPhraseQuery();
                        foreach (var termQuery in queryDefinition.TermQueries)
                        {
                            phraseQuery.Add(
                                termQuery.value
                                .Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)
                                .Select(phrase => new Term(termQuery.field, phrase.ToLower()))
                                .ToArray()
                                );
                        }
                        query = phraseQuery;
                    }

                    var reader       = writer.DocsWriter.GetReader(applyAllDeletes: true);
                    var searcher     = new IndexSearcher(reader);
                    var luceneResult = searcher.Search(query, queryDefinition.Limit);

                    foreach (var doc in luceneResult.ScoreDocs)
                    {
                        var foundDoc = searcher.Doc(doc.Doc);
                        hits.Add(await inflateDocument(foundDoc));
                    }

                    result.TotalHits = luceneResult.TotalHits;
                    result.Hits      = hits;

                    // Facets
                    if (queryDefinition.Facets.Any())
                    {
                        FacetsConfig    facetsConfig = new FacetsConfig();
                        FacetsCollector fc           = new FacetsCollector();
                        FacetsCollector.Search(searcher, query, queryDefinition.FacetMax, fc);
                        using (var taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.Open(Path.Combine(options.IndexPath, indexType, "taxonomy"))))
                        {
                            var facets = new FastTaxonomyFacetCounts(taxonomyReader, facetsConfig, fc);
                            foreach (var facet in queryDefinition.Facets)
                            {
                                var facetGroup = new FacetGroup {
                                    Field = facet
                                };
                                facetGroup.Facets =
                                    facets.GetTopChildren(queryDefinition.FacetMax, facet).LabelValues
                                    .Select(x => new Facet {
                                    Key = x.Label, Count = (long)x.Value
                                })
                                    .ToArray();
                                result.FacetGroups.Add(facetGroup);
                            }
                        }
                    }
                }

                return(result);
            }
        }
Ejemplo n.º 32
0
        public void TestQueryScorerMultiPhraseQueryHighlighting()
        {
            MultiPhraseQuery mpq = new MultiPhraseQuery();

            mpq.Add(new Term[] {new Term(FIELD_NAME, "wordx"), new Term(FIELD_NAME, "wordb")});
            mpq.Add(new Term(FIELD_NAME, "wordy"));

            DoSearching(mpq);

            int maxNumFragmentsRequired = 2;
            AssertExpectedHighlightCount(maxNumFragmentsRequired, 6);
        }
Ejemplo n.º 33
0
        private void lucene_serach_Click(object sender, EventArgs e)
        {
            int counter = 0;

            this.results.Rows.Clear();
            var           query    = this.search_field.Text.ToLower();
            var           array    = query.Split(' ').ToList();
            List <string> res_list = new List <string>();
            var           searcher = new IndexSearcher(writer1.GetReader(applyAllDeletes: true));

            //одно слово
            var phrase = new MultiPhraseQuery();

            foreach (var word in array)
            {
                phrase = new MultiPhraseQuery();
                if (!String.IsNullOrEmpty(word))
                {
                    phrase.Add(new Term("name_word", word));
                    var res = searcher.Search(phrase, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        var score    = hit.Score;

                        this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString());
                    }
                }
            }

            // полное название
            phrase = new MultiPhraseQuery();
            phrase.Add(new Term("full_name", query));
            var hits = searcher.Search(phrase, 10).ScoreDocs;

            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                var score    = hit.Score;
                this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString());
            }

            //части слов
            foreach (var word in array)
            {
                if (!String.IsNullOrEmpty(word))
                {
                    var wild = new WildcardQuery(new Term("name_word", "*" + word + "*"));
                    var res  = searcher.Search(wild, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        var score    = hit.Score;
                        this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString());
                    }
                }
            }

            //год и часть слова
            string year_to_find = "";
            int    number       = 0;

            foreach (var word in array)
            {
                bool result = Int32.TryParse(word, out number);
                if (result && number > 1800 && number <= 9999)
                {
                    year_to_find = word;
                    array.RemoveAt(array.IndexOf(word));
                    break;
                }
            }
            Console.WriteLine(number != 0);

            if (number != 0)
            {
                phrase = new MultiPhraseQuery();
                foreach (var word in array)
                {
                    if (!String.IsNullOrEmpty(word))
                    {
                        BooleanQuery booleanQuery = new BooleanQuery();

                        var wild = new WildcardQuery(new Term("name_word", "*" + word + "*"));
                        var num  = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true);

                        booleanQuery.Add(wild, Occur.SHOULD);
                        booleanQuery.Add(num, Occur.SHOULD);
                        var res = searcher.Search(booleanQuery, 10).ScoreDocs;
                        foreach (var hit in res)
                        {
                            var foundDoc = searcher.Doc(hit.Doc);
                            var score    = hit.Score;
                            this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString());
                        }
                    }
                }
            }
        }