public virtual Query Build(IQueryNode queryNode)
        {
            MultiPhraseQueryNode phraseNode = (MultiPhraseQueryNode)queryNode;

            MultiPhraseQuery phraseQuery = new MultiPhraseQuery();

            IList <IQueryNode> children = phraseNode.GetChildren();

            if (children != null)
            {
                IDictionary <int, JCG.List <Term> > positionTermMap = new JCG.SortedDictionary <int, JCG.List <Term> >();

                foreach (IQueryNode child in children)
                {
                    FieldQueryNode termNode  = (FieldQueryNode)child;
                    TermQuery      termQuery = (TermQuery)termNode
                                               .GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID);

                    if (!positionTermMap.TryGetValue(termNode.PositionIncrement, out JCG.List <Term> termList) || termList is null)
                    {
                        termList = new JCG.List <Term>();
                        positionTermMap[termNode.PositionIncrement] = termList;
                    }

                    termList.Add(termQuery.Term);
                }

                foreach (int positionIncrement in positionTermMap.Keys)
                {
                    JCG.List <Term> termList = positionTermMap[positionIncrement];

                    phraseQuery.Add(termList.ToArray(/*new Term[termList.size()]*/),
                                    positionIncrement);
                }
            }

            return(phraseQuery);
        }
Esempio n. 2
0
            public MultiPhraseWeight(MultiPhraseQuery outerInstance, IndexSearcher searcher)
            {
                this.outerInstance = outerInstance;
                this.similarity    = searcher.Similarity;
                IndexReaderContext context = searcher.TopReaderContext;

                // compute idf
                var allTermStats = new JCG.List <TermStatistics>();

                foreach (Term[] terms in outerInstance.termArrays)
                {
                    foreach (Term term in terms)
                    {
                        if (!termContexts.TryGetValue(term, out TermContext termContext) || termContext is null)
                        {
                            termContext        = TermContext.Build(context, term);
                            termContexts[term] = termContext;
                        }
                        allTermStats.Add(searcher.TermStatistics(term, termContext));
                    }
                }
                stats = similarity.ComputeWeight(outerInstance.Boost, searcher.CollectionStatistics(outerInstance.field), allTermStats.ToArray());
            }
Esempio n. 3
0
        private static List <string> searchOneWord_Lucene(string query, IndexSearcher searcher)
        {   //Ищем по одному слову
            List <string> result_lucene = new List <string>();
            var           array         = query.Split(' ');

            var phrase = new MultiPhraseQuery();

            foreach (var word in array)
            {
                phrase = new MultiPhraseQuery();
                if (!String.IsNullOrEmpty(word))
                {
                    phrase.Add(new Term("name_word", word));
                    var res = searcher.Search(phrase, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        result_lucene.Add(getFoundDocResult(foundDoc));
                    }
                }
            }
            return(result_lucene);
        }
        public virtual void TestMultiPhraseQueryParsing()
        {
            TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[]
            {
                new TokenAndPos("a", 0),
                new TokenAndPos("1", 0),
                new TokenAndPos("b", 1),
                new TokenAndPos("1", 1),
                new TokenAndPos("c", 2)
            };

            QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND));
            Query       q  = qp.Parse("\"this text is acually ignored\"");

            assertTrue("wrong query type!", q is MultiPhraseQuery);

            MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery();

            multiPhraseQuery.Add(new Term[] { new Term("field", "a"), new Term("field", "1") }, -1);
            multiPhraseQuery.Add(new Term[] { new Term("field", "b"), new Term("field", "1") }, 0);
            multiPhraseQuery.Add(new Term[] { new Term("field", "c") }, 1);

            assertEquals(multiPhraseQuery, q);
        }
Esempio n. 5
0
        private void SearchButton_Click(object sender, EventArgs e)
        {
            //Variables and pretty stuff
            int counter = 0;

            Cursor.Current       = Cursors.WaitCursor;
            SearchButton.Enabled = false;
            ResultBox.Items.Clear();
            var           query    = TextSearch.Text;
            var           array    = query.Split(' ').ToList();
            List <string> res_list = new List <string>();

            //Some sort of  error handling
            try
            {
                if (!luceneCheck.Checked)
                {
                    using (var conn = new NpgsqlConnection(connString))
                    {
                        conn.Open();
                        var statement = "";

                        //Поиск по точному названию
                        statement = "SELECT * " +
                                    "FROM movies " +
                                    "WHERE name = \'" + query + "\'";
                        var command = new NpgsqlCommand(statement, conn);
                        var id      = 0;
                        var year    = 0;
                        var name    = "";
                        using (var reader = command.ExecuteReader())
                        {
                            while (reader.Read() && counter < 10)
                            {
                                id       = reader.GetInt32(0);
                                year     = reader.GetInt32(1);
                                name     = reader.GetString(2);
                                counter += 1;
                                res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name);
                            }
                        }

                        //Поиск по году и по названию  //, если предыдущий ничего не дал
                        //if (ResultBox.Items.Count == 0)

                        //Ищем год в запросе
                        string year_to_find = "";
                        int    number       = 0;
                        foreach (var word in array)
                        {
                            bool result = Int32.TryParse(word, out number);
                            if (result && number > 1800 && number <= 9999)
                            {
                                year_to_find = word;
                                array.RemoveAt(array.IndexOf(word));
                                break;
                            }
                            else
                            {
                                number = 0;
                            }
                        }

                        //Если нашли
                        if (number != 0)
                        {
                            foreach (var word in array)
                            {
                                if (!String.IsNullOrEmpty(word))
                                {
                                    statement = "SELECT * " +
                                                "FROM movies " +
                                                "WHERE year = " + year_to_find + " AND name ILIKE \'%" + word + "%\' ";
                                    command = new NpgsqlCommand(statement, conn);
                                    using (var reader = command.ExecuteReader())
                                    {
                                        while (reader.Read() && counter < 10)
                                        {
                                            counter += 1;
                                            id       = reader.GetInt32(0);
                                            year     = reader.GetInt32(1);
                                            name     = reader.GetString(2);
                                            res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name);
                                        }
                                    }
                                }
                            }
                        }

                        //Поиск по слову в названии //, если предыдущие ничего не дали
                        //if (ResultBox.Items.Count == 0)
                        foreach (var word in array)
                        {
                            if (!String.IsNullOrEmpty(word))
                            {
                                statement = "SELECT * " +
                                            "FROM movies " +
                                            "WHERE name ILIKE \'" + word + " %\' " +
                                            "OR name = \'" + word + "\' " +
                                            "OR  name ILIKE \'% " + word + "\'";
                                command = new NpgsqlCommand(statement, conn);
                                using (var reader = command.ExecuteReader())
                                {
                                    while (reader.Read() && counter < 10)
                                    {
                                        counter += 1;
                                        id       = reader.GetInt32(0);
                                        year     = reader.GetInt32(1);
                                        name     = reader.GetString(2);
                                        res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name);
                                    }
                                }
                            }
                        }

                        //Поиск по части слова в названии. Потому что надо найти хоть что-то
                        //if (ResultBox.Items.Count == 0)
                        foreach (var word in array)
                        {
                            if (!String.IsNullOrEmpty(word))
                            {
                                statement = "SELECT * " +
                                            "FROM movies " +
                                            "WHERE name ILIKE \'%" + word + "%\' ";
                                command = new NpgsqlCommand(statement, conn);
                                using (var reader = command.ExecuteReader())
                                {
                                    while (reader.Read() && counter < 10)
                                    {
                                        counter += 1;
                                        id       = reader.GetInt32(0);
                                        year     = reader.GetInt32(1);
                                        name     = reader.GetString(2);
                                        res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name);
                                    }
                                }
                            }
                        }

                        //Дубли не хотим
                        res_list = res_list.Select(x => x).Distinct().ToList();
                        ResultBox.Items.Clear();
                        foreach (var item in res_list)
                        {
                            ResultBox.Items.Add(item);
                        }
                        conn.Close();
                    }
                }
                else
                {
                    //Ищем по одному слову
                    QueryParser parser = new QueryParser(AppLuceneVersion, "name", analyzer);
                    var         phrase = new MultiPhraseQuery();
                    foreach (var word in array)
                    {
                        var q = parser.Parse(query);
                        if (!String.IsNullOrEmpty(word))
                        {
                            var res = searcher.Search(q, 10).ScoreDocs;
                            foreach (var hit in res)
                            {
                                var foundDoc = searcher.Doc(hit.Doc);
                                var score    = hit.Score;
                                res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() +
                                             " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]);
                            }
                        }
                    }

                    //Ищем полное название
                    phrase.Add(new Term("name", query));
                    var hits = searcher.Search(phrase, 10).ScoreDocs;
                    foreach (var hit in hits)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        var score    = hit.Score;
                        res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() +
                                     " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]);
                    }

                    //Ищем части слов
                    foreach (var word in array)
                    {
                        if (!String.IsNullOrEmpty(word))
                        {
                            var wild = new WildcardQuery(new Term("name", word));
                            var res  = searcher.Search(wild, 10).ScoreDocs;
                            foreach (var hit in res)
                            {
                                var foundDoc = searcher.Doc(hit.Doc);
                                var score    = hit.Score;
                                res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() +
                                             " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]);
                            }
                        }
                    }

                    //Ищем год и часть слова
                    string year_to_find = "";
                    int    number       = 0;
                    foreach (var word in array)
                    {
                        bool result = Int32.TryParse(word, out number);
                        if (result && number > 1800 && number <= 9999)
                        {
                            year_to_find = word;
                            array.RemoveAt(array.IndexOf(word));
                            break;
                        }
                        else
                        {
                            number = 0;
                        }
                    }

                    //Если нашли
                    if (number != 0)
                    {
                        phrase = new MultiPhraseQuery();
                        foreach (var word in array)
                        {
                            if (!String.IsNullOrEmpty(word))
                            {
                                BooleanQuery booleanQuery = new BooleanQuery();
                                var          wild         = new WildcardQuery(new Term("name", word));
                                var          num          = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true);
                                booleanQuery.Add(wild, Occur.MUST);
                                booleanQuery.Add(num, Occur.MUST);
                                var res = searcher.Search(booleanQuery, 10).ScoreDocs;
                                foreach (var hit in res)
                                {
                                    var foundDoc = searcher.Doc(hit.Doc);
                                    var score    = hit.Score;
                                    res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() +
                                                 " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]);
                                }
                            }
                        }
                    }
                }

                //Не хотим дубли
                res_list = res_list.Select(x => x).Distinct().ToList();
                ResultBox.Items.Clear();
                foreach (var item in res_list)
                {
                    ResultBox.Items.Add(item);
                }

                //Ну и если всё плохо
                if (ResultBox.Items.Count == 0)
                {
                    ResultBox.Items.Add("Нет результатов. Попробуйте расширить поисковый запрос");
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("Error occured while searching: " + ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
            Cursor.Current       = Cursors.Default;
            SearchButton.Enabled = true;
        }
Esempio n. 6
0
 public virtual Query VisitMultiPhraseQuery(MultiPhraseQuery multiPhraseq)
 {
     throw new SnNotSupportedException();
 }
        public static Hashtable Search(bool api)
        {
            BillType type;
            int      number;
            int      session = -1;

            if (HttpContext.Current.Request["session"] != null && HttpContext.Current.Request["session"] != "")
            {
                session = int.Parse(HttpContext.Current.Request["session"]);
            }

            string q = HttpContext.Current.Request["q"];

            int start = 0, count = (!api ? 30 : 1000);

            if (HttpContext.Current.Request["start"] != null)
            {
                start = int.Parse(HttpContext.Current.Request["start"]);
            }
            if (HttpContext.Current.Request["count"] != null)
            {
                count = int.Parse(HttpContext.Current.Request["count"]);
            }

            BooleanQuery query = new BooleanQuery();

            Hashtable no_results = new Hashtable();

            no_results["count"] = 0;

            if (q != null && q.IndexOf("*") > -1)
            {
                return(no_results);
            }

            if (!api && session == -1 && q != null)
            {
                int slash = q.IndexOf('/');
                if (slash >= q.Length - 4 && slash > 2)
                {
                    try {
                        session = int.Parse(q.Substring(slash + 1));                       // and if that worked...
                        q       = q.Substring(0, slash);
                        HttpContext.Current.Response.Redirect("billsearch.xpd?session=" + session + "&q=" + HttpUtility.UrlEncode(q));
                        return(null);
                    } catch { }
                }
            }

            if (session == -1)
            {
                session = Util.CurrentSession;
            }

            //Console.Error.WriteLine("Find: " + q);

            string    search_method = "search";
            ArrayList specs         = new ArrayList();
            Hashtable scores        = new Hashtable();

            // Match a bill number exactly
            if (q != null && Bills.ParseID(q, out type, out number))
            {
                if (!api)
                {
                    // Redirect the user right to the bill page.
                    // Don't even check if bill exists.
                    HttpContext.Current.Response.Redirect(
                        Bills.BillLink2(session, type, number));
                    return(null);
                }
                else
                {
                    search_method = "search by bill number";
                    scores[session + EnumsConv.BillTypeToString(type) + number] = 1.0F;
                    specs.Add(new Database.AndSpec(
                                  new Database.SpecEQ("session", session),
                                  new Database.SpecEQ("type", EnumsConv.BillTypeToString(type)),
                                  new Database.SpecEQ("number", number)));
                }
            }

            // Match public law number exactly
            if (!api && q != null && (q.StartsWith("P.L.") || q.StartsWith("PL")))
            {
                try {
                    string num = null;
                    if (q.StartsWith("P.L."))
                    {
                        num = q.Substring(4);
                    }
                    if (q.StartsWith("PL"))
                    {
                        num = q.Substring(2);
                    }
                    num = num.Replace(" ", "");

                    int dash = num.IndexOf('-');
                    int s    = int.Parse(num.Substring(0, dash));

                    TableRow bill = Util.Database.DBSelectFirst("billindex", "session, type, number",
                                                                new Database.SpecEQ("idx", "publiclawnumber"),
                                                                new Database.SpecEQ("session", s),
                                                                new Database.SpecEQ("value", num));

                    if (bill != null)
                    {
                        if (!api)
                        {
                            HttpContext.Current.Response.Redirect(Bills.BillLink3((int)bill["session"], (string)bill["type"], (int)bill["number"]));
                            return(null);
                        }
                        else
                        {
                            search_method = "search by public law number";
                            scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F;
                            specs.Add(new Database.AndSpec(
                                          new Database.SpecEQ("session", (int)bill["session"]),
                                          new Database.SpecEQ("type", (string)bill["type"]),
                                          new Database.SpecEQ("number", (int)bill["number"])));
                        }
                    }
                } catch {
                }
            }

            if (session == -1)
            {
                session = Util.CurrentSession;
            }

            // Match USC reference
            Regex uscexp = new Regex(@"(\d[0-9A-Za-z\-]*)\s+U\.?S\.?C\.?\s+(\d[0-9A-Za-z\-]*)((\s*\([^\) ]+\))*)",
                                     RegexOptions.IgnoreCase);
            Match uscmc = (q == null ? null : uscexp.Match(q));

            if (uscmc != null && uscmc.Success)
            {
                string title     = uscmc.Groups[1].Value;
                string section   = uscmc.Groups[2].Value;
                string paragraph = uscmc.Groups[3].Value;

                string[] ps  = paragraph.Split('[', '(', ')', ' ');
                int      psi = 0; while (psi < ps.Length - 1 && ps[psi] == "")
                {
                    psi++;
                }
                int pse = ps.Length - 1; while (pse > 0 && ps[pse] == "")
                {
                    pse--;
                }
                if (ps.Length != 0)
                {
                    paragraph = "_" + String.Join("_", ps, psi, pse - psi + 1);
                }

                Table table = Util.Database.DBSelect("billusc", "session, type, number",
                                                     new Database.SpecEQ("session", session),
                                                     new Database.OrSpec(
                                                         new Database.SpecEQ("ref", title + "_" + section + paragraph),
                                                         new Database.SpecStartsWith("ref", title + "_" + section + paragraph + "_")));
                foreach (TableRow bill in table)
                {
                    search_method = "search by U.S.C. section";
                    scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F;
                    specs.Add(new Database.AndSpec(
                                  new Database.SpecEQ("session", (int)bill["session"]),
                                  new Database.SpecEQ("type", (string)bill["type"]),
                                  new Database.SpecEQ("number", (int)bill["number"])));
                }
            }

            int total_count = -1;

            if (specs.Count == 0)
            {
                if (q != null && q.Trim() != "")
                {
                    BooleanQuery query1 = new BooleanQuery();
                    query.Add(query1, BooleanClause.Occur.MUST);
                    try {
                        /*if (!q.StartsWith("-")) {
                         *      PhraseQuery pq = new PhraseQuery();
                         *      pq.Add( new Term("shorttitles", q) );
                         *      pq.SetBoost((float)4);
                         *      query1.Add(pq, false, false);
                         * }*/

                        Query query_titles2 = new QueryParser("shorttitles", new StandardAnalyzer()).Parse(q);
                        query_titles2.SetBoost((float)3);
                        query1.Add(query_titles2, BooleanClause.Occur.SHOULD);

                        Query query_titles1 = new QueryParser("officialtitles", new StandardAnalyzer()).Parse(q);
                        query_titles1.SetBoost((float)2);
                        query1.Add(query_titles1, BooleanClause.Occur.SHOULD);

                        Query query_summary = new QueryParser("summary", new StandardAnalyzer()).Parse(q);
                        query1.Add(query_summary, BooleanClause.Occur.SHOULD);

                        Query query_text = new QueryParser("fulltext", new StandardAnalyzer()).Parse(q);
                        query1.Add(query_text, BooleanClause.Occur.SHOULD);
                    } catch (Exception e) {
                        return(no_results);
                    }
                }

                string   chamber   = HttpContext.Current.Request["chamber"];
                string[] status    = HttpContext.Current.Request["status"] == null ? null : HttpContext.Current.Request["status"].Split(',');
                string   sponsor   = HttpContext.Current.Request["sponsor"];
                string   cosponsor = HttpContext.Current.Request["cosponsor"];

                if (chamber != null && (chamber == "s" || chamber == "h"))
                {
                    query.Add(new WildcardQuery(new Term("type", chamber + "*")), BooleanClause.Occur.MUST);
                }
                if (status != null && status[0] != "")
                {
                    List <Term> terms = new List <Term>();
                    foreach (string s in status)
                    {
                        terms.Add(new Term("state", s));
                    }
                    MultiPhraseQuery mpq = new MultiPhraseQuery();
                    mpq.Add(terms.ToArray());
                    query.Add(mpq, BooleanClause.Occur.MUST);
                }
                if (sponsor != null && sponsor != "")
                {
                    query.Add(new TermQuery(new Term("sponsor", sponsor)), BooleanClause.Occur.MUST);
                }
                if (cosponsor != null && cosponsor != "")
                {
                    query.Add(new TermQuery(new Term("cosponsor", cosponsor)), BooleanClause.Occur.MUST);
                }

                IndexSearcher searcher = new IndexSearcher(Util.DataPath + Path.DirectorySeparatorChar + session + Path.DirectorySeparatorChar + "index.bills.lucene");

                Sort sort = null;
                if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "introduced")
                {
                    sort = new Sort(new SortField("introduced", SortField.STRING, true));
                }
                if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "lastaction")
                {
                    sort = new Sort(new SortField("lastaction", SortField.STRING, true));
                }

                Hits hits = searcher.Search(query, sort == null ? new Sort() : sort);

                int end = hits.Length();
                if (start + count < end)
                {
                    end = start + count;
                }
                total_count = hits.Length();

                for (int i = start; i < end; i++)
                {
                    Document doc         = hits.Doc(i);
                    string   billsession = doc.Get("session");
                    string   billtype    = doc.Get("type");
                    string   billnumber  = doc.Get("number");

                    int istatus = (int)EnumsConv.BillStatusFromString(doc.Get("status"));

                    float score;
                    if (sort == null)             // readjust the score based on status
                    {
                        score = hits.Score(i) + istatus / (float)8 * (float).2;
                    }
                    else             // keep order from Lucene
                    {
                        score = -i;
                    }

                    scores[billsession + billtype + billnumber] = score;
                    specs.Add(new Database.AndSpec(
                                  new Database.SpecEQ("session", billsession),
                                  new Database.SpecEQ("type", billtype),
                                  new Database.SpecEQ("number", billnumber)));
                }

                if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "hits" && specs.Count > 0)
                {
                    Table hitsinfo = Util.Database.DBSelect("billhits", "*",
                                                            Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec))));
                    foreach (TableRow billhits in hitsinfo)
                    {
                        scores["" + billhits["session"] + billhits["type"] + billhits["number"]] = (float)(int)billhits["hits1"];
                    }
                }
            }

            if (specs.Count == 0)
            {
                return(no_results);
            }

            Table billinfo = Util.Database.DBSelect("billstatus", "*",
                                                    Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec))));

            if (total_count == -1)
            {
                total_count = billinfo.Rows;
            }

            ArrayList ret = new ArrayList();

            foreach (TableRow r in billinfo)
            {
                ret.Add(r);
            }

            BillHitComparer bhc = new BillHitComparer();

            bhc.scores = scores;
            ret.Sort(bhc);

            Hashtable ret2 = new Hashtable();

            ret2["count"]   = total_count;
            ret2["method"]  = search_method;
            ret2["results"] = ret;

            return(ret2);
        }
        /// <summary>
        /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>.
        /// </summary>
        /// <param name="query">Query to extract Terms from</param>
        /// <param name="terms">Map to place created WeightedSpanTerms in</param>
        private void Extract(Query query, IDictionary <String, WeightedSpanTerm> terms)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        Extract(queryClauses[i].Query, terms);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                PhraseQuery phraseQuery      = ((PhraseQuery)query);
                Term[]      phraseQueryTerms = phraseQuery.GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }
                int   slop      = phraseQuery.Slop;
                int[] positions = phraseQuery.GetPositions();
                // add largest position increment to slop
                if (positions.Length > 0)
                {
                    int lastPos    = positions[0];
                    int largestInc = 0;
                    int sz         = positions.Length;
                    for (int i = 1; i < sz; i++)
                    {
                        int pos = positions[i];
                        int inc = pos - lastPos;
                        if (inc > largestInc)
                        {
                            largestInc = inc;
                        }
                        lastPos = pos;
                    }
                    if (largestInc > 1)
                    {
                        slop += largestInc;
                    }
                }

                bool inorder = slop == 0;

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                ExtractWeightedSpanTerms(terms, sp);
            }
            else if (query is TermQuery)
            {
                ExtractWeightedTerms(terms, query);
            }
            else if (query is SpanQuery)
            {
                ExtractWeightedSpanTerms(terms, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                Extract(((FilteredQuery)query).Query, terms);
            }
            else if (query is DisjunctionMaxQuery)
            {
                foreach (var q in ((DisjunctionMaxQuery)query))
                {
                    Extract(q, terms);
                }
            }
            else if (query is MultiTermQuery && expandMultiTermQuery)
            {
                MultiTermQuery mtq = ((MultiTermQuery)query);
                if (mtq.RewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
                {
                    mtq = (MultiTermQuery)mtq.Clone();
                    mtq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                    query             = mtq;
                }
                FakeReader fReader = new FakeReader();
                MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq);
                if (fReader.Field != null)
                {
                    IndexReader ir = GetReaderForField(fReader.Field);
                    Extract(query.Rewrite(ir), terms);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq        = (MultiPhraseQuery)query;
                IList <Term[]>   termArrays = mpq.GetTermArrays();
                int[]            positions  = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    var disjunctLists     = new List <SpanQuery> [maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]           termArray = termArrays[i];
                        List <SpanQuery> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length));
                            ++distinctPositions;
                        }
                        for (int j = 0; j < termArray.Length; ++j)
                        {
                            disjuncts.Add(new SpanTermQuery(termArray[j]));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        List <SpanQuery> disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    ExtractWeightedSpanTerms(terms, sp);
                }
            }
        }
Esempio n. 9
0
        private void  QueryToSpanQuery(Query query, ICollection <byte[]> payloads)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        QueryToSpanQuery(queryClauses[i].Query, payloads);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                Term[]      phraseQueryTerms = ((PhraseQuery)query).GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }

                int  slop    = ((PhraseQuery)query).Slop;
                bool inorder = false;

                if (slop == 0)
                {
                    inorder = true;
                }

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                GetPayloads(payloads, sp);
            }
            else if (query is TermQuery)
            {
                SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term);
                stq.Boost = query.Boost;
                GetPayloads(payloads, stq);
            }
            else if (query is SpanQuery)
            {
                GetPayloads(payloads, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                QueryToSpanQuery(((FilteredQuery)query).Query, payloads);
            }
            else if (query is DisjunctionMaxQuery)
            {
                for (IEnumerator <Query> iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext();)
                {
                    QueryToSpanQuery(iterator.Current, payloads);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq = (MultiPhraseQuery)query;
                System.Collections.Generic.IList <Term[]> termArrays = mpq.GetTermArrays();
                int[] positions = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    IList <Query>[] disjunctLists     = new IList <Query> [maxPosition + 1];
                    int             distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]        termArray = termArrays[i];
                        IList <Query> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <Query>(termArray.Length));
                            ++distinctPositions;
                        }
                        foreach (Term term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        IList <Query> disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery((SpanQuery[])(disjuncts.ToArray()));
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    GetPayloads(payloads, sp);
                }
            }
        }
Esempio n. 10
0
        static void Main(string[] args)
        {
            var jsonProps = "[{\"name\":\"Ondrej\",\"surname\":\"Kubicek\",\"app_data\":[\"112233\", \"56612\"]},"
                            + "{\"name\":\"Lukas\",\"surname\":\"Bily\",\"app_data\":[\"12355\", \"112233\", \"89466\"]}," +
                            "{\"name\":\"Lenak\",\"surname\":\"Nejaka\",\"app_data\":[\"89700\"]}]";

            var version = LuceneVersion.LUCENE_48;
            var dir     = new RAMDirectory();

            var analyzer    = new StandardAnalyzer(version);
            var indexConfig = new IndexWriterConfig(version, analyzer);

            var writer = new IndexWriter(dir, indexConfig);

            var d    = JsonDocument.Parse(jsonProps);
            var root = d.RootElement;

            foreach (var line in root.EnumerateArray())
            {
                var doc = new Document();

                doc.Add(new StringField("name", line.GetProperty("name").GetString(), Field.Store.NO));
                doc.Add(new StringField("surname", line.GetProperty("surname").GetString(), Field.Store.NO));
                foreach (var f in line.GetProperty("app_data").EnumerateArray())
                {
                    doc.Add(new StringField("app_data", f.GetString(), Field.Store.NO));
                }

                doc.Add(new StringField("payload", line.ToString(), Field.Store.YES));

                writer.AddDocument(doc);
                // Console.WriteLine(line.GetProperty("name"));
                // if (line.GetProperty("app_data").EnumerateArray().Any(x => x.GetString() == "1"))
                // {

                // }

                // foreach(var data in line.GetProperty("app_data").EnumerateArray())
                // {
                // }

                // Console.WriteLine(line.GetProperty("app_data").GetArrayLength());
            }

            writer.Flush(false, false);

            var searcher = new IndexSearcher(writer.GetReader(true));

            var query = new MultiPhraseQuery();

            query.Add(new Term("app_data", "12355"));

            var booleanQuery = new BooleanQuery();

            booleanQuery.Add(new TermQuery(new Term("app_data", "12355")), Occur.SHOULD);
            booleanQuery.Add(new TermQuery(new Term("app_data", "89700")), Occur.SHOULD);

            var res = searcher.Search(booleanQuery, 100);

            Console.WriteLine(res.TotalHits);
            foreach (var hit in res.ScoreDocs)
            {
                var item = searcher.Doc(hit.Doc);
                Console.WriteLine(item.Get("payload"));
            }
        }
Esempio n. 11
0
        private void GetDataIndexId(DirectoryInfo directoryInfo, ref string textSearcher, ref List <string> lReturn)
        {
            using (Directory directory = FSDirectory.Open(directoryInfo))
                using (Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30))
                    using (IndexReader indexReader = IndexReader.Open(directory, true))
                        using (Searcher indexSearcher = new IndexSearcher(indexReader))
                        {
                            TopScoreDocCollector collectorMultiPhraseQuery = TopScoreDocCollector.Create(100, true);
                            TopScoreDocCollector collectorQueryParser      = TopScoreDocCollector.Create(100, true);
                            int           docId          = 0;
                            string        tempObjectId   = string.Empty;
                            List <string> listTemp       = new List <string>();
                            char[]        delimiterChars = { ' ', ',', '.', ':', '\t' };

                            MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery();

                            //Here implement the search lines for graphs at level 3
                            multiPhraseQuery.Slop = 3;



                            foreach (var word in textSearcher.Split(delimiterChars))
                            {
                                multiPhraseQuery.Add(new Term("FullRequest", word));
                            }

                            indexSearcher.Search(multiPhraseQuery, collectorMultiPhraseQuery);
                            ScoreDoc[] listResultPharseQuery = collectorMultiPhraseQuery.TopDocs().ScoreDocs;

                            foreach (var itemPharseQuery in listResultPharseQuery)
                            {
                                docId = itemPharseQuery.Doc;
                                Document docPharseQuery = indexSearcher.Doc(docId);
                                tempObjectId = docPharseQuery.Get("ObjetcId");

                                if (!string.IsNullOrEmpty(tempObjectId))
                                {
                                    listTemp.Add(tempObjectId);
                                }
                            }

                            //This lineas implement QueryPArse
                            docId        = 0;
                            tempObjectId = string.Empty;

                            var queryParser = new QueryParser(Version.LUCENE_30, "FullRequest", analyzer);
                            var query       = queryParser.Parse(textSearcher);

                            indexSearcher.Search(query, collectorQueryParser);
                            ScoreDoc[] listResultquery = collectorQueryParser.TopDocs().ScoreDocs;

                            foreach (var itemQuery in listResultquery)
                            {
                                docId = itemQuery.Doc;
                                Document docQuery = indexSearcher.Doc(docId);
                                tempObjectId = docQuery.Get("ObjetcId");

                                if (!string.IsNullOrEmpty(tempObjectId))
                                {
                                    listTemp.Add(tempObjectId);
                                }
                            }
                            lReturn.AddRange(listTemp.Distinct().ToList());
                        }
        }
Esempio n. 12
0
        /// <summary>
        /// Creates a query from the analysis chain.
        /// <para/>
        /// Expert: this is more useful for subclasses such as queryparsers.
        /// If using this class directly, just use <see cref="CreateBooleanQuery(string, string)"/>
        /// and <see cref="CreatePhraseQuery(string, string)"/>. </summary>
        /// <param name="analyzer"> Analyzer used for this query. </param>
        /// <param name="operator"> Default boolean operator used for this query. </param>
        /// <param name="field"> Field to create queries against. </param>
        /// <param name="queryText"> Text to be passed to the analysis chain. </param>
        /// <param name="quoted"> <c>true</c> if phrases should be generated when terms occur at more than one position. </param>
        /// <param name="phraseSlop"> Slop factor for phrase/multiphrase queries. </param>
        protected Query CreateFieldQuery(Analyzer analyzer, Occur @operator, string field, string queryText, bool quoted, int phraseSlop)
        {
            Debug.Assert(@operator == Occur.SHOULD || @operator == Occur.MUST);
            // Use the analyzer to get all the tokens, and then build a TermQuery,
            // PhraseQuery, or nothing based on the term count
            CachingTokenFilter          buffer     = null;
            ITermToBytesRefAttribute    termAtt    = null;
            IPositionIncrementAttribute posIncrAtt = null;
            int  numTokens     = 0;
            int  positionCount = 0;
            bool severalTokensAtSamePosition = false;
            bool hasMoreTokens = false;

            TokenStream source = null;

            try
            {
                source = analyzer.GetTokenStream(field, new StringReader(queryText));
                source.Reset();
                buffer = new CachingTokenFilter(source);
                buffer.Reset();

                if (buffer.HasAttribute <ITermToBytesRefAttribute>())
                {
                    termAtt = buffer.GetAttribute <ITermToBytesRefAttribute>();
                }
                if (buffer.HasAttribute <IPositionIncrementAttribute>())
                {
                    posIncrAtt = buffer.GetAttribute <IPositionIncrementAttribute>();
                }

                if (termAtt != null)
                {
                    try
                    {
                        hasMoreTokens = buffer.IncrementToken();
                        while (hasMoreTokens)
                        {
                            numTokens++;
                            int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1;
                            if (positionIncrement != 0)
                            {
                                positionCount += positionIncrement;
                            }
                            else
                            {
                                severalTokensAtSamePosition = true;
                            }
                            hasMoreTokens = buffer.IncrementToken();
                        }
                    }
                    catch (System.IO.IOException)
                    {
                        // ignore
                    }
                }
            }
            catch (System.IO.IOException e)
            {
                throw new Exception("Error analyzing query text", e);
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(source);
            }

            // rewind the buffer stream
            buffer.Reset();

            BytesRef bytes = termAtt == null ? null : termAtt.BytesRef;

            if (numTokens == 0)
            {
                return(null);
            }
            else if (numTokens == 1)
            {
                try
                {
                    bool hasNext = buffer.IncrementToken();
                    Debug.Assert(hasNext == true);
                    termAtt.FillBytesRef();
                }
                catch (System.IO.IOException)
                {
                    // safe to ignore, because we know the number of tokens
                }
                return(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))));
            }
            else
            {
                if (severalTokensAtSamePosition || (!quoted))
                {
                    if (positionCount == 1 || (!quoted))
                    {
                        // no phrase query:

                        if (positionCount == 1)
                        {
                            // simple case: only one position, with synonyms
                            BooleanQuery q = NewBooleanQuery(true);
                            for (int i = 0; i < numTokens; i++)
                            {
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                    termAtt.FillBytesRef();
                                }
                                catch (System.IO.IOException)
                                {
                                    // safe to ignore, because we know the number of tokens
                                }
                                Query currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
                                q.Add(currentQuery, Occur.SHOULD);
                            }
                            return(q);
                        }
                        else
                        {
                            // multiple positions
                            BooleanQuery q            = NewBooleanQuery(false);
                            Query        currentQuery = null;
                            for (int i = 0; i < numTokens; i++)
                            {
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                    termAtt.FillBytesRef();
                                }
                                catch (System.IO.IOException)
                                {
                                    // safe to ignore, because we know the number of tokens
                                }
                                if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0)
                                {
                                    if (!(currentQuery is BooleanQuery))
                                    {
                                        Query t = currentQuery;
                                        currentQuery = NewBooleanQuery(true);
                                        ((BooleanQuery)currentQuery).Add(t, Occur.SHOULD);
                                    }
                                    ((BooleanQuery)currentQuery).Add(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))), Occur.SHOULD);
                                }
                                else
                                {
                                    if (currentQuery != null)
                                    {
                                        q.Add(currentQuery, @operator);
                                    }
                                    currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
                                }
                            }
                            q.Add(currentQuery, @operator);
                            return(q);
                        }
                    }
                    else
                    {
                        // phrase query:
                        MultiPhraseQuery mpq = NewMultiPhraseQuery();
                        mpq.Slop = phraseSlop;
                        IList <Term> multiTerms = new List <Term>();
                        int          position   = -1;
                        for (int i = 0; i < numTokens; i++)
                        {
                            int positionIncrement = 1;
                            try
                            {
                                bool hasNext = buffer.IncrementToken();
                                Debug.Assert(hasNext == true);
                                termAtt.FillBytesRef();
                                if (posIncrAtt != null)
                                {
                                    positionIncrement = posIncrAtt.PositionIncrement;
                                }
                            }
                            catch (System.IO.IOException)
                            {
                                // safe to ignore, because we know the number of tokens
                            }

                            if (positionIncrement > 0 && multiTerms.Count > 0)
                            {
                                if (enablePositionIncrements)
                                {
                                    mpq.Add(multiTerms.ToArray(), position);
                                }
                                else
                                {
                                    mpq.Add(multiTerms.ToArray());
                                }
                                multiTerms.Clear();
                            }
                            position += positionIncrement;
                            multiTerms.Add(new Term(field, BytesRef.DeepCopyOf(bytes)));
                        }
                        if (enablePositionIncrements)
                        {
                            mpq.Add(multiTerms.ToArray(), position);
                        }
                        else
                        {
                            mpq.Add(multiTerms.ToArray());
                        }
                        return(mpq);
                    }
                }
                else
                {
                    PhraseQuery pq = NewPhraseQuery();
                    pq.Slop = phraseSlop;
                    int position = -1;

                    for (int i = 0; i < numTokens; i++)
                    {
                        int positionIncrement = 1;

                        try
                        {
                            bool hasNext = buffer.IncrementToken();
                            Debug.Assert(hasNext == true);
                            termAtt.FillBytesRef();
                            if (posIncrAtt != null)
                            {
                                positionIncrement = posIncrAtt.PositionIncrement;
                            }
                        }
                        catch (System.IO.IOException)
                        {
                            // safe to ignore, because we know the number of tokens
                        }

                        if (enablePositionIncrements)
                        {
                            position += positionIncrement;
                            pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)), position);
                        }
                        else
                        {
                            pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)));
                        }
                    }
                    return(pq);
                }
            }
        }
Esempio n. 13
0
 public virtual Query VisitMultiPhraseQuery(MultiPhraseQuery multiPhraseq)
 {
     throw new NotImplementedException();
 }
Esempio n. 14
0
 public override Query VisitMultiPhraseQuery(MultiPhraseQuery multiPhraseq)
 {
     throw new SnNotSupportedException();
 }
Esempio n. 15
0
 public virtual void TestCJKSynonymsPhrase()
 {
     MultiPhraseQuery expected = new MultiPhraseQuery();
     expected.Add(new Term("field", "中"));
     expected.Add(new Term[] { new Term("field", "国"), new Term("field", "國") });
     QueryBuilder builder = new QueryBuilder(new MockCJKSynonymAnalyzer());
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国"));
     expected.Slop = 3;
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国", 3));
 }
Esempio n. 16
0
 public virtual void TestSynonymsPhrase()
 {
     MultiPhraseQuery expected = new MultiPhraseQuery();
     expected.Add(new Term("field", "old"));
     expected.Add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") });
     QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer());
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "old dogs"));
 }
Esempio n. 17
0
        private Query BuildMatchAnywhereQuery(IndexReader indexReader, string expandedSearchString, bool matchCase)
        {
            List <string> searchTerms          = null;
            string        adjustedSearchString = expandedSearchString;
            string        fieldToSearch        = Constants.IndexFields.Content;

            if (!matchCase)
            {
                fieldToSearch        = Constants.IndexFields.ContentCaseInsensitive;
                adjustedSearchString = adjustedSearchString.ToLower();
            }

            searchTerms = adjustedSearchString.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList();

            bool onlyOneTerm = searchTerms.Count == 1;
            var  firstTerm   = searchTerms.FirstOrDefault();
            var  lastTerm    = searchTerms.LastOrDefault();

            Query contentQuery = null;

            if (onlyOneTerm)
            {
                bool isFirstTermPunctuation = (firstTerm.Length == 1 && LuceneHelper.IsPunctuation(firstTerm.First()));
                if (isFirstTermPunctuation)
                {
                    contentQuery = new TermQuery(new Term(fieldToSearch, firstTerm));
                }
                else
                {
                    contentQuery = new WildcardQuery(new Term(fieldToSearch, "*" + firstTerm + "*"));
                }
            }
            else
            {
                MultiPhraseQuery phraseQuery = new MultiPhraseQuery();

                List <Term> firstTermMatches = new List <Term>();
                List <Term> lastTermMatches  = new List <Term>();
                CollectFirstAndLastTermMatches(indexReader, fieldToSearch, firstTermMatches, lastTermMatches, firstTerm, lastTerm);

                if (firstTermMatches.Count > 0)
                {
                    phraseQuery.Add(firstTermMatches.ToArray());
                }

                bool includeFirstTerm = firstTermMatches.Count == 0;
                bool includeLastTerm  = lastTermMatches.Count == 0;

                int startIndex = includeFirstTerm ? 0 : 1;
                int endIndex   = searchTerms.Count - (includeLastTerm ? 0 : 1);

                for (int i = startIndex; i < endIndex; i++)
                {
                    phraseQuery.Add(new Term(fieldToSearch, searchTerms[i]));
                }

                if (lastTermMatches.Count > 0)
                {
                    phraseQuery.Add(lastTermMatches.ToArray());
                }

                contentQuery = phraseQuery;
            }

            return(contentQuery);
        }
Esempio n. 18
0
 public override Query VisitMultiPhraseQuery(MultiPhraseQuery multiPhraseq)
 {
     throw new NotImplementedException();
 }
Esempio n. 19
0
        private void lucene_serach_Click(object sender, EventArgs e)
        {
            results.Rows.Clear();
            var query    = search_field.Text.ToLower();
            var array    = query.Split(' ').ToList();
            var searcher = new IndexSearcher(writer1.GetReader(applyAllDeletes: true));

            var totalResults = new List <Document>();
            //одно слово
            QueryParser parser = new QueryParser(AppLuceneVersion, "name", analyzer);
            var         phrase = new MultiPhraseQuery();

            foreach (var word in array)
            {
                var q = parser.Parse(query);
                if (!String.IsNullOrEmpty(word))
                {
                    var res = searcher.Search(q, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        if (!totalResults.Any(f =>
                                              f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value()))
                        {
                            totalResults.Add(foundDoc);
                        }
                    }
                }
            }

            // полное название
            phrase.Add(new Term("name", query));
            var hits = searcher.Search(phrase, 10).ScoreDocs;

            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value()))
                {
                    totalResults.Add(foundDoc);
                }
            }

            //части слов
            foreach (var word in array)
            {
                if (!string.IsNullOrEmpty(word))
                {
                    var wild = new WildcardQuery(new Term("name", "*" + word + "*"));
                    var res  = searcher.Search(wild, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        if (!totalResults.Any(f =>
                                              f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value()))
                        {
                            totalResults.Add(foundDoc);
                        }
                    }
                }
            }

            //год и часть слова
            var year_to_find = "";
            var number       = 0;

            foreach (var word in array)
            {
                var result = TryParse(word, out number);
                if (result && number > 1800 && number <= 9999)
                {
                    year_to_find = word;
                    array.RemoveAt(array.IndexOf(word));
                    break;
                }
            }

            Console.WriteLine(number != 0);

            if (number != 0)
            {
                phrase = new MultiPhraseQuery();
                foreach (var word in array)
                {
                    if (!string.IsNullOrEmpty(word))
                    {
                        var booleanQuery = new BooleanQuery();
                        var wild         = new WildcardQuery(new Term("name", "*" + word + "*"));
                        var num          = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true);

                        booleanQuery.Add(wild, Occur.SHOULD);
                        booleanQuery.Add(num, Occur.SHOULD);
                        var res = searcher.Search(booleanQuery, 10).ScoreDocs;
                        foreach (var hit in res)
                        {
                            var foundDoc = searcher.Doc(hit.Doc);
                            if (!totalResults.Any(f =>
                                                  f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value()))
                            {
                                totalResults.Add(foundDoc);
                            }
                        }
                    }
                }
            }


            foreach (var doc in totalResults)
            {
                results.Rows.Add(doc.GetField("id").GetInt32Value().ToString(),
                                 doc.GetValues("name")[0],
                                 doc.GetField("year").GetInt32Value().ToString());
            }
        }
Esempio n. 20
0
        private void  QueryToSpanQuery(Query query, ICollection <byte[]> payloads)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited())
                    {
                        QueryToSpanQuery(queryClauses[i].GetQuery(), payloads);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                Term[]      phraseQueryTerms = ((PhraseQuery)query).GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }

                int  slop    = ((PhraseQuery)query).GetSlop();
                bool inorder = false;

                if (slop == 0)
                {
                    inorder = true;
                }

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.SetBoost(query.GetBoost());
                GetPayloads(payloads, sp);
            }
            else if (query is TermQuery)
            {
                SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).GetTerm());
                stq.SetBoost(query.GetBoost());
                GetPayloads(payloads, stq);
            }
            else if (query is SpanQuery)
            {
                GetPayloads(payloads, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                QueryToSpanQuery(((FilteredQuery)query).GetQuery(), payloads);
            }
            else if (query is DisjunctionMaxQuery)
            {
                for (System.Collections.IEnumerator iterator = ((DisjunctionMaxQuery)query).Iterator(); iterator.MoveNext();)
                {
                    QueryToSpanQuery((Query)iterator.Current, payloads);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery         mpq        = (MultiPhraseQuery)query;
                System.Collections.IList termArrays = mpq.GetTermArrays();
                int[] positions = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    System.Collections.ArrayList[] disjunctLists = new System.Collections.ArrayList[maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[] termArray = (Term[])termArrays[i];
                        System.Collections.IList disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new System.Collections.ArrayList(termArray.Length));
                            ++distinctPositions;
                        }
                        for (int j = 0; j < termArray.Length; ++j)
                        {
                            disjuncts.Add(new SpanTermQuery(termArray[j]));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        System.Collections.ArrayList disjuncts = disjunctLists[i];
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery((SpanQuery[])(disjuncts.ToArray(typeof(SpanQuery[]))));
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.GetSlop();
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.SetBoost(query.GetBoost());
                    GetPayloads(payloads, sp);
                }
            }
        }
Esempio n. 21
0
        public async Task <SearchResult <T> > SearchAsync(SearchQuery queryDefinition, CancellationToken cancellationToken = default)
        {
            using (await writerLock.ReaderLockAsync(cancellationToken))
            {
                var      result = new SearchResult <T>();
                List <T> hits   = new List <T>();

                using (var writer = getWriter())
                {
                    Query query = new MatchAllDocsQuery();

                    // Term queries
                    if (queryDefinition.TermQueries.Any())
                    {
                        var phraseQuery = new MultiPhraseQuery();
                        foreach (var termQuery in queryDefinition.TermQueries)
                        {
                            phraseQuery.Add(
                                termQuery.value
                                .Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)
                                .Select(phrase => new Term(termQuery.field, phrase.ToLower()))
                                .ToArray()
                                );
                        }
                        query = phraseQuery;
                    }

                    var reader       = writer.DocsWriter.GetReader(applyAllDeletes: true);
                    var searcher     = new IndexSearcher(reader);
                    var luceneResult = searcher.Search(query, queryDefinition.Limit);

                    foreach (var doc in luceneResult.ScoreDocs)
                    {
                        var foundDoc = searcher.Doc(doc.Doc);
                        hits.Add(await inflateDocument(foundDoc));
                    }

                    result.TotalHits = luceneResult.TotalHits;
                    result.Hits      = hits;

                    // Facets
                    if (queryDefinition.Facets.Any())
                    {
                        FacetsConfig    facetsConfig = new FacetsConfig();
                        FacetsCollector fc           = new FacetsCollector();
                        FacetsCollector.Search(searcher, query, queryDefinition.FacetMax, fc);
                        using (var taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.Open(Path.Combine(options.IndexPath, indexType, "taxonomy"))))
                        {
                            var facets = new FastTaxonomyFacetCounts(taxonomyReader, facetsConfig, fc);
                            foreach (var facet in queryDefinition.Facets)
                            {
                                var facetGroup = new FacetGroup {
                                    Field = facet
                                };
                                facetGroup.Facets =
                                    facets.GetTopChildren(queryDefinition.FacetMax, facet).LabelValues
                                    .Select(x => new Facet {
                                    Key = x.Label, Count = (long)x.Value
                                })
                                    .ToArray();
                                result.FacetGroups.Add(facetGroup);
                            }
                        }
                    }
                }

                return(result);
            }
        }
Esempio n. 22
0
        private void QueryToSpanQuery(Query query, ICollection <byte[]> payloads)
        {
            if (query is BooleanQuery)
            {
                BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses();

                for (int i = 0; i < queryClauses.Length; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        QueryToSpanQuery(queryClauses[i].Query, payloads);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                Term[]      phraseQueryTerms = ((PhraseQuery)query).GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }

                int  slop    = ((PhraseQuery)query).Slop;
                bool inorder = false;

                if (slop == 0)
                {
                    inorder = true;
                }

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                GetPayloads(payloads, sp);
            }
            else if (query is TermQuery)
            {
                SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term);
                stq.Boost = query.Boost;
                GetPayloads(payloads, stq);
            }
            else if (query is SpanQuery)
            {
                GetPayloads(payloads, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                QueryToSpanQuery(((FilteredQuery)query).Query, payloads);
            }
            else if (query is DisjunctionMaxQuery)
            {
                foreach (var q in ((DisjunctionMaxQuery)query))
                {
                    QueryToSpanQuery(q, payloads);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq        = (MultiPhraseQuery)query;
                IList <Term[]>   termArrays = mpq.GetTermArrays();
                int[]            positions  = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    // LUCENENET: Changed from Query to SpanQuery to eliminate the O(n) cast
                    // required to instantiate SpanOrQuery below
                    IList <SpanQuery>[] disjunctLists = new List <SpanQuery> [maxPosition + 1];
                    int distinctPositions             = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]            termArray = termArrays[i];
                        IList <SpanQuery> disjuncts = disjunctLists[positions[i]]; // LUCENENET: Changed from Query to SpanQuery
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length)); // LUCENENET: Changed from Query to SpanQuery
                            ++distinctPositions;
                        }
                        foreach (Term term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    for (int i = 0; i < disjunctLists.Length; ++i)
                    {
                        IList <SpanQuery> disjuncts = disjunctLists[i]; // LUCENENET: Changed from Query to SpanQuery
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts);
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    GetPayloads(payloads, sp);
                }
            }
        }
Esempio n. 23
0
        private void button4_Click(object sender, EventArgs e)
        {
            int counter = 0;

            var           query    = find_text.ToLower();
            var           array    = query.Split(' ').ToList();
            List <string> res_list = new List <string>();
            var           searcher = new IndexSearcher(writer.GetReader());

            var totalResults = new List <Document>();

            //поиск по одному слову из названия
            var phrase = new MultiPhraseQuery();

            foreach (var word in array)
            {
                phrase = new MultiPhraseQuery();
                if (!String.IsNullOrEmpty(word))
                {
                    phrase.Add(new Term("name", word));
                    var res = searcher.Search(phrase, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        if (!totalResults.Any(f =>
                                              f.GetField("id").ToString() == foundDoc.GetField("id").ToString()))
                        {
                            totalResults.Add(foundDoc);
                        }
                    }
                }
            }

            //поиск по всем словам названия
            phrase = new MultiPhraseQuery();
            phrase.Add(new Term("name", query));
            var hits = searcher.Search(phrase, 10).ScoreDocs;

            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                if (!totalResults.Any(f => f.GetField("id").ToString() == foundDoc.GetField("id").ToString()))
                {
                    totalResults.Add(foundDoc);
                }
            }

            //поиск по частичным словам названия
            foreach (var word in array)
            {
                if (!String.IsNullOrEmpty(word))
                {
                    var wild = new WildcardQuery(new Term("name", "*" + word + "*"));
                    var res  = searcher.Search(wild, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        if (!totalResults.Any(f =>
                                              f.GetField("id").ToString() == foundDoc.GetField("id").ToString()))
                        {
                            totalResults.Add(foundDoc);
                        }
                    }
                }
            }

            //поиск по году и названию (части названия)
            string year_to_find = "";
            int    number       = 0;

            foreach (var word in array)
            {
                bool result = Int32.TryParse(word, out number);
                if (result && number > 1800 && number <= 9999)
                {
                    year_to_find = word;
                    array.RemoveAt(array.IndexOf(word));
                    break;
                }
            }
            Console.WriteLine(number != 0);

            if (number != 0)
            {
                phrase = new MultiPhraseQuery();
                foreach (var word in array)
                {
                    if (!String.IsNullOrEmpty(word))
                    {
                        BooleanQuery booleanQuery = new BooleanQuery();

                        var wild = new WildcardQuery(new Term("name", "*" + word + "*"));
                        var num  = NumericRangeQuery.NewIntRange("year", 1, number, number, true, true);

                        booleanQuery.Add(wild, Occur.SHOULD);
                        booleanQuery.Add(num, Occur.SHOULD);
                        var res = searcher.Search(booleanQuery, 10).ScoreDocs;
                        foreach (var hit in res)
                        {
                            var foundDoc = searcher.Doc(hit.Doc);
                            if (!totalResults.Any(f =>
                                                  f.GetField("id").ToString() == foundDoc.GetField("id").ToString()))
                            {
                                totalResults.Add(foundDoc);
                            }
                        }
                    }
                }
            }
            foreach (var doc in totalResults)
            {
                textBox1.AppendText(doc.ToString());
            }
        }
        /// <summary>
        /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <paramref name="query"/>.
        /// </summary>
        /// <param name="query"><see cref="Query"/> to extract Terms from</param>
        /// <param name="terms">Map to place created <see cref="WeightedSpanTerm"/>s in</param>
        /// <exception cref="System.IO.IOException">If there is a low-level I/O error</exception>
        protected virtual void Extract(Query query, IDictionary <string, WeightedSpanTerm> terms)
        {
            if (query is BooleanQuery)
            {
                IList <BooleanClause> queryClauses = ((BooleanQuery)query).Clauses;

                for (int i = 0; i < queryClauses.Count; i++)
                {
                    if (!queryClauses[i].IsProhibited)
                    {
                        Extract(queryClauses[i].Query, terms);
                    }
                }
            }
            else if (query is PhraseQuery)
            {
                PhraseQuery phraseQuery      = (PhraseQuery)query;
                Term[]      phraseQueryTerms = phraseQuery.GetTerms();
                SpanQuery[] clauses          = new SpanQuery[phraseQueryTerms.Length];
                for (int i = 0; i < phraseQueryTerms.Length; i++)
                {
                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                }
                int   slop      = phraseQuery.Slop;
                int[] positions = phraseQuery.GetPositions();
                // add largest position increment to slop
                if (positions.Length > 0)
                {
                    int lastPos    = positions[0];
                    int largestInc = 0;
                    int sz         = positions.Length;
                    for (int i = 1; i < sz; i++)
                    {
                        int pos = positions[i];
                        int inc = pos - lastPos;
                        if (inc > largestInc)
                        {
                            largestInc = inc;
                        }
                        lastPos = pos;
                    }
                    if (largestInc > 1)
                    {
                        slop += largestInc;
                    }
                }

                bool inorder = slop == 0;

                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                sp.Boost = query.Boost;
                ExtractWeightedSpanTerms(terms, sp);
            }
            else if (query is TermQuery)
            {
                ExtractWeightedTerms(terms, query);
            }
            else if (query is SpanQuery)
            {
                ExtractWeightedSpanTerms(terms, (SpanQuery)query);
            }
            else if (query is FilteredQuery)
            {
                Extract(((FilteredQuery)query).Query, terms);
            }
            else if (query is ConstantScoreQuery)
            {
                Query q = ((ConstantScoreQuery)query).Query;
                if (q != null)
                {
                    Extract(q, terms);
                }
            }
            else if (query is CommonTermsQuery)
            {
                // specialized since rewriting would change the result query
                // this query is TermContext sensitive.
                ExtractWeightedTerms(terms, query);
            }
            else if (query is DisjunctionMaxQuery)
            {
                foreach (var q in ((DisjunctionMaxQuery)query))
                {
                    Extract(q, terms);
                }
            }
            else if (query is MultiPhraseQuery)
            {
                MultiPhraseQuery mpq        = (MultiPhraseQuery)query;
                IList <Term[]>   termArrays = mpq.GetTermArrays();
                int[]            positions  = mpq.GetPositions();
                if (positions.Length > 0)
                {
                    int maxPosition = positions[positions.Length - 1];
                    for (int i = 0; i < positions.Length - 1; ++i)
                    {
                        if (positions[i] > maxPosition)
                        {
                            maxPosition = positions[i];
                        }
                    }

                    var disjunctLists     = new List <SpanQuery> [maxPosition + 1];
                    int distinctPositions = 0;

                    for (int i = 0; i < termArrays.Count; ++i)
                    {
                        Term[]           termArray = termArrays[i];
                        List <SpanQuery> disjuncts = disjunctLists[positions[i]];
                        if (disjuncts == null)
                        {
                            disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length));
                            ++distinctPositions;
                        }
                        foreach (var term in termArray)
                        {
                            disjuncts.Add(new SpanTermQuery(term));
                        }
                    }

                    int         positionGaps = 0;
                    int         position     = 0;
                    SpanQuery[] clauses      = new SpanQuery[distinctPositions];
                    foreach (var disjuncts in disjunctLists)
                    {
                        if (disjuncts != null)
                        {
                            clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
                        }
                        else
                        {
                            ++positionGaps;
                        }
                    }

                    int  slop    = mpq.Slop;
                    bool inorder = (slop == 0);

                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                    sp.Boost = query.Boost;
                    ExtractWeightedSpanTerms(terms, sp);
                }
            }
            else
            {
                Query origQuery = query;
                if (query is MultiTermQuery)
                {
                    if (!expandMultiTermQuery)
                    {
                        return;
                    }
                    MultiTermQuery copy = (MultiTermQuery)query.Clone();
                    copy.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                    origQuery = copy;
                }
                IndexReader reader    = GetLeafContext().Reader;
                Query       rewritten = origQuery.Rewrite(reader);
                if (rewritten != origQuery)
                {
                    // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
                    // if this method is overwritten in a subclass or above in the next recursion
                    Extract(rewritten, terms);
                }
            }
            ExtractUnknownQuery(query, terms);
        }
Esempio n. 25
0
        public void TestQueryScorerMultiPhraseQueryHighlighting()
        {
            MultiPhraseQuery mpq = new MultiPhraseQuery();

            mpq.Add(new Term[] {new Term(FIELD_NAME, "wordx"), new Term(FIELD_NAME, "wordb")});
            mpq.Add(new Term(FIELD_NAME, "wordy"));

            DoSearching(mpq);

            int maxNumFragmentsRequired = 2;
            AssertExpectedHighlightCount(maxNumFragmentsRequired, 6);
        }
Esempio n. 26
0
        /// <summary>
        /// On base of https://lucenenet.apache.org
        /// </summary>
        /// <param name="query"></param>
        /// <returns></returns>
        public IEnumerable <(string name, int year)> SearchWithLucy(string query)
        {
            var words    = query.Split(' ').ToList();
            var searcher = new IndexSearcher(lucyAdapter.lucyWriter.GetReader(applyAllDeletes: true));

            var totalResults = new List <Document>();
            //word
            MultiPhraseQuery multiPhraseQuery;

            foreach (var word in words)
            {
                multiPhraseQuery = new MultiPhraseQuery();
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                multiPhraseQuery.Add(new Term("name_word", word));
                var docs = searcher.Search(multiPhraseQuery, 10).ScoreDocs;
                foreach (var doc in docs)
                {
                    var document = searcher.Doc(doc.Doc);
                    if (totalResults.All(f => f.GetField("id").GetInt32Value() != document.GetField("id").GetInt32Value()))
                    {
                        totalResults.Add(document);
                    }
                }
            }

            // full name
            multiPhraseQuery = new MultiPhraseQuery();
            multiPhraseQuery.Add(new Term("full_name", query));
            var scoreDocs = searcher.Search(multiPhraseQuery, 10).ScoreDocs;

            foreach (var scoreDoc in scoreDocs)
            {
                var doc = searcher.Doc(scoreDoc.Doc);
                if (totalResults.All(f => f.GetField("id").GetInt32Value() != doc.GetField("id").GetInt32Value()))
                {
                    totalResults.Add(doc);
                }
            }

            //word parts
            foreach (var word in words)
            {
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                var wildcardQuery = new WildcardQuery(new Term("name_word", "*" + word + "*"));
                var docs          = searcher.Search(wildcardQuery, 10).ScoreDocs;
                foreach (var doc in docs)
                {
                    var document = searcher.Doc(doc.Doc);
                    if (totalResults.All(f => f.GetField("id").GetInt32Value() != document.GetField("id").GetInt32Value()))
                    {
                        totalResults.Add(document);
                    }
                }
            }

            //year and word part
            var number = 0;

            foreach (var word in words)
            {
                var result = int.TryParse(word, out number);
                if (!result)
                {
                    continue;
                }
                words.RemoveAt(words.IndexOf(word));
                break;
            }

            if (number != 0)
            {
                foreach (var word in words)
                {
                    if (string.IsNullOrEmpty(word))
                    {
                        continue;
                    }
                    var booleanQuery = new BooleanQuery();

                    var wildcardQuery = new WildcardQuery(new Term("name_word", "*" + word + "*"));
                    var rangeQuery    = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true);

                    booleanQuery.Add(wildcardQuery, Occur.SHOULD);
                    booleanQuery.Add(rangeQuery, Occur.SHOULD);
                    var docs = searcher.Search(booleanQuery, 10).ScoreDocs;
                    foreach (var doc in docs)
                    {
                        var foundDoc = searcher.Doc(doc.Doc);
                        if (totalResults.All(f => f.GetField("id").GetInt32Value() != foundDoc.GetField("id").GetInt32Value()))
                        {
                            totalResults.Add(foundDoc);
                        }
                    }
                }
            }

            foreach (var doc in totalResults.Take(10))
            {
                yield return(doc.GetValues("full_name")[0], (int)doc.GetField("year").GetInt32Value());
            }
        }
Esempio n. 27
0
        public void TestQueryScorerMultiPhraseQueryHighlightingWithGap()
        {
            MultiPhraseQuery mpq = new MultiPhraseQuery();

            /*
             * The toString of MultiPhraseQuery doesn't work so well with these
             * out-of-order additions, but the Query itself seems to match accurately.
             */

            mpq.Add(new Term[] {new Term(FIELD_NAME, "wordz")}, 2);
            mpq.Add(new Term[] {new Term(FIELD_NAME, "wordx")}, 0);

            DoSearching(mpq);

            int maxNumFragmentsRequired = 1;
            int expectedHighlights = 2;

            AssertExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights);
        }
Esempio n. 28
0
        public void LuceneMinimalExampleTest()
        {
            //var dir = new RAMDirectory();
            //var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);

            //X) Create an index and define a text analyzer ------------------------
            // Ensures index backward compatibility
            const LuceneVersion AppLuceneVersion = LuceneVersion.LUCENE_48;

            // Construct a machine-independent path for the index
            var basePath  = Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData);
            var indexPath = Path.Combine(basePath, "index");

            System.IO.Directory.Delete(indexPath);

            using var dir = FSDirectory.Open(indexPath);

            // Create an analyzer to process the text
            var analyzer = new StandardAnalyzer(AppLuceneVersion);

            // Create an index writer
            var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer);

            using var writer = new IndexWriter(dir, indexConfig);

            //X) Add to the index ------------------------
            var source = new
            {
                Name           = "Kermit the Frog",
                FavoritePhrase = "The quick brown fox jumps over the lazy dog"
            };

            var doc = new Document
            {
                // StringField indexes but doesn't tokenize
                new StringField("name", source.Name, Field.Store.YES),
                new TextField("favoritePhrase", source.FavoritePhrase, Field.Store.YES)
            };

            writer.AddDocument(doc);
            writer.Flush(triggerMerge: false, applyAllDeletes: false);

            //X) Construct a query ------------------------

            // Search with a phrase
            var phrase = new MultiPhraseQuery
            {
                new Term("favoritePhrase", "brown"),
                new Term("favoritePhrase", "fox")
            };

            //X) Fetch the results ------------------------

            // Re-use the writer to get real-time updates
            using var reader = writer.GetReader(applyAllDeletes: true);
            var searcher = new IndexSearcher(reader);

            ScoreDoc[] hits = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs;

            // Display the output in a table
            string strDisp = $"{"Score",10}" + $" {"Name",-15}" + $" {"Favorite Phrase",-40}";

            Console.WriteLine(strDisp);

            foreach (var hit in hits)
            {
                var    foundDoc = searcher.Doc(hit.Doc);
                string str      = $"{hit.Score:f8}" + $" {foundDoc.Get("name"),-15}" + $" {foundDoc.Get("favoritePhrase"),-40}";
                Console.WriteLine(str);
            }
        }
Esempio n. 29
0
        private void lucene_serach_Click(object sender, EventArgs e)
        {
            int counter = 0;

            this.results.Rows.Clear();
            var           query    = this.search_field.Text.ToLower();
            var           array    = query.Split(' ').ToList();
            List <string> res_list = new List <string>();
            var           searcher = new IndexSearcher(writer1.GetReader(applyAllDeletes: true));

            //одно слово
            var phrase = new MultiPhraseQuery();

            foreach (var word in array)
            {
                phrase = new MultiPhraseQuery();
                if (!String.IsNullOrEmpty(word))
                {
                    phrase.Add(new Term("name_word", word));
                    var res = searcher.Search(phrase, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        var score    = hit.Score;

                        this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString());
                    }
                }
            }

            // полное название
            phrase = new MultiPhraseQuery();
            phrase.Add(new Term("full_name", query));
            var hits = searcher.Search(phrase, 10).ScoreDocs;

            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                var score    = hit.Score;
                this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString());
            }

            //части слов
            foreach (var word in array)
            {
                if (!String.IsNullOrEmpty(word))
                {
                    var wild = new WildcardQuery(new Term("name_word", "*" + word + "*"));
                    var res  = searcher.Search(wild, 10).ScoreDocs;
                    foreach (var hit in res)
                    {
                        var foundDoc = searcher.Doc(hit.Doc);
                        var score    = hit.Score;
                        this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString());
                    }
                }
            }

            //год и часть слова
            string year_to_find = "";
            int    number       = 0;

            foreach (var word in array)
            {
                bool result = Int32.TryParse(word, out number);
                if (result && number > 1800 && number <= 9999)
                {
                    year_to_find = word;
                    array.RemoveAt(array.IndexOf(word));
                    break;
                }
            }
            Console.WriteLine(number != 0);

            if (number != 0)
            {
                phrase = new MultiPhraseQuery();
                foreach (var word in array)
                {
                    if (!String.IsNullOrEmpty(word))
                    {
                        BooleanQuery booleanQuery = new BooleanQuery();

                        var wild = new WildcardQuery(new Term("name_word", "*" + word + "*"));
                        var num  = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true);

                        booleanQuery.Add(wild, Occur.SHOULD);
                        booleanQuery.Add(num, Occur.SHOULD);
                        var res = searcher.Search(booleanQuery, 10).ScoreDocs;
                        foreach (var hit in res)
                        {
                            var foundDoc = searcher.Doc(hit.Doc);
                            var score    = hit.Score;
                            this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString());
                        }
                    }
                }
            }
        }