IScorer implementation which scores text fragments by the number of unique query terms found. This class converts appropriate Querys to SpanQuerys and attempts to score only those terms that participated in generating the 'hit' on the document.
Inheritance: IScorer
コード例 #1
0
ファイル: SearchEngine.cs プロジェクト: riadyb/totalrecall
        public IEnumerable<Hit> Search(string query, int maxResults)
        {
            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);

            QueryParser qp = new QueryParser(
                Lucene.Net.Util.Version.LUCENE_29,
                "contents",
                analyzer
            );
            Query q = qp.Parse(query);

            TopDocs top = searcher.Search(q, maxResults);
            List<Hit> result = new List<Hit>();

            foreach (var scoreDoc in top.ScoreDocs)
            {
                var doc = searcher.Doc(scoreDoc.Doc);
                string contents = doc.Get("contents");

                var scorer = new QueryScorer(q, searcher.IndexReader, "contents");
                var highlighter = new Highlighter(scorer);

                result.Add(new Hit()
                {
                    Relevance = scoreDoc.Score,
                    Title = doc.Get("title"),
                    Url = doc.Get("path"),
                    Excerpt = highlighter.GetBestFragment(analyzer, "contents", contents)
                });
            }

            return result;
        }
コード例 #2
0
ファイル: HtmlHighlighter.cs プロジェクト: sdluxeon/MyPhoto
 private string GeneratePreviewText(Query q, string text)
 {
     var scorer = new QueryScorer(q);
     Highlighter highlighter = new Highlighter(htmlFormatter, scorer);
     highlighter.TextFragmenter = new SimpleFragmenter(250);
     TokenStream stream = SearchEnvironment.DefaultAnalyzer.TokenStream("html_content", new StringReader(text));
     return highlighter.GetBestFragments(stream, text, 4, "<br/>");
 }
コード例 #3
0
 // This method takes a search term and a text as a parameter, and displays the text
 // with the search term in bold.
 public static void RealHighlighter(string searchTerm, string text)
 {
     TermQuery query = new TermQuery(new Term("mainText", searchTerm));
         Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query);
         Highlighter highlighter = new Highlighter(scorer);
         System.IO.StringReader reader = new System.IO.StringReader(text);
         TokenStream tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader);
         String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5); // 5 is the maximum number of fragments that gets tested
         foreach (var word in toBePrinted)
         {
             Console.Write(word);
         }
 }
コード例 #4
0
 // TEST METHOD FOR HIGHLIGHTING.
 public static void Highlighter()
 {
     string textTest = "I am a man that follows hell.";
         TermQuery queryTest = new TermQuery(new Term("", "hell"));
         Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(queryTest);
         Highlighter highlighter = new Highlighter(scorer);
         System.IO.StringReader reader = new System.IO.StringReader(textTest);
         TokenStream tokenStream = new SimpleAnalyzer().TokenStream("field", reader);
         String[] toBePrinted = highlighter.GetBestFragments(tokenStream, textTest, 1); // 1 is the maximum number of fragments that gets tested
         foreach (var word in toBePrinted)
         {
             Console.WriteLine(word);
         }
 }
コード例 #5
0
        /// <summary>
        /// Highlights the field.
        /// </summary>
        /// <param name="fieldName">Name of the field.</param>
        /// <param name="fieldValue">The field value.</param>
        /// <param name="startTag">The start tag.</param>
        /// <param name="endTag">The end tag.</param>
        /// <param name="fragmentLength">Length of the fragment.</param>
        /// <param name="numberOfFragments">The number of fragments.</param>
        /// <returns></returns>
        protected string[] HighlightField(string fieldName, string fieldValue, string startTag = "<strong>", string endTag = "</strong>", int fragmentLength = 150, int numberOfFragments = 1)
        {
            var        scorer      = new Lucene.Net.Search.Highlight.QueryScorer(Query);
            IFormatter formatter   = new SimpleHTMLFormatter(startTag, endTag);
            var        highlighter = new Highlighter(formatter, scorer)
            {
                TextFragmenter = new SimpleFragmenter(fragmentLength)
            };
            var         sr = new StringReader(fieldValue);
            var         specificAnalyzer = GetAnalyzer(fieldName);
            TokenStream stream           = specificAnalyzer.TokenStream(fieldName, sr);

            return(highlighter.GetBestFragments(stream, fieldValue, numberOfFragments));
        }
コード例 #6
0
        public List<IndexResult> Search(string terms)
        {
            List<IndexResult> retObj = new List<IndexResult>();
            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

            using (var searcher = new IndexSearcher(FSDirectory.Open(IndexDirectory)))
            {

                // parse the query, "text" is the default field to search
                var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new[] { "text", "title", "urlkey", "searchterms" }, analyzer);

                Query query = parser.Parse(terms);

                TopDocs hits = searcher.Search(query, 200);

                SimpleFragmenter fragmenter = new SimpleFragmenter(80);
                QueryScorer scorer = new QueryScorer(query);
                Highlighter highlighter = new Highlighter(scorer);
                highlighter.TextFragmenter = fragmenter;

                for (int i = 0; i < hits.TotalHits; i++)
                {
                    // get the document from index
                    Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);

                    TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));

                    String sample = highlighter.GetBestFragments(stream, doc.Get("text"), 2, "...");
                    String title = doc.Get("title");
                    String urlkey = doc.Get("urlkey");
                    String type = doc.Get("type");

                    retObj.Add(new IndexResult()
                    {
                        Sample = sample,
                        Title = title,
                        Type = type,
                        UrlKey = urlkey

                    });

                }

                return retObj;
            }
        }
コード例 #7
0
    // This method is printing out the message details given the index document.
    // NOTE: The field "mainText" must be stored in indexing level. Same goes for any
    // other field you want to search.
    private static void DisplayMessage(Document d, string searchTerm)
    {
        // THIS IS USED IN THE DATABASE INDEXic
            //Console.WriteLine("id: " + d.Get("id") + "\n" + "messageBox: " + d.Get("messageBox") + "\n" + "incoming: " + d.Get("incoming") + "\n" + "date: " + d.Get("date") + "\n" + "mainText: " + d.Get("mainText"));

            // THIS IS USED IN MY TEST FILES
            //Console.WriteLine("id: " + d.Get("id") + "\n" + "mainText: " + d.Get("mainText"));
            string text = d.Get("mainText");
            TermQuery query = new TermQuery(new Term("mainText", searchTerm));
            Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query);
            Highlighter highlighter = new Highlighter(scorer);
            System.IO.StringReader reader = new System.IO.StringReader(text);
            TokenStream tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader);
            String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5); // 5 is the maximum number of fragments that gets tested
           foreach (var word in toBePrinted)
            {
                Console.Write(word);
            }

            Console.WriteLine("=====================");
            Console.ReadKey();
    }
コード例 #8
0
        public static DataTable searchPitanja(string pretraga)
        {
            DataTable ResultsPitanja = new DataTable();
            // create the searcher
            // index is placed in "index" subdirectory
            string indexDirectory = "J:/Triglav_Web_App/Triglav/Web/Lucene/Pitanja";
            var analyzer = new StandardAnalyzer(Version.LUCENE_30);
            IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory));

            // parse the query, "text" is the default field to search
            var parser = new MultiFieldQueryParser(Version.LUCENE_30, new[] { "Naslov", "Sadrzaj", "Tagovi" }, analyzer);
            //var parser = new QueryParser(Version.LUCENE_30, "Sadrzaj", analyzer);
            Query query = parser.Parse(pretraga);

            //// create the result DataTable
            ResultsPitanja.Columns.Add("id", typeof(Int32));
            ResultsPitanja.Columns.Add("Naslov", typeof(string));
            ResultsPitanja.Columns.Add("Sadrzaj", typeof(string));
            ResultsPitanja.Columns.Add("Tagovi", typeof(string));
            ResultsPitanja.Columns.Add("DatumKreiranja", typeof(DateTime));
            ResultsPitanja.Columns.Add("DatumZadnjeIzmjene", typeof(DateTime));
            ResultsPitanja.Columns.Add("DatumZadnjeAktivnosti", typeof(DateTime));
            ResultsPitanja.Columns.Add("DatumZatvaranjaPosta", typeof(DateTime));
            ResultsPitanja.Columns.Add("PrihvaceniOdgovori", typeof(Int32));
            ResultsPitanja.Columns.Add("BrojOdgovora", typeof(Int32));
            ResultsPitanja.Columns.Add("BrojKomentara", typeof(Int32));
            ResultsPitanja.Columns.Add("BrojOmiljenih", typeof(Int32));
            ResultsPitanja.Columns.Add("BrojPregleda", typeof(Int32));
            ResultsPitanja.Columns.Add("BrojPoena", typeof(Int32));
            ResultsPitanja.Columns.Add("VlasnikID", typeof(Int32));
            ResultsPitanja.Columns.Add("VlasnikNadimak", typeof(string));
            ResultsPitanja.Columns.Add("PromijenioID", typeof(Int32));
            ResultsPitanja.Columns.Add("RoditeljskiPostID", typeof(Int32));
            //Results.Columns.Add("PodKategorija", typeof(Int32));
            ResultsPitanja.Columns.Add("PostVrsta", typeof(Int32));
            // ResultsPitanja.Columns.Add("SlikaURL", typeof(string));
            ResultsPitanja.Columns.Add("temp", typeof(string));
            ResultsPitanja.Columns.Add("Likes", typeof(Int32));
            ResultsPitanja.Columns.Add("Unlikes", typeof(Int32));
            ResultsPitanja.Columns.Add("Sazetak", typeof(string));
            ResultsPitanja.Columns.Add("BrojRangiranja", typeof(Int32));
            ResultsPitanja.Columns.Add("PrihvacenaIzmjena", typeof(Int32));
            ResultsPitanja.Columns.Add("Podnaslov", typeof(string));
            ResultsPitanja.Columns.Add("Broj.Razgovora", typeof(Int32));
            ResultsPitanja.Columns.Add("sample", typeof(string));

            // search
            TopDocs hits = searcher.Search(query, 5);

            //E this.total = hits.TotalHits;

            // create highlighter
            IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color: #e5ecf9; \">", "</span>");
            SimpleFragmenter fragmenter = new SimpleFragmenter(80);
            QueryScorer scorer = new QueryScorer(query);
            Highlighter highlighter = new Highlighter(formatter, scorer);
            highlighter.TextFragmenter = fragmenter;

            for (int i = 0; i < hits.ScoreDocs.Count(); i++)
            {
                // get the document from index
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);

                TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("Sadrzaj")));
                String sample = highlighter.GetBestFragments(stream, doc.Get("Sadrzaj"), 3, "...");

                //String path = doc.Get("path");

                // create a new row with the result data
                DataRow rowPitanja = ResultsPitanja.NewRow();

                rowPitanja["id"] = doc.Get("id");
                rowPitanja["Naslov"] = doc.Get("Naslov");
                rowPitanja["Sadrzaj"] = sample; //doc.Get("Sadrzaj");
                rowPitanja["Tagovi"] = doc.Get("Tagovi");
                rowPitanja["DatumKreiranja"] = doc.Get("DatumKreiranja");
                rowPitanja["DatumZadnjeIzmjene"] = doc.Get("DatumZadnjeIzmjene");
                rowPitanja["DatumZadnjeAktivnosti"] = doc.Get("DatumZadnjeAktivnosti");
                //row["DatumZatvaranjaPosta"] = doc.Get("DatumZatvaranjaPosta");
                rowPitanja["PrihvaceniOdgovori"] = doc.Get("PrihvaceniOdgovori");
                rowPitanja["BrojOdgovora"] = doc.Get("BrojOdgovora");
                rowPitanja["BrojKomentara"] = doc.Get("BrojKomentara");
                rowPitanja["BrojOmiljenih"] = doc.Get("BrojOmiljenih");
                rowPitanja["BrojPregleda"] = doc.Get("BrojPregleda");
                rowPitanja["BrojPoena"] = doc.Get("BrojPoena");
                //row["VlasnikID"] = doc.Get("VlasnikID");
                rowPitanja["VlasnikNadimak"] = doc.Get("VlasnikNadimak");
                //row["PromijenioID"] = doc.Get("PromijenioID");
                //row["RoditeljskiPostID"] = doc.Get("RoditeljskiPostID");
                //row["PodKategorija"] = doc.Get("PodKategorija");
                rowPitanja["PostVrsta"] = doc.Get("PostVrsta");
                //rowPitanja["SlikaURL"] = doc.Get("SlikaURL");
                //row["temp"] = doc.Get("temp");
                rowPitanja["Likes"] = doc.Get("Likes");
                rowPitanja["Unlikes"] = doc.Get("Unlikes");
                rowPitanja["Sazetak"] = doc.Get("Sazetak");
                rowPitanja["BrojRangiranja"] = doc.Get("BrojRangiranja");
                rowPitanja["PrihvacenaIzmjena"] = doc.Get("PrihvacenaIzmjena");
                rowPitanja["Podnaslov"] = doc.Get("Podnaslov");
                //row["Broj.Razgovora"] = doc.Get("Broj.Razgovora");
                //rowPitanja["sample"] = sample;

                ResultsPitanja.Rows.Add(rowPitanja);
            }
            searcher.Dispose();
            return ResultsPitanja;
        }
コード例 #9
0
 /// <param name="queryScorer"><see cref="QueryScorer"/> that was used to score hits</param>
 /// <param name="fragmentSize">size in bytes of each fragment</param>
 public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize)
 {
     this.fragmentSize = fragmentSize;
     this.queryScorer  = queryScorer;
 }
コード例 #10
0
 /// <param name="queryScorer"><see cref="QueryScorer"/> that was used to score hits</param>
 public SimpleSpanFragmenter(QueryScorer queryScorer)
     : this(queryScorer, DEFAULT_FRAGMENT_SIZE)
 {
 }
コード例 #11
0
        private void search()
        {
            DateTime start = DateTime.Now;
            // create the result DataTable
            this.Results.Columns.Add("title", typeof(string));
            this.Results.Columns.Add("sample", typeof(string));
            this.Results.Columns.Add("path", typeof(string));
            this.Results.Columns.Add("url", typeof(string));
            this.Results.Columns.Add("Type", typeof(string));

            // create the searcher
            // index is placed in "index" subdirectory
            string indexDirectory = Server.MapPath("~/App_Data/index");

            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            //   List<string> STOP_WORDS =  StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>();
            IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory));
            BooleanQuery bquery = new BooleanQuery();
            //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer);
            List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" };
            List<string> Projects = new List<string>();
            if (Session["ProjectList"] != null)
            {
                Projects = (List<string>)Session["ProjectList"];
            }

            List<string> allType = new List<string> { "A", "B", "C" };
            if (this.Request.QueryString["Page"] != null)
            {
                if (allType.Contains(this.Request.QueryString["Page"].ToString()))
                {
                    allType.Remove(this.Request.QueryString["Page"]);
                    foreach (string type in allType)
                    {
                        TermQuery termq1 = new TermQuery(new Term("EXTPRP", type));
                        bquery.Add(termq1, Occur.MUST_NOT);
                        FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0);
                        bquery.Add(termq, Occur.MUST_NOT);
                    }
                }
            }

            //Query query = parser.Parse(this.Query);
            //foreach (string term in SearchTerm)
            //{
            //    if (term == "title")
            //    {
            //        TermQuery termq = new TermQuery(new Term(term, this.Query));
            //        termq.Boost = 50f;
            //        bquery.Add(termq, Occur.SHOULD);
            //    }
            //    else
            //    {
            //        TermQuery termq = new TermQuery(new Term(term, this.Query));
            //        termq.Boost = 5f;
            //        bquery.Add(termq, Occur.SHOULD);
            //    }

            //}

            foreach (string term in SearchTerm)
            {
                if (term == "title")
                {
                    TermQuery termq = new TermQuery(new Term(term, this.Query));
                    termq.Boost = 5f;
                    bquery.Add(termq, Occur.SHOULD);
                }
                else
                {
                    FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0);
                    termq.Boost = 0.1f;
                    bquery.Add(termq, Occur.SHOULD);
                }
            }

            //foreach (string project in Projects)
            //{
            //    TermQuery termq1 = new TermQuery(new Term("Project", project));
            //    bquery.Add(termq1, Occur.MUST_NOT);

            //}

            //foreach (string project in Projects.Distinct())
            //{
            //    TermQuery termq1 = new TermQuery(new Term("path", project));
            //    bquery.Add(termq1, Occur.MUST);
            //    FuzzyQuery termq = new FuzzyQuery(new Term("path", project), 0.5f, 0);
            //    bquery.Add(termq, Occur.MUST);
            //}

            //bquery.Add(new TermQuery(new Term("Project", "DEV")), Occur.SHOULD);

            //List<ScoreDoc> TempArrList = new List<ScoreDoc>();

            TopDocs hits = searcher.Search(bquery, null, 10000);

            //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore);
            //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0);
            //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray();

            if (Projects.Count() != 0)
            {
                hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")))).Distinct().ToArray();
            }

            //foreach (string project in Projects.Distinct())
            //{
            //    //hits.ScoreDocs = hits.ScoreDocs.Where(obj => Regex.IsMatch(searcher.Doc(obj.Doc).Get("path").Replace(@"\", @"\\"), @".*" + project.Replace(@"\", @"\\") + ".*")).ToArray();
            //    string s = Path.GetDirectoryName("\\SAGITEC-1629\\Soogle\\CARS\\bhagyashree.txt");
            //    hits.ScoreDocs = hits.ScoreDocs.Where(obj => Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")).Contains(project)).ToArray();
            //}

            this.total = hits.ScoreDocs.Count();

            this.startAt = InitStartAt();

            int resultsCount = Math.Min(total, this.maxResults + this.startAt);

            // create highlighter
            IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>");
            SimpleFragmenter fragmenter = new SimpleFragmenter(200);
            QueryScorer scorer = new QueryScorer(bquery);
            Highlighter highlighter = new Highlighter(formatter, scorer);
            highlighter.TextFragmenter = fragmenter;

            int j = 0;

            for (int i = startAt; i < resultsCount; i++)
            {
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                String path = doc.Get("path");
                string getExtension = doc.Get("Extension");

                TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
                String sample = "";
                try
                {
                    string document = doc.Get("text");
                    if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp")
                    {
                        sample = "";
                    }
                    else
                    {
                        sample = highlighter.GetBestFragment(stream, document);//, 2, "...");
                    }

                }
                catch (Exception ex)
                {
                }

                // create a new row with the result data
                DataRow row = this.Results.NewRow();
                row["title"] = doc.Get("title");
                row["path"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", "");
                row["url"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", "");
                row["sample"] = sample;
                if (path.Contains('.'))
                {
                    row["Type"] = GetMIMEType(path);
                }
                //if (!Projects.Contains(doc.Get("Project")) || !allType.Contains(doc.Get("EXTPRP")))
                //{
                this.Results.Rows.Add(row);
                //}
                j++;

            }

            Repeater1.DataSource = Results;
            Repeater1.DataBind();

            searcher.Dispose();

            // result information
            this.duration = DateTime.Now - start;
            this.fromItem = startAt + 1;
            this.toItem = Math.Min(startAt + maxResults, total);
        }
コード例 #12
0
ファイル: MainSearch.aspx.cs プロジェクト: TushChandak/Moogle
        //********************************************************************************************************************************
        private void search()
        {
            if (TextBoxQuery.Text != "")
            {
                DateTime start = DateTime.Now;
                // create the result DataTable
                this.Results.Columns.Add("title", typeof(string));
                this.Results.Columns.Add("sample", typeof(string));
                this.Results.Columns.Add("path", typeof(string));
                this.Results.Columns.Add("url", typeof(string));
                this.Results.Columns.Add("Type", typeof(string));

                // create the searcher
                // index is placed in "index" subdirectory
                string indexDirectory = Server.MapPath(IndexDirPath);
                var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                //   List<string> STOP_WORDS =  StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>();
                IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory));
                BooleanQuery bquery = new BooleanQuery();
                //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer);
                List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" };
                List<string> Projects = new List<string>();
                if (Session["ProjectList"] != null)
                {
                    Projects = (List<string>)Session["ProjectList"];
                }

                List<string> allType = null;
                if (hnkClickLink.Value == "")
                {
                    allType = new List<string>();
                }
                else
                {
                    allType = new List<string> { "Doc", "Code", "Images", "Other" };
                }

                if (this.Request.QueryString["Page"] != null)
                {
                    if (allType.Contains(Convert.ToString(hnkClickLink.Value)))
                    {
                        allType.Remove(Convert.ToString(hnkClickLink.Value));
                        foreach (string type in allType)
                        {
                            TermQuery termq1 = new TermQuery(new Term("EXTPRP", type));
                            bquery.Add(termq1, Occur.MUST_NOT);
                            FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0);
                            bquery.Add(termq, Occur.MUST_NOT);
                        }
                    }
                }

                //Query query = parser.Parse(this.Query);
                foreach (string term in SearchTerm)
                {
                    if (term == "title")
                    {
                        TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower()));
                        termq.Boost = 5f;
                        bquery.Add(termq, Occur.SHOULD);
                    }
                    else
                    {
                        TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower()));
                        termq.Boost = 0.1f;
                        bquery.Add(termq, Occur.SHOULD);
                    }
                }

                foreach (string term in SearchTerm)
                {
                    if (this.Query.Contains("."))
                    {
                        string SearchKeyword = this.Query.Replace(".", "");
                        if (term == "Extension")
                        {
                            TermQuery termq = new TermQuery(new Term(term, SearchKeyword.ToLower()));
                            termq.Boost = 5f;
                            bquery.Add(termq, Occur.SHOULD);
                        }
                    }
                    else
                    {
                        if (term == "title")
                        {
                            FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query.ToLower()));
                            termq.Boost = 5f;
                            bquery.Add(termq, Occur.SHOULD);
                        }
                        else
                        {
                            //FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0);
                            //termq.Boost = 0.1f;
                            //bquery.Add(termq, Occur.SHOULD);
                        }
                    }
                }

                TopDocs hits = searcher.Search(bquery, null, 10000);

                //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore);
                //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0);
                //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray();

                if (Projects.Count() != 0)
                {
                    hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(SplitPath(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path"))))).Distinct().ToArray();
                }

                this.total = hits.ScoreDocs.Count();

                this.startAt = InitStartAt();

                int resultsCount = Math.Min(total, this.maxResults + this.startAt);

                // create highlighter
                IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>");
                SimpleFragmenter fragmenter = new SimpleFragmenter(200);
                QueryScorer scorer = new QueryScorer(bquery);
                Highlighter highlighter = new Highlighter(formatter, scorer);
                highlighter.TextFragmenter = fragmenter;
                //highlighter.MaxDocCharsToAnalyze=200;

                //for (int i = startAt; i < resultsCount; i++)
                //{
                //    Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                //    String path = doc.Get("path");
                //    string getExtension = doc.Get("Extension");

                //    TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
                //    String sample = "";
                //    try
                //    {
                //        string document = doc.Get("text");
                //        if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg")
                //        {
                //            sample = "";
                //        }
                //        else
                //        {
                //            string outp = highlighter.GetBestFragment(stream, document);
                //            if (outp != null)
                //                sample = ReplaceSpecialChar(outp.Trim()); //, 2, "...");
                //            else
                //                sample = Limit(doc.Get("text").Trim(), 200);
                //        }

                //    }
                //    catch (Exception ex)
                //    {
                //    }

                //    // create a new row with the result data
                //    DataRow row = this.Results.NewRow();
                //    row["title"] = doc.Get("title");
                //    row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, "");
                //    row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, "");
                //    row["sample"] = sample;
                //    if (path.Contains('.'))
                //    {
                //        row["Type"] = GetMIMEType(path);
                //    }

                //    this.Results.Rows.Add(row);
                //}

                for (int i = 0; i < this.total; i++)
                {
                    Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                    String path = doc.Get("path");
                    string getExtension = doc.Get("Extension");

                    TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
                    String sample = "";
                    try
                    {
                        string document = doc.Get("text");
                        if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg")
                        {
                            sample = "";
                        }
                        else
                        {
                            string outp = highlighter.GetBestFragment(stream, document);
                            if (outp != null)
                                sample = Limit(outp.Trim(), 200); //, 2, "...");
                            else
                                sample = Limit(doc.Get("text").Trim(), 200);
                        }

                    }
                    catch (Exception ex)
                    {
                    }

                    // create a new row with the result data
                    DataRow row = Results.NewRow();
                    row["title"] = doc.Get("title");
                    row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, "");
                    row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, "");
                    row["sample"] = sample;
                    if (path.Contains('.'))
                    {
                        row["Type"] = GetMIMEType(path);
                    }

                    Results.Rows.Add(row);
                }

                //****************************** Logic for Paging for Repeater Control****************************************
                PagedDataSource pgitems = new PagedDataSource();
                DataView dv = new DataView(Results);
                pgitems.DataSource = dv;

                pgitems.AllowPaging = true;

                pgitems.PageSize = 10;//You can set the number of items here using some logic.

                pgitems.CurrentPageIndex = PageNumber;

                btnPrev.Visible = !pgitems.IsFirstPage;
                btnNext.Visible = !pgitems.IsLastPage;

                if (pgitems.PageCount > 1)
                {
                    rptPages.Visible = true;
                    ArrayList pages = new ArrayList();
                    for (int i = PageNumber; i < 5 + PageNumber; i++)
                        pages.Add((i + 1).ToString());
                    rptPages.DataSource = pages;
                    rptPages.DataBind();
                }
                else
                    rptPages.Visible = false;

                Repeater1.DataSource = pgitems;
                Repeater1.DataBind();
                //*************************************************************************************************************

                //Repeater1.DataSource = Results;
                //Repeater1.DataBind();

                searcher.Dispose();

                // result information
                this.duration = DateTime.Now - start;
                this.fromItem = startAt + 1;
                this.toItem = Math.Min(startAt + maxResults, total);
            }
        }
コード例 #13
0
ファイル: BexisIndexSearcher.cs プロジェクト: BEXIS2/Core
        /// <summary>
        /// 
        /// </summary>
        /// <remarks></remarks>
        /// <seealso cref=""/>
        /// <param name="origQuery"></param>
        /// <param name="queryFilter"></param>
        /// <param name="searchtext"></param>
        /// <returns></returns>
        public static IEnumerable<TextValue> doTextSearch(Query origQuery, String queryFilter, String searchtext)
        {
            String filter = queryFilter;
            BooleanQuery query = new BooleanQuery();
            query.Add(origQuery, Occur.MUST);
            if (!filter.ToLower().StartsWith("ng_"))
            {
                filter = "ng_" + filter;
            }
            if (filter.ToLower().Equals("ng_all"))
            {
                filter = "ng_all";
                queryFilter = "ng_all";
            }
            HashSet<string> uniqueText = new HashSet<string>();
            searchtext = searchtext.ToLower();
            QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, filter, new KeywordAnalyzer());
            parser.DefaultOperator = QueryParser.Operator.AND;
            Query X1 = parser.Parse(searchtext);
            query.Add(X1, Occur.MUST);
            // Query query = parser.Parse("tree data");
            TopDocs tds = searcher.Search(query, 50);
            QueryScorer scorer = new QueryScorer(query, searchtext);
            Analyzer analyzer = new NGramAnalyzer();
            List<TextValue> autoCompleteTextList = new List<TextValue>();
            foreach (ScoreDoc sd in tds.ScoreDocs)
            {
                Document doc = searcher.Doc(sd.Doc);
                String docId = doc.GetField("doc_id").StringValue;
                TermQuery q1 = new TermQuery(new Term("id", docId.ToLower()));
                TermQuery q0 = new TermQuery(new Term("field", queryFilter.ToLower()));
                QueryParser parser1 = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "value", new KeywordAnalyzer());
                parser1.DefaultOperator = QueryParser.Operator.AND;
                Query q2 = parser1.Parse(searchtext);
                BooleanQuery q3 = new BooleanQuery();
                q3.Add(q1, Occur.MUST);
                q3.Add(q2, Occur.MUST);
                q3.Add(q0, Occur.MUST);
                TopDocs tdAutoComp = autoCompleteSearcher.Search(q3, 100);
                foreach (ScoreDoc sdAutoComp in tdAutoComp.ScoreDocs)
                {
                    Document docAutoComp = autoCompleteSearcher.Doc(sdAutoComp.Doc);
                    String toAdd = docAutoComp.GetField("value").StringValue;
                    if (!uniqueText.Contains(toAdd))
                    {
                        TextValue tv = new TextValue();
                        tv.Name = toAdd;
                        tv.Value = toAdd;
                        autoCompleteTextList.Add(tv);
                        uniqueText.Add(toAdd);
                    }
                }

                if (autoCompleteTextList.Count > 7) break;
            }
            return autoCompleteTextList;
        }
コード例 #14
0
 /// <param name="queryScorer">QueryScorer that was used to score hits</param>
 /// <param name="fragmentSize">size in bytes of each fragment</param>
 public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize)
 {
     this.fragmentSize = fragmentSize;
     this.queryScorer = queryScorer;
 }
コード例 #15
0
 /// <param name="queryScorer">QueryScorer that was used to score hits</param>
 public SimpleSpanFragmenter(QueryScorer queryScorer)
     : this(queryScorer, DEFAULT_FRAGMENT_SIZE)
 {
 }
コード例 #16
0
ファイル: IndexHelper.cs プロジェクト: joedavis01/mojoportal
        public static IndexItemCollection Search(
            int siteId,
            bool isAdminContentAdminOrSiteEditor,
            List<string> userRoles,
            Guid[] featureGuids,
            DateTime modifiedBeginDate,
            DateTime modifiedEndDate,
            string queryText,
            bool highlightResults,
            int highlightedFragmentSize,
            int pageNumber,
            int pageSize,
            int maxClauseCount,
            out int totalHits,
            out bool invalidQuery)
        {
            invalidQuery = false;
            totalHits = 0;

            IndexItemCollection results = new IndexItemCollection();

            if (string.IsNullOrEmpty(queryText))
            {
                return results;
            }

            using (Lucene.Net.Store.Directory searchDirectory = GetDirectory(siteId))
            {
                if (!IndexReader.IndexExists(searchDirectory)) { return results; }

                long startTicks = DateTime.Now.Ticks;

                try
                {
                    if (maxClauseCount != 1024)
                    {
                        BooleanQuery.MaxClauseCount = maxClauseCount;
                    }

                    // there are different analyzers for different languages
                    // see LuceneSettings.config in the root of the web
                    LuceneSettingsProvider provider = LuceneSettingsManager.Providers[GetSiteProviderName(siteId)];
                    Analyzer analyzer = provider.GetAnalyzer();

                    Query searchQuery = MultiFieldQueryParser.Parse(
                        Lucene.Net.Util.Version.LUCENE_30,
                        new string[] { queryText, queryText, queryText, queryText, queryText, queryText.Replace("*", string.Empty) },
                        new string[] { "Title", "ModuleTitle", "contents", "PageName", "PageMetaDesc", "Keyword" },
                        analyzer);

                    BooleanQuery filterQuery = new BooleanQuery(); // won't be used to score the results

                    if (!isAdminContentAdminOrSiteEditor) // skip role filters for these users
                    {
                        AddRoleFilters(userRoles, filterQuery);
                        AddModuleRoleFilters(userRoles, filterQuery);
                    }

                    TermRangeQuery beginDateFilter = new TermRangeQuery(
                        "PublishBeginDate",
                        DateTime.MinValue.ToString("s"),
                        DateTime.UtcNow.ToString("s"),
                        true,
                        true);

                    filterQuery.Add(beginDateFilter, Occur.MUST);

                    TermRangeQuery endDateFilter = new TermRangeQuery(
                        "PublishEndDate",
                        DateTime.UtcNow.ToString("s"),
                        DateTime.MaxValue.ToString("s"),
                        true,
                        true);

                    filterQuery.Add(endDateFilter, Occur.MUST);

                    if ((modifiedBeginDate.Date > DateTime.MinValue.Date) || (modifiedEndDate.Date < DateTime.MaxValue.Date))
                    {
                        TermRangeQuery lastModifiedDateFilter = new TermRangeQuery(
                            "LastModUtc",
                            modifiedBeginDate.Date.ToString("s"),
                            modifiedEndDate.Date.ToString("s"),
                            true,
                            true);

                        filterQuery.Add(lastModifiedDateFilter, Occur.MUST);
                    }

                    //if ((!DisableSearchFeatureFilters) && (featureGuid != Guid.Empty))
                    //{
                    //    BooleanQuery featureFilter = new BooleanQuery();

                    //    featureFilter.Add(new TermQuery(new Term("FeatureId", featureGuid.ToString())), Occur.MUST);

                    //    filterQuery.Add(featureFilter, Occur.MUST);
                    //}

                    if ((featureGuids != null) && (featureGuids.Length > 0))
                    {
                        BooleanQuery featureFilter = new BooleanQuery();

                        foreach (Guid featureGuid in featureGuids)
                        {
                            featureFilter.Add(new TermQuery(new Term("FeatureId", featureGuid.ToString())), Occur.SHOULD);
                        }

                        filterQuery.Add(featureFilter, Occur.MUST);
                    }

                    Filter filter = new QueryWrapperFilter(filterQuery); // filterQuery won't affect result scores

                    using (IndexSearcher searcher = new IndexSearcher(searchDirectory))
                    {

                        //http://stackoverflow.com/questions/9872933/migrating-lucene-hitcollector-2-x-to-collector-3-x
                        //TopScoreDocCollector collector = TopScoreDocCollector.Create(maxResults, true);

                        int maxResults = int.MaxValue;
                        TopDocs hits = searcher.Search(searchQuery, filter, maxResults);

                        int startHit = 0;
                        if (pageNumber > 1)
                        {
                            startHit = ((pageNumber - 1) * pageSize);
                        }

                        totalHits = hits.TotalHits;

                        int end = startHit + pageSize;
                        if (totalHits <= end)
                        {
                            end = totalHits;
                        }

                        int itemsAdded = 0;
                        int itemsToAdd = end;

                        QueryScorer scorer = new QueryScorer(searchQuery);
                        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='searchterm'>", "</span>");
                        Highlighter highlighter = new Highlighter(formatter, scorer);

                        highlighter.TextFragmenter = new SimpleFragmenter(highlightedFragmentSize);

                        for (int i = startHit; i < itemsToAdd; i++)
                        {
                            Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                            IndexItem indexItem = new IndexItem(doc, hits.ScoreDocs[i].Score);

                            if (highlightResults)
                            {
                                try
                                {
                                    TokenStream stream = analyzer.TokenStream("contents", new StringReader(doc.Get("contents")));
                                    string highlightedResult = highlighter.GetBestFragment(stream, doc.Get("contents"));

                                    if (highlightedResult != null) { indexItem.Intro = highlightedResult; }
                                }
                                catch (NullReferenceException) { }
                            }

                            results.Add(indexItem);
                            itemsAdded += 1;

                        }

                        results.ItemCount = itemsAdded;
                        results.PageIndex = pageNumber;

                        results.ExecutionTime = DateTime.Now.Ticks - startTicks;

                    }

                }
                catch (ParseException ex)
                {
                    invalidQuery = true;
                    log.Error("handled error for search terms " + queryText, ex);
                    // these parser exceptions are generally caused by
                    // spambots posting too much junk into the search form
                    // heres an option to automatically ban the ip address
                    HandleSpam(queryText, ex);

                    return results;
                }
                catch (BooleanQuery.TooManyClauses ex)
                {
                    invalidQuery = true;
                    log.Error("handled error for search terms " + queryText, ex);
                    return results;

                }
                catch (System.IO.IOException ex)
                {
                    invalidQuery = true;
                    log.Error("handled error for search terms " + queryText, ex);
                    return results;

                }

                return results;
            }
        }
コード例 #17
0
        private SearchResult ExecuteQuery(string[] metaData, int resultOffset, int resultLength, Query query) {
            var startTime = DateTime.Now;
            var ticks = DateTime.Now.ToUniversalTime().Ticks;

            Query publishStartQuery = NumericRangeQuery.NewLongRange("publishStart", null, ticks, true, false);
            Query publishStopQuery = NumericRangeQuery.NewLongRange("publishStop", ticks, null, false, true);

            var booleanQuery = new BooleanQuery {
                {query, Occur.MUST},
                {publishStartQuery, Occur.MUST},
                {publishStopQuery, Occur.MUST}
            };

            var scoreDocs = _searcher.Search(booleanQuery, null, MaxHits, Sort.RELEVANCE).ScoreDocs;
            var result = new SearchResult {NumberOfHits = scoreDocs.Length};

            // Create highlighter
            IFormatter formatter = new SimpleHTMLFormatter("<span class=\"search-highlight;\">", "</span>");
            var fragmenter = new SimpleFragmenter(120);
            var scorer = new QueryScorer(query);
            var highlighter = new Highlighter(formatter, scorer) {TextFragmenter = fragmenter};

            if (resultOffset < scoreDocs.Length) {
                var resultUpperOffset = resultOffset + resultLength;
                if (resultUpperOffset > scoreDocs.Length) {
                    resultUpperOffset = scoreDocs.Length;
                }

                for (var i = resultOffset; i < resultUpperOffset; i++) {
                    var doc = scoreDocs[i];
                    var document = _searcher.Doc(doc.Doc);
                    var content = document.Get("content");
                    var excerpt = "";

                    if (content != null) {
                        var stream = _analyzer.TokenStream("", new StringReader(document.Get("content")));
                        excerpt = highlighter.GetBestFragments(stream, document.Get("content"), 2, "...");
                    }

                    Guid pageId;
                    (document.Get("pageId") ?? string.Empty).TryParseGuid(out pageId);

                    var hit = new SearchHit {
                        PageId = pageId,
                        Path = document.Get("path"),
                        Title = document.Get("title"), 
                        Excerpt = excerpt
                    };


                    foreach (var key in metaData) {
                        hit.MetaData.Add(key, document.Get(key));
                    }

                    result.Hits.Add(hit);
                }
            }

            var timeTaken = DateTime.Now - startTime;
            result.SecondsTaken = timeTaken.TotalSeconds;

            return result;
        }
コード例 #18
0
ファイル: SearchEngine.cs プロジェクト: tbrito/salus
        private IList<int> Search(string text, int tipodocumentoId, string startDate, string endDate)
        {
            var directory = this.GetDirectory();
            var indexReader = this.GetIndexReader(directory);
            var searcher = new IndexSearcher(indexReader);

            try
            {
                var query = this.BuildQuery(text, tipodocumentoId);
                var filter = this.BuildDateFilter(startDate, endDate);

                var sort = new Sort(new SortField("dataCriacao", SortField.LONG, true));

                var docs = searcher.Search(query, filter, this.configuracoesDaAplicacao.ResultadoMaximoConsulta, sort);

                // create highlighter
                var formatter = new SimpleHTMLFormatter("<span class=\"result-highlight\">", "</span>");
                var scorer = new QueryScorer(query);
                this.Highlighter = new Highlighter(formatter, scorer);
                this.Stream = LuceneEngineBase.GetAnalyzer().TokenStream(string.Empty, new StringReader(text));

                return this.BuildSearchResult(docs, searcher);
            }
            finally
            {
                searcher.Dispose();
                indexReader.Dispose();
                directory.Dispose();
            }
        }