Ejemplo n.º 1
0
        public IEnumerable <Tuple <float, Document, string[]> > Search(string text, string defaultField = "title", int maxResultCount = 500)
        {
            var   parser = new QueryParser(Constants.Version, defaultField, _analyzer);
            Query query  = parser.Parse(text ?? string.Empty);

            var formatter   = new SimpleHTMLFormatter(string.Empty, string.Empty);
            var fragmenter  = new SimpleFragmenter(120);
            var scorer      = new QueryScorer(query);
            var highlighter = new Highlighter(formatter, scorer)
            {
                TextFragmenter = fragmenter
            };

            using (var directory = FSDirectory.Open(new DirectoryInfo(_path), new NoLockFactory()))
            {
                using (var searcher = new IndexSearcher(IndexReader.Open(directory, ReadonlyMode)))
                {
                    TopDocs hits = searcher.Search(query, maxResultCount);

                    foreach (var scoreDoc in hits.ScoreDocs)
                    {
                        Document doc = searcher.Doc(scoreDoc.Doc);

                        var field       = doc.Get(defaultField);
                        var tokenStream = _analyzer.TokenStream(defaultField, new StringReader(field));
                        var framgents   = highlighter.GetBestFragments(tokenStream, field, 5);

                        yield return(new Tuple <float, Document, string[]>(scoreDoc.Score, doc, framgents));
                    }
                }
            }
        }
Ejemplo n.º 2
0
        public string GetSummaryWithHighlight(Query query, string text, string fileName)
        {
            // create highlighter
            var analyzer    = _getAnalyzer();
            var formatter   = new SimpleHTMLFormatter("<span class=\"search-highlight\">", "</span>");
            var fragmenter  = new SimpleFragmenter(250);
            var scorer      = new QueryScorer(query);
            var highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = fragmenter;

            var stream  = analyzer.TokenStream(fileName, new StringReader(text));
            var summary = highlighter.GetBestFragments(stream, text, 2, "...");

            if (string.IsNullOrEmpty(summary))
            {
                summary = text.ToString();
            }

            return(summary);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Searches the index.
        /// </summary>
        /// <param name="totalHits">The total hits.</param>
        /// <param name="forumId">The forum identifier.</param>
        /// <param name="userId">The user identifier.</param>
        /// <param name="searchQuery">The search query.</param>
        /// <param name="searchField">The search field.</param>
        /// <param name="pageIndex">Index of the page.</param>
        /// <param name="pageSize">Size of the page.</param>
        /// <returns>
        /// Returns the Search results
        /// </returns>
        private List <SearchMessage> SearchIndex(
            out int totalHits,
            int forumId,
            int userId,
            string searchQuery,
            string searchField = "",
            int pageIndex      = 1,
            int pageSize       = 1000)
        {
            if (searchQuery.Replace("*", string.Empty).Replace("?", string.Empty).IsNotSet())
            {
                totalHits = 0;
                return(new List <SearchMessage>());
            }

            // Insert forum access here
            var userAccessList = this.GetRepository <vaccess>().Get(v => v.UserID == userId);

            // filter forum
            if (forumId > 0)
            {
                userAccessList = userAccessList.FindAll(v => v.ForumID == forumId);
            }

            var searcher = this.GetSearcher();

            if (searcher == null)
            {
                totalHits = 0;
                return(new List <SearchMessage>());
            }

            var hitsLimit = this.Get <BoardSettings>().ReturnSearchMax;

            // 0 => Lucene error;
            if (hitsLimit == 0)
            {
                hitsLimit = pageSize;
            }

            var analyzer = new StandardAnalyzer(MatchVersion);

            var         formatter  = new SimpleHTMLFormatter("<mark>", "</mark>");
            var         fragmenter = new SimpleFragmenter(hitsLimit);
            QueryScorer scorer;

            // search by single field
            if (searchField.IsSet())
            {
                var parser = new QueryParser(MatchVersion, searchField, analyzer);
                var query  = ParseQuery(searchQuery, parser);
                scorer = new QueryScorer(query);

                var hits = searcher.Search(query, hitsLimit).ScoreDocs;
                totalHits = hits.Length;

                var highlighter = new Highlighter(formatter, scorer)
                {
                    TextFragmenter = fragmenter
                };

                var results = this.MapSearchToDataList(
                    highlighter,
                    analyzer,
                    searcher,
                    hits,
                    pageIndex,
                    pageSize,
                    userAccessList);

                analyzer.Dispose();

                return(results);
            }
            else
            {
                var parser = new MultiFieldQueryParser(
                    MatchVersion,
                    new[]
                {
                    "Message", "Topic",
                    this.Get <BoardSettings>().EnableDisplayName ? "AuthorDisplay" : "Author"
                },
                    analyzer);

                var query = ParseQuery(searchQuery, parser);
                scorer = new QueryScorer(query);

                // sort by date
                var sort = new Sort(new SortField("MessageId", SortFieldType.STRING, true));
                var hits = searcher.Search(query, null, hitsLimit, sort).ScoreDocs;

                totalHits = hits.Length;
                var highlighter = new Highlighter(formatter, scorer)
                {
                    TextFragmenter = fragmenter
                };

                var results = this.MapSearchToDataList(
                    highlighter,
                    analyzer,
                    searcher,
                    hits,
                    pageIndex,
                    pageSize,
                    userAccessList);

                this.searcherManager.Release(searcher);

                return(results);
            }
        }
Ejemplo n.º 4
0
        public static DataTable searchClanci(string pretraga)
        {
            DataTable ResultsClanci = new DataTable();
            // create the searcher
            // index is placed in "index" subdirectory
            string        indexDirectory = AppDomain.CurrentDomain.BaseDirectory + "Clanci";
            var           analyzer       = new StandardAnalyzer(Version.LUCENE_30);
            IndexSearcher searcher       = new IndexSearcher(FSDirectory.Open(indexDirectory));

            // parse the query, "text" is the default field to search
            var parser = new MultiFieldQueryParser(Version.LUCENE_30, new[] { "Naslov", "Sazetak", "Sadrzaj", "Tagovi" }, analyzer);
            //var parser = new QueryParser(Version.LUCENE_30, "Sazetak" , analyzer);
            Query query = parser.Parse(pretraga);


            //// create the result DataTable
            ResultsClanci.Columns.Add("id", typeof(Int32));
            ResultsClanci.Columns.Add("Naslov", typeof(string));
            ResultsClanci.Columns.Add("Sadrzaj", typeof(string));
            ResultsClanci.Columns.Add("Tagovi", typeof(string));
            ResultsClanci.Columns.Add("DatumKreiranja", typeof(DateTime));
            ResultsClanci.Columns.Add("DatumZadnjeIzmjene", typeof(DateTime));
            ResultsClanci.Columns.Add("DatumZadnjeAktivnosti", typeof(DateTime));
            ResultsClanci.Columns.Add("DatumZatvaranjaPosta", typeof(DateTime));
            ResultsClanci.Columns.Add("PrihvaceniOdgovori", typeof(Int32));
            ResultsClanci.Columns.Add("BrojOdgovora", typeof(Int32));
            ResultsClanci.Columns.Add("BrojKomentara", typeof(Int32));
            ResultsClanci.Columns.Add("BrojOmiljenih", typeof(Int32));
            ResultsClanci.Columns.Add("BrojPregleda", typeof(Int32));
            ResultsClanci.Columns.Add("BrojPoena", typeof(Int32));
            ResultsClanci.Columns.Add("VlasnikID", typeof(Int32));
            ResultsClanci.Columns.Add("VlasnikNadimak", typeof(string));
            ResultsClanci.Columns.Add("PromijenioID", typeof(Int32));
            ResultsClanci.Columns.Add("RoditeljskiPostID", typeof(Int32));
            //Results.Columns.Add("PodKategorija", typeof(Int32));
            ResultsClanci.Columns.Add("PostVrsta", typeof(Int32));
            ResultsClanci.Columns.Add("SlikaURL", typeof(string));
            ResultsClanci.Columns.Add("temp", typeof(string));
            ResultsClanci.Columns.Add("Likes", typeof(Int32));
            ResultsClanci.Columns.Add("Unlikes", typeof(Int32));
            ResultsClanci.Columns.Add("Sazetak", typeof(string));
            ResultsClanci.Columns.Add("BrojRangiranja", typeof(Int32));
            ResultsClanci.Columns.Add("PrihvacenaIzmjena", typeof(Int32));
            ResultsClanci.Columns.Add("Podnaslov", typeof(string));
            ResultsClanci.Columns.Add("Broj.Razgovora", typeof(Int32));
            ResultsClanci.Columns.Add("sample", typeof(string));
            ResultsClanci.Columns.Add("sampleNaslov", typeof(string));

            // search
            TopDocs hits = searcher.Search(query, 6);

            //E this.total = hits.TotalHits;

            // create highlighter
            IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color: #e5ecf9; \">", "</span>"); // ovdje radi hl svoje
            SimpleFragmenter fragmenter  = new SimpleFragmenter(80);
            QueryScorer      scorer      = new QueryScorer(query);
            Highlighter      highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = fragmenter;


            for (int i = 0; i < hits.ScoreDocs.Count(); i++)
            {
                // get the document from index
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);

                TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("Sazetak")));
                String      sample = highlighter.GetBestFragments(stream, doc.Get("Sazetak"), 3, "..."); // uzimamo najbolje fragmente texta



                //String path = doc.Get("path");

                // create a new row with the result data
                DataRow row = ResultsClanci.NewRow();

                row["id"]                    = doc.Get("id");
                row["Naslov"]                = doc.Get("Naslov"); //doc.Get("Naslov");
                row["Sadrzaj"]               = doc.Get("Sadrzaj");
                row["Tagovi"]                = doc.Get("Tagovi");
                row["DatumKreiranja"]        = doc.Get("DatumKreiranja");
                row["DatumZadnjeIzmjene"]    = doc.Get("DatumZadnjeIzmjene");
                row["DatumZadnjeAktivnosti"] = doc.Get("DatumZadnjeAktivnosti");
                //row["DatumZatvaranjaPosta"] = doc.Get("DatumZatvaranjaPosta");
                row["PrihvaceniOdgovori"] = doc.Get("PrihvaceniOdgovori");
                row["BrojOdgovora"]       = doc.Get("BrojOdgovora");
                row["BrojKomentara"]      = doc.Get("BrojKomentara");
                row["BrojOmiljenih"]      = doc.Get("BrojOmiljenih");
                row["BrojPregleda"]       = doc.Get("BrojPregleda");
                row["BrojPoena"]          = doc.Get("BrojPoena");
                //row["VlasnikID"] = doc.Get("VlasnikID");
                row["VlasnikNadimak"] = doc.Get("VlasnikNadimak");
                //row["PromijenioID"] = doc.Get("PromijenioID");
                //row["RoditeljskiPostID"] = doc.Get("RoditeljskiPostID");
                //row["PodKategorija"] = doc.Get("PodKategorija");
                row["PostVrsta"] = doc.Get("PostVrsta");
                row["SlikaURL"]  = doc.Get("SlikaURL");
                //row["temp"] = doc.Get("temp");
                row["Likes"]             = doc.Get("Likes");
                row["Unlikes"]           = doc.Get("Unlikes");
                row["Sazetak"]           = sample; //doc.Get("Sazetak");
                row["BrojRangiranja"]    = doc.Get("BrojRangiranja");
                row["PrihvacenaIzmjena"] = doc.Get("PrihvacenaIzmjena");
                row["Podnaslov"]         = doc.Get("Podnaslov");
                //row["Broj.Razgovora"] = doc.Get("Broj.Razgovora");
                //row["sample"] = sample;
                //row["sampleNaslov"] = sampleNaslov;


                ResultsClanci.Rows.Add(row);
            }
            searcher.Dispose();

            return(ResultsClanci); // vracamo datatable i dodajemo u datasource
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Searches the lucene index with the search text.
        /// </summary>
        /// <param name="searchText">The text to search with.</param>
        /// <remarks>Syntax reference: http://lucene.apache.org/java/2_3_2/queryparsersyntax.html#Wildcard</remarks>
        /// <exception cref="SearchException">An error occured searching the lucene.net index.</exception>
        public SearchResultsModel SearchIndex(string searchText)
        {
            // This check is for the benefit of the CI builds
            if (!Directory.Exists(_indexPath))
            {
                CreateIndex();
            }

            var model = new SearchResultsModel();

            StandardAnalyzer analyzer = new StandardAnalyzer();

            try
            {
                IndexSearcher searcher = new IndexSearcher(_indexPath);
                // Build query
                var parser      = new MultiFieldQueryParser(new string[] { "Text", "Title" }, analyzer);
                var searchQuery = parser.Parse(searchText);

                // Execute search
                var hits = searcher.Search(searchQuery);

                // Display results
                var results = new List <Result>();
                for (int i = 0; i < hits.Length(); i++)
                {
                    results.Add(new Result()
                    {
                        doc   = hits.Doc(i),
                        Score = hits.Score(i)
                    });
                }

                //Highlight the parts that are matched:
                var formatter   = new SimpleHTMLFormatter("<span style='background:yellow;font-weight:bold;'>", "</span>");
                var fragmenter  = new SimpleFragmenter(400);
                var scorer      = new QueryScorer(searchQuery);
                var highlighter = new Highlighter(formatter, scorer);
                highlighter.SetTextFragmenter(fragmenter);
                var finalResults = new List <DisplayedResult>();

                var db = Legato.Current.DB;
                foreach (var result in results)
                {
                    var stream      = analyzer.TokenStream("", new StringReader(result.doc.Get("RawText")));
                    var highlighted = highlighter.GetBestFragments(stream, result.doc.Get("RawText"), 1, "...").Replace("'", "''");
                    if (highlighted == "")                     // sometimes the highlighter fails to emit text...
                    {
                        highlighted = result.doc.Get("RawText").Replace("'", "''");
                    }
                    if (highlighted.Length > 1000)
                    {
                        highlighted = highlighted.Substring(0, 1000);
                    }

                    int postID;
                    if (!int.TryParse(result.doc.Get("GlobalPostID"), out postID))                     // If GlobalPostID is null or not a number, this isn't a valid search entry, so we skip it.
                    {
                        continue;
                    }
                    var post = db.GlobalPostIDs.Where(p => p.GlobalPostID1 == postID).SingleOrDefault();
                    if (post == null)
                    {
                        continue;
                    }
                    // TODO: privacy checks?
                    post.FillProperties();

                    finalResults.Add(new DisplayedResult()
                    {
                        ResultPost      = post,
                        Score           = result.Score,
                        HighlightedHTML = highlighted
                    });
                }

                model = new SearchResultsModel()
                {
                    Results = finalResults.OrderByDescending(r => r.Score),
                    Query   = searchText.Trim()
                };
            }
            catch (Exception ex)
            {
                throw new SearchException(ex, "An error occured while searching the index");
            }

            return(model);

            /*MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] { "content", "title" }, analyzer);
             *
             * Query query = null;
             * try
             * {
             *      query = parser.Parse(searchText);
             * }
             * catch (Lucene.Net.QueryParsers.ParseException)
             * {
             *      // Catch syntax errors in the search and remove them.
             *      searchText = QueryParser.Escape(searchText);
             *      query = parser.Parse(searchText);
             * }
             *
             * if (query != null)
             * {
             *      try
             *      {
             *              IndexSearcher searcher = new IndexSearcher(_indexPath);
             *              Hits hits = searcher.Search(query);
             *
             *              for (int i = 0; i < hits.Length(); i++)
             *              {
             *                      Document document = hits.Doc(i);
             *
             *                      DateTime createdOn = DateTime.Now;
             *                      if (!DateTime.TryParse(document.GetField("createdon").StringValue(), out createdOn))
             *                              createdOn = DateTime.Now;
             *
             *                      SearchResult result = new SearchResult()
             *                      {
             *                              Id = int.Parse(document.GetField("id").StringValue()),
             *                              Title = document.GetField("title").StringValue(),
             *                              ContentSummary = document.GetField("contentsummary").StringValue(),
             *                              Tags = document.GetField("tags").StringValue(),
             *                              CreatedBy = document.GetField("createdby").StringValue(),
             *                              CreatedOn = createdOn,
             *                              ContentLength = int.Parse(document.GetField("contentlength").StringValue()),
             *                              Score = hits.Score(i)
             *                      };
             *
             *                      list.Add(result);
             *              }
             *      }
             *      catch (Exception ex)
             *      {
             *              throw new SearchException(ex, "An error occured while searching the index");
             *      }
             * }
             *
             * return list; */
        }
Ejemplo n.º 6
0
    public void Search(Query query)
    {
        if (IndexSearcher == null)
        {
            throw new Exception("IndexSearcher not created");
        }

        trace("search {0}", query.ToString());

        var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

        // do the query
        var     start        = DateTime.Now.TimeOfDay;
        TopDocs SearchResult = IndexSearcher.Search(query, _MAXRESULTS);

        _totalItems = SearchResult.TotalHits;
        if (_totalItems > _MAXRESULTS)
        {
            _totalItems = _MAXRESULTS;
        }

        // create highlighter
        IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"font-weight:bold;\">", "</span>");
        SimpleFragmenter fragmenter  = new SimpleFragmenter(80);
        QueryScorer      scorer      = new QueryScorer(query);
        Highlighter      highlighter = new Highlighter(formatter, scorer);

        highlighter.TextFragmenter = fragmenter;

        // initialize startAt
        _startAt = InitStartAt();

        // how many items we should show - less than defined at the end of the results
        int resultsCount = Math.Min(_totalItems, _resultsPerPage + _startAt);

        if (resultsCount > _MAXRESULTS)
        {
            resultsCount = _MAXRESULTS;
        }

        for (int i = _startAt; i < resultsCount; i++)
        {
            // get the document from index
            Document doc = IndexSearcher.Doc(SearchResult.ScoreDocs[i].Doc);

            TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
            String      sample = highlighter.GetBestFragments(stream, doc.Get("text"), 2, "...");

            String path = doc.Get("path");

            // create a new row with the result data
            DataRow row = _Results.NewRow();
            row["title"]  = doc.Get("title");
            row["path"]   = path;
            row["url"]    = _baseURL + path;
            row["sample"] = sample;
            row["score"]  = Convert.ToInt16(SearchResult.ScoreDocs[i].Score * 100);
            row["id"]     = doc.Get("id");
            row["type"]   = doc.Get("type");

            _Results.Rows.Add(row);
        }
        IndexSearcher.Dispose();

        var end = DateTime.Now.TimeOfDay;

        trace("Search completed in {0}ms", end.TotalMilliseconds - start.TotalMilliseconds);
        trace(SearchResult, IndexSearcher);
    }
Ejemplo n.º 7
0
    public void Search(string Query)
    {
        if (IndexSearcher == null)
        {
            throw new Exception("IndexSearcher not created");
        }

        _query = Query;
        DateTime start = DateTime.Now;

        var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

        var fieldName         = "text";
        var minimumSimilarity = 0.5f;
        var prefixLength      = 3;
        var query             = new BooleanQuery();

        var segments = _query.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

        foreach (string segment in segments)
        {
            Term       term       = new Term(fieldName, segment);
            FuzzyQuery fuzzyQuery = new FuzzyQuery(term, minimumSimilarity, prefixLength);
            query.Add(fuzzyQuery, Occur.SHOULD);
        }

        // search
        TopDocs hits = IndexSearcher.Search(query, 200);

        _totalItems = hits.TotalHits;
        if (_totalItems > _MAXRESULTS)
        {
            _totalItems = _MAXRESULTS;
        }

        // create highlighter
        IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"font-weight:bold;\">", "</span>");
        SimpleFragmenter fragmenter  = new SimpleFragmenter(80);
        QueryScorer      scorer      = new QueryScorer(query);
        Highlighter      highlighter = new Highlighter(formatter, scorer);

        highlighter.TextFragmenter = fragmenter;

        // initialize startAt
        _startAt = InitStartAt();

        // how many items we should show - less than defined at the end of the results
        int resultsCount = Math.Min(_totalItems, _resultsPerPage + _startAt);

        if (resultsCount > _MAXRESULTS)
        {
            resultsCount = _MAXRESULTS;
        }

        for (int i = _startAt; i < resultsCount; i++)
        {
            // get the document from index
            Document doc = IndexSearcher.Doc(hits.ScoreDocs[i].Doc);

            TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
            String      sample = highlighter.GetBestFragments(stream, doc.Get("text"), 2, "...");

            String path = doc.Get("path");

            // create a new row with the result data
            DataRow row = _Results.NewRow();
            row["title"]  = doc.Get("title");
            row["path"]   = path;
            row["url"]    = _baseURL + path;
            row["sample"] = sample;
            row["score"]  = Convert.ToInt16(hits.ScoreDocs[i].Score * 100);

            _Results.Rows.Add(row);
        }
        IndexSearcher.Dispose();

        // result information
        _duration = DateTime.Now - start;
        _fromItem = _startAt + 1;
        _toItem   = Math.Min(_startAt + this.ResultsPerPage, _totalItems);
    }
Ejemplo n.º 8
0
        private static IEnumerable <LuceneSearchData> _search
            (string searchQuery, string searchField = "", string role = null)
        {
            // validation
            if (string.IsNullOrEmpty(searchQuery.Replace("*", "").Replace("?", "")))
            {
                return(new List <LuceneSearchData>());
            }

            // set up lucene searcher
            using (var searcher = new IndexSearcher(_directory, false))
            {
                var hits_limit = 1000;
                var analyzer   = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

                // search by single field
                if (!string.IsNullOrEmpty(searchField))
                {
                    var parser  = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, searchField, analyzer);
                    var query   = parseQuery(searchQuery, parser);
                    var hits    = searcher.Search(query, hits_limit).ScoreDocs;
                    var results = _mapLuceneToDataList(hits, searcher);
                    analyzer.Close();
                    searcher.Dispose();
                    return(results);
                }
                // search by multiple fields (ordered by RELEVANCE)
                else
                {
                    var parser = new MultiFieldQueryParser
                                     (Lucene.Net.Util.Version.LUCENE_30, new[] {
                        "Name",
                        "name_en",
                        "name_zh",
                        "name_cn",
                        "Description",
                        "desc_en",
                        "desc_zh",
                        "desc_cn",
                    }, analyzer);
                    var query = parseQuery(searchQuery, parser);

                    BooleanQuery bq = new BooleanQuery();
                    bq.Add(query, Occur.MUST);

                    if (role == "trading")
                    {
                        var role_parser = new MultiFieldQueryParser
                                              (Lucene.Net.Util.Version.LUCENE_30, new[] {
                            "is_trading",
                            "is_member",
                            "is_visitor",
                        }, analyzer);

                        role_parser.DefaultOperator = QueryParser.AND_OPERATOR;

                        var role_query = parseQuery("1", role_parser);
                        bq.Add(role_query, Occur.MUST);
                    }
                    else if (role == "member")
                    {
                        var role_parser = new MultiFieldQueryParser
                                              (Lucene.Net.Util.Version.LUCENE_30, new[] {
                            "is_member",
                            "is_visitor",
                        }, analyzer);

                        role_parser.DefaultOperator = QueryParser.AND_OPERATOR;

                        var role_query = parseQuery("1", role_parser);
                        bq.Add(role_query, Occur.MUST);
                    }
                    else
                    {
                        var role_parser = new MultiFieldQueryParser
                                              (Lucene.Net.Util.Version.LUCENE_30, new[] {
                            "is_visitor",
                        }, analyzer);

                        role_parser.DefaultOperator = QueryParser.AND_OPERATOR;

                        var role_query = parseQuery("1", role_parser);
                        bq.Add(role_query, Occur.MUST);
                    }


                    var        scorer    = new QueryScorer(bq);
                    var        hits      = searcher.Search(bq, null, hits_limit, Sort.RELEVANCE).ScoreDocs;
                    IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color:yellow;\">", "</span>");

                    SimpleFragmenter fragmenter = new SimpleFragmenter(1000);

                    Highlighter highlighter = new Highlighter(formatter, scorer);
                    highlighter.TextFragmenter = fragmenter;

                    var results = _mapLuceneToDataList(hits, searcher, highlighter, analyzer);
                    analyzer.Close();
                    searcher.Dispose();
                    return(results);
                }
            }
        }
Ejemplo n.º 9
0
        public virtual async Task <SearchResult> SearchAsync(string term,
                                                             int?filterByCategory         = null,
                                                             int languageId               = -1,
                                                             PostType?postType            = null,
                                                             SearchPlace searchPlace      = SearchPlace.Anywhere,
                                                             SearchResultSortType orderBy = SearchResultSortType.Score,
                                                             int maxResult    = 1000,
                                                             bool exactSearch = false)
        {
            var result = new SearchResult();

            term = term.Trim();

            //replace multiple spaces with a single space
            RegexOptions options = RegexOptions.None;
            Regex        regex   = new Regex("[ ]{2,}", options);

            term = regex.Replace(term, " ");

            if (string.IsNullOrWhiteSpace(term))
            {
                return(result);
            }

            var watch = new System.Diagnostics.Stopwatch();

            watch.Start();
            try
            {
                await Task.Run(() =>
                {
                    using (var directory = FSDirectory.Open(new DirectoryInfo(_indexFilesPath)))
                    {
                        using (var searcher = new IndexSearcher(directory, readOnly: true))
                        {
                            var searchInFields = new List <string>();
                            if (searchPlace == SearchPlace.Anywhere)
                            {
                                searchInFields.AddRange(new string[] { "Title", "Description", "Keywords", "Tags" });
                            }
                            else
                            {
                                if (searchPlace.HasFlagFast(SearchPlace.Title))
                                {
                                    searchInFields.Add("Title");
                                }

                                if (searchPlace.HasFlagFast(SearchPlace.Description))
                                {
                                    searchInFields.Add("Description");
                                }

                                if (searchPlace.HasFlagFast(SearchPlace.Keywords))
                                {
                                    searchInFields.Add("Keywords");
                                }

                                if (searchPlace.HasFlagFast(SearchPlace.Tags))
                                {
                                    searchInFields.Add("Tags");
                                }
                            }

                            BooleanFilter filter = null;
                            if (languageId > -1 || filterByCategory != null || postType != null)
                            {
                                filter = new BooleanFilter();
                                if (languageId > -1)
                                {
                                    filter.Add(new FilterClause(
                                                   new QueryWrapperFilter(new TermQuery(new Term("LanguageId", languageId.ToString()))),
                                                   Occur.MUST));
                                }
                                if (filterByCategory != null)
                                {
                                    filter.Add(new FilterClause(
                                                   new QueryWrapperFilter(new TermQuery(new Term("Categories",
                                                                                                 filterByCategory.Value.ToString()))), Occur.MUST));
                                }
                                if (postType != null)
                                {
                                    filter.Add(new FilterClause(
                                                   new QueryWrapperFilter(new TermQuery(new Term("PostType",
                                                                                                 postType.Value.ToString()))), Occur.MUST));
                                }
                            }

                            var currentSettings = _settingService.LoadSetting <SiteSettings>();
                            if (!currentSettings.EnableBlog)
                            {
                                //Filter Blog Posts if Blog is disabled
                                if (filter == null)
                                {
                                    filter = new BooleanFilter();
                                }
                                filter.Add(new FilterClause(
                                               new QueryWrapperFilter(new TermQuery(new Term("PostType",
                                                                                             PostType.BlogPost.ToString()))), Occur.MUST_NOT));
                            }

                            Sort sort = new Sort(SortField.FIELD_SCORE);

                            switch (orderBy)
                            {
                            case SearchResultSortType.NumberOfVisits:
                                sort = new Sort(new SortField("NumberOfVisit", SortField.INT, true));
                                break;

                            case SearchResultSortType.PublishDate:
                                sort = new Sort(new SortField("PublishDate", SortField.LONG, true));
                                break;

                            case SearchResultSortType.LastUpDate:
                                sort = new Sort(new SortField("LastUpDate", SortField.LONG, true));
                                break;
                            }

                            var analyzer       = new StandardAnalyzer(Version);
                            var parser         = new MultiFieldQueryParser(Version, searchInFields.ToArray(), analyzer);
                            QueryScorer scorer = null;
                            var hits           = new List <ScoreDoc>();
                            Query query        = null;
                            if (exactSearch)
                            {
                                query = ParseQuery(term, parser);
                                hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs);
                            }
                            else
                            {
                                query = ParseQuery($"(\"{term}\")", parser);
                                hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs);
                                query = ParseQuery($"({term.Replace(" ", "*")})", parser);
                                hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs);
                                query = ParseQuery($"(+{term.Trim().Replace(" ", " +")})", parser);
                                hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs);
                                query = ParseQuery(term, parser);
                                hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs);
                            }

                            scorer = new QueryScorer(query);

                            if (hits.Count == 0)
                            {
                                term   = SearchByPartialWords(term);
                                query  = ParseQuery(term, parser);
                                scorer = new QueryScorer(query);
                                hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs);
                            }

                            var formatter = new SimpleHTMLFormatter(
                                "<span class='badge badge-warning'>",
                                "</span>");
                            var fragmenter  = new SimpleFragmenter(300);
                            var highlighter = new Highlighter(formatter, scorer)
                            {
                                TextFragmenter = fragmenter
                            };

                            foreach (var scoreDoc in hits)
                            {
                                var doc = searcher.Doc(scoreDoc.Doc);
                                result.Documents.Add(new SearchResultDocument()
                                {
                                    DocumentId       = int.Parse(doc.Get("ID")),
                                    LanguageId       = int.Parse(doc.Get("LanguageId")),
                                    LanguageIsoCode  = doc.Get("LanguageCode"),
                                    Score            = scoreDoc.Score,
                                    DocumentTitle    = GetHighlight("Title", highlighter, analyzer, doc.Get("Title"), false),
                                    DocumentBody     = GetHighlight("Description", highlighter, analyzer, doc.Get("Description"), true),
                                    DocumentKeywords = doc.Get("Keywords"),
                                    DocumentTags     = doc.Get("Tags"),
                                });
                            }

                            result.Documents = result.Documents.DistinctBy(p => new { p.DocumentId })
                                               .ToList();

                            analyzer.Close();

                            //SuggestSimilar
                            using (var spellDirectory = FSDirectory.Open(new DirectoryInfo(_spellFilesPath)))
                            {
                                using (var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(spellDirectory))
                                {
                                    result.SuggestSimilar.AddRange(spellChecker.SuggestSimilar(term, 10, null, null, true));
                                }
                            }
                        }
                    }
                });
            }
            catch (Exception ex)
            {
                result.Error    = ex;
                result.HasError = true;
            }

            watch.Stop();
            result.ElapsedMilliseconds = watch.ElapsedMilliseconds;

            _eventPublisher.Publish(new SearchEvent(term, filterByCategory, languageId, postType, searchPlace, maxResult, result));

            return(result);
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Searches the index.
        /// </summary>
        /// <param name="totalHits">The total hits.</param>
        /// <param name="forumId">The forum identifier.</param>
        /// <param name="userId">The user identifier.</param>
        /// <param name="searchQuery">The search query.</param>
        /// <param name="searchField">The search field.</param>
        /// <param name="pageIndex">Index of the page.</param>
        /// <param name="pageSize">Size of the page.</param>
        /// <returns>
        /// Returns the Search results
        /// </returns>
        private List <SearchMessage> SearchIndex(
            out int totalHits,
            int forumId,
            int userId,
            string searchQuery,
            string searchField = "",
            int pageIndex      = 1,
            int pageSize       = 1000)
        {
            if (string.IsNullOrEmpty(searchQuery.Replace("*", string.Empty).Replace("?", string.Empty)))
            {
                totalHits = 0;
                return(new List <SearchMessage>());
            }

            // Insert forum access here
            var userAccessList = this.GetRepository <vaccess>().Get(v => v.UserID == userId);

            // filter forum
            if (forumId > 0)
            {
                userAccessList = userAccessList.FindAll(v => v.ForumID == forumId);
            }

            using (var searcher = new IndexSearcher(Directory, true))
            {
                var hitsLimit = this.Get <YafBoardSettings>().ReturnSearchMax;
                var analyzer  = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

                var formatter = new SimpleHTMLFormatter(
                    "<mark>",
                    "</mark>");
                var         fragmenter = new SimpleFragmenter(hitsLimit);
                QueryScorer scorer;

                // search by single field
                if (!string.IsNullOrEmpty(searchField))
                {
                    var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, searchField, analyzer);
                    var query  = this.ParseQuery(searchQuery, parser);
                    scorer = new QueryScorer(query);

                    var hits = searcher.Search(query, hitsLimit).ScoreDocs;
                    totalHits = hits.Length;

                    var highlighter = new Highlighter(formatter, scorer)
                    {
                        TextFragmenter = fragmenter
                    };

                    var results = this.MapSearchToDataList(
                        highlighter,
                        analyzer,
                        searcher,
                        hits,
                        pageIndex,
                        pageSize,
                        userAccessList);

                    analyzer.Close();
                    searcher.Dispose();

                    return(results);
                }
                else
                {
                    var parser = new MultiFieldQueryParser(
                        Lucene.Net.Util.Version.LUCENE_30,
                        new[] { "Message", "Topic", "Author" },
                        analyzer);

                    var query = this.ParseQuery(searchQuery, parser);
                    scorer = new QueryScorer(query);

                    var hits = searcher.Search(query, null, hitsLimit, Sort.INDEXORDER).ScoreDocs;
                    totalHits = hits.Length;

                    var highlighter = new Highlighter(formatter, scorer)
                    {
                        TextFragmenter = fragmenter
                    };

                    var results = this.MapSearchToDataList(
                        highlighter,
                        analyzer,
                        searcher,
                        hits,
                        pageIndex,
                        pageSize,
                        userAccessList);

                    analyzer.Close();
                    searcher.Dispose();
                    return(results);
                }
            }
        }
Ejemplo n.º 11
0
        public void DoStandardHighlights(Analyzer analyzer, IndexSearcher searcher,
                                         TopDocs hits, Query query, IFormatter formatter, bool expandMT)
        {
            IFragmenter frag = new SimpleFragmenter(20);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                IScorer scorer = null;
                TokenStream tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
                if (Mode == QUERY)
                {
                    scorer = new QueryScorer(query);
                }
                else if (Mode == QUERY_TERM)
                {
                    scorer = new QueryTermScorer(query);
                }
                var highlighter = new Highlighter(formatter, scorer) {TextFragmenter = frag};

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
        }
Ejemplo n.º 12
0
        public void Summarize(string summaryText, IList <string> bulletPointsText, string contentHtml,
                              bool includeContent, out string digestText, out string bodyText)
        {
            var digestBuilder = new StringBuilder(DigestMaxSize);

            // Add summary.

            digestBuilder.Append(summaryText);

            // Add bullet points.

            if (bulletPointsText != null)
            {
                foreach (var bulletPoint in bulletPointsText)
                {
                    if (!string.IsNullOrEmpty(bulletPoint))
                    {
                        digestBuilder.AppendFormat(" \x2022\x00a0{0}", bulletPoint.Trim()); // &bull;&nbsp;text
                    }
                }
            }

            // Check whether we need to go through the content.

            // TODO: consider using MS Search HTML IFilter instead
            var contentText = HttpUtility.HtmlDecode(
                HtmlUtil.StripHtmlTags(
                    HtmlUtil.AposToHtml(contentHtml)));

            contentText = contentText.Replace('\x0095', '\x2022'); // replace Windows Western bullet with Unicode bullet

            if (!includeContent && digestBuilder.Length > 0)
            {
                digestText = digestBuilder.ToString();
                bodyText   = contentText;
                return;
            }

            // Add the content extract (up to DigestMaxSize in total).

            Analyzer   analyzer      = new SimpleAnalyzer();
            var        contentStream = analyzer.tokenStream(Field, new java.io.StringReader(contentText));
            var        offset        = (OffsetAttribute)contentStream.addAttribute(typeof(OffsetAttribute));
            Fragmenter fragmenter    = new SimpleFragmenter(DigestMaxSize - digestBuilder.Length);

            fragmenter.start(contentText, contentStream);

            var endOffset = 0;

            while (contentStream.incrementToken())
            {
                endOffset = offset.endOffset();
                if (fragmenter.isNewFragment())
                {
                    break;
                }
            }

            if (digestBuilder.Length > 0)
            {
                digestBuilder.Append(' ');
            }

            digestBuilder.Append(contentText, 0, endOffset);
            digestBuilder.Append(" ...");
            digestText = digestBuilder.ToString();
            bodyText   = contentText.Substring(endOffset);
        }
Ejemplo n.º 13
0
        public static string HtmlPreview(Object obj, string query, string textType, string preTag, string postTag, int fragmentSize, int maxNumFragments)
        {
            string         text;
            GxSilentTrnSdt silent = obj as GxSilentTrnSdt;
            GxFile         file   = obj as GxFile;

            if (silent != null)
            {
                text = (silent).Transaction.ToString();
            }
            else if (file != null)
            {
                text = DocumentHandler.GetText(file.GetAbsoluteName(), System.IO.Path.GetExtension(file.GetAbsoluteName()));
            }
            else if (textType.ToLower().StartsWith("htm"))
            {
                text = new NTidyHTMLHandler().GetTextFromString(obj.ToString());
            }
            else
            {
                text = obj.ToString();
            }
            if (!string.IsNullOrEmpty(query) && !string.IsNullOrEmpty(text))
            {
                if (qp == null)
                {
                    qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_24, IndexRecord.CONTENTFIELD, Indexer.CreateAnalyzer());
                    qp.DefaultOperator        = QueryParser.Operator.AND;
                    qp.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                }
                Query unReWrittenQuery = qp.Parse(query);
                Query q = unReWrittenQuery;
                try
                {
                    if (reader == null)
                    {
                        reader = Indexer.Reader;
                    }
                    if (!queries.TryGetValue(query, out q))
                    {
                        q = unReWrittenQuery.Rewrite(reader);//required to expand search terms (for the usage of highlighting with wildcards)

                        if (queries.Count == int.MaxValue)
                        {
                            queries.Clear();
                        }
                        queries[query] = q;
                    }
                }
                catch (Exception ex)
                {
                    GXLogging.Error(log, "HTMLPreview error", ex);
                }
                QueryScorer scorer = new QueryScorer(q);

                SimpleHTMLFormatter formatter   = new SimpleHTMLFormatter(preTag, postTag);
                Highlighter         highlighter = new Highlighter(formatter, scorer);
                IFragmenter         fragmenter  = new SimpleFragmenter(fragmentSize);

                highlighter.TextFragmenter = fragmenter;
                TokenStream tokenStream = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_24).TokenStream("Content", new StringReader(text));

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragments, "...");
                return(result);
            }
            else
            {
                return(text);
            }
        }
        public ActionResult Search()
        {
            var    path          = Server.MapPath("/Index-lucene");
            int    numberOfFiles = System.IO.Directory.GetFiles(path).Length;
            var    searchText    = Request.QueryString.ToString();
            string output        = searchText.Substring(searchText.IndexOf('=') + 1);
            string searchWord    = output.Replace('+', ' ');

            ViewBag.YourSearch = searchWord;
            if (numberOfFiles != 0 && output.Length > 0)
            {
                Lucene.Net.Store.Directory dir = FSDirectory.Open(path);
                Analyzer    analyzer           = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
                IndexReader indexReader        = IndexReader.Open(dir, true);
                Searcher    indexSearch        = new IndexSearcher(indexReader);

                try
                {
                    var    startSearchTime        = DateTime.Now.TimeOfDay;
                    string totaltimeTakenToSearch = string.Empty;
                    var    queryParser            = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { "metaTag", "prevewContent", "fileNameWithoutExtension" }, analyzer);
                    var    query = queryParser.Parse(searchWord);
                    //ViewBag.SearchQuery = "Searching for: \"" + searchWord + "\"";
                    TopDocs resultDocs = indexSearch.Search(query, indexReader.NumDocs());
                    ViewBag.SearchQuery = resultDocs.TotalHits + " result(s) found for \"" + searchWord + "\"";
                    TopScoreDocCollector collector = TopScoreDocCollector.Create(20000, true);
                    indexSearch.Search(query, collector);
                    ScoreDoc[]       hits        = collector.TopDocs().ScoreDocs;
                    IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"color: black; font-weight: bold;\">", "</span>");
                    SimpleFragmenter fragmenter  = new SimpleFragmenter(160);
                    QueryScorer      scorer      = new QueryScorer(query);
                    Highlighter      highlighter = new Highlighter(formatter, scorer);
                    highlighter.TextFragmenter = fragmenter; //highlighter.SetTextFragmenter(fragmenter);
                    List <ListofResult> parts = new List <ListofResult>();
                    for (int i = 0; i < hits.Length; i++)
                    {
                        int         docId   = hits[i].Doc;
                        float       score   = hits[i].Score;
                        Document    doc     = indexSearch.Doc(docId);
                        string      url     = doc.Get("URL");
                        string      title   = doc.Get("filename");
                        TokenStream stream  = analyzer.TokenStream("", new StringReader(doc.Get("prevewContent")));
                        string      content = highlighter.GetBestFragments(stream, doc.Get("prevewContent"), 3, "...");
                        if (content == null || content == "")
                        {
                            string contents = doc.Get("prevewContent");
                            if (contents != "")
                            {
                                if (contents.Length < 480)
                                {
                                    content = contents.Substring(0, contents.Length);
                                }
                                else
                                {
                                    content = contents.Substring(0, 480);
                                }
                            }
                        }
                        parts.Add(new ListofResult()
                        {
                            FileName = title, Content = content, URL = url
                        });
                        var endSearchTime = DateTime.Now.TimeOfDay;
                        var timeTaken     = endSearchTime.TotalMilliseconds - startSearchTime.TotalMilliseconds;
                        totaltimeTakenToSearch = timeTaken.ToString();
                    }
                    //Search completed, dispose IndexSearcher
                    indexSearch.Dispose();
                    //assigning list into ViewBag
                    ViewBag.SearchResult = parts;
                }
                catch (Exception ex)
                {
                }
            }
            else
            {
                return(RedirectToAction("UploadFile", "Home"));
            }
            return(View());
        }
Ejemplo n.º 15
0
        private SearchResult ExecuteQuery(string[] metaData, int resultOffset, int resultLength, Query query) {
            var startTime = DateTime.Now;
            var ticks = DateTime.Now.ToUniversalTime().Ticks;

            Query publishStartQuery = NumericRangeQuery.NewLongRange("publishStart", null, ticks, true, false);
            Query publishStopQuery = NumericRangeQuery.NewLongRange("publishStop", ticks, null, false, true);

            var booleanQuery = new BooleanQuery {
                {query, Occur.MUST},
                {publishStartQuery, Occur.MUST},
                {publishStopQuery, Occur.MUST}
            };

            var scoreDocs = _searcher.Search(booleanQuery, null, MaxHits, Sort.RELEVANCE).ScoreDocs;
            var result = new SearchResult {NumberOfHits = scoreDocs.Length};

            // Create highlighter
            IFormatter formatter = new SimpleHTMLFormatter("<span class=\"search-highlight;\">", "</span>");
            var fragmenter = new SimpleFragmenter(120);
            var scorer = new QueryScorer(query);
            var highlighter = new Highlighter(formatter, scorer) {TextFragmenter = fragmenter};

            if (resultOffset < scoreDocs.Length) {
                var resultUpperOffset = resultOffset + resultLength;
                if (resultUpperOffset > scoreDocs.Length) {
                    resultUpperOffset = scoreDocs.Length;
                }

                for (var i = resultOffset; i < resultUpperOffset; i++) {
                    var doc = scoreDocs[i];
                    var document = _searcher.Doc(doc.Doc);
                    var content = document.Get("content");
                    var excerpt = "";

                    if (content != null) {
                        var stream = _analyzer.TokenStream("", new StringReader(document.Get("content")));
                        excerpt = highlighter.GetBestFragments(stream, document.Get("content"), 2, "...");
                    }

                    Guid pageId;
                    (document.Get("pageId") ?? string.Empty).TryParseGuid(out pageId);

                    var hit = new SearchHit {
                        PageId = pageId,
                        Path = document.Get("path"),
                        Title = document.Get("title"), 
                        Excerpt = excerpt
                    };


                    foreach (var key in metaData) {
                        hit.MetaData.Add(key, document.Get(key));
                    }

                    result.Hits.Add(hit);
                }
            }

            var timeTaken = DateTime.Now - startTime;
            result.SecondsTaken = timeTaken.TotalSeconds;

            return result;
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Searches the index for the querytext and displays a ranked list of results to the screen
        /// </summary>
        /// <param name="querytext">The text to search the index</param>
        private string SearchAndDisplayResults(string querytext, long qid, List <long> relevantList)
        {
            System.Console.WriteLine("Searching for " + querytext);
            querytext = querytext.ToLower();
            Query query = parser.Parse(querytext);

            System.Console.WriteLine($"Searching for { query.ToString()}");

            TopDocs results = searcher.Search(query, MAX_QUERY);

            // create highlighter - using strong tag to highlight in this case (change as needed)
            //IFormatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>");
            IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>");

            // excerpt set to 200 characters in length
            var fragmenter  = new SimpleFragmenter(3000);
            var scorer      = new QueryScorer(query);
            var highlighter = new Highlighter(formatter, scorer)
            {
                TextFragmenter = fragmenter
            };

            long            rank           = 0;
            float           topscore       = 0f;
            long            foundrelevants = 0;
            List <TrecItem> logItems       = new List <TrecItem>();

            SearchedListViewModel.DeleteAll();
            foreach (ScoreDoc scoreDoc in results.ScoreDocs)
            {
                if (rank == 0)
                {
                    topscore = scoreDoc.Score;
                }
                rank++;
                Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc);
                long id = Convert.ToInt64(doc.Get(PID_FN).ToString());
                CollectionPassage ps = collectionProvider.Passages[id];

                // Logging Trec
                logItems.Add(new TrecItem(0, id, rank, scoreDoc.Score));

                // get highlighted fragment
                TokenStream stream      = analyzer.TokenStream("", new StringReader(ps.passage_text));
                string      highlighted = highlighter.GetBestFragment(stream, ps.passage_text);

                //string url2 = doc.Get(TEXT_FN).ToString();
                //Console.WriteLine("Rank " + rank + " text " + myFieldValue);
                if (highlighted == null)
                {
                    highlighted = ps.passage_text;
                }
                if (relevantList.Contains(id))
                {
                    foundrelevants++;
                }
                SearchedListViewModel.Add(scoreDoc.Score / topscore, id, ps.GetTitle(), ps.url, highlighted, relevantList.Contains(id));

                //Console.WriteLine("==>" + highlighted);
            }

            StatusBarViewModel.Instance.NumRelevants = "Num Relevants : " + foundrelevants.ToString() + "/" + relevantList.Count.ToString();
            StatusBarViewModel.Instance.NumSearch    = "Num Searched :" + results.ScoreDocs.Length.ToString();
            // Logging Trec
            trecLogger.Logging(qid, logItems);

            //Console.WriteLine(string.Join(",", relevantList));
            return(query.ToString());
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Searches the index for the querytext
        /// </summary>
        /// <param name="querytext">The text to search the index</param>
        //public string SearchIndext(string querytext)
        public List <Dictionary <string, string> > SearchIndext(string querytext)
        {
            List <Dictionary <string, string> > resultListDict = new List <Dictionary <string, string> >();      // Initiate a result list

            Query query = DisplayQueries(querytext);

            Console.WriteLine("query is " + query);
            TopDocs results = searcher.Search(query, 100);

            System.Console.WriteLine("Number of results is " + results.TotalHits);

            // Setup the configuration of Highlighter
            IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color:yellow;\">", "</span>");
            SimpleFragmenter fragmenter  = new SimpleFragmenter(2000);
            QueryScorer      scorer      = new QueryScorer(query);
            Highlighter      highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = fragmenter;

            int rank = 0;

            // ScoreDocs : a array stores pointers of a query
            // scoreDoc : a pointer of a query points to doc_ID and score (of the doc for the query)
            //string output = "";
            if (results.TotalHits != 0)   // Check if there are results
            {
                foreach (ScoreDoc scoreDoc in results.ScoreDocs)
                {
                    rank++;
                    Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc);
                    string myFieldValue = doc.Get(TEXT_FN_PASS_TEXT);
                    string myURL        = doc.Get(TEXT_FN_URL);
                    string passId       = doc.Get(TEXT_FN_PASS_ID);
                    string score        = scoreDoc.Score.ToString();
                    string queryId      = doc.Get(TEXT_FN_QUERY_ID);

                    int jsonId = Int32.Parse(doc.Get(TEXT_FN_JSON_ARRAY_ID));

                    // passage_text field store as Field.Store.NO
                    foreach (var itemP in jArr[jsonId][PASSAGES])
                    {
                        if (itemP[TEXT_FN_PASS_ID].ToString() == passId)
                        {
                            myFieldValue = itemP[TEXT_FN_PASS_TEXT].ToString();
                        }
                    }

                    //Add the Highlighter tag into passage_text of query
                    //TokenStream HLstream = analyzer.TokenStream("", new StringReader(doc.Get(TEXT_FN_PASS_TEXT)));
                    //string HLmyFieldValue = highlighter.GetBestFragment(HLstream, doc.Get(TEXT_FN_PASS_TEXT));
                    TokenStream HLstream       = analyzer.TokenStream("", new StringReader(myFieldValue));
                    string      HLmyFieldValue = highlighter.GetBestFragment(HLstream, myFieldValue);

                    Explanation e = searcher.Explain(query, scoreDoc.Doc);

                    //Extract title from URL
                    char     delimiters = '/';
                    string[] urlSeg     = myURL.Split(delimiters);
                    string   title;
                    if (urlSeg[urlSeg.Length - 1].Length == 0)
                    {
                        title = urlSeg[urlSeg.Length - 2];
                    }
                    else
                    {
                        title = urlSeg[urlSeg.Length - 1];
                    }

                    resultListDict.Add(new Dictionary <string, string> {
                        { "rank", rank.ToString() }, { "passId", passId },
                        { "score", score }, { "title", title }, { "url", myURL }, { "text", myFieldValue }, { "queryId", queryId }, { "highlighter", HLmyFieldValue }
                    });

                    //Console.WriteLine("Rank " + rank + " text " + myFieldValue + " URL " + myURL);
                    //Console.WriteLine(e);
                }
            }

            return(resultListDict);
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Does the search and stores the information about the results.
        /// </summary>
        private void search()
        {
            DateTime start = DateTime.Now;

            // create the searcher
            // index is placed in "index" subdirectory
            string indexDirectory = Server.MapPath("~/App_Data/index");

            //var analyzer = new StandardAnalyzer(Version.LUCENE_30);
            var analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();

            IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory));

            // parse the query, "text" is the default field to search
            var parser = new QueryParser(Version.LUCENE_30, "name", analyzer);

            parser.AllowLeadingWildcard = true;

            Query query = parser.Parse(this.Query);

            // create the result DataTable
            this.Results.Columns.Add("title", typeof(string));
            this.Results.Columns.Add("sample", typeof(string));
            this.Results.Columns.Add("path", typeof(string));
            this.Results.Columns.Add("url", typeof(string));

            // search
            TopDocs hits = searcher.Search(query, 200);

            this.total = hits.TotalHits;

            // create highlighter
            IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"font-weight:bold;\">", "</span>");
            SimpleFragmenter fragmenter  = new SimpleFragmenter(80);
            QueryScorer      scorer      = new QueryScorer(query);
            Highlighter      highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = fragmenter;

            // initialize startAt
            this.startAt = InitStartAt();

            // how many items we should show - less than defined at the end of the results
            int resultsCount = Math.Min(total, this.maxResults + this.startAt);


            for (int i = startAt; i < resultsCount; i++)
            {
                // get the document from index
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);

                TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("name")));
                String      sample = highlighter.GetBestFragments(stream, doc.Get("name"), 2, "...");

                String path = doc.Get("path");

                // create a new row with the result data
                DataRow row = this.Results.NewRow();
                row["title"] = doc.Get("name");
                //row["path"] = "api/" + path;
                //row["url"] = "www.dotlucene.net/documentation/api/" + path;
                //row["sample"] = sample;

                this.Results.Rows.Add(row);
            }
            searcher.Dispose();

            // result information
            this.duration = DateTime.Now - start;
            this.fromItem = startAt + 1;
            this.toItem   = Math.Min(startAt + maxResults, total);
        }