Exemplo n.º 1
0
        public string Query(string keyword)
        {
            Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("text", analyzer);
            Lucene.Net.Search.Query query = null;

            try
            {
                if (string.IsNullOrEmpty(keyword))
                {
                    throw new Exception("keywork is empty");
                }

                query = parser.Parse(keyword);

            }
            catch (Exception e)
            {
            }

            lock (locker)
            {

                Lucene.Net.Search.Hits hits = null;
                try
                {
                    if (searcher == null)
                    {
                        searcher = new Lucene.Net.Search.IndexSearcher(DBNLConfigurationManager.LuceneElement.IndexingFolder);
                    }

                    hits = searcher.Search(query);

                }
                catch (Exception e)
                {
                }

                for (int i = 0; i < hits.Length(); i++)
                {
                    Lucene.Net.Documents.Document doc = hits.Doc(i);

                }

                Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter(
            "<span style=\"background:yellow;color:red;\">",
            "</span>");

                Lucene.Net.Highlight.SimpleFragmenter fragmenter = new Lucene.Net.Highlight.SimpleFragmenter(400);
                Lucene.Net.Highlight.QueryScorer scorer = new Lucene.Net.Highlight.QueryScorer(query);
                Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer);
                highlighter.SetTextFragmenter(fragmenter);

                StringBuilder sb = new StringBuilder();
                string guid = Guid.NewGuid().ToString().Replace("-", "");
                Dictionary<string, int> dict_already_seen_ids = new Dictionary<string, int>();

                // insert the search results into a temp table which we will join with what's in the database
                for (int i = 0; i < hits.Length(); i++)
                {
                    if (dict_already_seen_ids.Count < 100)
                    {
                        Lucene.Net.Documents.Document doc = hits.Doc(i);
                        string id = doc.Get("id");
                        if (!dict_already_seen_ids.ContainsKey(id))
                        {
                            dict_already_seen_ids[id] = 1;

                            string raw_text =doc.Get("raw_text");

                            Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(raw_text));
                            string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 1, "...").Replace("'", "''");

                            if (highlighted_text == "") // someties the highlighter fails to emit text...
                            {
                                highlighted_text = raw_text.Replace("'", "''");
                            }
                            if (highlighted_text.Length > 3000)
                            {
                                highlighted_text = highlighted_text.Substring(0, 3000);
                            }
                            sb.Append(highlighted_text);
                            sb.Append("'");
                            sb.Append(")\n");
                        }
                    }
                    else
                    {
                        break;
                    }
                }
                return sb.ToString();

            }
        }
Exemplo n.º 2
0
        public IEnumerable<Content> Search(string keyword, int page, int pageSize, out int totals)
        {
            lock (locker)
            {
                List<Content> result = new List<Content>();
                IndexReader reader = IndexReader.Open(DBNLConfigurationManager.LuceneElement.IndexingFolder);

                IndexSearcher searcher = new IndexSearcher(reader);

                TopDocCollector collector = new TopDocCollector((page + 1) * pageSize);

                PhraseQuery pquery = new PhraseQuery();
                BooleanQuery myquery = new BooleanQuery();
                PhraseQuery q2 = new PhraseQuery();
                //grab the search terms from the query string
                string[] str = keyword.Split(' ');
                //build the query
                foreach (string word in str)
                {
                    //brand is the field I'm searching in
                    q2.Add(new Term("content", word.ToLower()));
                }

                //finally, add it to the BooleanQuery object
                myquery.Add(q2, BooleanClause.Occur.MUST);

                //foreach (string srt in keyword.Split(new char[] {' '}))
                //{
                //    pquery.Add(new Term("content", srt.ToLower()));
                //}
                //pquery.Add(q2, BooleanClause.Occur.MUST);

                TermQuery query = new TermQuery(new Term("content", keyword));
                //            TopDocs topDocs = searcher.Search(query, collector);
                //searcher.Search(query, collector);

                QueryParser qp = new QueryParser("content", new StandardAnalyzer());

                //Contains a phrase such as "this is a phrase"
                Query q = qp.Parse(keyword);
                //Hits hits = searcher.Search(q);
                //Hits hits = searcher.Search(query);
                Hits hits = searcher.Search(myquery);

                //ScoreDoc[] hits = collector.TopDocs().scoreDocs;
                totals = hits.Length();
                Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter(
            "<span class=\"Highlight\">",
            "</span>");

                Lucene.Net.Highlight.SimpleFragmenter fragmenter = new Lucene.Net.Highlight.SimpleFragmenter(400);
                Lucene.Net.Highlight.QueryScorer scorer = new Lucene.Net.Highlight.QueryScorer(myquery);
                Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer);
                highlighter.SetTextFragmenter(fragmenter);

                for (int i = (page - 1) * pageSize; i < Math.Min(page * pageSize, hits.Length()); i++)
                {

                    Document doc = hits.Doc(i);
                    string raw_text = doc.Get("content");

                    Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(raw_text));
                    string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 1, "...").Replace("'", "''");

                    if (highlighted_text == "") // someties the highlighter fails to emit text...
                    {
                        highlighted_text = raw_text.Replace("'", "''");
                    }
                    if (highlighted_text.Length > 500)
                    {
                        highlighted_text = highlighted_text.Substring(0, 500);
                    }

                    Content content = new ContentService().GetItem(int.Parse(doc.Get("id")));
                    content.HighlightText = highlighted_text;
                    result.Add(content);
                }
                reader.Close();

                searcher.Close();
                return result.AsEnumerable();
            }
        }
Exemplo n.º 3
0
        public SearchModel Search(string searchText)
        {
            var result = new SearchModel();

            if (string.IsNullOrEmpty(searchText))
            {
                result.Message = "Įveskite paieškos užklausą.";
                return(result);
            }

            var stemmedSearchText = new LithuanianStemmer().Stem(searchText.Trim());

            if (string.IsNullOrEmpty(stemmedSearchText))
            {
                result.Message = "Įveskite paieškos užklausą.";
                return(result);
            }

            Lucene.Net.Search.Hits hits = null;
            try
            {
                if (char.IsLetter(stemmedSearchText[stemmedSearchText.Length - 1]))
                {
                    stemmedSearchText += "*";
                }

                query = parser.Parse(stemmedSearchText);

                if (searcher == null)
                {
                    searcher = new Lucene.Net.Search.IndexSearcher(CustomAppSettings.SearchIndexFolder);
                }

                hits = searcher.Search(query);
            }
            catch (Exception e)
            {
                result.Message = "Paieška nepavyko. Pataisykite užklausą. Klaidos pranešimas: " + e.Message;
                return(result);
            }

            Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter(
                "<span class=\"highlightResult\">",
                "</span>");

            var fragmenter  = new Lucene.Net.Highlight.SimpleFragmenter(100);
            var scorer      = new Lucene.Net.Highlight.QueryScorer(searcher.Rewrite(query));
            var highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer);

            highlighter.SetTextFragmenter(fragmenter);

            Dictionary <string, int> dict_already_seen_ids = new Dictionary <string, int>();

            var list = new List <SearchIndexModel>();

            // insert the search results into a temp table which we will join with what's in the database
            for (int i = 0; i < hits.Length(); i++)
            {
                if (dict_already_seen_ids.Count < 100)
                {
                    Lucene.Net.Documents.Document doc = hits.Doc(i);
                    string id = doc.Get("id");
                    if (!dict_already_seen_ids.ContainsKey(id))
                    {
                        dict_already_seen_ids[id] = 1;
                        var model = new SearchIndexModel();
                        model.Id      = id;
                        model.Score   = hits.Score(i);
                        model.Subject = doc.Get("subject");
                        model.Type    = (EntryTypes)Enum.Parse(typeof(EntryTypes), doc.Get("type"));

                        string raw_text = HttpUtility.HtmlEncode(doc.Get("raw_text"));
                        //string raw_text = doc.Get("raw_text");

                        Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("text",
                                                                                      new System.IO.StringReader(
                                                                                          raw_text));
                        string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 3, "...").Replace("'",
                                                                                                                   "''");


                        if (highlighted_text == "") // someties the highlighter fails to emit text...
                        {
                            highlighted_text = raw_text.Replace("'", "''");
                        }
                        if (highlighted_text.Length > 3000)
                        {
                            highlighted_text = highlighted_text.Substring(0, 3000);
                        }

                        model.HighlightedText = highlighted_text;

                        list.Add(model);
                    }
                }
                else
                {
                    break;
                }
            }

            result.List         = list;
            result.SearchPhrase = searchText;
            if (list.Count == 0)
            {
                result.Message = string.Format("Įrašų pagal užklausą '{0}' nerasta. Patikslinkite paieškos duomenis.", searchText);
            }

            return(result);
        }