public override TokenStream TokenStream(string fieldName, TextReader reader) { // Split the title based on IdSeparators, then run it through the innerAnalyzer string title = reader.ReadToEnd(); string partiallyTokenized = String.Join(" ", title.Split(PackageIndexEntity.IdSeparators, StringSplitOptions.RemoveEmptyEntries)); return(innerAnalyzer.TokenStream(fieldName, new StringReader(partiallyTokenized))); }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { StandardAnalyzer sa = new StandardAnalyzer(MyLucene.GetLuceneVersion(), StopFilter.MakeStopSet(stopWords)); TokenStream result = sa.TokenStream(fieldName, reader); result = new MyFilter(result); return(result); }
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) { StandardAnalyzer sa = new StandardAnalyzer(stopWords); TokenStream result = sa.TokenStream(fieldName, reader); result = new MyFilter(result); return(result); }
private void button1_Click(object sender, EventArgs e) { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_20); TokenStream tokenStream = analyzer.TokenStream("", new StringReader("北京,欢迎你们所有人")); while (tokenStream.IncrementToken()) { var str = tokenStream.GetAttribute <ITermAttribute>(); Console.WriteLine(str.Term); } }
/// <summary> /// 一元分词 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void button1_Click(object sender, EventArgs e) { Analyzer analyzer = new StandardAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader("北京,Hi欢迎你们大家")); Lucene.Net.Analysis.Token token = null; while ((token = tokenStream.Next()) != null) { Console.WriteLine(token.TermText()); } }
public static string Summarize(Query query, bool shouldDocumentsBeClustered, string discoveryPath, Encoding encoding, Cache cache) { StandardAnalyzer standardAnalyzer = new StandardAnalyzer(); Highlighter highligher = new Highlighter(new QueryScorer(query)); highligher.SetTextFragmenter(new SimpleFragmenter(150)); string text = UserDefinedFunctions.ExtractText(File.ReadAllText(discoveryPath, encoding)).Value; TokenStream tokenStream = standardAnalyzer.TokenStream("text", new StringReader(text)); return((highligher.GetBestFragments(tokenStream, text, 1, "...") + " ...").TrimStart(" ,".ToCharArray())); }
private void button1_Click(object sender, EventArgs e) { Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); using (TokenStream tokenStream = analyzer.TokenStream("", new StringReader("北京,Hi欢迎你们大家"))) { ITermAttribute ita; while (tokenStream.IncrementToken()) { ita = tokenStream.GetAttribute <ITermAttribute>(); Console.WriteLine(ita.Term); } } }
public static void testStandard(String testString) { Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //不提倡使用/LUCENE_CURRENT ,用精确的版本号,这里是3.0 StringReader r = new StringReader(testString); TokenStream ts = analyzer.TokenStream("", r); Console.WriteLine("=====Standard analyzer======="); ITermAttribute termAtt = ts.GetAttribute <ITermAttribute>(); while (ts.IncrementToken()) { string iterm = termAtt.Term; Console.WriteLine("[" + iterm + "]"); } }
protected void btnGetSegmentation_Click(object sender, EventArgs e) { string words = txtWords.Text; if (string.IsNullOrEmpty(words)) { return; } Analyzer analyzer = new StandardAnalyzer(); // 标准分词 → 一元分词 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(words)); Token token = null; while ((token = tokenStream.Next()) != null) // 只要还有词,就不返回null { string word = token.TermText(); // token.TermText() 取得当前分词 Response.Write(word + " | "); } }
public static string GetTag(string text) { text = text.Replace("\n", string.Empty).Replace("\r", string.Empty).Replace(",", string.Empty).Replace(" ", string.Empty); StandardAnalyzer analyzer = new StandardAnalyzer(); int termCounter = 0; StringBuilder sb = new StringBuilder(); AnalyzerView view = new TermAnalyzerView(); StringReader stringReader = new StringReader(text); TokenStream tokenStream = analyzer.TokenStream("defaultFieldName", stringReader); var Text = view.GetView(tokenStream, out termCounter).Trim(); return(Text); }
public static string GetKeyWordSplid(string keywords) { StringBuilder sb = new StringBuilder(); //Analyzer analyzer = new PanGuAnalyzer(); Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); TokenStream stream = analyzer.TokenStream(keywords, new StringReader(keywords)); ITermAttribute ita = null; bool hasNext = stream.IncrementToken(); while (hasNext) { ita = stream.GetAttribute <ITermAttribute>(); sb.Append(ita.Term + " "); hasNext = stream.IncrementToken(); } return(sb.ToString()); }
public ActionResult Cut(string str) { //一元分词-简单分词 StringBuilder sb = new StringBuilder(); StandardAnalyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str)); ITermAttribute item = tokenStream.GetAttribute <ITermAttribute>(); while (tokenStream.IncrementToken()) { sb.Append(item.Term + "|"); } tokenStream.CloneAttributes(); analyzer.Close(); return(Content(sb.ToString())); }
/// <summary> /// Does the search and stores the information about the results. /// </summary> public void HandleSearch(string searchQuery) { DateTime start = DateTime.Now; _searchQuery = searchQuery; // create the searcher // index is placed in "index" subdirectory var searcher = new IndexSearcher(_indexDir); Analyzer analyzer = new StandardAnalyzer(); // parse the query, "text" is the default field to search var query = QueryParser.Parse(_searchQuery, "text", analyzer); const string containerName = "content"; Container container = _currentPage.Containers[containerName]; const string resultElementName = "result"; const string pagingElementName = "paging"; const string summaryElementName = "summary"; const string allElementNames = resultElementName + pagingElementName + summaryElementName; int count = container.Elements.Count; // Remove previous search result for (int i = count; i > 0; --i) { if (container.Elements[i] == null) { continue; } if (allElementNames.IndexOf(container.Elements[i].Type, StringComparison.Ordinal) > -1) { container.Elements.Remove(i); } } Element queryElement = container.Elements[0]; Element element = container.Elements.Create(summaryElementName); queryElement["query"] = _searchQuery; // search Hits hits = searcher.Search(query); _total = hits.Length(); // create highlighter var highlighter = new Highlighter(new QueryScorer(query)); // initialize startAt _startFirstAt = InitStartAt(); // how many items we should show - less than defined at the end of the results int resultsCount = SmallerOf(_total, MaxResults + _startFirstAt); for (int i = _startFirstAt; i < resultsCount; i++) { // get the document from index Document document = hits.Doc(i); string path = document.Get("url"); if (path != null) { string plainText = document.Get("text"); TokenStream tokenStream = analyzer.TokenStream("text", new StringReader(plainText)); string text = highlighter.GetBestFragments(tokenStream, plainText, 2, "..."); element = container.Elements.Create(resultElementName); element["title"] = document.Get("title"); element["path"] = _searchPage + path.Replace("\\", "/") + "/"; element["sample"] = string.IsNullOrEmpty(text) ? plainText : text; } } searcher.Close(); _duration = DateTime.Now - start; _fromItem = _startFirstAt + 1; _toItem = SmallerOf(_startFirstAt + MaxResults, _total); // result information element.Node.InnerText = Summary; // paging link element = container.Elements.Create(pagingElementName); element.Node.InnerText = SetPaging(); _process.SearchContext = _currentPage; _currentPage.Save(); }
public static DataTable searchClanci(string pretraga) { DataTable ResultsClanci = new DataTable(); // create the searcher // index is placed in "index" subdirectory string indexDirectory = AppDomain.CurrentDomain.BaseDirectory + "Clanci"; var analyzer = new StandardAnalyzer(Version.LUCENE_30); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); // parse the query, "text" is the default field to search var parser = new MultiFieldQueryParser(Version.LUCENE_30, new[] { "Naslov", "Sazetak", "Sadrzaj", "Tagovi" }, analyzer); //var parser = new QueryParser(Version.LUCENE_30, "Sazetak" , analyzer); Query query = parser.Parse(pretraga); //// create the result DataTable ResultsClanci.Columns.Add("id", typeof(Int32)); ResultsClanci.Columns.Add("Naslov", typeof(string)); ResultsClanci.Columns.Add("Sadrzaj", typeof(string)); ResultsClanci.Columns.Add("Tagovi", typeof(string)); ResultsClanci.Columns.Add("DatumKreiranja", typeof(DateTime)); ResultsClanci.Columns.Add("DatumZadnjeIzmjene", typeof(DateTime)); ResultsClanci.Columns.Add("DatumZadnjeAktivnosti", typeof(DateTime)); ResultsClanci.Columns.Add("DatumZatvaranjaPosta", typeof(DateTime)); ResultsClanci.Columns.Add("PrihvaceniOdgovori", typeof(Int32)); ResultsClanci.Columns.Add("BrojOdgovora", typeof(Int32)); ResultsClanci.Columns.Add("BrojKomentara", typeof(Int32)); ResultsClanci.Columns.Add("BrojOmiljenih", typeof(Int32)); ResultsClanci.Columns.Add("BrojPregleda", typeof(Int32)); ResultsClanci.Columns.Add("BrojPoena", typeof(Int32)); ResultsClanci.Columns.Add("VlasnikID", typeof(Int32)); ResultsClanci.Columns.Add("VlasnikNadimak", typeof(string)); ResultsClanci.Columns.Add("PromijenioID", typeof(Int32)); ResultsClanci.Columns.Add("RoditeljskiPostID", typeof(Int32)); //Results.Columns.Add("PodKategorija", typeof(Int32)); ResultsClanci.Columns.Add("PostVrsta", typeof(Int32)); ResultsClanci.Columns.Add("SlikaURL", typeof(string)); ResultsClanci.Columns.Add("temp", typeof(string)); ResultsClanci.Columns.Add("Likes", typeof(Int32)); ResultsClanci.Columns.Add("Unlikes", typeof(Int32)); ResultsClanci.Columns.Add("Sazetak", typeof(string)); ResultsClanci.Columns.Add("BrojRangiranja", typeof(Int32)); ResultsClanci.Columns.Add("PrihvacenaIzmjena", typeof(Int32)); ResultsClanci.Columns.Add("Podnaslov", typeof(string)); ResultsClanci.Columns.Add("Broj.Razgovora", typeof(Int32)); ResultsClanci.Columns.Add("sample", typeof(string)); ResultsClanci.Columns.Add("sampleNaslov", typeof(string)); // search TopDocs hits = searcher.Search(query, 6); //E this.total = hits.TotalHits; // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color: #e5ecf9; \">", "</span>"); // ovdje radi hl svoje SimpleFragmenter fragmenter = new SimpleFragmenter(80); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; for (int i = 0; i < hits.ScoreDocs.Count(); i++) { // get the document from index Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("Sazetak"))); String sample = highlighter.GetBestFragments(stream, doc.Get("Sazetak"), 3, "..."); // uzimamo najbolje fragmente texta //String path = doc.Get("path"); // create a new row with the result data DataRow row = ResultsClanci.NewRow(); row["id"] = doc.Get("id"); row["Naslov"] = doc.Get("Naslov"); //doc.Get("Naslov"); row["Sadrzaj"] = doc.Get("Sadrzaj"); row["Tagovi"] = doc.Get("Tagovi"); row["DatumKreiranja"] = doc.Get("DatumKreiranja"); row["DatumZadnjeIzmjene"] = doc.Get("DatumZadnjeIzmjene"); row["DatumZadnjeAktivnosti"] = doc.Get("DatumZadnjeAktivnosti"); //row["DatumZatvaranjaPosta"] = doc.Get("DatumZatvaranjaPosta"); row["PrihvaceniOdgovori"] = doc.Get("PrihvaceniOdgovori"); row["BrojOdgovora"] = doc.Get("BrojOdgovora"); row["BrojKomentara"] = doc.Get("BrojKomentara"); row["BrojOmiljenih"] = doc.Get("BrojOmiljenih"); row["BrojPregleda"] = doc.Get("BrojPregleda"); row["BrojPoena"] = doc.Get("BrojPoena"); //row["VlasnikID"] = doc.Get("VlasnikID"); row["VlasnikNadimak"] = doc.Get("VlasnikNadimak"); //row["PromijenioID"] = doc.Get("PromijenioID"); //row["RoditeljskiPostID"] = doc.Get("RoditeljskiPostID"); //row["PodKategorija"] = doc.Get("PodKategorija"); row["PostVrsta"] = doc.Get("PostVrsta"); row["SlikaURL"] = doc.Get("SlikaURL"); //row["temp"] = doc.Get("temp"); row["Likes"] = doc.Get("Likes"); row["Unlikes"] = doc.Get("Unlikes"); row["Sazetak"] = sample; //doc.Get("Sazetak"); row["BrojRangiranja"] = doc.Get("BrojRangiranja"); row["PrihvacenaIzmjena"] = doc.Get("PrihvacenaIzmjena"); row["Podnaslov"] = doc.Get("Podnaslov"); //row["Broj.Razgovora"] = doc.Get("Broj.Razgovora"); //row["sample"] = sample; //row["sampleNaslov"] = sampleNaslov; ResultsClanci.Rows.Add(row); } searcher.Dispose(); return(ResultsClanci); // vracamo datatable i dodajemo u datasource }
/// <summary> /// Searches the lucene index with the search text. /// </summary> /// <param name="searchText">The text to search with.</param> /// <remarks>Syntax reference: http://lucene.apache.org/java/2_3_2/queryparsersyntax.html#Wildcard</remarks> /// <exception cref="SearchException">An error occured searching the lucene.net index.</exception> public SearchResultsModel SearchIndex(string searchText) { // This check is for the benefit of the CI builds if (!Directory.Exists(_indexPath)) { CreateIndex(); } var model = new SearchResultsModel(); StandardAnalyzer analyzer = new StandardAnalyzer(); try { IndexSearcher searcher = new IndexSearcher(_indexPath); // Build query var parser = new MultiFieldQueryParser(new string[] { "Text", "Title" }, analyzer); var searchQuery = parser.Parse(searchText); // Execute search var hits = searcher.Search(searchQuery); // Display results var results = new List <Result>(); for (int i = 0; i < hits.Length(); i++) { results.Add(new Result() { doc = hits.Doc(i), Score = hits.Score(i) }); } //Highlight the parts that are matched: var formatter = new SimpleHTMLFormatter("<span style='background:yellow;font-weight:bold;'>", "</span>"); var fragmenter = new SimpleFragmenter(400); var scorer = new QueryScorer(searchQuery); var highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(fragmenter); var finalResults = new List <DisplayedResult>(); var db = Legato.Current.DB; foreach (var result in results) { var stream = analyzer.TokenStream("", new StringReader(result.doc.Get("RawText"))); var highlighted = highlighter.GetBestFragments(stream, result.doc.Get("RawText"), 1, "...").Replace("'", "''"); if (highlighted == "") // sometimes the highlighter fails to emit text... { highlighted = result.doc.Get("RawText").Replace("'", "''"); } if (highlighted.Length > 1000) { highlighted = highlighted.Substring(0, 1000); } int postID; if (!int.TryParse(result.doc.Get("GlobalPostID"), out postID)) // If GlobalPostID is null or not a number, this isn't a valid search entry, so we skip it. { continue; } var post = db.GlobalPostIDs.Where(p => p.GlobalPostID1 == postID).SingleOrDefault(); if (post == null) { continue; } // TODO: privacy checks? post.FillProperties(); finalResults.Add(new DisplayedResult() { ResultPost = post, Score = result.Score, HighlightedHTML = highlighted }); } model = new SearchResultsModel() { Results = finalResults.OrderByDescending(r => r.Score), Query = searchText.Trim() }; } catch (Exception ex) { throw new SearchException(ex, "An error occured while searching the index"); } return(model); /*MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] { "content", "title" }, analyzer); * * Query query = null; * try * { * query = parser.Parse(searchText); * } * catch (Lucene.Net.QueryParsers.ParseException) * { * // Catch syntax errors in the search and remove them. * searchText = QueryParser.Escape(searchText); * query = parser.Parse(searchText); * } * * if (query != null) * { * try * { * IndexSearcher searcher = new IndexSearcher(_indexPath); * Hits hits = searcher.Search(query); * * for (int i = 0; i < hits.Length(); i++) * { * Document document = hits.Doc(i); * * DateTime createdOn = DateTime.Now; * if (!DateTime.TryParse(document.GetField("createdon").StringValue(), out createdOn)) * createdOn = DateTime.Now; * * SearchResult result = new SearchResult() * { * Id = int.Parse(document.GetField("id").StringValue()), * Title = document.GetField("title").StringValue(), * ContentSummary = document.GetField("contentsummary").StringValue(), * Tags = document.GetField("tags").StringValue(), * CreatedBy = document.GetField("createdby").StringValue(), * CreatedOn = createdOn, * ContentLength = int.Parse(document.GetField("contentlength").StringValue()), * Score = hits.Score(i) * }; * * list.Add(result); * } * } * catch (Exception ex) * { * throw new SearchException(ex, "An error occured while searching the index"); * } * } * * return list; */ }
public void Search(Query query) { if (IndexSearcher == null) { throw new Exception("IndexSearcher not created"); } trace("search {0}", query.ToString()); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); // do the query var start = DateTime.Now.TimeOfDay; TopDocs SearchResult = IndexSearcher.Search(query, _MAXRESULTS); _totalItems = SearchResult.TotalHits; if (_totalItems > _MAXRESULTS) { _totalItems = _MAXRESULTS; } // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(80); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; // initialize startAt _startAt = InitStartAt(); // how many items we should show - less than defined at the end of the results int resultsCount = Math.Min(_totalItems, _resultsPerPage + _startAt); if (resultsCount > _MAXRESULTS) { resultsCount = _MAXRESULTS; } for (int i = _startAt; i < resultsCount; i++) { // get the document from index Document doc = IndexSearcher.Doc(SearchResult.ScoreDocs[i].Doc); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = highlighter.GetBestFragments(stream, doc.Get("text"), 2, "..."); String path = doc.Get("path"); // create a new row with the result data DataRow row = _Results.NewRow(); row["title"] = doc.Get("title"); row["path"] = path; row["url"] = _baseURL + path; row["sample"] = sample; row["score"] = Convert.ToInt16(SearchResult.ScoreDocs[i].Score * 100); row["id"] = doc.Get("id"); row["type"] = doc.Get("type"); _Results.Rows.Add(row); } IndexSearcher.Dispose(); var end = DateTime.Now.TimeOfDay; trace("Search completed in {0}ms", end.TotalMilliseconds - start.TotalMilliseconds); trace(SearchResult, IndexSearcher); }
public void Search(string Query) { if (IndexSearcher == null) { throw new Exception("IndexSearcher not created"); } _query = Query; DateTime start = DateTime.Now; var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); var fieldName = "text"; var minimumSimilarity = 0.5f; var prefixLength = 3; var query = new BooleanQuery(); var segments = _query.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); foreach (string segment in segments) { Term term = new Term(fieldName, segment); FuzzyQuery fuzzyQuery = new FuzzyQuery(term, minimumSimilarity, prefixLength); query.Add(fuzzyQuery, Occur.SHOULD); } // search TopDocs hits = IndexSearcher.Search(query, 200); _totalItems = hits.TotalHits; if (_totalItems > _MAXRESULTS) { _totalItems = _MAXRESULTS; } // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(80); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; // initialize startAt _startAt = InitStartAt(); // how many items we should show - less than defined at the end of the results int resultsCount = Math.Min(_totalItems, _resultsPerPage + _startAt); if (resultsCount > _MAXRESULTS) { resultsCount = _MAXRESULTS; } for (int i = _startAt; i < resultsCount; i++) { // get the document from index Document doc = IndexSearcher.Doc(hits.ScoreDocs[i].Doc); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = highlighter.GetBestFragments(stream, doc.Get("text"), 2, "..."); String path = doc.Get("path"); // create a new row with the result data DataRow row = _Results.NewRow(); row["title"] = doc.Get("title"); row["path"] = path; row["url"] = _baseURL + path; row["sample"] = sample; row["score"] = Convert.ToInt16(hits.ScoreDocs[i].Score * 100); _Results.Rows.Add(row); } IndexSearcher.Dispose(); // result information _duration = DateTime.Now - start; _fromItem = _startAt + 1; _toItem = Math.Min(_startAt + this.ResultsPerPage, _totalItems); }
public ActionResult Search() { var path = Server.MapPath("/Index-lucene"); int numberOfFiles = System.IO.Directory.GetFiles(path).Length; var searchText = Request.QueryString.ToString(); string output = searchText.Substring(searchText.IndexOf('=') + 1); string searchWord = output.Replace('+', ' '); ViewBag.YourSearch = searchWord; if (numberOfFiles != 0 && output.Length > 0) { Lucene.Net.Store.Directory dir = FSDirectory.Open(path); Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); IndexReader indexReader = IndexReader.Open(dir, true); Searcher indexSearch = new IndexSearcher(indexReader); try { var startSearchTime = DateTime.Now.TimeOfDay; string totaltimeTakenToSearch = string.Empty; var queryParser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { "metaTag", "prevewContent", "fileNameWithoutExtension" }, analyzer); var query = queryParser.Parse(searchWord); //ViewBag.SearchQuery = "Searching for: \"" + searchWord + "\""; TopDocs resultDocs = indexSearch.Search(query, indexReader.NumDocs()); ViewBag.SearchQuery = resultDocs.TotalHits + " result(s) found for \"" + searchWord + "\""; TopScoreDocCollector collector = TopScoreDocCollector.Create(20000, true); indexSearch.Search(query, collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; IFormatter formatter = new SimpleHTMLFormatter("<span style=\"color: black; font-weight: bold;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(160); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; //highlighter.SetTextFragmenter(fragmenter); List <ListofResult> parts = new List <ListofResult>(); for (int i = 0; i < hits.Length; i++) { int docId = hits[i].Doc; float score = hits[i].Score; Document doc = indexSearch.Doc(docId); string url = doc.Get("URL"); string title = doc.Get("filename"); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("prevewContent"))); string content = highlighter.GetBestFragments(stream, doc.Get("prevewContent"), 3, "..."); if (content == null || content == "") { string contents = doc.Get("prevewContent"); if (contents != "") { if (contents.Length < 480) { content = contents.Substring(0, contents.Length); } else { content = contents.Substring(0, 480); } } } parts.Add(new ListofResult() { FileName = title, Content = content, URL = url }); var endSearchTime = DateTime.Now.TimeOfDay; var timeTaken = endSearchTime.TotalMilliseconds - startSearchTime.TotalMilliseconds; totaltimeTakenToSearch = timeTaken.ToString(); } //Search completed, dispose IndexSearcher indexSearch.Dispose(); //assigning list into ViewBag ViewBag.SearchResult = parts; } catch (Exception ex) { } } else { return(RedirectToAction("UploadFile", "Home")); } return(View()); }
public static string Summarize(Query query, Query wildcardSafeQuery, bool shouldDocumentsBeClustered, string text) { int fragmentLength = 150; StandardAnalyzer standardAnalyzer = new StandardAnalyzer(); Highlighter highligher = new Highlighter(new QueryScorer(query)); highligher.SetTextFragmenter(new SimpleFragmenter(fragmentLength)); string text2 = UserDefinedFunctions.ExtractText(text).Value; TokenStream tokenStream = standardAnalyzer.TokenStream("text", new StringReader(text2)); string bestFragments = (highligher.GetBestFragments(tokenStream, text2, 1, "...") + " ...").TrimStart(" ,".ToCharArray()); if (bestFragments == "...") { text = HttpUtility.HtmlEncode(text); tokenStream = standardAnalyzer.TokenStream("text", new StringReader(text)); bestFragments = (highligher.GetBestFragments(tokenStream, text, 1, "...") + " ...").TrimStart(" ,".ToCharArray()); if (bestFragments == "...") { Hashtable hashTable = new Hashtable(); try { query.ExtractTerms(hashTable); } catch { try { wildcardSafeQuery.ExtractTerms(hashTable); } catch { } } if (hashTable.Count != 0) { string firstTerm = null; foreach (Term term in hashTable.Values) { if (term.Field() == "text") { string termText = term.Text(); if (termText != null) { firstTerm = termText.Split(' ')[0]; break; } } } if (firstTerm != null) { int index = text.ToLowerInvariant().IndexOf(firstTerm); if (index != -1) { if (index + fragmentLength > text.Length) { fragmentLength = text.Length - index; } bestFragments = Regex.Replace(text.Substring(index, fragmentLength), firstTerm, "<b>" + firstTerm + "</b>", RegexOptions.IgnoreCase) + "..."; } } } } } return(bestFragments); }
private static string getHighlight(Highlighter highlighter, StandardAnalyzer analyzer, string fieldContent) { Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new StringReader(fieldContent)); return(highlighter.GetBestFragments(stream, fieldContent, 1, ".")); }
/// <summary> /// Main searching method /// </summary> /// <param name="lookQuery"></param> /// <returns>an IEnumerableWithTotal</returns> public static IEnumerableWithTotal <LookMatch> Query(LookQuery lookQuery) { IEnumerableWithTotal <LookMatch> lookMatches = null; // prepare return value if (lookQuery == null) { LogHelper.Warn(typeof(LookService), "Supplied search query was null"); } else { var searchProvider = LookService.Searcher; var searchCriteria = searchProvider.CreateSearchCriteria(); var query = searchCriteria.Field(string.Empty, string.Empty); // Text if (!string.IsNullOrWhiteSpace(lookQuery.TextQuery.SearchText)) { if (lookQuery.TextQuery.Fuzzyness > 0) { query.And().Field(LookService.TextField, lookQuery.TextQuery.SearchText.Fuzzy(lookQuery.TextQuery.Fuzzyness)); } else { query.And().Field(LookService.TextField, lookQuery.TextQuery.SearchText); } } // Tags if (lookQuery.TagQuery != null) { var allTags = new List <string>(); var anyTags = new List <string>(); if (lookQuery.TagQuery.AllTags != null) { allTags.AddRange(lookQuery.TagQuery.AllTags); allTags.RemoveAll(x => string.IsNullOrWhiteSpace(x)); } if (lookQuery.TagQuery.AnyTags != null) { anyTags.AddRange(lookQuery.TagQuery.AnyTags); anyTags.RemoveAll(x => string.IsNullOrWhiteSpace(x)); } if (allTags.Any()) { query.And().GroupedAnd(allTags.Select(x => LookService.TagsField), allTags.ToArray()); } if (anyTags.Any()) { query.And().GroupedOr(allTags.Select(x => LookService.TagsField), anyTags.ToArray()); } } // TODO: Date // TODO: Name // Nodes if (lookQuery.NodeQuery != null) { if (lookQuery.NodeQuery.TypeAliases != null) { var typeAliases = new List <string>(); typeAliases.AddRange(lookQuery.NodeQuery.TypeAliases); typeAliases.RemoveAll(x => string.IsNullOrWhiteSpace(x)); if (typeAliases.Any()) { query.And().GroupedOr(typeAliases.Select(x => UmbracoContentIndexer.NodeTypeAliasFieldName), typeAliases.ToArray()); } } if (lookQuery.NodeQuery.ExcludeIds != null) { foreach (var excudeId in lookQuery.NodeQuery.ExcludeIds.Distinct()) { query.Not().Id(excudeId); } } } try { searchCriteria = query.Compile(); } catch (Exception exception) { LogHelper.WarnWithException(typeof(LookService), "Could not compile the Examine query", exception); } if (searchCriteria != null && searchCriteria is LuceneSearchCriteria) { Sort sort = null; Filter filter = null; Func <int, double?> getDistance = x => null; Func <string, IHtmlString> getHighlight = null; TopDocs topDocs = null; switch (lookQuery.SortOn) { case SortOn.Date: // newest -> oldest sort = new Sort(new SortField(LuceneIndexer.SortedFieldNamePrefix + LookService.DateField, SortField.LONG, true)); break; case SortOn.Name: // a -> z sort = new Sort(new SortField(LuceneIndexer.SortedFieldNamePrefix + LookService.NameField, SortField.STRING)); break; } if (lookQuery.LocationQuery != null && lookQuery.LocationQuery.Location != null) { double maxDistance = LookService.MaxDistance; if (lookQuery.LocationQuery.MaxDistance != null) { maxDistance = Math.Min(lookQuery.LocationQuery.MaxDistance.GetMiles(), maxDistance); } var distanceQueryBuilder = new DistanceQueryBuilder( lookQuery.LocationQuery.Location.Latitude, lookQuery.LocationQuery.Location.Longitude, maxDistance, LookService.LocationField + "_Latitude", LookService.LocationField + "_Longitude", CartesianTierPlotter.DefaltFieldPrefix, true); // update filter filter = distanceQueryBuilder.Filter; if (lookQuery.SortOn == SortOn.Distance) { // update sort sort = new Sort( new SortField( LookService.DistanceField, new DistanceFieldComparatorSource(distanceQueryBuilder.DistanceFilter))); } // raw data for the getDistance func var distances = distanceQueryBuilder.DistanceFilter.Distances; // update getDistance func getDistance = new Func <int, double?>(x => { if (distances.ContainsKey(x)) { return(distances[x]); } return(null); }); } var indexSearcher = new IndexSearcher(((LuceneIndexer)LookService.Indexer).GetLuceneDirectory(), false); var luceneSearchCriteria = (LuceneSearchCriteria)searchCriteria; // Do the Lucene search topDocs = indexSearcher.Search( luceneSearchCriteria.Query, // the query build by Examine filter ?? new QueryWrapperFilter(luceneSearchCriteria.Query), LookService.MaxLuceneResults, sort ?? new Sort(SortField.FIELD_SCORE)); if (topDocs.TotalHits > 0) { // setup the highlighing func if required if (lookQuery.TextQuery.HighlightFragments > 0 && !string.IsNullOrWhiteSpace(lookQuery.TextQuery.SearchText)) { var version = Lucene.Net.Util.Version.LUCENE_29; Analyzer analyzer = new StandardAnalyzer(version); var queryParser = new QueryParser(version, LookService.TextField, analyzer); var queryScorer = new QueryScorer(queryParser .Parse(lookQuery.TextQuery.SearchText) .Rewrite(indexSearcher.GetIndexReader())); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), queryScorer); // update the func so it does real highlighting work getHighlight = (x) => { var tokenStream = analyzer.TokenStream(LookService.TextField, new StringReader(x)); var highlight = highlighter.GetBestFragments( tokenStream, x, lookQuery.TextQuery.HighlightFragments, // max number of fragments lookQuery.TextQuery.HighlightSeparator); // fragment separator return(new HtmlString(highlight)); }; } lookMatches = new EnumerableWithTotal <LookMatch>( LookSearchService.GetLookMatches( lookQuery, indexSearcher, topDocs, getHighlight, getDistance), topDocs.TotalHits); } } } return(lookMatches ?? new EnumerableWithTotal <LookMatch>(Enumerable.Empty <LookMatch>(), 0)); }