/// <summary> /// Annotates the given sequence of <see cref="Document"/> objects by adding a <b>_highlight</b> field; /// the <b>_highlight</b> field will contain the best matching text fragment from the <see cref="Document"/> /// object's full-text field. /// </summary> /// <param name="hits">The sequence of <see cref="Document"/> objects.</param> /// <param name="criteria">The search criteria that produced the hits.</param> /// <returns> /// The original sequence of Document objects, with a <b>_highlight</b> field added to each Document. /// </returns> public static IEnumerable<Document> GenerateHighlights(this IEnumerable<Document> hits, SearchCriteria criteria) { if (hits == null) throw new ArgumentNullException(nameof(hits)); if (criteria == null) throw new ArgumentNullException(nameof(criteria)); if (String.IsNullOrWhiteSpace(criteria.Query)) throw new ArgumentException("SearchCriteria.Query cannot be empty"); var documents = hits.ToList(); try { var indexDirectory = new RAMDirectory(); var analyzer = new FullTextAnalyzer(); var config = new IndexWriterConfig(analyzer).SetRAMBufferSizeMB(_ramBufferSizeMB); var writer = new IndexWriter(indexDirectory, config); BuidIndex(documents, writer); GenerateHighlights(documents, writer, criteria); writer.DeleteAll(); writer.Commit(); writer.Close(); indexDirectory.Close(); } catch (Exception ex) { _log.Error(ex); } return documents; }
/// <summary> /// Breaks up the input text into individual tokens. /// </summary> /// <param name="text">The input text.</param> /// <param name="enableStemming">if set to <c>true</c>, the FullTextIndex will stem /// the tokens that make up the texts, using the Porter stemming algorithm.</param> /// <param name="ignoreCase">if set to <c>true</c>, character casing is ignored.</param> /// <param name="separatorChars">A string whose component characters will be used to split the texts into tokens.</param> /// <returns></returns> public static IEnumerable<string> Tokenize(string text, bool enableStemming = true, bool ignoreCase = true, string separatorChars = DEFAULT_SEPARATOR_CHARS) { if (String.IsNullOrWhiteSpace(text)) throw new ArgumentException("text cannot be null or blank"); if (String.IsNullOrWhiteSpace(separatorChars)) separatorChars = DEFAULT_SEPARATOR_CHARS; using (var analyzer = new FullTextAnalyzer(enableStemming, ignoreCase, separatorChars)) { using (var stream = analyzer.TokenStream("text", text)) { var attrib = stream.AddAttribute(typeof(CharTermAttribute)) as CharTermAttribute; stream.Reset(); while (stream.IncrementToken()) { yield return attrib.ToString(); } stream.End(); } } }