private static List <SnippetLine> GetSnippet(SnippetRequest request) { Queryable queryable = QueryDriver.GetQueryable(request.Hit.Source); ISnippetReader snippet_reader; bool full_text = request.FullText; int ctx_length = request.ContextLength; int snp_length = request.SnippetLength; if (queryable == null) { Console.WriteLine("SnippetExecutor: No queryable object matches '{0}'", request.Hit.Source); snippet_reader = new SnippetReader(null, null, false, -1, -1); full_text = false; } else { snippet_reader = queryable.GetSnippet(request.QueryTerms, request.Hit, full_text, ctx_length, snp_length); } List <SnippetLine> snippetlines = new List <SnippetLine> (); if (snippet_reader == null) { return(snippetlines); } if (!full_text) { foreach (SnippetLine snippet_line in snippet_reader.GetSnippet()) { snippetlines.Add(snippet_line); } } else { SnippetLine snippet_line = new SnippetLine(); snippet_line.Line = 1; Fragment fragment = new Fragment(); fragment.QueryTermIndex = -1; StringBuilder sb = new StringBuilder(); string line; // Read data from snippet_reader and write while ((line = snippet_reader.ReadLine()) != null) { sb.Append(StringFu.CleanupInvalidXmlCharacters(line)); sb.Append("\n"); } fragment.Text = sb.ToString(); snippet_line.Fragments = new ArrayList(); snippet_line.Fragments.Add(fragment); snippetlines.Add(snippet_line); } snippet_reader.Close(); return(snippetlines); }
// Starts scanning at character pos of string text for occurrence of any word // in stemmed_terms. Returns a list of (words)*[(matched word)(words)*]+ private SnippetLine MarkTerms(ArrayList stemmed_terms, string text, ref int pos) { SnippetLine snippet_line = null; int prev_match_end_pos = pos; // misnomer; means 1 + end_pos of previous word // 1. get next word // 2. if no next word, return arraylist // 3. if word is not a match, following_words ++ // 4. else { // 4a. add list to the arraylist // 4b. add word to the arraylist // 4c. clear list // 4d. following_words=0 // } // 5. if (following_words >= max_following_words) { // 5a. add list to the arraylist // 5b. clear list // 5c. return list // } while (pos < text.Length) { // Find the beginning of the next token if (IsTokenSeparator(text [pos])) { ++pos; continue; } // Find the end of the next token int end_pos = pos + 1; while (end_pos < text.Length && !IsTokenSeparator(text [end_pos])) { ++end_pos; } string token = text.Substring(pos, end_pos - pos); string stemmed_token = null; bool found_match = false; // Iterate through the stemmed terms and match the token for (int i = 0; i < stemmed_terms.Count; i++) { // If this term is longer than the token in question, give up. if (end_pos - pos < ((string)stemmed_terms [i]).Length) { continue; } // We cache the token, so as to avoid stemming it more than once // when considering multiple terms. if (stemmed_token == null) { stemmed_token = LuceneCommon.Stem(token.ToLower()); } if (String.Compare((string)stemmed_terms [i], stemmed_token, true) != 0) { continue; } // We have a match! found_match = true; //Console.WriteLine ("Found match"); if (snippet_line == null) { snippet_line = new SnippetLine(); } // Find the fragment before the match int start_pos = sliding_window.StartValue; if (start_pos == -1) // If no non-match words seen after last match { start_pos = prev_match_end_pos; // Use wherever previous word ended } sliding_window.Reset(); string before_match = text.Substring(start_pos, pos - start_pos); snippet_line.AddNonMatchFragment(before_match); //Console.WriteLine ("Adding [{0}, {1}]:[{2}]", start_pos, pos - 1, before_match); snippet_line.AddMatchFragment(i, token); //Console.WriteLine ("Adding word [{0}, {1}]:[{2}]", pos, end_pos - 1, token); prev_match_end_pos = end_pos; break; } if (!found_match) { // Add the start pos of the token to the window sliding_window.Add(pos); // If we found a match previously and saw enough following words, stop if (snippet_line != null && snippet_line.Count > 0 && sliding_window.Count == context_length) { sliding_window.Reset(); string after_match = text.Substring(prev_match_end_pos, end_pos - prev_match_end_pos); snippet_line.AddNonMatchFragment(after_match); //Console.WriteLine ("Adding [{0}, {1}]:[{2}]", prev_match_end_pos, end_pos - 1, after_match); return(snippet_line); } } pos = end_pos; } // If less than 6 words came after the last match, add the rest here if (snippet_line != null && snippet_line.Count > 0) { sliding_window.Reset(); string after_match = text.Substring(prev_match_end_pos, pos - prev_match_end_pos); snippet_line.AddNonMatchFragment(after_match); //Console.WriteLine ("Adding [{0}, {1}]:[{2}]", prev_match_end_pos, pos - 1, after_match); //Console.WriteLine ("Sending snippet: {0}", snippet_line.ToString ()); return(snippet_line); } sliding_window.Reset(); return(null); }