Пример #1
0
        public void ReadXml(XmlReader reader)
        {
            FullText = Convert.ToBoolean(reader.GetAttribute("FullText"));
            reader.MoveToContent();

            if (reader.IsEmptyElement)             // no <snippetline>...</snippetline>
            {
                return;
            }

            reader.Read();
            reader.MoveToContent();              // Keep doing this, to skip over the whitespaces
            while (reader.Name == "SnippetLine" && reader.NodeType == XmlNodeType.Element)
            {
                string snippet_line_string = reader.ReadOuterXml();                  // could be really huge for full text
                reader.MoveToContent();
                SnippetLine snippet_line = (SnippetLine)snippet_line_ser.Deserialize(new StringReader(snippet_line_string));
                if (snippets == null)
                {
                    snippets = new ArrayList();
                }
                snippets.Add(snippet_line);
            }
            reader.ReadEndElement();
        }
Пример #2
0
	private static List<SnippetLine> GetSnippet (SnippetRequest request)
	{
		Queryable queryable = QueryDriver.GetQueryable (request.Hit.Source);
		ISnippetReader snippet_reader;
		bool full_text = request.FullText;
		int ctx_length = request.ContextLength;
		int snp_length = request.SnippetLength;

		if (queryable == null) {
			Console.WriteLine ("SnippetExecutor: No queryable object matches '{0}'", request.Hit.Source);
			snippet_reader = new SnippetReader (null, null, false, -1, -1);
			full_text = false;
		} else
			snippet_reader = queryable.GetSnippet (request.QueryTerms, request.Hit, full_text, ctx_length, snp_length);

		List<SnippetLine> snippetlines = new List<SnippetLine> ();
		if (snippet_reader == null)
			return snippetlines;

		if (! full_text) {
 			foreach (SnippetLine snippet_line in snippet_reader.GetSnippet ())
				snippetlines.Add (snippet_line);
		} else {
			SnippetLine snippet_line = new SnippetLine ();
			snippet_line.Line = 1;

			Fragment fragment = new Fragment ();
			fragment.QueryTermIndex = -1;
			StringBuilder sb = new StringBuilder ();

			string line;
			// Read data from snippet_reader and write
			while ((line = snippet_reader.ReadLine ()) != null) {
				sb.Append (StringFu.CleanupInvalidXmlCharacters (line));
				sb.Append ("\n");
			}

			fragment.Text = sb.ToString ();
			snippet_line.Fragments = new ArrayList ();
			snippet_line.Fragments.Add (fragment);
			snippetlines.Add (snippet_line);
		}

		snippet_reader.Close ();

		return snippetlines;
	}
Пример #3
0
		// Starts scanning at character pos of string text for occurrence of any word
		// in stemmed_terms. Returns a list of (words)*[(matched word)(words)*]+
		private SnippetLine MarkTerms (ArrayList stemmed_terms, string text, ref int pos)
		{
			SnippetLine snippet_line = null;
			int prev_match_end_pos = pos; // misnomer; means 1 + end_pos of previous word

			// 1. get next word
			// 2. if no next word, return arraylist
			// 3. if word is not a match, following_words ++
			// 4. else {
			// 4a. add list to the arraylist
			// 4b. add word to the arraylist
			// 4c. clear list
			// 4d. following_words=0
			// }
			// 5. if (following_words >= max_following_words) {
			// 5a. add list to the arraylist
			// 5b. clear list
			// 5c. return list
			// }

			while (pos < text.Length) {
				// Find the beginning of the next token
				if (IsTokenSeparator (text [pos])) {
					++ pos;
					continue;
				}

				// Find the end of the next token
				int end_pos = pos + 1;
				while (end_pos < text.Length && ! IsTokenSeparator (text [end_pos]))
					    ++ end_pos;

				string token = text.Substring (pos, end_pos - pos);
				string stemmed_token = null;
				bool found_match = false;

				// Iterate through the stemmed terms and match the token
				for (int i = 0; i < stemmed_terms.Count; i++) {
					
					// If this term is longer than the token in question, give up.
					if (end_pos - pos < ((string)stemmed_terms [i]).Length)
						continue;
					
					// We cache the token, so as to avoid stemming it more than once
					// when considering multiple terms.
					if (stemmed_token == null) {
						stemmed_token = LuceneCommon.Stem (token.ToLower ());
					}

					if (String.Compare ((string) stemmed_terms [i], stemmed_token, true) != 0)
						continue;
					
					// We have a match!
					found_match = true;
					//Console.WriteLine ("Found match");

					if (snippet_line == null)
						snippet_line = new SnippetLine ();

					// Find the fragment before the match
					int start_pos = sliding_window.StartValue;
					if (start_pos == -1) // If no non-match words seen after last match
						start_pos = prev_match_end_pos; // Use wherever previous word ended
					sliding_window.Reset ();

					string before_match = text.Substring (start_pos, pos - start_pos);
					snippet_line.AddNonMatchFragment (before_match);
					//Console.WriteLine ("Adding [{0}, {1}]:[{2}]", start_pos, pos - 1, before_match);

					snippet_line.AddMatchFragment (i, token);
					//Console.WriteLine ("Adding word [{0}, {1}]:[{2}]", pos, end_pos - 1, token);
					prev_match_end_pos = end_pos;

					break;
				}

				if (! found_match) {
					// Add the start pos of the token to the window
					sliding_window.Add (pos);
					// If we found a match previously and saw enough following words, stop
					if (snippet_line != null && snippet_line.Count > 0 && sliding_window.Count == context_length) {
						sliding_window.Reset ();
						string after_match = text.Substring (prev_match_end_pos, end_pos - prev_match_end_pos);
						snippet_line.AddNonMatchFragment (after_match);
						//Console.WriteLine ("Adding [{0}, {1}]:[{2}]", prev_match_end_pos, end_pos - 1, after_match);
						return snippet_line;
					}
				}

				pos = end_pos;
			}

			// If less than 6 words came after the last match, add the rest here
			if (snippet_line != null && snippet_line.Count > 0) {
				sliding_window.Reset ();
				string after_match = text.Substring (prev_match_end_pos, pos - prev_match_end_pos);
				snippet_line.AddNonMatchFragment (after_match);
				//Console.WriteLine ("Adding [{0}, {1}]:[{2}]", prev_match_end_pos, pos - 1, after_match);

				//Console.WriteLine ("Sending snippet: {0}", snippet_line.ToString ());
				return snippet_line;
			}

			sliding_window.Reset ();
			return null;
		}