Exemplo n.º 1
0
	static void Display (Indexable indexable)
	{
		if (!first_indexable) {
			Console.WriteLine ();
			Console.WriteLine ("-----------------------------------------");
			Console.WriteLine ();
		}
		first_indexable = false;

		Console.WriteLine ("Filename: " + indexable.Uri);

		if (indexable.ParentUri != null)
			Console.WriteLine ("Parent: " + indexable.ParentUri);

		Stopwatch watch = new Stopwatch ();

		Filter filter;

		watch.Start ();
		if (! FilterFactory.FilterIndexable (indexable, out filter)) {
			indexable.Cleanup ();
			indexable.NoContent = true;
			filter = null;
		}
		watch.Stop ();

		Console.WriteLine ("Filter: {0} (determined in {1})", filter, watch);
		Console.WriteLine ("MimeType: {0}", indexable.MimeType);
		Console.WriteLine ();

		ArrayList generated_indexables = new ArrayList ();
		Indexable generated_indexable;

		bool first = true;
		if (filter != null && filter.HasGeneratedIndexable) {
			while (filter.GenerateNextIndexable (out generated_indexable)) {
				if (generated_indexable == null)
					continue;

				if (first) {
					Console.WriteLine ("Filter-generated indexables:");
					first = false;
				}
				
				Console.WriteLine ("  {0}", generated_indexable.Uri);

				if (show_generated)
					generated_indexables.Add (generated_indexable);
				else
					generated_indexable.Cleanup ();
			}
		}

		if (! first)
			Console.WriteLine ();

		// Make sure that the properties are sorted.
		ArrayList prop_array = new ArrayList (indexable.Properties);
		prop_array.Sort ();

		Console.WriteLine ("Properties:");

		if (indexable.ValidTimestamp)
			Console.WriteLine ("  Timestamp = {0}", DateTimeUtil.ToString (indexable.Timestamp));

		foreach (Beagrep.Property prop in prop_array) {
			if (String.IsNullOrEmpty (prop.Value))
				continue;

			Console.WriteLine ("  {0} = {1}", prop.Key, prop.Value);
		}

		Console.WriteLine ();

		if (indexable.NoContent)
			return;

		watch.Reset ();
		watch.Start ();

		TextReader reader;
		Analyzer indexing_analyzer = new BeagrepAnalyzer ();

		char[] buffer = new char [2048];
		reader = indexable.GetTextReader ();
		char separater_char = (tokenize ? '\n' : ' ');
		if (reader != null) {
			first = true;

			if (analyze) {
				if (! stats_only)
					Console.WriteLine ("Content:");

				TokenStream token_stream = indexing_analyzer.TokenStream ("Text", reader);
				Lucene.Net.Analysis.Token token = token_stream.Next ();
				first = (token == null);

				if (! stats_only)
					for (; token != null; token = token_stream.Next ())
						Console.Write ("{0}{1}", token.TermText (), separater_char);

				token_stream.Close ();
			} else {
#if false
				while (true) {
					int l = reader.Read (buffer, 0, 2048);
					if (l <= 0)
						break;
					if (first)
						first = false;
					if (! stats_only)
						DisplayContent (buffer, l);
				}
#else
				string line;
				first = true;
				while ((line = reader.ReadLine ()) != null) {
					if (first) {
						Console.WriteLine ("Content:");
						first = false;
					}
					if (! stats_only)
						DisplayContent (line);
				}
#endif
			}

			reader.Close ();

			if (first)
				Console.WriteLine ("(no content)");
			else
				Console.WriteLine ('\n');
		}
			
		/*
		reader = indexable.GetHotTextReader ();
		first = true;
		if (reader != null) {
			Console.WriteLine ("HotContent:");

			if (analyze) {
				TokenStream token_stream = indexing_analyzer.TokenStream ("HotText", reader);
				Lucene.Net.Analysis.Token token = token_stream.Next ();
				first = (token == null);

				for (; token != null; token = token_stream.Next ())
					Console.Write ("{0}{1}", token.TermText (), separater_char);

				token_stream.Close ();
			} else {
				while (true) {
					int l = reader.Read (buffer, 0, 2048);
					if (l <= 0)
						break;
					if (first)
						first = false;
					DisplayContent (buffer, l);
				}
			}

			reader.Close ();

			if (first)
				Console.WriteLine ("(no hot content)");
			else
				Console.WriteLine ('\n');
		}
		*/

		watch.Stop ();

		Console.WriteLine ();
		Console.WriteLine ("Text extracted in {0}", watch);


		foreach (Indexable gi in generated_indexables)
			Display (gi);

		Stream stream = indexable.GetBinaryStream ();
		if (stream != null)
			stream.Close ();

		// Clean up any temporary files associated with filtering this indexable.
		indexable.Cleanup ();
	}