Ejemplo n.º 1
0
    static void Display(Indexable indexable)
    {
        if (!first_indexable) {
            Console.WriteLine ();
            Console.WriteLine ("-----------------------------------------");
            Console.WriteLine ();
        }
        first_indexable = false;

        Console.WriteLine ("Filename: " + indexable.Uri);

        if (indexable.ParentUri != null)
            Console.WriteLine ("Parent: " + indexable.ParentUri);

        Stopwatch watch = new Stopwatch ();

        Filter filter;

        watch.Start ();
        if (! FilterFactory.FilterIndexable (indexable, out filter)) {
            indexable.Cleanup ();
            indexable.NoContent = true;
            filter = null;
        }
        watch.Stop ();

        Console.WriteLine ("Filter: {0} (determined in {1})", filter, watch);
        Console.WriteLine ("MimeType: {0}", indexable.MimeType);
        Console.WriteLine ();

        ArrayList generated_indexables = new ArrayList ();
        Indexable generated_indexable;

        bool first = true;
        if (filter != null && filter.HasGeneratedIndexable) {
            while (filter.GenerateNextIndexable (out generated_indexable)) {
                if (generated_indexable == null)
                    continue;

                if (first) {
                    Console.WriteLine ("Filter-generated indexables:");
                    first = false;
                }

                Console.WriteLine ("  {0}", generated_indexable.Uri);

                if (show_generated)
                    generated_indexables.Add (generated_indexable);
                else
                    generated_indexable.Cleanup ();
            }
        }

        if (! first)
            Console.WriteLine ();

        // Make sure that the properties are sorted.
        ArrayList prop_array = new ArrayList (indexable.Properties);
        prop_array.Sort ();

        Console.WriteLine ("Properties:");

        if (indexable.ValidTimestamp)
            Console.WriteLine ("  Timestamp = {0}", DateTimeUtil.ToString (indexable.Timestamp));

        foreach (Beagle.Property prop in prop_array) {
            if (String.IsNullOrEmpty (prop.Value))
                continue;

            Console.WriteLine ("  {0} = {1}", prop.Key, prop.Value);
        }

        Console.WriteLine ();

        if (indexable.NoContent)
            return;

        watch.Reset ();
        watch.Start ();

        TextReader reader;
        Analyzer indexing_analyzer = new BeagleAnalyzer ();

        char[] buffer = new char [2048];
        reader = indexable.GetTextReader ();
        char separater_char = (tokenize ? '\n' : ' ');
        if (reader != null) {
            first = true;

            if (analyze) {
                if (! stats_only)
                    Console.WriteLine ("Content:");

                TokenStream token_stream = indexing_analyzer.TokenStream ("Text", reader);
                Lucene.Net.Analysis.Token token = token_stream.Next ();
                first = (token == null);

                if (! stats_only)
                    for (; token != null; token = token_stream.Next ())
                        Console.Write ("{0}{1}", token.TermText (), separater_char);

                token_stream.Close ();
            } else {
        #if false
                while (true) {
                    int l = reader.Read (buffer, 0, 2048);
                    if (l <= 0)
                        break;
                    if (first)
                        first = false;
                    if (! stats_only)
                        DisplayContent (buffer, l);
                }
        #else
                string line;
                first = true;
                while ((line = reader.ReadLine ()) != null) {
                    if (first) {
                        Console.WriteLine ("Content:");
                        first = false;
                    }
                    if (! stats_only)
                        DisplayContent (line);
                }
        #endif
            }

            reader.Close ();

            if (first)
                Console.WriteLine ("(no content)");
            else
                Console.WriteLine ('\n');
        }

        /*
        reader = indexable.GetHotTextReader ();
        first = true;
        if (reader != null) {
            Console.WriteLine ("HotContent:");

            if (analyze) {
                TokenStream token_stream = indexing_analyzer.TokenStream ("HotText", reader);
                Lucene.Net.Analysis.Token token = token_stream.Next ();
                first = (token == null);

                for (; token != null; token = token_stream.Next ())
                    Console.Write ("{0}{1}", token.TermText (), separater_char);

                token_stream.Close ();
            } else {
                while (true) {
                    int l = reader.Read (buffer, 0, 2048);
                    if (l <= 0)
                        break;
                    if (first)
                        first = false;
                    DisplayContent (buffer, l);
                }
            }

            reader.Close ();

            if (first)
                Console.WriteLine ("(no hot content)");
            else
                Console.WriteLine ('\n');
        }
        */

        watch.Stop ();

        Console.WriteLine ();
        Console.WriteLine ("Text extracted in {0}", watch);

        #if ENABLE_RDF_ADAPTER
        IList<string> links = indexable.Links;
        if (links != null && links.Count != 0) {
            Console.WriteLine ("Links:");
            foreach (string link in links)
                Console.WriteLine (link);
            Console.WriteLine ();
        }
        #endif

        foreach (Indexable gi in generated_indexables)
            Display (gi);

        Stream stream = indexable.GetBinaryStream ();
        if (stream != null)
            stream.Close ();

        // Clean up any temporary files associated with filtering this indexable.
        indexable.Cleanup ();
    }
Ejemplo n.º 2
0
	static void Display (Indexable indexable)
	{
		if (!first_indexable) {
			Console.WriteLine ();
			Console.WriteLine ("-----------------------------------------");
			Console.WriteLine ();
		}
		first_indexable = false;

		Console.WriteLine ("Filename: " + indexable.Uri);

		if (indexable.ParentUri != null)
			Console.WriteLine ("Parent: " + indexable.ParentUri);

		Stopwatch watch = new Stopwatch ();

		Filter filter;

		watch.Start ();
		if (! FilterFactory.FilterIndexable (indexable, out filter)) {
			indexable.Cleanup ();
			indexable.NoContent = true;
			filter = null;
		}
		watch.Stop ();

		Console.WriteLine ("Filter: {0} (determined in {1})", filter, watch);
		Console.WriteLine ("MimeType: {0}", indexable.MimeType);
		Console.WriteLine ();

		ArrayList generated_indexables = new ArrayList ();
		Indexable generated_indexable;

		bool first = true;
		if (filter != null && filter.HasGeneratedIndexable) {
			while (filter.GenerateNextIndexable (out generated_indexable)) {
				if (generated_indexable == null)
					continue;

				if (first) {
					Console.WriteLine ("Filter-generated indexables:");
					first = false;
				}
				
				Console.WriteLine ("  {0}", generated_indexable.Uri);

				if (show_generated)
					generated_indexables.Add (generated_indexable);
				else
					generated_indexable.Cleanup ();
			}
		}

		if (! first)
			Console.WriteLine ();

		// Make sure that the properties are sorted.
		ArrayList prop_array = new ArrayList (indexable.Properties);
		prop_array.Sort ();

		Console.WriteLine ("Properties:");

		if (indexable.ValidTimestamp)
			Console.WriteLine ("  Timestamp = {0}", DateTimeUtil.ToString (indexable.Timestamp));

		foreach (Beagle.Property prop in prop_array) {
			if (String.IsNullOrEmpty (prop.Value))
				continue;

			Console.WriteLine ("  {0} = {1}", prop.Key, prop.Value);
		}

		Console.WriteLine ();

		if (indexable.NoContent)
			return;

		watch.Reset ();
		watch.Start ();

		TextReader reader;
		Analyzer indexing_analyzer = new BeagleAnalyzer ();

		char[] buffer = new char [2048];
		reader = indexable.GetTextReader ();
		char separater_char = (tokenize ? '\n' : ' ');
		if (reader != null) {
			first = true;

			if (analyze) {
				if (! stats_only)
					Console.WriteLine ("Content:");

				TokenStream token_stream = indexing_analyzer.TokenStream ("Text", reader);
				Lucene.Net.Analysis.Token token = token_stream.Next ();
				first = (token == null);

				if (! stats_only)
					for (; token != null; token = token_stream.Next ())
						Console.Write ("{0}{1}", token.TermText (), separater_char);

				token_stream.Close ();
			} else {
#if false
				while (true) {
					int l = reader.Read (buffer, 0, 2048);
					if (l <= 0)
						break;
					if (first)
						first = false;
					if (! stats_only)
						DisplayContent (buffer, l);
				}
#else
				string line;
				first = true;
				while ((line = reader.ReadLine ()) != null) {
					if (first) {
						Console.WriteLine ("Content:");
						first = false;
					}
					if (! stats_only)
						DisplayContent (line);
				}
#endif
			}

			reader.Close ();

			if (first)
				Console.WriteLine ("(no content)");
			else
				Console.WriteLine ('\n');
		}
			
		/*
		reader = indexable.GetHotTextReader ();
		first = true;
		if (reader != null) {
			Console.WriteLine ("HotContent:");

			if (analyze) {
				TokenStream token_stream = indexing_analyzer.TokenStream ("HotText", reader);
				Lucene.Net.Analysis.Token token = token_stream.Next ();
				first = (token == null);

				for (; token != null; token = token_stream.Next ())
					Console.Write ("{0}{1}", token.TermText (), separater_char);

				token_stream.Close ();
			} else {
				while (true) {
					int l = reader.Read (buffer, 0, 2048);
					if (l <= 0)
						break;
					if (first)
						first = false;
					DisplayContent (buffer, l);
				}
			}

			reader.Close ();

			if (first)
				Console.WriteLine ("(no hot content)");
			else
				Console.WriteLine ('\n');
		}
		*/

		watch.Stop ();

		Console.WriteLine ();
		Console.WriteLine ("Text extracted in {0}", watch);

#if ENABLE_RDF_ADAPTER
		IList<string> links = indexable.Links;
		if (links != null && links.Count != 0) {
			Console.WriteLine ("Links:");
			foreach (string link in links)
				Console.WriteLine (link);
			Console.WriteLine ();
		}
#endif

		foreach (Indexable gi in generated_indexables)
			Display (gi);

		Stream stream = indexable.GetBinaryStream ();
		if (stream != null)
			stream.Close ();

		// Clean up any temporary files associated with filtering this indexable.
		indexable.Cleanup ();
	}