Exemple #1
0
	static void Display (Indexable indexable)
	{
		if (!first_indexable) {
			Console.WriteLine ();
			Console.WriteLine ("-----------------------------------------");
			Console.WriteLine ();
		}
		first_indexable = false;

		Console.WriteLine ("Filename: " + indexable.Uri);

		if (indexable.ParentUri != null)
			Console.WriteLine ("Parent: " + indexable.ParentUri);

		Stopwatch watch = new Stopwatch ();

		Filter filter;

		watch.Start ();
		if (! FilterFactory.FilterIndexable (indexable, out filter)) {
			indexable.Cleanup ();
			indexable.NoContent = true;
			filter = null;
		}
		watch.Stop ();

		Console.WriteLine ("Filter: {0} (determined in {1})", filter, watch);
		Console.WriteLine ("MimeType: {0}", indexable.MimeType);
		Console.WriteLine ();

		ArrayList generated_indexables = new ArrayList ();
		Indexable generated_indexable;

		bool first = true;
		if (filter != null && filter.HasGeneratedIndexable) {
			while (filter.GenerateNextIndexable (out generated_indexable)) {
				if (generated_indexable == null)
					continue;

				if (first) {
					Console.WriteLine ("Filter-generated indexables:");
					first = false;
				}
				
				Console.WriteLine ("  {0}", generated_indexable.Uri);

				if (show_generated)
					generated_indexables.Add (generated_indexable);
				else
					generated_indexable.Cleanup ();
			}
		}

		if (! first)
			Console.WriteLine ();

		// Make sure that the properties are sorted.
		ArrayList prop_array = new ArrayList (indexable.Properties);
		prop_array.Sort ();

		Console.WriteLine ("Properties:");

		if (indexable.ValidTimestamp)
			Console.WriteLine ("  Timestamp = {0}", DateTimeUtil.ToString (indexable.Timestamp));

		foreach (Beagrep.Property prop in prop_array) {
			if (String.IsNullOrEmpty (prop.Value))
				continue;

			Console.WriteLine ("  {0} = {1}", prop.Key, prop.Value);
		}

		Console.WriteLine ();

		if (indexable.NoContent)
			return;

		watch.Reset ();
		watch.Start ();

		TextReader reader;
		Analyzer indexing_analyzer = new BeagrepAnalyzer ();

		char[] buffer = new char [2048];
		reader = indexable.GetTextReader ();
		char separater_char = (tokenize ? '\n' : ' ');
		if (reader != null) {
			first = true;

			if (analyze) {
				if (! stats_only)
					Console.WriteLine ("Content:");

				TokenStream token_stream = indexing_analyzer.TokenStream ("Text", reader);
				Lucene.Net.Analysis.Token token = token_stream.Next ();
				first = (token == null);

				if (! stats_only)
					for (; token != null; token = token_stream.Next ())
						Console.Write ("{0}{1}", token.TermText (), separater_char);

				token_stream.Close ();
			} else {
#if false
				while (true) {
					int l = reader.Read (buffer, 0, 2048);
					if (l <= 0)
						break;
					if (first)
						first = false;
					if (! stats_only)
						DisplayContent (buffer, l);
				}
#else
				string line;
				first = true;
				while ((line = reader.ReadLine ()) != null) {
					if (first) {
						Console.WriteLine ("Content:");
						first = false;
					}
					if (! stats_only)
						DisplayContent (line);
				}
#endif
			}

			reader.Close ();

			if (first)
				Console.WriteLine ("(no content)");
			else
				Console.WriteLine ('\n');
		}
			
		/*
		reader = indexable.GetHotTextReader ();
		first = true;
		if (reader != null) {
			Console.WriteLine ("HotContent:");

			if (analyze) {
				TokenStream token_stream = indexing_analyzer.TokenStream ("HotText", reader);
				Lucene.Net.Analysis.Token token = token_stream.Next ();
				first = (token == null);

				for (; token != null; token = token_stream.Next ())
					Console.Write ("{0}{1}", token.TermText (), separater_char);

				token_stream.Close ();
			} else {
				while (true) {
					int l = reader.Read (buffer, 0, 2048);
					if (l <= 0)
						break;
					if (first)
						first = false;
					DisplayContent (buffer, l);
				}
			}

			reader.Close ();

			if (first)
				Console.WriteLine ("(no hot content)");
			else
				Console.WriteLine ('\n');
		}
		*/

		watch.Stop ();

		Console.WriteLine ();
		Console.WriteLine ("Text extracted in {0}", watch);


		foreach (Indexable gi in generated_indexables)
			Display (gi);

		Stream stream = indexable.GetBinaryStream ();
		if (stream != null)
			stream.Close ();

		// Clean up any temporary files associated with filtering this indexable.
		indexable.Cleanup ();
	}
Exemple #2
0
    static void Display(Indexable indexable)
    {
        if (!first_indexable)
        {
            Console.WriteLine();
            Console.WriteLine("-----------------------------------------");
            Console.WriteLine();
        }
        first_indexable = false;

        Console.WriteLine("Filename: " + indexable.Uri);

        if (indexable.ParentUri != null)
        {
            Console.WriteLine("Parent: " + indexable.ParentUri);
        }

        Stopwatch watch = new Stopwatch();

        Filter filter;

        watch.Start();
        if (!FilterFactory.FilterIndexable(indexable, out filter))
        {
            indexable.Cleanup();
            indexable.NoContent = true;
            filter = null;
        }
        watch.Stop();

        Console.WriteLine("Filter: {0} (determined in {1})", filter, watch);
        Console.WriteLine("MimeType: {0}", indexable.MimeType);
        Console.WriteLine();

        ArrayList generated_indexables = new ArrayList();
        Indexable generated_indexable;

        bool first = true;

        if (filter != null && filter.HasGeneratedIndexable)
        {
            while (filter.GenerateNextIndexable(out generated_indexable))
            {
                if (generated_indexable == null)
                {
                    continue;
                }

                if (first)
                {
                    Console.WriteLine("Filter-generated indexables:");
                    first = false;
                }

                Console.WriteLine("  {0}", generated_indexable.Uri);

                if (show_generated)
                {
                    generated_indexables.Add(generated_indexable);
                }
                else
                {
                    generated_indexable.Cleanup();
                }
            }
        }

        if (!first)
        {
            Console.WriteLine();
        }

        // Make sure that the properties are sorted.
        ArrayList prop_array = new ArrayList(indexable.Properties);

        prop_array.Sort();

        Console.WriteLine("Properties:");

        if (indexable.ValidTimestamp)
        {
            Console.WriteLine("  Timestamp = {0}", DateTimeUtil.ToString(indexable.Timestamp));
        }

        foreach (Beagrep.Property prop in prop_array)
        {
            if (String.IsNullOrEmpty(prop.Value))
            {
                continue;
            }

            Console.WriteLine("  {0} = {1}", prop.Key, prop.Value);
        }

        Console.WriteLine();

        if (indexable.NoContent)
        {
            return;
        }

        watch.Reset();
        watch.Start();

        TextReader reader;
        Analyzer   indexing_analyzer = new BeagrepAnalyzer();

        char[] buffer = new char [2048];
        reader = indexable.GetTextReader();
        char separater_char = (tokenize ? '\n' : ' ');

        if (reader != null)
        {
            first = true;

            if (analyze)
            {
                if (!stats_only)
                {
                    Console.WriteLine("Content:");
                }

                TokenStream token_stream        = indexing_analyzer.TokenStream("Text", reader);
                Lucene.Net.Analysis.Token token = token_stream.Next();
                first = (token == null);

                if (!stats_only)
                {
                    for (; token != null; token = token_stream.Next())
                    {
                        Console.Write("{0}{1}", token.TermText(), separater_char);
                    }
                }

                token_stream.Close();
            }
            else
            {
#if false
                while (true)
                {
                    int l = reader.Read(buffer, 0, 2048);
                    if (l <= 0)
                    {
                        break;
                    }
                    if (first)
                    {
                        first = false;
                    }
                    if (!stats_only)
                    {
                        DisplayContent(buffer, l);
                    }
                }
#else
                string line;
                first = true;
                while ((line = reader.ReadLine()) != null)
                {
                    if (first)
                    {
                        Console.WriteLine("Content:");
                        first = false;
                    }
                    if (!stats_only)
                    {
                        DisplayContent(line);
                    }
                }
#endif
            }

            reader.Close();

            if (first)
            {
                Console.WriteLine("(no content)");
            }
            else
            {
                Console.WriteLine('\n');
            }
        }

        /*
         * reader = indexable.GetHotTextReader ();
         * first = true;
         * if (reader != null) {
         *      Console.WriteLine ("HotContent:");
         *
         *      if (analyze) {
         *              TokenStream token_stream = indexing_analyzer.TokenStream ("HotText", reader);
         *              Lucene.Net.Analysis.Token token = token_stream.Next ();
         *              first = (token == null);
         *
         *              for (; token != null; token = token_stream.Next ())
         *                      Console.Write ("{0}{1}", token.TermText (), separater_char);
         *
         *              token_stream.Close ();
         *      } else {
         *              while (true) {
         *                      int l = reader.Read (buffer, 0, 2048);
         *                      if (l <= 0)
         *                              break;
         *                      if (first)
         *                              first = false;
         *                      DisplayContent (buffer, l);
         *              }
         *      }
         *
         *      reader.Close ();
         *
         *      if (first)
         *              Console.WriteLine ("(no hot content)");
         *      else
         *              Console.WriteLine ('\n');
         * }
         */

        watch.Stop();

        Console.WriteLine();
        Console.WriteLine("Text extracted in {0}", watch);


        foreach (Indexable gi in generated_indexables)
        {
            Display(gi);
        }

        Stream stream = indexable.GetBinaryStream();
        if (stream != null)
        {
            stream.Close();
        }

        // Clean up any temporary files associated with filtering this indexable.
        indexable.Cleanup();
    }