Esempio n. 1
0
 public void CleanGeneratedIndexables()
 {
     foreach (Indexable indexable in generated_indexables)
     {
         indexable.Cleanup();
     }
 }
 public void Cleanup()
 {
     Filter.Cleanup();
     Filter.CleanGeneratedIndexables();
     Indexable.Cleanup();
     Filter    = null;
     Indexable = null;
     if (PersistentPropDocs != null)
     {
         PersistentPropDocs.Clear();
     }
 }
Esempio n. 3
0
 override protected void DoCleanup()
 {
     indexable.Cleanup();
 }
Esempio n. 4
0
    static void Display(Indexable indexable)
    {
        if (!first_indexable) {
            Console.WriteLine ();
            Console.WriteLine ("-----------------------------------------");
            Console.WriteLine ();
        }
        first_indexable = false;

        Console.WriteLine ("Filename: " + indexable.Uri);

        if (indexable.ParentUri != null)
            Console.WriteLine ("Parent: " + indexable.ParentUri);

        Stopwatch watch = new Stopwatch ();

        Filter filter;

        watch.Start ();
        if (! FilterFactory.FilterIndexable (indexable, out filter)) {
            indexable.Cleanup ();
            indexable.NoContent = true;
            filter = null;
        }
        watch.Stop ();

        Console.WriteLine ("Filter: {0} (determined in {1})", filter, watch);
        Console.WriteLine ("MimeType: {0}", indexable.MimeType);
        Console.WriteLine ();

        ArrayList generated_indexables = new ArrayList ();
        Indexable generated_indexable;

        bool first = true;
        if (filter != null && filter.HasGeneratedIndexable) {
            while (filter.GenerateNextIndexable (out generated_indexable)) {
                if (generated_indexable == null)
                    continue;

                if (first) {
                    Console.WriteLine ("Filter-generated indexables:");
                    first = false;
                }

                Console.WriteLine ("  {0}", generated_indexable.Uri);

                if (show_generated)
                    generated_indexables.Add (generated_indexable);
                else
                    generated_indexable.Cleanup ();
            }
        }

        if (! first)
            Console.WriteLine ();

        // Make sure that the properties are sorted.
        ArrayList prop_array = new ArrayList (indexable.Properties);
        prop_array.Sort ();

        Console.WriteLine ("Properties:");

        if (indexable.ValidTimestamp)
            Console.WriteLine ("  Timestamp = {0}", DateTimeUtil.ToString (indexable.Timestamp));

        foreach (Beagle.Property prop in prop_array) {
            if (String.IsNullOrEmpty (prop.Value))
                continue;

            Console.WriteLine ("  {0} = {1}", prop.Key, prop.Value);
        }

        Console.WriteLine ();

        if (indexable.NoContent)
            return;

        watch.Reset ();
        watch.Start ();

        TextReader reader;
        Analyzer indexing_analyzer = new BeagleAnalyzer ();

        char[] buffer = new char [2048];
        reader = indexable.GetTextReader ();
        char separater_char = (tokenize ? '\n' : ' ');
        if (reader != null) {
            first = true;

            if (analyze) {
                if (! stats_only)
                    Console.WriteLine ("Content:");

                TokenStream token_stream = indexing_analyzer.TokenStream ("Text", reader);
                Lucene.Net.Analysis.Token token = token_stream.Next ();
                first = (token == null);

                if (! stats_only)
                    for (; token != null; token = token_stream.Next ())
                        Console.Write ("{0}{1}", token.TermText (), separater_char);

                token_stream.Close ();
            } else {
        #if false
                while (true) {
                    int l = reader.Read (buffer, 0, 2048);
                    if (l <= 0)
                        break;
                    if (first)
                        first = false;
                    if (! stats_only)
                        DisplayContent (buffer, l);
                }
        #else
                string line;
                first = true;
                while ((line = reader.ReadLine ()) != null) {
                    if (first) {
                        Console.WriteLine ("Content:");
                        first = false;
                    }
                    if (! stats_only)
                        DisplayContent (line);
                }
        #endif
            }

            reader.Close ();

            if (first)
                Console.WriteLine ("(no content)");
            else
                Console.WriteLine ('\n');
        }

        /*
        reader = indexable.GetHotTextReader ();
        first = true;
        if (reader != null) {
            Console.WriteLine ("HotContent:");

            if (analyze) {
                TokenStream token_stream = indexing_analyzer.TokenStream ("HotText", reader);
                Lucene.Net.Analysis.Token token = token_stream.Next ();
                first = (token == null);

                for (; token != null; token = token_stream.Next ())
                    Console.Write ("{0}{1}", token.TermText (), separater_char);

                token_stream.Close ();
            } else {
                while (true) {
                    int l = reader.Read (buffer, 0, 2048);
                    if (l <= 0)
                        break;
                    if (first)
                        first = false;
                    DisplayContent (buffer, l);
                }
            }

            reader.Close ();

            if (first)
                Console.WriteLine ("(no hot content)");
            else
                Console.WriteLine ('\n');
        }
        */

        watch.Stop ();

        Console.WriteLine ();
        Console.WriteLine ("Text extracted in {0}", watch);

        #if ENABLE_RDF_ADAPTER
        IList<string> links = indexable.Links;
        if (links != null && links.Count != 0) {
            Console.WriteLine ("Links:");
            foreach (string link in links)
                Console.WriteLine (link);
            Console.WriteLine ();
        }
        #endif

        foreach (Indexable gi in generated_indexables)
            Display (gi);

        Stream stream = indexable.GetBinaryStream ();
        if (stream != null)
            stream.Close ();

        // Clean up any temporary files associated with filtering this indexable.
        indexable.Cleanup ();
    }
        private IndexerAddedReceipt AddIndexableToIndex(Indexable indexable,
                                                        IndexWriter primary_writer,
                                                        ref IndexWriter secondary_writer,
                                                        Hashtable prop_change_docs)
        {
            Filter filter = null;

            if (FileFilterNotifier != null)
            {
                FileFilterNotifier(indexable.DisplayUri, indexable.ContentUri, null);                  // We don't know what filter yet.
            }
            // If we have content, try to find a filter
            // we we can use to process the indexable
            bool filter_content = false;

            try {
                filter_content = FilterFactory.FilterIndexable(indexable, (disable_textcache ? null : text_cache), out filter);
            } catch { }

            if (!filter_content)
            {
                indexable.NoContent = true;
                filter = null;
            }

            if (FileFilterNotifier != null)
            {
                FileFilterNotifier(indexable.DisplayUri, indexable.ContentUri, filter);                  // Update with our filter
            }
            IndexerAddedReceipt r = new IndexerAddedReceipt(indexable.Id);

            if (filter != null)
            {
                if (filter.HasGeneratedIndexable)
                {
                    Log.Debug("{0} might generate indexables from {1}; deferring until later",
                              indexable.DisplayUri,
                              filter.GetType().ToString());

                    // This indexable can potentially generate indexables,
                    // so defer its indexing

                    DeferredInfo di;
                    di = new DeferredInfo(indexable, filter, prop_change_docs);
                    deferred_indexables.Insert(0, di);

                    // Since we are deferred, continue. Do not cleanup indexable or remove text-cache yet.
                    // FIXME: Make sure all indexable.Cleanup is called for all indexables if
                    // shutdown is signalled.
                    if (FileFilterNotifier != null)
                    {
                        FileFilterNotifier(null, null, null);                          // reset
                    }
                    // Return null to signal the indexable was deferred
                    return(null);
                }

                // Force the clean-up of temporary files, just in case.
                // FIXME: I am not sure if the cleanup should happen now.
                // What is the difference between filter.Cleanup and Indexable.Cleanup ?
                filter.Cleanup();

                r.FilterName    = filter.GetType().ToString();
                r.FilterVersion = filter.Version;
            }

            // If this indexables is not deferred, add it to the index.
            if (indexable.DisplayUri != indexable.ContentUri)
            {
                Log.Debug("+{0} ({1})", indexable.DisplayUri, indexable.ContentUri);
            }
            else
            {
                Log.Debug("+{0}", indexable.DisplayUri);
            }

            Document persistent_prop_doc = null;

            if (prop_change_docs != null)
            {
                persistent_prop_doc = (Document)prop_change_docs [indexable.Uri];
            }
            AddDocumentToIndex(indexable, persistent_prop_doc, primary_writer, ref secondary_writer);

            if (FileFilterNotifier != null)
            {
                FileFilterNotifier(null, null, null);                  // reset
            }
            // Clean up any temporary files associated with filtering this indexable.
            indexable.Cleanup();

            // Remove any existing text cache for this item
            if (disable_textcache && text_cache != null)
            {
                text_cache.Delete(indexable.Uri);
            }

            return(r);
        }
Esempio n. 6
0
		private IndexerAddedReceipt AddIndexableToIndex (Indexable indexable,
								 IndexWriter primary_writer,
					          		 ref IndexWriter secondary_writer,
						  		 Hashtable prop_change_docs)
		{
			Filter filter = null;
			if (FileFilterNotifier != null)
				FileFilterNotifier (indexable.DisplayUri, indexable.ContentUri, null); // We don't know what filter yet.

			// If we have content, try to find a filter
			// we we can use to process the indexable
			bool filter_content = false;
			try {
				filter_content = FilterFactory.FilterIndexable (indexable, (disable_textcache ? null : text_cache), out filter);
			} catch { }

			if (! filter_content) {
				indexable.NoContent = true;
				filter = null;
			}

			if (FileFilterNotifier != null)
				FileFilterNotifier (indexable.DisplayUri, indexable.ContentUri, filter); // Update with our filter

			IndexerAddedReceipt r = new IndexerAddedReceipt (indexable.Id);

			if (filter != null) {
				if (filter.HasGeneratedIndexable) {
					Log.Debug ("{0} might generate indexables from {1}; deferring until later",
						   indexable.DisplayUri,
						   filter.GetType ().ToString ());

					// This indexable can potentially generate indexables,
					// so defer its indexing

					DeferredInfo di;
					di = new DeferredInfo (indexable, filter, prop_change_docs);
					deferred_indexables.Insert (0, di);

					// Since we are deferred, continue. Do not cleanup indexable or remove text-cache yet.
					// FIXME: Make sure all indexable.Cleanup is called for all indexables if
					// shutdown is signalled.
					if (FileFilterNotifier != null)
						FileFilterNotifier (null, null, null); // reset

					// Return null to signal the indexable was deferred
					return null;
				}

				// Force the clean-up of temporary files, just in case.
				// FIXME: I am not sure if the cleanup should happen now.
				// What is the difference between filter.Cleanup and Indexable.Cleanup ?
				filter.Cleanup ();

				r.FilterName = filter.GetType ().ToString ();
				r.FilterVersion = filter.Version;
			}

			// If this indexables is not deferred, add it to the index.
			if (indexable.DisplayUri != indexable.ContentUri)
				Log.Debug ("+{0} ({1})", indexable.DisplayUri, indexable.ContentUri);
			else
				Log.Debug ("+{0}", indexable.DisplayUri);

			Document persistent_prop_doc = null;
			if (prop_change_docs != null)
				persistent_prop_doc = (Document) prop_change_docs [indexable.Uri];
			AddDocumentToIndex (indexable, persistent_prop_doc, primary_writer, ref secondary_writer);

			if (FileFilterNotifier != null)
				FileFilterNotifier (null, null, null); // reset

			// Clean up any temporary files associated with filtering this indexable.
			indexable.Cleanup ();

			// Remove any existing text cache for this item
			if (disable_textcache && text_cache != null)
				text_cache.Delete (indexable.Uri);

			return r;
		}