示例#1
0
        // This doesn't check if it makes sense to actually
        // merge the two indexables: it just does it.
        public void Merge(Indexable other)
        {
            if (other.Timestamp > this.Timestamp)
            {
                this.Timestamp = other.Timestamp;
            }

            foreach (Property prop in other.Properties)
            {
                this.AddProperty(prop);
            }

            foreach (DictionaryEntry entry in other.local_state)
            {
                this.local_state [entry.Key] = entry.Value;
            }
        }
示例#2
0
        private void CopyPropertyParentToChild(Indexable parent)
        {
            // Parent is a top level indexable
            // Copy all properties
            foreach (Property prop in parent.Properties)
            {
                Property new_prop = (Property)prop.Clone();
                // Add parent: to property names ONLY IF
                // - not private property (these are not properties of the file content)
                // - property name does not already start with parent:
                if (!new_prop.Key.StartsWith(Property.PrivateNamespace) &&
                    !new_prop.Key.StartsWith("parent:"))
                {
                    new_prop.Key = "parent:" + new_prop.Key;
                }

                this.AddProperty(new_prop);
            }
        }
示例#3
0
        // FIXME: Copying the correct properties from parent to child:
        // (This is not perfect yet)
        // It does not make sense to have parent:parent:parent:...:parent:foo
        // for property names of a nested child
        // Moreover, if indexable a.mbox has child b.zip which has child c.zip,
        // then upon matching c.zip, we would like to get the information from
        // a.mbox (i.e. the toplevel indexable) only. Intermediate parent information
        // is not necessary for displaying results; in fact, storing them would cause
        // confusion during display.
        // E.g. storing parent:beagrep:filename for all parents
        // would cause, parent:beagrep:filename=a.mbox, parent.beagrep.filename=b.zip
        // whereas we are only interested in toplevel parent:beagrep:filename=a.mbox
        // For indexables which need to store the intermediate/immediate parent info
        // separately, explicitly store them.
        // Another problem is, toplevel indexable might want to store information
        // which should not be matched when searching for its child. Copying those
        // properties in all children will incorrectly match them.
        //

        private void CopyPropertyChildToChild(Indexable parent)
        {
            // If parent itself is a child,
            // then only copy parents' parent:xxx and _private:xxx properties
            foreach (Property prop in parent.Properties)
            {
                if (prop.Key.StartsWith("parent:") ||
                    prop.Key.StartsWith(Property.PrivateNamespace))
                {
                    Property new_prop = (Property)prop.Clone();
                    this.AddProperty(new_prop);
                }
                else
                {
                    Property new_prop = (Property)prop.Clone();
                    new_prop.IsStored = false;
                    this.AddProperty(new_prop);
                }
            }
        }
示例#4
0
        //////////////////////////

        public void SetChildOf(Indexable parent)
        {
            this.IsChild = true;
            if (parent.IsChild)
            {
                this.ParentUri = parent.ParentUri;
            }
            else
            {
                this.ParentUri = parent.Uri;
            }

            if (!this.ValidTimestamp)
            {
                this.Timestamp = parent.Timestamp;
            }

            if (string.IsNullOrEmpty(this.HitType))
            {
                this.HitType = parent.HitType;
            }

            this.Source = parent.Source;

            // FIXME: Set all of the parent's properties on the
            // child so that we get matches against the child
            // that otherwise would match only the parent, at
            // least until we have proper RDF support.

            if (parent.IsChild)
            {
                CopyPropertyChildToChild(parent);
            }
            else
            {
                CopyPropertyParentToChild(parent);
            }
        }
示例#5
0
	static void Main (string[] args)
	{
		if (args.Length != 2) {
			Console.WriteLine ("Usage: beagrep-master-delete-button index-name uri-to-delete");
			return;
		}

		string index_name = args [0];

		LuceneQueryingDriver driver = new LuceneQueryingDriver (index_name, -1, true);

		Uri uri = new Uri (args [1], false);
		Uri uri_to_delete = RemapUri (driver, uri);

		LuceneIndexingDriver indexer = new LuceneIndexingDriver (index_name, false);

		Indexable indexable = new Indexable (uri_to_delete);
		indexable.Type = IndexableType.Remove;

		IndexerRequest request = new IndexerRequest ();
		request.Add (indexable);

		IndexerReceipt [] receipts = indexer.Flush (request);
		if (receipts == null || receipts.Length == 0) {
			Console.WriteLine ("Uri {0} not found in {1}",
					   uri, index_name);
			return;
		}

		IndexerRemovedReceipt r = receipts [0] as IndexerRemovedReceipt;
		if (r == null || r.NumRemoved == 0) {
			Console.WriteLine ("Uri {0} not found in {1}",
					   uri, index_name);
			return;
		}

		Console.WriteLine ("Uri {0} deleted", uri);
	}
示例#6
0
                /////////////////////////////////////////////////////////////////

                static void AddToRequest (Indexable indexable)
                {
                        if (indexable == null)
                                return;

                        // Disable filtering and only index file attributes
                        if (arg_disable_filtering)
                                indexable.Filtering = IndexableFiltering.Never;

                        // Tag the item for easy identification (for say, removal)
                        if (arg_tag != null)
                                indexable.AddProperty (Property.NewUnsearched("Tag", arg_tag));

                        indexable.Source = arg_source;

                        pending_request.Add (indexable);
                        bool reschedule = false;

                        do {
                                if (Shutdown.ShutdownRequested)
                                        break;

                                if (! reschedule && pending_request.Count < BATCH_SIZE)
                                        break;

                                if (reschedule)
                                        Logger.Log.Debug ("Continuing indexing indexer generated indexables");
                                else
                                        Logger.Log.Debug ("Flushing driver, {0} items in queue", pending_request.Count);

                                reschedule = FlushIndexer (driver);

                                // Super Lame Hack: gtk-sharp up to 2.10 requires a main loop
                                // to dispose of any managed wrappers around GObjects.  Since
                                // we don't have one, we'll process all the pending items in
                                // a loop here.  This is particularly an issue with maildirs,
                                // because we need the loop to clean up after GMime.  Without
                                // it, GMime's streams are never completely unref'd, the
                                // file descriptors aren't closed, and we run out and crash.
                                while (GLib.MainContext.Pending ())
                                        GLib.MainContext.Iteration ();

                        } while (reschedule);
                }
示例#7
0
                static void DoIndexing ()
                {
                        int count_dirs = 0;
                        int count_files = 0;

                        Indexable indexable;
                        pending_request = new IndexerRequest ();
                        Queue modified_directories = new Queue ();

                        while (pending_directories.Count > 0) {
                                DirectoryInfo dir = (DirectoryInfo) pending_directories.Dequeue ();

                                AddToRequest (DirectoryToIndexable (dir, modified_directories));

                                try {
                                        if (arg_recursive)
                                                foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos (dir))
                                                        if (!Ignore (subdir)
                                                            && !FileSystem.IsSpecialFile (subdir.FullName))
                                                                pending_directories.Enqueue (subdir);

                                        foreach (FileInfo file in DirectoryWalker.GetFileInfos (dir))
                                                if (!Ignore (file)
                                                    && !FileSystem.IsSpecialFile (file.FullName)) {
                                                        AddToRequest (FileToIndexable (file));
                                                        count_files ++;
                                                }

                                } catch (DirectoryNotFoundException) {}

                                if (Shutdown.ShutdownRequested)
                                        break;

                                count_dirs++;
                        }

                        Logger.Log.Debug ("Scanned {0} files and directories in {1} directories", count_dirs + count_files, count_dirs);

                        if (Shutdown.ShutdownRequested) {
                                backing_fa_store.Flush ();
                                return;
                        }

                        // Time to remove deleted directories from the index and attributes store
                        while (modified_directories.Count > 0) {
                                DirectoryInfo subdir = (DirectoryInfo) modified_directories.Dequeue ();
                                Logger.Log.Debug ("Checking {0} for deleted files and directories", subdir.FullName);

                                // Get a list of all documents from lucene index with ParentDirUriPropKey set as that of subdir
                                ICollection all_dirent = GetAllItemsInDirectory (subdir);
                                foreach (Dirent info in all_dirent) {
                                        // check if the item exists
                                        if ((! info.IsDirectory && File.Exists (info.FullName)) ||
                                            (info.IsDirectory && Directory.Exists (info.FullName)))
                                                continue;

                                        if (info.IsDirectory)
                                                // Recursively remove deleted subdirectories
                                                modified_directories.Enqueue (new DirectoryInfo (info.FullName));

                                        // remove
                                        Uri uri = PathToUri (info.FullName);
                                        indexable = new Indexable (IndexableType.Remove, uri);
                                        AddToRequest (indexable);
                                }
                        }

                        bool reschedule = false;
                        // Call Flush until our request is empty.  We have to do this in a loop
                        // because Flush happens in a batch size and some indexables might generate more indexables
                        while (reschedule || pending_request.Count > 0) {
                                if (Shutdown.ShutdownRequested)
                                        break;

                                reschedule = FlushIndexer (driver);
                        }

                        backing_fa_store.Flush ();

                        if (Shutdown.ShutdownRequested)
                                return;

                        Logger.Log.Debug ("Optimizing index");
                        driver.OptimizeNow ();
                }
示例#8
0
        public int CompareTo(object obj)
        {
            Indexable other = (Indexable)obj;

            return(DateTime.Compare(this.Timestamp, other.Timestamp));
        }
示例#9
0
	static int Main (string[] args)
	{
		SystemInformation.SetProcessName ("beagrep-extract-content");

		if (args.Length < 1 || Array.IndexOf (args, "--help") != -1) {
			PrintUsage ();
			return 0;
		}

		if (Array.IndexOf (args, "--debug") == -1)
			Log.Disable ();

		if (Array.IndexOf (args, "--version") != -1) {
			VersionFu.PrintVersion ();
			return 0;
		}

		if (Array.IndexOf (args, "--tokenize") != -1)
			tokenize = true;
		
		if (Array.IndexOf (args, "--analyze") != -1)
			analyze = true;
		
		if (Array.IndexOf (args, "--show-generated") != -1 || Array.IndexOf (args, "--show-children") != -1)
			show_generated = true;

		StreamWriter writer = null;
		string outfile = null;
		foreach (string arg in args) {

			// mime-type option
			if (arg.StartsWith ("--mimetype=")) {
				mime_type = arg.Substring (11);    
				continue;
			// output file option
			// we need this in case the output contains different encoding
			// printing to Console might not always display properly
			} else if (arg.StartsWith ("--outfile=")) {
				outfile = arg.Substring (10);    
				Console.WriteLine ("Redirecting output to " + outfile);
				FileStream f = new FileStream (outfile, FileMode.Create);
				writer = new StreamWriter (f, System.Text.Encoding.UTF8);
				continue;
			} else if (arg.StartsWith ("--")) // option, skip it 
				continue;
			
			Uri uri = UriFu.PathToFileUri (arg);
			Indexable indexable = new Indexable (uri);
			if (mime_type != null)
				indexable.MimeType = mime_type;

			try {
				if (writer != null) {
					Console.SetOut (writer);
				}

				Display (indexable);
				if (writer != null) {
					writer.Flush ();
				}
				
				if (outfile != null) {
					StreamWriter standardOutput = new StreamWriter(Console.OpenStandardOutput());
					standardOutput.AutoFlush = true;
					Console.SetOut(standardOutput);
				}
				
			} catch (Exception e) {
				Console.WriteLine ("Unable to filter {0}: {1}", uri, e.Message);
				return -1;
			}
			
			// Super Lame Hack: gtk-sharp up to 2.10 requires a main loop
			// to dispose of any managed wrappers around GObjects.  Since
			// we don't have one, we'll process all the pending items in
			// a loop here.  This is particularly an issue with maildirs,
			// because we need the loop to clean up after GMime.  Without
			// it, GMime's streams are never completely unref'd, the
			// file descriptors aren't closed, and we run out and crash.
			while (GLib.MainContext.Pending ())
				GLib.MainContext.Iteration ();
		}
		if (writer != null)
			writer.Close ();

		return 0;
	}
示例#10
0
	static void Display (Indexable indexable)
	{
		if (!first_indexable) {
			Console.WriteLine ();
			Console.WriteLine ("-----------------------------------------");
			Console.WriteLine ();
		}
		first_indexable = false;

		Console.WriteLine ("Filename: " + indexable.Uri);

		if (indexable.ParentUri != null)
			Console.WriteLine ("Parent: " + indexable.ParentUri);

		Stopwatch watch = new Stopwatch ();

		Filter filter;

		watch.Start ();
		if (! FilterFactory.FilterIndexable (indexable, out filter)) {
			indexable.Cleanup ();
			indexable.NoContent = true;
			filter = null;
		}
		watch.Stop ();

		Console.WriteLine ("Filter: {0} (determined in {1})", filter, watch);
		Console.WriteLine ("MimeType: {0}", indexable.MimeType);
		Console.WriteLine ();

		ArrayList generated_indexables = new ArrayList ();
		Indexable generated_indexable;

		bool first = true;
		if (filter != null && filter.HasGeneratedIndexable) {
			while (filter.GenerateNextIndexable (out generated_indexable)) {
				if (generated_indexable == null)
					continue;

				if (first) {
					Console.WriteLine ("Filter-generated indexables:");
					first = false;
				}
				
				Console.WriteLine ("  {0}", generated_indexable.Uri);

				if (show_generated)
					generated_indexables.Add (generated_indexable);
				else
					generated_indexable.Cleanup ();
			}
		}

		if (! first)
			Console.WriteLine ();

		// Make sure that the properties are sorted.
		ArrayList prop_array = new ArrayList (indexable.Properties);
		prop_array.Sort ();

		Console.WriteLine ("Properties:");

		if (indexable.ValidTimestamp)
			Console.WriteLine ("  Timestamp = {0}", DateTimeUtil.ToString (indexable.Timestamp));

		foreach (Beagrep.Property prop in prop_array) {
			if (String.IsNullOrEmpty (prop.Value))
				continue;

			Console.WriteLine ("  {0} = {1}", prop.Key, prop.Value);
		}

		Console.WriteLine ();

		if (indexable.NoContent)
			return;

		watch.Reset ();
		watch.Start ();

		TextReader reader;
		Analyzer indexing_analyzer = new BeagrepAnalyzer ();

		char[] buffer = new char [2048];
		reader = indexable.GetTextReader ();
		char separater_char = (tokenize ? '\n' : ' ');
		if (reader != null) {
			first = true;

			if (analyze) {
				if (! stats_only)
					Console.WriteLine ("Content:");

				TokenStream token_stream = indexing_analyzer.TokenStream ("Text", reader);
				Lucene.Net.Analysis.Token token = token_stream.Next ();
				first = (token == null);

				if (! stats_only)
					for (; token != null; token = token_stream.Next ())
						Console.Write ("{0}{1}", token.TermText (), separater_char);

				token_stream.Close ();
			} else {
#if false
				while (true) {
					int l = reader.Read (buffer, 0, 2048);
					if (l <= 0)
						break;
					if (first)
						first = false;
					if (! stats_only)
						DisplayContent (buffer, l);
				}
#else
				string line;
				first = true;
				while ((line = reader.ReadLine ()) != null) {
					if (first) {
						Console.WriteLine ("Content:");
						first = false;
					}
					if (! stats_only)
						DisplayContent (line);
				}
#endif
			}

			reader.Close ();

			if (first)
				Console.WriteLine ("(no content)");
			else
				Console.WriteLine ('\n');
		}
			
		/*
		reader = indexable.GetHotTextReader ();
		first = true;
		if (reader != null) {
			Console.WriteLine ("HotContent:");

			if (analyze) {
				TokenStream token_stream = indexing_analyzer.TokenStream ("HotText", reader);
				Lucene.Net.Analysis.Token token = token_stream.Next ();
				first = (token == null);

				for (; token != null; token = token_stream.Next ())
					Console.Write ("{0}{1}", token.TermText (), separater_char);

				token_stream.Close ();
			} else {
				while (true) {
					int l = reader.Read (buffer, 0, 2048);
					if (l <= 0)
						break;
					if (first)
						first = false;
					DisplayContent (buffer, l);
				}
			}

			reader.Close ();

			if (first)
				Console.WriteLine ("(no hot content)");
			else
				Console.WriteLine ('\n');
		}
		*/

		watch.Stop ();

		Console.WriteLine ();
		Console.WriteLine ("Text extracted in {0}", watch);


		foreach (Indexable gi in generated_indexables)
			Display (gi);

		Stream stream = indexable.GetBinaryStream ();
		if (stream != null)
			stream.Close ();

		// Clean up any temporary files associated with filtering this indexable.
		indexable.Cleanup ();
	}
示例#11
0
		private void CopyPropertyParentToChild (Indexable parent)
		{
			// Parent is a top level indexable
			// Copy all properties
			foreach (Property prop in parent.Properties) {

				Property new_prop = (Property) prop.Clone ();
				// Add parent: to property names ONLY IF
				// - not private property (these are not properties of the file content)
				// - property name does not already start with parent:
				if (! new_prop.Key.StartsWith (Property.PrivateNamespace) &&
				    ! new_prop.Key.StartsWith ("parent:"))
					new_prop.Key = "parent:" + new_prop.Key;

				this.AddProperty (new_prop);
			}
		}
示例#12
0
		// FIXME: Copying the correct properties from parent to child:
		// (This is not perfect yet)
		// It does not make sense to have parent:parent:parent:...:parent:foo
		// for property names of a nested child
		// Moreover, if indexable a.mbox has child b.zip which has child c.zip,
		// then upon matching c.zip, we would like to get the information from
		// a.mbox (i.e. the toplevel indexable) only. Intermediate parent information
		// is not necessary for displaying results; in fact, storing them would cause
		// confusion during display.
		// E.g. storing parent:beagrep:filename for all parents
		// would cause, parent:beagrep:filename=a.mbox, parent.beagrep.filename=b.zip
		// whereas we are only interested in toplevel parent:beagrep:filename=a.mbox
		// For indexables which need to store the intermediate/immediate parent info
		// separately, explicitly store them.
		// Another problem is, toplevel indexable might want to store information
		// which should not be matched when searching for its child. Copying those
		// properties in all children will incorrectly match them.
		//

		private void CopyPropertyChildToChild (Indexable parent)
		{
			// If parent itself is a child,
			// then only copy parents' parent:xxx and _private:xxx properties
			foreach (Property prop in parent.Properties) {

				if (prop.Key.StartsWith ("parent:") ||
				    prop.Key.StartsWith (Property.PrivateNamespace)) {

					Property new_prop = (Property) prop.Clone ();
					this.AddProperty (new_prop);
				} else {
					
					Property new_prop = (Property) prop.Clone ();
					new_prop.IsStored = false;
					this.AddProperty (new_prop);
				}
			}
		}
示例#13
0
		//////////////////////////

		public void SetChildOf (Indexable parent)
		{
			this.IsChild = true;
			if (parent.IsChild)
				this.ParentUri = parent.ParentUri;
			else
				this.ParentUri = parent.Uri;

			if (!this.ValidTimestamp)
				this.Timestamp = parent.Timestamp;

			if (string.IsNullOrEmpty (this.HitType))
				this.HitType = parent.HitType;

			this.Source = parent.Source;

			// FIXME: Set all of the parent's properties on the
			// child so that we get matches against the child
			// that otherwise would match only the parent, at
			// least until we have proper RDF support.

			if (parent.IsChild)
				CopyPropertyChildToChild (parent);
			else
				CopyPropertyParentToChild (parent);
		}
示例#14
0
		// This doesn't check if it makes sense to actually
		// merge the two indexables: it just does it.
		public void Merge (Indexable other)
		{
			if (other.Timestamp > this.Timestamp)
				this.Timestamp = other.Timestamp;

			foreach (Property prop in other.Properties)
				this.AddProperty (prop);

			foreach (DictionaryEntry entry in other.local_state)
				this.local_state [entry.Key] = entry.Value;
		}
示例#15
0
                static Indexable FileToIndexable (FileInfo file)
                {
                        if (!file.Exists)
                                return null;

                        if (fa_store.IsUpToDateAndFiltered (PathInIndex (file.FullName),
                                                            FileSystem.GetLastWriteTimeUtc (file.FullName)))
                                return null;

                        // Create the indexable and add the standard properties we
                        // use in the FileSystemQueryable.
                        Uri uri = PathToUri (file.FullName);
                        Indexable indexable = new Indexable (uri);
                        indexable.Timestamp = file.LastWriteTimeUtc;
                        indexable.FlushBufferCache = true;
                        indexable.AddProperty (Property.NewUnsearched ("fixme:filesize", file.Length));
                        FSQ.AddStandardPropertiesToIndexable (indexable, file.Name, Guid.Empty, false);

                        // Store directory name in the index
                        string dirname = file.DirectoryName;
                        indexable.AddProperty (Property.NewUnsearched (Property.ParentDirUriPropKey, PathToUri (dirname)));

                        return indexable;
                }
示例#16
0
                static Indexable DirectoryToIndexable (DirectoryInfo dir, Queue modified_directories)
                {
                        if (!dir.Exists)
                                return null;

                        // Check if the directory information is stored in attributes store
                        // And if the mtime of the directory is same as that in the attributes store
                        FileAttributes attr = fa_store.Read (PathInIndex (dir.FullName));

                        // If the directory exists in the fa store, then it is already indexed.
                        if (attr != null) {
                                // If we don't care about deleted content then we are fine.
                                // If the attributes are up-to-date, then we are fine too.
                                if (! arg_delete || FileAttributesStore.IsUpToDate (attr, FileSystem.GetLastWriteTimeUtc (dir.FullName)))
                                        return null;

                                // But the last write time needs to be uptodate to support enable-deletion,
                                // so we actually index the directories, even if --disable-directories
                                // is set.
                                modified_directories.Enqueue (dir);
                        }

                        // Create the indexable and add the standard properties we
                        // use in the FileSystemQueryable.
                        Uri uri = PathToUri (dir.FullName);
                        Indexable indexable = new Indexable (uri);
                        indexable.MimeType = "inode/directory";
                        indexable.NoContent = true;
                        indexable.Timestamp = dir.LastWriteTimeUtc;

                        // Store the directory information in the index anyway, but if --disable-directories
                        // was passed, then do not store the names and other standard properties
                        // used during searching
                        if (! arg_disable_directories)
                                FSQ.AddStandardPropertiesToIndexable (indexable, dir.Name, Guid.Empty, false);

                        // Add directory name property
                        string dirname = dir.Parent.FullName;
                        indexable.AddProperty (Property.NewUnsearched (Property.ParentDirUriPropKey, PathToUri (dirname)));

                        indexable.AddProperty (Property.NewBool (Property.IsDirectoryPropKey, true));

                        return indexable;
                }
示例#17
0
 public void Add(Indexable indexable)
 {
     indexable.StoreStream();
     to_add.Add(indexable);
 }