static Indexable DirectoryToIndexable(DirectoryInfo dir, Queue modified_directories) { if (!dir.Exists) { return(null); } // Check if the directory information is stored in attributes store // And if the mtime of the directory is same as that in the attributes store FileAttributes attr = fa_store.Read(PathInIndex(dir.FullName)); // If the directory exists in the fa store, then it is already indexed. if (attr != null) { // If we don't care about deleted content then we are fine. // If the attributes are up-to-date, then we are fine too. if (!arg_delete || FileAttributesStore.IsUpToDate(attr, FileSystem.GetLastWriteTimeUtc(dir.FullName))) { return(null); } // But the last write time needs to be uptodate to support enable-deletion, // so we actually index the directories, even if --disable-directories // is set. modified_directories.Enqueue(dir); } // Create the indexable and add the standard properties we // use in the FileSystemQueryable. Uri uri = PathToUri(dir.FullName); Indexable indexable = new Indexable(uri); indexable.MimeType = "inode/directory"; indexable.NoContent = true; indexable.Timestamp = dir.LastWriteTimeUtc; // Store the directory information in the index anyway, but if --disable-directories // was passed, then do not store the names and other standard properties // used during searching if (!arg_disable_directories) { FSQ.AddStandardPropertiesToIndexable(indexable, dir.Name, Guid.Empty, false); } // Add directory name property string dirname = dir.Parent.FullName; indexable.AddProperty(Property.NewUnsearched(Property.ParentDirUriPropKey, PathToUri(dirname))); indexable.AddProperty(Property.NewBool(Property.IsDirectoryPropKey, true)); return(indexable); }
// FIXME: Move these to LuceneCommon if and when we decide to // support adding/removing arbitrary backends at runtime internal void Close() { Log.Debug("Removing static queryable {0}", IndexName); if (text_cache != null) { text_cache.Dispose(); } // Free the cached IndexReaders LuceneCommon.CloseReader(LuceneCommon.GetReader(Driver.PrimaryStore)); LuceneCommon.CloseReader(LuceneCommon.GetReader(Driver.SecondaryStore)); Driver.PrimaryStore.Close(); Driver.SecondaryStore.Close(); FileAttributesStore.Dispose(); }
protected void InitFileAttributesStore(string name, string external_fingerprint) { string storage_path = Path.Combine(PathFinder.IndexDir, name); string fingerprint_file = Path.Combine(storage_path, "fingerprint"); string internal_fingerprint; if (!Directory.Exists(storage_path)) { Directory.CreateDirectory(storage_path); internal_fingerprint = GuidFu.ToShortString(Guid.NewGuid()); StreamWriter writer = new StreamWriter(fingerprint_file); writer.WriteLine(internal_fingerprint); writer.Close(); } else { StreamReader reader = new StreamReader(fingerprint_file); internal_fingerprint = reader.ReadLine(); reader.Close(); } string fingerprint; if (external_fingerprint != null) { fingerprint = internal_fingerprint + "-" + external_fingerprint; } else { fingerprint = internal_fingerprint; } IFileAttributesStore ifa_store; if (ExtendedAttribute.Supported) { ifa_store = new FileAttributesStore_ExtendedAttribute(fingerprint); } else { ifa_store = new FileAttributesStore_Sqlite(storage_path, fingerprint); } fa_store = new FileAttributesStore(ifa_store); }
// Remapping hack from DumpIndex static Uri RemapUri (LuceneQueryingDriver driver, Uri uri) { // We only need to remap URIs in the file system backend if (driver.IndexName != "FileSystemIndex") return uri; FileAttributesStore fa_store = new FileAttributesStore (new FileAttributesStore_Mixed (Path.Combine (PathFinder.IndexDir, "FileSystemIndex"), driver.Fingerprint)); string path = uri.LocalPath; Beagrep.Daemon.FileAttributes attr = fa_store.Read (path); if (attr == null) { Console.WriteLine ("No file attribute info for {0}", uri); return uri; } return new Uri ("uid:" + GuidFu.ToShortString (attr.UniqueId) + uri.Fragment); }
override protected Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt) { // Retrieve our cached info about the file. CachedFileInfo info; info = file_info_cache [indexable.Uri] as CachedFileInfo; if (info == null) return indexable.Uri; file_info_cache.Remove (info.Uri); // Yeah, this is ghetto. If it's a file that's shared across multiple // indexables, only tag it with when the last indexable has been indexed. if (info.Shared && DecrementReferenceCount (info.Path)) return indexable.Uri; // Since we know that the file has been successfully // indexed, update the file attributes accordingly. // Don't set filter information on a file if multiple // indexables has been created from it. FileAttributes attr; attr = FileAttributesStore.ReadOrCreate (info.Path); attr.LastWriteTime = info.Mtime; // Don't set filter information on a file if multiple indexables has been // created from it. if (! info.Shared) { attr.FilterName = receipt.FilterName; attr.FilterVersion = receipt.FilterVersion; } if (! FileAttributesStore.Write (attr)) Logger.Log.Warn ("Couldn't write attributes for {0}", info.Path); return indexable.Uri; }
static ArrayList RemapUris (LuceneQueryingDriver driver, ArrayList uris) { // We only need to remap URIs in the file system backend if (driver.IndexName != "FileSystemIndex") return uris; FileAttributesStore fa_store = new FileAttributesStore (new FileAttributesStore_Mixed (Path.Combine (PathFinder.IndexDir, "FileSystemIndex"), driver.Fingerprint)); for (int i = 0; i < uris.Count; i++) { Uri uri = (Uri) uris [i]; string path = uri.LocalPath; Beagrep.Daemon.FileAttributes attr = fa_store.Read (path); if (attr == null) { Console.WriteLine ("No file attribute info for {0}", uri); continue; } Uri internal_uri = new Uri ("uid:" + GuidFu.ToShortString (attr.UniqueId) + uri.Fragment); uris [i] = internal_uri; } return uris; }
protected void InitFileAttributesStore (string name, string external_fingerprint) { string storage_path = Path.Combine (PathFinder.IndexDir, name); string fingerprint_file = Path.Combine (storage_path, "fingerprint"); string internal_fingerprint; if (! Directory.Exists (storage_path)) { Directory.CreateDirectory (storage_path); internal_fingerprint = GuidFu.ToShortString (Guid.NewGuid ()); StreamWriter writer = new StreamWriter (fingerprint_file); writer.WriteLine (internal_fingerprint); writer.Close (); } else { StreamReader reader = new StreamReader (fingerprint_file); internal_fingerprint = reader.ReadLine (); reader.Close (); } string fingerprint; if (external_fingerprint != null) fingerprint = internal_fingerprint + "-" + external_fingerprint; else fingerprint = internal_fingerprint; IFileAttributesStore ifa_store; if (ExtendedAttribute.Supported) ifa_store = new FileAttributesStore_ExtendedAttribute (fingerprint); else ifa_store = new FileAttributesStore_Sqlite (storage_path, fingerprint); fa_store = new FileAttributesStore (ifa_store); }
static void DoMain (string [] args) { SystemInformation.SetProcessName ("beagrep-build-index"); if (args.Length < 2) PrintUsage (); ArrayList allowed_patterns = new ArrayList (); ArrayList denied_patterns = new ArrayList (); ArrayList denied_dir_patterns = new ArrayList (); int i = 0; while (i < args.Length) { string arg = args [i]; ++i; string next_arg = i < args.Length ? args [i] : null; switch (arg) { case "-h": case "--help": PrintUsage (); break; case "--tag": if (next_arg != null) arg_tag = next_arg; ++i; break; case "-r": case "--recursive": arg_recursive = true; break; case "--enable-deletion": arg_delete = true; break; case "--disable-directories": arg_disable_directories = true; break; case "--enable-text-cache": arg_cache_text = true; break; case "--target": if (next_arg != null) arg_output = Path.IsPathRooted (next_arg) ? next_arg : Path.GetFullPath (next_arg); ++i; break; case "--disable-filtering": arg_disable_filtering = true; break; case "--allow-pattern": if (next_arg == null) break; if (next_arg.IndexOf (',') != -1) { foreach (string pattern in next_arg.Split (',')) allowed_patterns.Add (pattern); } else { allowed_patterns.Add (next_arg); } ++i; break; case "--deny-directory-pattern": if (next_arg == null) break; if (next_arg.IndexOf (',') != -1) { foreach (string pattern in next_arg.Split (',')) denied_dir_patterns.Add (pattern); } else { denied_dir_patterns.Add (next_arg); } ++i; break; case "--deny-pattern": if (next_arg == null) break; if (next_arg.IndexOf (',') != -1) { foreach (string pattern in next_arg.Split (',')) denied_patterns.Add (pattern); } else { denied_patterns.Add (next_arg); } ++i; break; case "--disable-restart": arg_disable_restart = true; break; case "--source": if (next_arg == null) break; arg_source = next_arg; ++i; break; default: if (arg.StartsWith ("-") || arg.StartsWith ("--")) PrintUsage (); string path = Path.IsPathRooted (arg) ? arg : Path.GetFullPath (arg); if (path != "/" && path.EndsWith ("/")) path = path.TrimEnd ('/'); if (Directory.Exists (path)) pending_directories.Enqueue (new DirectoryInfo (path)); else if (File.Exists (path)) pending_files.Enqueue (new FileInfo (path)); break; } } ///////////////////////////////////////////////////////// if (arg_output == null) { Logger.Log.Error ("--target must be specified"); Environment.Exit (1); } // Set the storage dir, this should be used to store log messages // and filterver.dat PathFinder.StorageDir = arg_output; foreach (FileSystemInfo info in pending_directories) { if (Path.GetFullPath (arg_output) == info.FullName) { Logger.Log.Error ("Target directory cannot be one of the source paths."); Environment.Exit (1); } } foreach (FileSystemInfo info in pending_files) { if (Path.GetFullPath (arg_output) == info.FullName) { Logger.Log.Error ("Target directory cannot be one of the source paths."); Environment.Exit (1); } } if (!Directory.Exists (Path.GetDirectoryName (arg_output))) { Logger.Log.Error ("Index directory not available for construction: {0}", arg_output); Environment.Exit (1); } // Be *EXTRA PARANOID* about the contents of the target // directory, because creating an indexing driver will // nuke it. if (Directory.Exists (arg_output)) { foreach (FileInfo info in DirectoryWalker.GetFileInfos (arg_output)) { if (Array.IndexOf (allowed_files, info.Name) == -1) { Logger.Log.Error ("{0} doesn't look safe to delete: non-Beagrep file {1} was found", arg_output, info.FullName); Environment.Exit (1); } } foreach (DirectoryInfo info in DirectoryWalker.GetDirectoryInfos (arg_output)) { if (Array.IndexOf (allowed_dirs, info.Name) == -1) { Logger.Log.Error ("{0} doesn't look safe to delete: non-Beagrep directory {1} was found", arg_output, info.FullName); Environment.Exit (1); } } } string config_file_path = Path.Combine (arg_output, "StaticIndex.xml"); string prev_source = null; if (File.Exists (config_file_path)) { Config static_index_config = Conf.LoadFrom (config_file_path); if (static_index_config == null) { Log.Error ("Invalid configuation file {0}", config_file_path); Environment.Exit (1); } prev_source = static_index_config.GetOption ("Source", null); if (arg_source != null && prev_source != arg_source) { Log.Error ("Source already set to {0} for existing static index. Cannot set source to {1}.", prev_source, arg_source); Environment.Exit (1); } // If arg_source is not given, and prev_source is present, use prev_source // as the arg_source. This is useful for re-running build-index without // giving --arg_source for already existing static index arg_source = prev_source; } if (arg_source == null) { DirectoryInfo dir = new DirectoryInfo (StringFu.SanitizePath (arg_output)); arg_source = dir.Name; } string global_files_config = Path.Combine (PathFinder.ConfigDataDir, "config-files"); global_files_config = Path.Combine (global_files_config, Conf.Names.FilesQueryableConfig + ".xml"); if (! File.Exists (global_files_config)) { Log.Error ("Global configuration file not found {0}", global_files_config); Environment.Exit (0); } // Setup regexes for allowed/denied patterns if (allowed_patterns.Count > 0) { allowed_regex = StringFu.GetPatternRegex (allowed_patterns); } else { // Read the exclude values from config // For system-wide indexes, only the global config value will be used Config config = Conf.Get (Conf.Names.FilesQueryableConfig); List<string[]> values = config.GetListOptionValues (Conf.Names.ExcludePattern); if (values != null) foreach (string[] exclude in values) denied_patterns.Add (exclude [0]); if (denied_patterns.Count > 0) denied_regex = StringFu.GetPatternRegex (denied_patterns); } if (denied_dir_patterns.Count > 0) { denied_dir_regex = StringFu.GetPatternRegex (denied_dir_patterns); Log.Always("Will ignore directories matching regular expression: {0}", denied_dir_regex); } Log.Always ("Starting beagrep-build-index (pid {0}) at {1}", Process.GetCurrentProcess ().Id, DateTime.Now); driver = new LuceneIndexingDriver (arg_output, MINOR_VERSION, false); driver.TextCache = (arg_cache_text) ? new TextCache (arg_output) : null; if (driver.TextCache != null) driver.TextCache.WorldReadable = true; backing_fa_store = new FileAttributesStore_Sqlite (driver.TopDirectory, driver.Fingerprint); fa_store = new FileAttributesStore (backing_fa_store); // Set up signal handlers #if MONO_1_9 Shutdown.SetupSignalHandlers (delegate (int signal) { if (signal == (int) Mono.Unix.Native.Signum.SIGINT || signal == (int) Mono.Unix.Native.Signum.SIGTERM) Shutdown.BeginShutdown (); }); #else SetupSignalHandlers (); #endif Thread monitor_thread = null; Stopwatch watch = new Stopwatch (); watch.Start (); if (!arg_disable_restart) { // Start the thread that monitors memory usage. monitor_thread = ExceptionHandlingThread.Start (new ThreadStart (MemoryMonitorWorker)); } // Start indexworker to do the crawling and indexing IndexWorker (); // Join any threads so that we know that we're the only thread still running if (monitor_thread != null) monitor_thread.Join (); watch.Stop (); Logger.Log.Debug ("Elapsed time {0}.", watch); // Write this after indexing is done. This is because, if creating a new index, // LuceneIndexingDriver.Create() is called which purges the entire directory. if (prev_source == null) { Config static_index_config = Conf.LoadNew ("StaticIndex.xml"); // Write StaticIndex.xml containing: // The name of the source static_index_config.SetOption ("Source", arg_source); static_index_config ["Source"].Description = "Source of the static index"; Conf.SaveTo (static_index_config, config_file_path); } if (restart) { Logger.Log.Debug ("Restarting beagrep-build-index"); Process p = new Process (); p.StartInfo.UseShellExecute = false; // FIXME: Maybe this isn't the right way to do things? It should be ok, // the PATH is inherited from the shell script which runs mono itself. p.StartInfo.FileName = "mono"; p.StartInfo.Arguments = String.Join (" ", Environment.GetCommandLineArgs ()); p.Start (); } Log.Always ("Exiting beagrep-build-index (pid {0}) at {1}", Process.GetCurrentProcess ().Id, DateTime.Now); }
public bool IsUpToDate (string path) { return FileAttributesStore.IsUpToDate (path); }
/////////////////////////////////////////////////////////////////////////// // Convenience functions public bool IsUpToDate (string path, Filter filter) { return FileAttributesStore.IsUpToDate (path, filter); }
static void DoMain(string [] args) { SystemInformation.SetProcessName("beagrep-build-index"); if (args.Length < 2) { PrintUsage(); } ArrayList allowed_patterns = new ArrayList(); ArrayList denied_patterns = new ArrayList(); ArrayList denied_dir_patterns = new ArrayList(); int i = 0; while (i < args.Length) { string arg = args [i]; ++i; string next_arg = i < args.Length ? args [i] : null; switch (arg) { case "-h": case "--help": PrintUsage(); break; case "--tag": if (next_arg != null) { arg_tag = next_arg; } ++i; break; case "-r": case "--recursive": arg_recursive = true; break; case "--enable-deletion": arg_delete = true; break; case "--disable-directories": arg_disable_directories = true; break; case "--enable-text-cache": arg_cache_text = true; break; case "--target": if (next_arg != null) { arg_output = Path.IsPathRooted(next_arg) ? next_arg : Path.GetFullPath(next_arg); } ++i; break; case "--disable-filtering": arg_disable_filtering = true; break; case "--allow-pattern": if (next_arg == null) { break; } if (next_arg.IndexOf(',') != -1) { foreach (string pattern in next_arg.Split(',')) { allowed_patterns.Add(pattern); } } else { allowed_patterns.Add(next_arg); } ++i; break; case "--deny-directory-pattern": if (next_arg == null) { break; } if (next_arg.IndexOf(',') != -1) { foreach (string pattern in next_arg.Split(',')) { denied_dir_patterns.Add(pattern); } } else { denied_dir_patterns.Add(next_arg); } ++i; break; case "--deny-pattern": if (next_arg == null) { break; } if (next_arg.IndexOf(',') != -1) { foreach (string pattern in next_arg.Split(',')) { denied_patterns.Add(pattern); } } else { denied_patterns.Add(next_arg); } ++i; break; case "--disable-restart": arg_disable_restart = true; break; case "--source": if (next_arg == null) { break; } arg_source = next_arg; ++i; break; default: if (arg.StartsWith("-") || arg.StartsWith("--")) { PrintUsage(); } string path = Path.IsPathRooted(arg) ? arg : Path.GetFullPath(arg); if (path != "/" && path.EndsWith("/")) { path = path.TrimEnd('/'); } if (Directory.Exists(path)) { pending_directories.Enqueue(new DirectoryInfo(path)); } else if (File.Exists(path)) { pending_files.Enqueue(new FileInfo(path)); } break; } } ///////////////////////////////////////////////////////// if (arg_output == null) { Logger.Log.Error("--target must be specified"); Environment.Exit(1); } // Set the storage dir, this should be used to store log messages // and filterver.dat PathFinder.StorageDir = arg_output; foreach (FileSystemInfo info in pending_directories) { if (Path.GetFullPath(arg_output) == info.FullName) { Logger.Log.Error("Target directory cannot be one of the source paths."); Environment.Exit(1); } } foreach (FileSystemInfo info in pending_files) { if (Path.GetFullPath(arg_output) == info.FullName) { Logger.Log.Error("Target directory cannot be one of the source paths."); Environment.Exit(1); } } if (!Directory.Exists(Path.GetDirectoryName(arg_output))) { Logger.Log.Error("Index directory not available for construction: {0}", arg_output); Environment.Exit(1); } // Be *EXTRA PARANOID* about the contents of the target // directory, because creating an indexing driver will // nuke it. if (Directory.Exists(arg_output)) { foreach (FileInfo info in DirectoryWalker.GetFileInfos(arg_output)) { if (Array.IndexOf(allowed_files, info.Name) == -1) { Logger.Log.Error("{0} doesn't look safe to delete: non-Beagrep file {1} was found", arg_output, info.FullName); Environment.Exit(1); } } foreach (DirectoryInfo info in DirectoryWalker.GetDirectoryInfos(arg_output)) { if (Array.IndexOf(allowed_dirs, info.Name) == -1) { Logger.Log.Error("{0} doesn't look safe to delete: non-Beagrep directory {1} was found", arg_output, info.FullName); Environment.Exit(1); } } } string config_file_path = Path.Combine(arg_output, "StaticIndex.xml"); string prev_source = null; if (File.Exists(config_file_path)) { Config static_index_config = Conf.LoadFrom(config_file_path); if (static_index_config == null) { Log.Error("Invalid configuation file {0}", config_file_path); Environment.Exit(1); } prev_source = static_index_config.GetOption("Source", null); if (arg_source != null && prev_source != arg_source) { Log.Error("Source already set to {0} for existing static index. Cannot set source to {1}.", prev_source, arg_source); Environment.Exit(1); } // If arg_source is not given, and prev_source is present, use prev_source // as the arg_source. This is useful for re-running build-index without // giving --arg_source for already existing static index arg_source = prev_source; } if (arg_source == null) { DirectoryInfo dir = new DirectoryInfo(StringFu.SanitizePath(arg_output)); arg_source = dir.Name; } string global_files_config = Path.Combine(PathFinder.ConfigDataDir, "config-files"); global_files_config = Path.Combine(global_files_config, Conf.Names.FilesQueryableConfig + ".xml"); if (!File.Exists(global_files_config)) { Log.Error("Global configuration file not found {0}", global_files_config); Environment.Exit(0); } // Setup regexes for allowed/denied patterns if (allowed_patterns.Count > 0) { allowed_regex = StringFu.GetPatternRegex(allowed_patterns); } else { // Read the exclude values from config // For system-wide indexes, only the global config value will be used Config config = Conf.Get(Conf.Names.FilesQueryableConfig); List <string[]> values = config.GetListOptionValues(Conf.Names.ExcludePattern); if (values != null) { foreach (string[] exclude in values) { denied_patterns.Add(exclude [0]); } } if (denied_patterns.Count > 0) { denied_regex = StringFu.GetPatternRegex(denied_patterns); } } if (denied_dir_patterns.Count > 0) { denied_dir_regex = StringFu.GetPatternRegex(denied_dir_patterns); Log.Always("Will ignore directories matching regular expression: {0}", denied_dir_regex); } Log.Always("Starting beagrep-build-index (pid {0}) at {1}", Process.GetCurrentProcess().Id, DateTime.Now); driver = new LuceneIndexingDriver(arg_output, MINOR_VERSION, false); driver.TextCache = (arg_cache_text) ? new TextCache(arg_output) : null; if (driver.TextCache != null) { driver.TextCache.WorldReadable = true; } backing_fa_store = new FileAttributesStore_Sqlite(driver.TopDirectory, driver.Fingerprint); fa_store = new FileAttributesStore(backing_fa_store); // Set up signal handlers #if MONO_1_9 Shutdown.SetupSignalHandlers(delegate(int signal) { if (signal == (int)Mono.Unix.Native.Signum.SIGINT || signal == (int)Mono.Unix.Native.Signum.SIGTERM) { Shutdown.BeginShutdown(); } }); #else SetupSignalHandlers(); #endif Thread monitor_thread = null; Stopwatch watch = new Stopwatch(); watch.Start(); if (!arg_disable_restart) { // Start the thread that monitors memory usage. monitor_thread = ExceptionHandlingThread.Start(new ThreadStart(MemoryMonitorWorker)); } // Start indexworker to do the crawling and indexing IndexWorker(); // Join any threads so that we know that we're the only thread still running if (monitor_thread != null) { monitor_thread.Join(); } watch.Stop(); Logger.Log.Debug("Elapsed time {0}.", watch); // Write this after indexing is done. This is because, if creating a new index, // LuceneIndexingDriver.Create() is called which purges the entire directory. if (prev_source == null) { Config static_index_config = Conf.LoadNew("StaticIndex.xml"); // Write StaticIndex.xml containing: // The name of the source static_index_config.SetOption("Source", arg_source); static_index_config ["Source"].Description = "Source of the static index"; Conf.SaveTo(static_index_config, config_file_path); } if (restart) { Logger.Log.Debug("Restarting beagrep-build-index"); Process p = new Process(); p.StartInfo.UseShellExecute = false; // FIXME: Maybe this isn't the right way to do things? It should be ok, // the PATH is inherited from the shell script which runs mono itself. p.StartInfo.FileName = "mono"; p.StartInfo.Arguments = String.Join(" ", Environment.GetCommandLineArgs()); p.Start(); } Log.Always("Exiting beagrep-build-index (pid {0}) at {1}", Process.GetCurrentProcess().Id, DateTime.Now); }
public UidManager (FileAttributesStore fa_store, LuceneQueryingDriver driver) { this.fa_store = fa_store; this.name_resolver = (LuceneNameResolver) driver; }