static private void CopyDirectoryRecursively(DirectoryInfo source_directory, DirectoryInfo target_directory) { if (!target_directory.Exists) { target_directory.Create(); } foreach (FileInfo source_file in DirectoryWalker.GetFileInfos(source_directory)) { FileInfo target_file = new FileInfo(Path.Combine(target_directory.FullName, source_file.Name)); // FIXME: Don't hard code filenames - Mono.Posix.StatMode.Regular if (source_file.Name.IndexOf("socket") != -1 || source_file.Name.EndsWith("-journal")) { continue; } File.Copy(source_file.FullName, target_file.FullName, true); } foreach (DirectoryInfo source_child_directory in DirectoryWalker.GetDirectoryInfos(source_directory)) { DirectoryInfo target_child_directory = new DirectoryInfo(Path.Combine(target_directory.FullName, source_child_directory.Name)); CopyDirectoryRecursively(source_child_directory, target_child_directory); } }
public bool HasNextIndexable() { do { while (file_enumerator == null || !file_enumerator.MoveNext()) { if (!directory_enumerator.MoveNext()) { Logger.Log.Debug("KonqQ: Crawling done"); file_enumerator = null; current_file = null; return(false); } DirectoryInfo current_dir = (DirectoryInfo)directory_enumerator.Current; //Logger.Log.Debug ("Trying dir:" + current_dir.Name); // start watching for new files and get the list of current files // kind of race here - might get duplicate files if (Inotify.Enabled) { Inotify.Subscribe(current_dir.FullName, OnInotifyEvent, Inotify.EventType.Create | Inotify.EventType.MovedTo); } file_enumerator = DirectoryWalker.GetFileInfos(current_dir).GetEnumerator(); } current_file = (FileInfo)file_enumerator.Current; //if (!IsUpToDate (current_file.FullName)) // Logger.Log.Debug (current_file.FullName + " is not upto date"); // KDE4 cache contains _freq files which are non-cache files } while (current_file.FullName.EndsWith("_freq") || IsUpToDate(current_file.FullName)); return(true); }
public void Crawl() { log_files.Clear(); Queue pending = new Queue(); pending.Enqueue(log_dir); while (pending.Count > 0) { string dir = (string)pending.Dequeue(); foreach (string subdir in DirectoryWalker.GetDirectories(dir)) { pending.Enqueue(subdir); } foreach (FileInfo file in DirectoryWalker.GetFileInfos(dir)) { if (FileIsInteresting(file)) { log_files.Add(file); } } } }
public bool HasNextIndexable() { do { while (file_enumerator == null || !file_enumerator.MoveNext()) { if (!dir_enumerator.MoveNext()) { dir_enumerator = null; indexer.Queryable.Indexing = false; return(false); } if (Shutdown.ShutdownRequested) { return(false); } current_dir = (DirectoryInfo)dir_enumerator.Current; num_dir_crawled++; num_file_in_dir = DirectoryWalker.GetNumItems(current_dir.FullName); num_file_in_dir_crawled = 0; indexer.Progress = (double)num_dir_crawled / num_dirs; Log.Info("Scanning {0} maildir mails in {1}", num_file_in_dir, current_dir.FullName); files_to_parse = DirectoryWalker.GetFileInfos(current_dir); file_enumerator = files_to_parse.GetEnumerator(); } num_file_in_dir_crawled++; CrawlFile = (FileInfo)file_enumerator.Current; } while (IsUpToDate(CrawlFile.FullName)); return(true); }
public bool HasNextIndexable() { if (note_files == null) { note_files = DirectoryWalker.GetFileInfos(tomboy_dir).GetEnumerator(); } return(note_files.MoveNext()); }
public bool HasNextIndexable() { if (map_files == null) { map_files = DirectoryWalker.GetFileInfos(lab_dir).GetEnumerator(); } return(map_files.MoveNext()); }
public DirectoryIndexableGenerator(FileSystemQueryable queryable, DirectoryModel directory) { this.queryable = queryable; this.directory = directory; if (this.directory == null) { done = true; } else { files = DirectoryWalker.GetFileInfos(this.directory.FullName).GetEnumerator(); } }
private void StartWorker() { if (!Directory.Exists(akregator_dir)) { GLib.Timeout.Add(60000, new GLib.TimeoutHandler(CheckForExistence)); return; } if (Inotify.Enabled) { Inotify.EventType mask = Inotify.EventType.CloseWrite | Inotify.EventType.Delete; Inotify.Subscribe(akregator_dir, OnInotifyEvent, mask); } else { FileSystemWatcher fsw = new FileSystemWatcher(); fsw.Path = akregator_dir; fsw.Changed += new FileSystemEventHandler(OnChanged); fsw.Created += new FileSystemEventHandler(OnChanged); fsw.EnableRaisingEvents = true; } Log.Info("Scanning Akregator feeds..."); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); DirectoryInfo dir = new DirectoryInfo(akregator_dir); int count = 0; foreach (FileInfo file in DirectoryWalker.GetFileInfos(dir)) { if (file.Extension == ".xml") { IndexSingleFeed(file.FullName, true); count++; } } stopwatch.Stop(); Log.Info("{0} files will be parsed (scanned in {1})", count, stopwatch); }
private IEnumerable GetIndexables(string path) { foreach (FileInfo file in DirectoryWalker.GetFileInfos(path)) { if (file.Name [0] == '.') { continue; } if (File.Exists(Path.Combine(file.DirectoryName, "." + file.Name))) { yield return(FileToIndexable(file)); } } yield break; }
private void StartWorker() { if (!CheckForDirectory()) { Log.Debug("Watching for creation of Liferea directory"); GLib.Timeout.Add(60000, new GLib.TimeoutHandler(CheckForExistence)); return; } if (Inotify.Enabled) { Inotify.EventType mask = Inotify.EventType.CloseWrite | Inotify.EventType.Delete; Inotify.Subscribe(liferea_dir, OnInotifyEvent, mask); } else { FileSystemWatcher fsw = new FileSystemWatcher(); fsw.Path = liferea_dir; fsw.Changed += new FileSystemEventHandler(OnChanged); fsw.Created += new FileSystemEventHandler(OnChanged); fsw.EnableRaisingEvents = true; } Log.Info("Scanning Liferea feeds..."); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); DirectoryInfo dir = new DirectoryInfo(liferea_dir); int count = 0; foreach (FileInfo file in DirectoryWalker.GetFileInfos(dir)) { IndexSingleFeed(file.FullName); } stopwatch.Stop(); Log.Info("{0} files will be parsed (scanned in {1})", count, stopwatch); }
private void CrawlRemoteDirectory(string remote_dir, bool index) { if (Inotify.Enabled) { Inotify.Subscribe(remote_dir, OnInotifyNewConversation, Inotify.EventType.CloseWrite | Inotify.EventType.Modify); } if (index) { foreach (FileInfo file in DirectoryWalker.GetFileInfos(remote_dir)) { if (FileIsInteresting(file.Name)) { IndexLog(file.FullName, Scheduler.Priority.Delayed); } } //queryable.IsIndexing = false; } }
/** * Recursively traverse the files and dirctories under mail_root * to find files that need to be indexed, directories that * need to be watched for changes */ public void Crawl() { if (!Directory.Exists(mail_root)) { return; } mail_directories.Clear(); folder_directories.Clear(); mbox_files.Clear(); Queue pending = new Queue(); pending.Enqueue(mail_root); folder_directories.Add(mail_root); // add inotify watch to root folder if (Inotify.Enabled) { Inotify.Subscribe(mail_root, OnInotifyEvent, Inotify.EventType.Create | Inotify.EventType.Delete | Inotify.EventType.MovedFrom | Inotify.EventType.MovedTo | Inotify.EventType.Modify); } while (pending.Count > 0) { string dir = (string)pending.Dequeue(); Logger.Log.Debug("Searching for mbox and maildirs in " + dir); foreach (FileInfo fi in DirectoryWalker.GetFileInfos(dir)) { if (!fi.Name.EndsWith(".index")) { continue; } string indexFile = fi.Name; string mailFolderName = indexFile.Substring(1, indexFile.LastIndexOf(".index") - 1); string mailFolder = Path.Combine(dir, mailFolderName); if (IgnoreFolder(mailFolder)) { continue; } if (Directory.Exists(mailFolder)) { mail_directories.Add(mailFolder); if (Inotify.Enabled) { Watch(mailFolder); } } else if (File.Exists(mailFolder)) { mbox_files.Add(mailFolder); } // if there is a directory with name .<mailFolderName>.directory // then it contains sub-folders string subFolder = Path.Combine(dir, "." + mailFolderName + ".directory"); if (Directory.Exists(subFolder)) { pending.Enqueue(subFolder); folder_directories.Add(subFolder); if (Inotify.Enabled) { Inotify.Subscribe(subFolder, OnInotifyEvent, Inotify.EventType.Create | Inotify.EventType.Delete | Inotify.EventType.MovedFrom | Inotify.EventType.MovedTo | Inotify.EventType.Modify); } } } } // copy the contents as mail_directories, mbox_files might change due to async events ArrayList _mail_directories = new ArrayList(mail_directories); ArrayList _mbox_files = new ArrayList(mbox_files); if (queryable.ThisScheduler.ContainsByTag(mail_root)) { Logger.Log.Debug("Not adding task for already running task: {0}", mail_root); return; } else { KMaildirIndexableGenerator generator = new KMaildirIndexableGenerator(this, _mail_directories); AddIIndexableTask(generator, mail_root); } foreach (string mbox_file in _mbox_files) { IndexMbox(mbox_file, true); } }
static void Main(string [] args) { if (args.Length < 2) { PrintUsage(); } string index_dir = (Path.IsPathRooted(args [0])) ? args [0] : Path.GetFullPath(args [0]); if (!Directory.Exists(index_dir)) { Console.WriteLine("Could not find index: {0}", index_dir); Environment.Exit(1); } // Be *EXTRA PARANOID* about the contents of the target // directory, because creating an indexing driver will // nuke it. if (args [1] != "info" && Directory.Exists(index_dir)) { foreach (FileInfo info in DirectoryWalker.GetFileInfos(index_dir)) { if (Array.IndexOf(allowed_files, info.Name) == -1) { Logger.Log.Error("{0} doesn't look safe to delete: non-Beagrep file {1} was found", index_dir, info.FullName); Environment.Exit(1); } } foreach (DirectoryInfo info in DirectoryWalker.GetDirectoryInfos(index_dir)) { if (Array.IndexOf(allowed_dirs, info.Name) == -1) { Logger.Log.Error("{0} doesn't look safe to delete: non-Beagrep directory {1} was found", index_dir, info.FullName); Environment.Exit(1); } } } switch (args [1]) { #if false case "list": ExecuteList(); break; case "remove": ExecuteRemove(args [2]); break; #endif case "info": ExecuteInfo(index_dir); break; case "merge": ExecuteMerge(index_dir, args [2]); break; case "optimize": ExecuteOptimize(index_dir); break; default: Console.WriteLine("Unknown command: {0}", args [1]); PrintUsage(); break; } }
static void DoIndexing() { int count_dirs = 0; int count_files = 0; Indexable indexable; pending_request = new IndexerRequest(); Queue modified_directories = new Queue(); while (pending_directories.Count > 0) { DirectoryInfo dir = (DirectoryInfo)pending_directories.Dequeue(); AddToRequest(DirectoryToIndexable(dir, modified_directories)); try { if (arg_recursive) { foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos(dir)) { if (!Ignore(subdir) && !FileSystem.IsSpecialFile(subdir.FullName)) { pending_directories.Enqueue(subdir); } } } foreach (FileInfo file in DirectoryWalker.GetFileInfos(dir)) { if (!Ignore(file)) { AddToRequest(FileToIndexable(file)); count_files++; } } } catch (DirectoryNotFoundException) {} if (Shutdown.ShutdownRequested) { break; } count_dirs++; } Logger.Log.Debug("Scanned {0} files and directories in {1} directories", count_dirs + count_files, count_dirs); if (Shutdown.ShutdownRequested) { backing_fa_store.Flush(); return; } // Time to remove deleted directories from the index and attributes store while (modified_directories.Count > 0) { DirectoryInfo subdir = (DirectoryInfo)modified_directories.Dequeue(); Logger.Log.Debug("Checking {0} for deleted files and directories", subdir.FullName); // Get a list of all documents from lucene index with ParentDirUriPropKey set as that of subdir ICollection all_dirent = GetAllItemsInDirectory(subdir); foreach (Dirent info in all_dirent) { // check if the item exists if ((!info.IsDirectory && File.Exists(info.FullName)) || (info.IsDirectory && Directory.Exists(info.FullName))) { continue; } if (info.IsDirectory) { // Recursively remove deleted subdirectories modified_directories.Enqueue(new DirectoryInfo(info.FullName)); } // remove Uri uri = PathToUri(info.FullName); indexable = new Indexable(IndexableType.Remove, uri); AddToRequest(indexable); } } bool reschedule = false; // Call Flush until our request is empty. We have to do this in a loop // because Flush happens in a batch size and some indexables might generate more indexables while (reschedule || pending_request.Count > 0) { if (Shutdown.ShutdownRequested) { break; } reschedule = FlushIndexer(driver); } backing_fa_store.Flush(); if (Shutdown.ShutdownRequested) { return; } Logger.Log.Debug("Optimizing index"); driver.OptimizeNow(); }
static void DoMain(string [] args) { SystemInformation.SetProcessName("beagrep-build-index"); if (args.Length < 2) { PrintUsage(); } ArrayList allowed_patterns = new ArrayList(); ArrayList denied_patterns = new ArrayList(); ArrayList denied_dir_patterns = new ArrayList(); int i = 0; while (i < args.Length) { string arg = args [i]; ++i; string next_arg = i < args.Length ? args [i] : null; switch (arg) { case "-h": case "--help": PrintUsage(); break; case "--tag": if (next_arg != null) { arg_tag = next_arg; } ++i; break; case "-r": case "--recursive": arg_recursive = true; break; case "--enable-deletion": arg_delete = true; break; case "--disable-directories": arg_disable_directories = true; break; case "--enable-text-cache": arg_cache_text = true; break; case "--target": if (next_arg != null) { arg_output = Path.IsPathRooted(next_arg) ? next_arg : Path.GetFullPath(next_arg); } ++i; break; case "--disable-filtering": arg_disable_filtering = true; break; case "--allow-pattern": if (next_arg == null) { break; } if (next_arg.IndexOf(',') != -1) { foreach (string pattern in next_arg.Split(',')) { allowed_patterns.Add(pattern); } } else { allowed_patterns.Add(next_arg); } ++i; break; case "--deny-directory-pattern": if (next_arg == null) { break; } if (next_arg.IndexOf(',') != -1) { foreach (string pattern in next_arg.Split(',')) { denied_dir_patterns.Add(pattern); } } else { denied_dir_patterns.Add(next_arg); } ++i; break; case "--deny-pattern": if (next_arg == null) { break; } if (next_arg.IndexOf(',') != -1) { foreach (string pattern in next_arg.Split(',')) { denied_patterns.Add(pattern); } } else { denied_patterns.Add(next_arg); } ++i; break; case "--disable-restart": arg_disable_restart = true; break; case "--source": if (next_arg == null) { break; } arg_source = next_arg; ++i; break; default: if (arg.StartsWith("-") || arg.StartsWith("--")) { PrintUsage(); } string path = Path.IsPathRooted(arg) ? arg : Path.GetFullPath(arg); if (path != "/" && path.EndsWith("/")) { path = path.TrimEnd('/'); } if (Directory.Exists(path)) { pending_directories.Enqueue(new DirectoryInfo(path)); } else if (File.Exists(path)) { pending_files.Enqueue(new FileInfo(path)); } break; } } ///////////////////////////////////////////////////////// if (arg_output == null) { Logger.Log.Error("--target must be specified"); Environment.Exit(1); } // Set the storage dir, this should be used to store log messages // and filterver.dat PathFinder.StorageDir = arg_output; foreach (FileSystemInfo info in pending_directories) { if (Path.GetFullPath(arg_output) == info.FullName) { Logger.Log.Error("Target directory cannot be one of the source paths."); Environment.Exit(1); } } foreach (FileSystemInfo info in pending_files) { if (Path.GetFullPath(arg_output) == info.FullName) { Logger.Log.Error("Target directory cannot be one of the source paths."); Environment.Exit(1); } } if (!Directory.Exists(Path.GetDirectoryName(arg_output))) { Logger.Log.Error("Index directory not available for construction: {0}", arg_output); Environment.Exit(1); } // Be *EXTRA PARANOID* about the contents of the target // directory, because creating an indexing driver will // nuke it. if (Directory.Exists(arg_output)) { foreach (FileInfo info in DirectoryWalker.GetFileInfos(arg_output)) { if (Array.IndexOf(allowed_files, info.Name) == -1) { Logger.Log.Error("{0} doesn't look safe to delete: non-Beagrep file {1} was found", arg_output, info.FullName); Environment.Exit(1); } } foreach (DirectoryInfo info in DirectoryWalker.GetDirectoryInfos(arg_output)) { if (Array.IndexOf(allowed_dirs, info.Name) == -1) { Logger.Log.Error("{0} doesn't look safe to delete: non-Beagrep directory {1} was found", arg_output, info.FullName); Environment.Exit(1); } } } string config_file_path = Path.Combine(arg_output, "StaticIndex.xml"); string prev_source = null; if (File.Exists(config_file_path)) { Config static_index_config = Conf.LoadFrom(config_file_path); if (static_index_config == null) { Log.Error("Invalid configuation file {0}", config_file_path); Environment.Exit(1); } prev_source = static_index_config.GetOption("Source", null); if (arg_source != null && prev_source != arg_source) { Log.Error("Source already set to {0} for existing static index. Cannot set source to {1}.", prev_source, arg_source); Environment.Exit(1); } // If arg_source is not given, and prev_source is present, use prev_source // as the arg_source. This is useful for re-running build-index without // giving --arg_source for already existing static index arg_source = prev_source; } if (arg_source == null) { DirectoryInfo dir = new DirectoryInfo(StringFu.SanitizePath(arg_output)); arg_source = dir.Name; } string global_files_config = Path.Combine(PathFinder.ConfigDataDir, "config-files"); global_files_config = Path.Combine(global_files_config, Conf.Names.FilesQueryableConfig + ".xml"); if (!File.Exists(global_files_config)) { Log.Error("Global configuration file not found {0}", global_files_config); Environment.Exit(0); } // Setup regexes for allowed/denied patterns if (allowed_patterns.Count > 0) { allowed_regex = StringFu.GetPatternRegex(allowed_patterns); } else { // Read the exclude values from config // For system-wide indexes, only the global config value will be used Config config = Conf.Get(Conf.Names.FilesQueryableConfig); List <string[]> values = config.GetListOptionValues(Conf.Names.ExcludePattern); if (values != null) { foreach (string[] exclude in values) { denied_patterns.Add(exclude [0]); } } if (denied_patterns.Count > 0) { denied_regex = StringFu.GetPatternRegex(denied_patterns); } } if (denied_dir_patterns.Count > 0) { denied_dir_regex = StringFu.GetPatternRegex(denied_dir_patterns); Log.Always("Will ignore directories matching regular expression: {0}", denied_dir_regex); } Log.Always("Starting beagrep-build-index (pid {0}) at {1}", Process.GetCurrentProcess().Id, DateTime.Now); driver = new LuceneIndexingDriver(arg_output, MINOR_VERSION, false); driver.TextCache = (arg_cache_text) ? new TextCache(arg_output) : null; if (driver.TextCache != null) { driver.TextCache.WorldReadable = true; } backing_fa_store = new FileAttributesStore_Sqlite(driver.TopDirectory, driver.Fingerprint); fa_store = new FileAttributesStore(backing_fa_store); // Set up signal handlers #if MONO_1_9 Shutdown.SetupSignalHandlers(delegate(int signal) { if (signal == (int)Mono.Unix.Native.Signum.SIGINT || signal == (int)Mono.Unix.Native.Signum.SIGTERM) { Shutdown.BeginShutdown(); } }); #else SetupSignalHandlers(); #endif Thread monitor_thread = null; Stopwatch watch = new Stopwatch(); watch.Start(); if (!arg_disable_restart) { // Start the thread that monitors memory usage. monitor_thread = ExceptionHandlingThread.Start(new ThreadStart(MemoryMonitorWorker)); } // Start indexworker to do the crawling and indexing IndexWorker(); // Join any threads so that we know that we're the only thread still running if (monitor_thread != null) { monitor_thread.Join(); } watch.Stop(); Logger.Log.Debug("Elapsed time {0}.", watch); // Write this after indexing is done. This is because, if creating a new index, // LuceneIndexingDriver.Create() is called which purges the entire directory. if (prev_source == null) { Config static_index_config = Conf.LoadNew("StaticIndex.xml"); // Write StaticIndex.xml containing: // The name of the source static_index_config.SetOption("Source", arg_source); static_index_config ["Source"].Description = "Source of the static index"; Conf.SaveTo(static_index_config, config_file_path); } if (restart) { Logger.Log.Debug("Restarting beagrep-build-index"); Process p = new Process(); p.StartInfo.UseShellExecute = false; // FIXME: Maybe this isn't the right way to do things? It should be ok, // the PATH is inherited from the shell script which runs mono itself. p.StartInfo.FileName = "mono"; p.StartInfo.Arguments = String.Join(" ", Environment.GetCommandLineArgs()); p.Start(); } Log.Always("Exiting beagrep-build-index (pid {0}) at {1}", Process.GetCurrentProcess().Id, DateTime.Now); }