static private void CopyDirectoryRecursively(DirectoryInfo source_directory,
                                                     DirectoryInfo target_directory)
        {
            if (!target_directory.Exists)
            {
                target_directory.Create();
            }

            foreach (FileInfo source_file in DirectoryWalker.GetFileInfos(source_directory))
            {
                FileInfo target_file = new FileInfo(Path.Combine(target_directory.FullName, source_file.Name));

                // FIXME: Don't hard code filenames - Mono.Posix.StatMode.Regular
                if (source_file.Name.IndexOf("socket") != -1 ||
                    source_file.Name.EndsWith("-journal"))
                {
                    continue;
                }

                File.Copy(source_file.FullName, target_file.FullName, true);
            }

            foreach (DirectoryInfo source_child_directory in DirectoryWalker.GetDirectoryInfos(source_directory))
            {
                DirectoryInfo target_child_directory = new DirectoryInfo(Path.Combine(target_directory.FullName, source_child_directory.Name));

                CopyDirectoryRecursively(source_child_directory,
                                         target_child_directory);
            }
        }
        /**
         * Add watch to the parameter directory and its subdirs, recursively
         */
        public void Watch(string path)
        {
            DirectoryInfo root = new DirectoryInfo(path);

            if (!root.Exists)
            {
                return;
            }

            Queue queue = new Queue();

            queue.Enqueue(root);

            while (queue.Count > 0)
            {
                DirectoryInfo dir = queue.Dequeue() as DirectoryInfo;

                if (!dir.Exists)
                {
                    continue;
                }

                //log.Debug ("Adding inotify watch to " + dir.FullName);
                Inotify.Subscribe(dir.FullName, OnInotifyEvent,
                                  Inotify.EventType.Create
                                  | Inotify.EventType.Delete
                                  | Inotify.EventType.MovedFrom
                                  | Inotify.EventType.MovedTo);

                foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos(dir))
                {
                    queue.Enqueue(subdir);
                }
            }
        }
Exemple #3
0
 private void Crawl()
 {
     directory_enumerator = DirectoryWalker.GetDirectoryInfos(konq_cache_dir).GetEnumerator();
     Scheduler.Task crawl_task = NewAddTask(this);
     crawl_task.Tag = crawler_tag;
     ThisScheduler.Add(crawl_task);
 }
Exemple #4
0
    static void Main(string [] args)
    {
        Mode      mode        = Mode.Uris;
        bool      show_counts = true;
        ArrayList index_dirs  = new ArrayList();
        ArrayList index_names = new ArrayList();
        ArrayList uris        = new ArrayList();

        foreach (string arg in args)
        {
            switch (arg)
            {
            case "--help":
                PrintUsage();
                Environment.Exit(0);
                break;

            case "--uris":
                mode = Mode.Uris;
                break;

            case "--properties":
            case "--props":
                mode = Mode.Properties;
                break;

            case "--term-frequencies":
            case "--term-freqs":
                mode = Mode.TermFrequencies;
                break;

            case "--hide-counts":
                show_counts = false;
                break;

            case "--show-counts":
                show_counts = false;
                break;

            case "--fields":
                mode = Mode.Fields;
                break;

            default:
                if (arg.StartsWith("--indexdir="))
                {
                    index_dirs.Add(arg.Remove(0, 11));
                }
                else if (arg.StartsWith("--index="))
                {
                    index_names.Add(arg.Remove(0, 8));
                }
                else
                {
                    Uri uri;

                    try {
                        uri = UriFu.UserUritoEscapedUri(arg);
                    } catch (UriFormatException) {
                        uri = UriFu.PathToFileUri(arg);
                    }

                    uris.Add(uri);
                }
                break;
            }
        }

        if (uris.Count > 0 && (mode == Mode.TermFrequencies || mode == Mode.Fields))
        {
            Console.WriteLine("ERROR: --term-frequencies and --fields do not make sense with files or URIs.");
            Environment.Exit(1);
        }

        ArrayList indexes = new ArrayList();

        // If no --index or --indexdir options, get all the default indexes.
        if (index_dirs.Count == 0 && index_names.Count == 0)
        {
            foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos(PathFinder.IndexDir))
            {
                indexes.Add(new IndexInfo(subdir.Name));
            }
        }
        else
        {
            foreach (string name in index_names)
            {
                DirectoryInfo info = new DirectoryInfo(Path.Combine(PathFinder.IndexDir, name));

                if (!info.Exists)
                {
                    Console.WriteLine("ERROR: No index named '{0}'", name);
                    Environment.Exit(1);
                }

                indexes.Add(new IndexInfo(info.Name));
            }

            foreach (string dir in index_dirs)
            {
                indexes.Add(new IndexInfo(dir));
            }
        }

        indexes.Sort();

        if (mode == Mode.Uris || mode == Mode.Properties)
        {
            DumpIndexInformation(indexes, uris, mode == Mode.Properties, show_counts);
        }
        else if (mode == Mode.TermFrequencies)
        {
            DumpIndexTermFreqs(indexes);
        }
        else if (mode == Mode.Fields)
        {
            DumpIndexFields(indexes);
        }
    }
Exemple #5
0
        static void Main(string [] args)
        {
            if (args.Length < 2)
            {
                PrintUsage();
            }

            string index_dir = (Path.IsPathRooted(args [0])) ? args [0] : Path.GetFullPath(args [0]);

            if (!Directory.Exists(index_dir))
            {
                Console.WriteLine("Could not find index: {0}", index_dir);
                Environment.Exit(1);
            }

            // Be *EXTRA PARANOID* about the contents of the target
            // directory, because creating an indexing driver will
            // nuke it.
            if (args [1] != "info" && Directory.Exists(index_dir))
            {
                foreach (FileInfo info in DirectoryWalker.GetFileInfos(index_dir))
                {
                    if (Array.IndexOf(allowed_files, info.Name) == -1)
                    {
                        Logger.Log.Error("{0} doesn't look safe to delete: non-Beagrep file {1} was found", index_dir, info.FullName);
                        Environment.Exit(1);
                    }
                }

                foreach (DirectoryInfo info in DirectoryWalker.GetDirectoryInfos(index_dir))
                {
                    if (Array.IndexOf(allowed_dirs, info.Name) == -1)
                    {
                        Logger.Log.Error("{0} doesn't look safe to delete: non-Beagrep directory {1} was found", index_dir, info.FullName);
                        Environment.Exit(1);
                    }
                }
            }

            switch (args [1])
            {
#if false
            case "list":
                ExecuteList();
                break;

            case "remove":
                ExecuteRemove(args [2]);
                break;
#endif
            case "info":
                ExecuteInfo(index_dir);
                break;

            case "merge":
                ExecuteMerge(index_dir, args [2]);
                break;

            case "optimize":
                ExecuteOptimize(index_dir);
                break;

            default:
                Console.WriteLine("Unknown command: {0}", args [1]);
                PrintUsage();
                break;
            }
        }
Exemple #6
0
        static void DoIndexing()
        {
            int count_dirs  = 0;
            int count_files = 0;

            Indexable indexable;

            pending_request = new IndexerRequest();
            Queue modified_directories = new Queue();

            while (pending_directories.Count > 0)
            {
                DirectoryInfo dir = (DirectoryInfo)pending_directories.Dequeue();

                AddToRequest(DirectoryToIndexable(dir, modified_directories));

                try {
                    if (arg_recursive)
                    {
                        foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos(dir))
                        {
                            if (!Ignore(subdir) &&
                                !FileSystem.IsSpecialFile(subdir.FullName))
                            {
                                pending_directories.Enqueue(subdir);
                            }
                        }
                    }

                    foreach (FileInfo file in DirectoryWalker.GetFileInfos(dir))
                    {
                        if (!Ignore(file))
                        {
                            AddToRequest(FileToIndexable(file));
                            count_files++;
                        }
                    }
                } catch (DirectoryNotFoundException) {}

                if (Shutdown.ShutdownRequested)
                {
                    break;
                }

                count_dirs++;
            }

            Logger.Log.Debug("Scanned {0} files and directories in {1} directories", count_dirs + count_files, count_dirs);

            if (Shutdown.ShutdownRequested)
            {
                backing_fa_store.Flush();
                return;
            }

            // Time to remove deleted directories from the index and attributes store
            while (modified_directories.Count > 0)
            {
                DirectoryInfo subdir = (DirectoryInfo)modified_directories.Dequeue();
                Logger.Log.Debug("Checking {0} for deleted files and directories", subdir.FullName);

                // Get a list of all documents from lucene index with ParentDirUriPropKey set as that of subdir
                ICollection all_dirent = GetAllItemsInDirectory(subdir);
                foreach (Dirent info in all_dirent)
                {
                    // check if the item exists
                    if ((!info.IsDirectory && File.Exists(info.FullName)) ||
                        (info.IsDirectory && Directory.Exists(info.FullName)))
                    {
                        continue;
                    }

                    if (info.IsDirectory)
                    {
                        // Recursively remove deleted subdirectories
                        modified_directories.Enqueue(new DirectoryInfo(info.FullName));
                    }

                    // remove
                    Uri uri = PathToUri(info.FullName);
                    indexable = new Indexable(IndexableType.Remove, uri);
                    AddToRequest(indexable);
                }
            }

            bool reschedule = false;

            // Call Flush until our request is empty.  We have to do this in a loop
            // because Flush happens in a batch size and some indexables might generate more indexables
            while (reschedule || pending_request.Count > 0)
            {
                if (Shutdown.ShutdownRequested)
                {
                    break;
                }

                reschedule = FlushIndexer(driver);
            }

            backing_fa_store.Flush();

            if (Shutdown.ShutdownRequested)
            {
                return;
            }

            Logger.Log.Debug("Optimizing index");
            driver.OptimizeNow();
        }
Exemple #7
0
        static void DoMain(string [] args)
        {
            SystemInformation.SetProcessName("beagrep-build-index");

            if (args.Length < 2)
            {
                PrintUsage();
            }

            ArrayList allowed_patterns    = new ArrayList();
            ArrayList denied_patterns     = new ArrayList();
            ArrayList denied_dir_patterns = new ArrayList();

            int i = 0;

            while (i < args.Length)
            {
                string arg = args [i];
                ++i;
                string next_arg = i < args.Length ? args [i] : null;

                switch (arg)
                {
                case "-h":
                case "--help":
                    PrintUsage();
                    break;

                case "--tag":
                    if (next_arg != null)
                    {
                        arg_tag = next_arg;
                    }
                    ++i;
                    break;

                case "-r":
                case "--recursive":
                    arg_recursive = true;
                    break;

                case "--enable-deletion":
                    arg_delete = true;
                    break;

                case "--disable-directories":
                    arg_disable_directories = true;
                    break;

                case "--enable-text-cache":
                    arg_cache_text = true;
                    break;

                case "--target":
                    if (next_arg != null)
                    {
                        arg_output = Path.IsPathRooted(next_arg) ? next_arg : Path.GetFullPath(next_arg);
                    }
                    ++i;
                    break;

                case "--disable-filtering":
                    arg_disable_filtering = true;
                    break;

                case "--allow-pattern":
                    if (next_arg == null)
                    {
                        break;
                    }

                    if (next_arg.IndexOf(',') != -1)
                    {
                        foreach (string pattern in next_arg.Split(','))
                        {
                            allowed_patterns.Add(pattern);
                        }
                    }
                    else
                    {
                        allowed_patterns.Add(next_arg);
                    }

                    ++i;
                    break;

                case "--deny-directory-pattern":
                    if (next_arg == null)
                    {
                        break;
                    }

                    if (next_arg.IndexOf(',') != -1)
                    {
                        foreach (string pattern in next_arg.Split(','))
                        {
                            denied_dir_patterns.Add(pattern);
                        }
                    }
                    else
                    {
                        denied_dir_patterns.Add(next_arg);
                    }

                    ++i;
                    break;

                case "--deny-pattern":
                    if (next_arg == null)
                    {
                        break;
                    }

                    if (next_arg.IndexOf(',') != -1)
                    {
                        foreach (string pattern in next_arg.Split(','))
                        {
                            denied_patterns.Add(pattern);
                        }
                    }
                    else
                    {
                        denied_patterns.Add(next_arg);
                    }

                    ++i;
                    break;

                case "--disable-restart":
                    arg_disable_restart = true;
                    break;

                case "--source":
                    if (next_arg == null)
                    {
                        break;
                    }

                    arg_source = next_arg;
                    ++i;
                    break;

                default:
                    if (arg.StartsWith("-") || arg.StartsWith("--"))
                    {
                        PrintUsage();
                    }

                    string path = Path.IsPathRooted(arg) ? arg : Path.GetFullPath(arg);
                    if (path != "/" && path.EndsWith("/"))
                    {
                        path = path.TrimEnd('/');
                    }

                    if (Directory.Exists(path))
                    {
                        pending_directories.Enqueue(new DirectoryInfo(path));
                    }
                    else if (File.Exists(path))
                    {
                        pending_files.Enqueue(new FileInfo(path));
                    }
                    break;
                }
            }

            /////////////////////////////////////////////////////////

            if (arg_output == null)
            {
                Logger.Log.Error("--target must be specified");
                Environment.Exit(1);
            }

            // Set the storage dir, this should be used to store log messages
            // and filterver.dat
            PathFinder.StorageDir = arg_output;

            foreach (FileSystemInfo info in pending_directories)
            {
                if (Path.GetFullPath(arg_output) == info.FullName)
                {
                    Logger.Log.Error("Target directory cannot be one of the source paths.");
                    Environment.Exit(1);
                }
            }

            foreach (FileSystemInfo info in pending_files)
            {
                if (Path.GetFullPath(arg_output) == info.FullName)
                {
                    Logger.Log.Error("Target directory cannot be one of the source paths.");
                    Environment.Exit(1);
                }
            }

            if (!Directory.Exists(Path.GetDirectoryName(arg_output)))
            {
                Logger.Log.Error("Index directory not available for construction: {0}", arg_output);
                Environment.Exit(1);
            }

            // Be *EXTRA PARANOID* about the contents of the target
            // directory, because creating an indexing driver will
            // nuke it.
            if (Directory.Exists(arg_output))
            {
                foreach (FileInfo info in DirectoryWalker.GetFileInfos(arg_output))
                {
                    if (Array.IndexOf(allowed_files, info.Name) == -1)
                    {
                        Logger.Log.Error("{0} doesn't look safe to delete: non-Beagrep file {1} was found", arg_output, info.FullName);
                        Environment.Exit(1);
                    }
                }

                foreach (DirectoryInfo info in DirectoryWalker.GetDirectoryInfos(arg_output))
                {
                    if (Array.IndexOf(allowed_dirs, info.Name) == -1)
                    {
                        Logger.Log.Error("{0} doesn't look safe to delete: non-Beagrep directory {1} was found", arg_output, info.FullName);
                        Environment.Exit(1);
                    }
                }
            }

            string config_file_path = Path.Combine(arg_output, "StaticIndex.xml");
            string prev_source      = null;

            if (File.Exists(config_file_path))
            {
                Config static_index_config = Conf.LoadFrom(config_file_path);
                if (static_index_config == null)
                {
                    Log.Error("Invalid configuation file {0}", config_file_path);
                    Environment.Exit(1);
                }

                prev_source = static_index_config.GetOption("Source", null);
                if (arg_source != null && prev_source != arg_source)
                {
                    Log.Error("Source already set to {0} for existing static index. Cannot set source to {1}.", prev_source, arg_source);
                    Environment.Exit(1);
                }

                // If arg_source is not given, and prev_source is present, use prev_source
                // as the arg_source. This is useful for re-running build-index without
                // giving --arg_source for already existing static index
                arg_source = prev_source;
            }


            if (arg_source == null)
            {
                DirectoryInfo dir = new DirectoryInfo(StringFu.SanitizePath(arg_output));
                arg_source = dir.Name;
            }

            string global_files_config = Path.Combine(PathFinder.ConfigDataDir, "config-files");

            global_files_config = Path.Combine(global_files_config, Conf.Names.FilesQueryableConfig + ".xml");
            if (!File.Exists(global_files_config))
            {
                Log.Error("Global configuration file not found {0}", global_files_config);
                Environment.Exit(0);
            }

            // Setup regexes for allowed/denied patterns
            if (allowed_patterns.Count > 0)
            {
                allowed_regex = StringFu.GetPatternRegex(allowed_patterns);
            }
            else
            {
                // Read the exclude values from config
                // For system-wide indexes, only the global config value will be used
                Config          config = Conf.Get(Conf.Names.FilesQueryableConfig);
                List <string[]> values = config.GetListOptionValues(Conf.Names.ExcludePattern);
                if (values != null)
                {
                    foreach (string[] exclude in values)
                    {
                        denied_patterns.Add(exclude [0]);
                    }
                }

                if (denied_patterns.Count > 0)
                {
                    denied_regex = StringFu.GetPatternRegex(denied_patterns);
                }
            }

            if (denied_dir_patterns.Count > 0)
            {
                denied_dir_regex = StringFu.GetPatternRegex(denied_dir_patterns);
                Log.Always("Will ignore directories matching regular expression: {0}", denied_dir_regex);
            }

            Log.Always("Starting beagrep-build-index (pid {0}) at {1}", Process.GetCurrentProcess().Id, DateTime.Now);

            driver           = new LuceneIndexingDriver(arg_output, MINOR_VERSION, false);
            driver.TextCache = (arg_cache_text) ? new TextCache(arg_output) : null;
            if (driver.TextCache != null)
            {
                driver.TextCache.WorldReadable = true;
            }

            backing_fa_store = new FileAttributesStore_Sqlite(driver.TopDirectory, driver.Fingerprint);
            fa_store         = new FileAttributesStore(backing_fa_store);

            // Set up signal handlers
#if MONO_1_9
            Shutdown.SetupSignalHandlers(delegate(int signal)
            {
                if (signal == (int)Mono.Unix.Native.Signum.SIGINT ||
                    signal == (int)Mono.Unix.Native.Signum.SIGTERM)
                {
                    Shutdown.BeginShutdown();
                }
            });
#else
            SetupSignalHandlers();
#endif

            Thread monitor_thread = null;

            Stopwatch watch = new Stopwatch();
            watch.Start();

            if (!arg_disable_restart)
            {
                // Start the thread that monitors memory usage.
                monitor_thread = ExceptionHandlingThread.Start(new ThreadStart(MemoryMonitorWorker));
            }

            // Start indexworker to do the crawling and indexing
            IndexWorker();

            // Join any threads so that we know that we're the only thread still running
            if (monitor_thread != null)
            {
                monitor_thread.Join();
            }

            watch.Stop();
            Logger.Log.Debug("Elapsed time {0}.", watch);

            // Write this after indexing is done. This is because, if creating a new index,
            // LuceneIndexingDriver.Create() is called which purges the entire directory.

            if (prev_source == null)
            {
                Config static_index_config = Conf.LoadNew("StaticIndex.xml");

                // Write StaticIndex.xml containing:
                // The name of the source
                static_index_config.SetOption("Source", arg_source);
                static_index_config ["Source"].Description = "Source of the static index";


                Conf.SaveTo(static_index_config, config_file_path);
            }

            if (restart)
            {
                Logger.Log.Debug("Restarting beagrep-build-index");
                Process p = new Process();
                p.StartInfo.UseShellExecute = false;
                // FIXME: Maybe this isn't the right way to do things?  It should be ok,
                // the PATH is inherited from the shell script which runs mono itself.
                p.StartInfo.FileName  = "mono";
                p.StartInfo.Arguments = String.Join(" ", Environment.GetCommandLineArgs());
                p.Start();
            }

            Log.Always("Exiting beagrep-build-index (pid {0}) at {1}", Process.GetCurrentProcess().Id, DateTime.Now);
        }