private void PostCrawlHook () { Logger.Log.Debug ("Done crawling '{0}'", current_dir.FullName); queryable.DoneCrawlingOneDirectory (current_dir); current_generator = null; current_dir = null; }
public DirectoryIndexableGenerator (FileSystemQueryable queryable, DirectoryModel directory) { this.queryable = queryable; this.directory = directory; if (this.directory == null) done = true; else files = DirectoryWalker.GetFileInfos (this.directory.FullName).GetEnumerator (); }
// Returns 'true' if the queue was empty before adding // this item. public bool Add (DirectoryModel dir) { lock (big_lock) { bool was_empty; was_empty = (to_be_crawled.Count == 0); if (!was_empty && to_be_crawled.Contains (dir)) return false; to_be_crawled.Enqueue (dir); Description = String.Format ("Pending directories: {0}", to_be_crawled.Count); return was_empty; } }
// MergeXmpData can return an entirely new indexable or just change the passed indexable public Indexable MergeXmpData (ref Indexable indexable, string path, Guid id, DirectoryModel parent, bool crawling) { // In crawl mode, whenever xmp file is encountered, it either schedules the basefile or notices that // the basefile is already scheduled. In short, it is properly taken care of. So, during // crawling, we can return the indexable created at this point instead of checking the // existence of foo.xmp for each crawled file foo. if (crawling) return null; else return GetNewXmpIndexable (ref indexable, path, id, parent); }
private Indexable GetNewXmpIndexable (ref Indexable indexable, string path, Guid id, DirectoryModel parent) { // In non-crawl mode, check if a corresponding xmp file is present and not already scheduled and index it. // If file.xmp and file are rapidly written/updated (in that order), this does the right thing. // If file and file.xmp are rapidly written/updated (in that order), either // - file.xmp is present during FileToIndexable(file): in which case xmp properties are // added to file; and when file.xmp is indexed, it will replace the xmp properties // - file.xmp is not present during FileToIndexable(file): when the xmp file is later indexed // it will add the xmp properties // since the uid file will still be in the uid-cache, correct uid will be used for xmp prop-change indexable string possible_xmp_file_path = string.Concat (path, ".xmp"); if (! File.Exists (possible_xmp_file_path)) return null; Guid xmp_id = queryable.RegisterFile (parent, (Path.GetFileName (possible_xmp_file_path))); if (xmp_id == Guid.Empty) return null; XmpFile xmp_file = null; try { xmp_file = new XmpFile (possible_xmp_file_path); } catch { uid_manager.ForgetNewId (possible_xmp_file_path); return null; } // FIXME: Should also delete previous xmp properties! foreach (Property p in xmp_file.Properties) { p.IsMutable = true; indexable.AddProperty (p); } xmp_file.Close (); // Also need to save some local states for PostAddHook, // namely, path to the xmp file, path to basefile and generated uid indexable.LocalState ["XmpFilePath"] = possible_xmp_file_path; indexable.LocalState ["BaseFilePath"] = path; indexable.LocalState ["XmpGuid"] = GuidFu.ToShortString (xmp_id); if (Debug) Log.Debug ("Adding properties from {0}({2}) to {1}({3})", possible_xmp_file_path, path, GuidFu.ToShortString (xmp_id), GuidFu.ToShortString (id)); return null; }
internal void RegisterNewId(string name, DirectoryModel dir, Guid id) { //Log.Debug ("Registering {0}={1}", name, GuidFu.ToShortString (id)); cached_uid_by_path [Path.Combine(dir.FullName, name)] = id; }
private void Detatch_Unlocked () { if (IsRoot) rooted_to = null; if (parent != null) parent.children.Remove (name); big_lock = null; parent = null; ExpireCached_Unlocked (); }
public void AddDirectory (DirectoryModel parent, string name) { // Ignore the stuff we want to ignore. if (filter.Ignore (parent, name, true)) return; // FIXME: ! parent.HasChildWithName (name) if (parent != null && parent.HasChildWithName (name)) return; string path; path = (parent == null) ? name : Path.Combine (parent.FullName, name); if (Debug) Logger.Log.Debug ("Adding directory '{0}'", path, name); if (! Directory.Exists (path)) { Logger.Log.Error ("Can't add directory: '{0}' does not exist", path); return; } FileAttributes attr; attr = FileAttributesStore.Read (path); // Note that we don't look at the mtime of a directory when // deciding whether or not to index it. bool needs_indexing = false; if (attr == null) { // If it has no attributes, it definitely needs // indexing. needs_indexing = true; } else { // Make sure that it still has the same name as before. // If not, we need to re-index it. // We can do this since we preloaded all of the name // info in the directory via PreloadDirectoryNameInfo. string last_known_name; last_known_name = UniqueIdToDirectoryName (attr.UniqueId); if (last_known_name != path) { Logger.Log.Debug ("'{0}' now seems to be called '{1}'", last_known_name, path); needs_indexing = true; } } // If we can't descend into this directory, we want to // index it but not build a DirectoryModel for it. // FIXME: We should do the right thing when a // directory's permissions change. bool is_walkable; is_walkable = DirectoryWalker.IsWalkable (path); if (! is_walkable) Logger.Log.Debug ("Can't walk '{0}'", path); if (needs_indexing) ScheduleDirectory (name, parent, attr, is_walkable); else if (is_walkable) RegisterDirectory (name, parent, attr); }
private void RemoveDirectory (DirectoryModel dir) { Uri uri; uri = GuidFu.ToUri (dir.UniqueId); Indexable indexable; indexable = new Indexable (IndexableType.Remove, uri); indexable.DisplayUri = UriFu.PathToFileUri (dir.FullName); // Remember a copy of our external Uri, so that we can // easily remap it in the PostRemoveHook. indexable.LocalState ["RemovedUri"] = indexable.DisplayUri; // Forget watches and internal references ForgetDirectoryRecursively (dir); // Calling Remove will expire the path names, // so name caches will be cleaned up accordingly. dir.Remove (); Scheduler.Task task; task = NewAddTask (indexable); // We *add* the indexable to *remove* the index item task.Priority = Scheduler.Priority.Immediate; ThisScheduler.Add (task); }
/////////////////////////////////////////////////////////// static public DirectoryModel NewRoot (object big_lock, string path, FileAttributes attr) { path = StringFu.SanitizePath (path); DirectoryModel root; root = new DirectoryModel (attr); root.big_lock = big_lock; root.rooted_to = FileSystem.GetDirectoryNameRootOk (path); root.name = Path.GetFileName (path); return root; }
private void ScheduleDirectory (string name, DirectoryModel parent, FileAttributes attr, bool is_walkable) { string path; path = (parent == null) ? name : Path.Combine (parent.FullName, name); Guid id; id = (attr == null) ? Guid.NewGuid () : attr.UniqueId; DateTime last_crawl; last_crawl = (attr == null) ? DateTime.MinValue : attr.LastWriteTime; Indexable indexable; indexable = DirectoryToIndexable (path, id, parent); if (indexable != null) { indexable.LocalState ["Name"] = name; indexable.LocalState ["LastCrawl"] = last_crawl; indexable.LocalState ["IsWalkable"] = is_walkable; Scheduler.Task task; task = NewAddTask (indexable); task.Priority = Scheduler.Priority.Delayed; ThisScheduler.Add (task); } }
internal void RegisterNewId (string name, DirectoryModel dir, Guid id) { //Log.Debug ("Registering {0}={1}", name, GuidFu.ToShortString (id)); cached_uid_by_path [Path.Combine (dir.FullName, name)] = id; }
/////////////////////////////////////////////////////////// // Moving stuff around public void MoveTo (DirectoryModel new_parent, string new_name) { lock (big_lock) Detatch_Unlocked (); // No need to lock anything here, since this node // is just floating out in space. if (new_name != null) this.name = new_name; lock (new_parent.big_lock) new_parent.Attach_Unlocked (this); }
public void MarkDirectoryAsUncrawlable (DirectoryModel dir) { if (! dir.IsAttached) return; // If we managed to get set up a watch on this directory, // drop it. if (dir.WatchHandle != null) { event_backend.ForgetWatch (dir.WatchHandle); dir.WatchHandle = null; } dir.MarkAsUncrawlable (); }
private void Recrawl_Recursive (DirectoryModel dir, DirectoryState state) { dir.State = state; tree_crawl_task.Add (dir); foreach (DirectoryModel sub_dir in dir.Children) Recrawl_Recursive (sub_dir, state); }
// This is called from the PostFlushHook of DirectoryIndexableGenerator i.e. // after PostAddHook() has Registered the directory public void DoneCrawlingOneDirectory (DirectoryModel dir) { if (! dir.IsAttached) return; FileAttributes attr; attr = FileAttributesStore.Read (dir.FullName); // We couldn't read our attribute back in for some // reason. Complain loudly. if (attr == null) { Log.Error ("Unable to read attributes for recently crawled directory {0}", dir.FullName); dir.MarkAsClean (); return; } // We don't have to be super-careful about this since // we only use the FileAttributes mtime on a directory // to determine its initial state, not whether or not // its index record is up-to-date. attr.LastWriteTime = DateTime.UtcNow; // ...but we do use this to decide which order directories get // crawled in. dir.LastCrawlTime = DateTime.UtcNow; FileAttributesStore.Write (attr); dir.MarkAsClean (); }
////////////////////////////////////////////////////////////////////////// // // This code controls the directory crawl order // private DirectoryModel StupidWalk (DirectoryModel prev_best, DirectoryModel contender) { if (contender.NeedsCrawl) { if (prev_best == null || prev_best.CompareTo (contender) < 0) prev_best = contender; } foreach (DirectoryModel child in contender.Children) prev_best = StupidWalk (prev_best, child); return prev_best; }
private void MoveDirectory (DirectoryModel dir, DirectoryModel new_parent, // or null if we are just renaming string new_name) { if (dir == null) { Logger.Log.Warn ("Couldn't find DirectoryModel for directory moving to '{0}' in '{1}', so it was hopefully never there.", new_name, new_parent.FullName); AddDirectory (new_parent, new_name); return; } if (dir.IsRoot) throw new Exception ("Can't move root " + dir.FullName); // We'll need this later in order to generate the // right change notification. string old_path; old_path = dir.FullName; if (new_parent != null && new_parent != dir.Parent) dir.MoveTo (new_parent, new_name); else dir.Name = new_name; // Remember this by path lock (dir_models_by_path) dir_models_by_path [dir.FullName] = dir; CacheDirectoryNameChange (dir.UniqueId, dir.Parent.UniqueId, new_name); Indexable indexable; indexable = NewRenamingIndexable (new_name, dir.UniqueId, dir.Parent, // == new_parent old_path); indexable.LocalState ["OurDirectoryModel"] = dir; Scheduler.Task task; task = NewAddTask (indexable); task.Priority = Scheduler.Priority.Immediate; // Danger Will Robinson! // We need to use BlockUntilNoCollision to get the correct notifications // in a mv a b; mv b c; mv c a situation. // FIXME: And now that type no longer exists! ThisScheduler.Add (task); }
private void Attach_Unlocked (DirectoryModel child) { string msg; if (child.IsRoot) { msg = String.Format ("Can't attach root node '{0}' to '{1}'", child.Name, this.FullName); throw new Exception (msg); } if (child.parent != null) { msg = String.Format ("Can't attach non-detatched node '{0}' to '{1}'", child.Name, this.FullName); throw new Exception (msg); } if (children == null) { children = new Hashtable (); } else if (children.Contains (child.Name)) { msg = String.Format ("'{0}' already contains a child named '{1}'", this.FullName, child.Name); throw new Exception (msg); } child.big_lock = this.big_lock; child.parent = this; this.children [child.name] = child; }
internal Guid ReadOrCreateNewId (DirectoryModel dir, string name) { Guid old_guid = NameAndParentToId (name, dir); if (old_guid != Guid.Empty) return old_guid; return CreateNewId (Path.Combine (dir.FullName, name)); }
public DirectoryModel AddChild (string child_name, FileAttributes attr) { lock (big_lock) { DirectoryModel child; child = new DirectoryModel (attr); child.name = child_name; Attach_Unlocked (child); return child; } }
// This works for files. (It probably works for directories // too, but you should use one of the more efficient means // above if you know it is a directory.) // This is mostly used for getting uid for deleted files internal Guid NameAndParentToId (string name, DirectoryModel dir) { string path; path = Path.Combine (dir.FullName, name); Guid unique_id; if (cached_uid_by_path.Contains (path)) unique_id = (Guid) cached_uid_by_path [path]; else unique_id = name_resolver.GetIdByNameAndParentId (name, dir.UniqueId); return unique_id; }
public void MoveTo (DirectoryModel new_parent) { MoveTo (new_parent, null); }
private bool RegisterDirectory (string name, DirectoryModel parent, FileAttributes attr) { string path; path = (parent == null) ? name : Path.Combine (parent.FullName, name); if (Debug) Logger.Log.Debug ("Registered directory '{0}' ({1})", path, attr.UniqueId); DateTime mtime = Directory.GetLastWriteTimeUtc (path); if (! FileSystem.ExistsByDateTime (mtime)) { Log.Debug ("Directory '{0}' ({1}) appears to have gone away", path, attr.UniqueId); return false; } DirectoryModel dir; if (parent == null) dir = DirectoryModel.NewRoot (big_lock, path, attr); else dir = parent.AddChild (name, attr); if (mtime > attr.LastWriteTime) { dir.State = DirectoryState.Dirty; if (Debug) Logger.Log.Debug ("'{0}' is dirty", path); } if (Debug) { if (dir.IsRoot) Logger.Log.Debug ("Created model '{0}'", dir.FullName); else Logger.Log.Debug ("Created model '{0}' with parent '{1}'", dir.FullName, dir.Parent.FullName); } // Add any roots we create to the list of roots if (dir.IsRoot) roots.Add (dir); // Add the directory to our by-id hash, and remove any NameInfo // we might have cached about it. dir_models_by_id [dir.UniqueId] = dir; name_info_by_id.Remove (dir.UniqueId); // Start watching the directory. dir.WatchHandle = event_backend.CreateWatch (path); // Schedule this directory for crawling. if (tree_crawl_task.Add (dir)) ThisScheduler.Add (tree_crawl_task); // Make sure that our file crawling task is active, // since presumably we now have something new to crawl. ActivateFileCrawling (); return true; }
// Must be called from inside big_lock private void SetIsActive (bool is_active, DirectoryModel current_dir) { this.is_active = is_active; queryable.UpdateIsIndexing (current_dir); }
private Indexable FileRemoveIndexable (DirectoryModel dir, string name) { // FIXME if (name.EndsWith (".xmp")) { Log.Warn ("Deleting of xmp files is not yet supported!"); return null; } Guid unique_id; unique_id = uid_manager.NameAndParentToId (name, dir); if (unique_id == Guid.Empty) { Log.Info ("Could not resolve unique id of '{0}' in '{1}' for removal -- it is probably already gone", name, dir.FullName); return null; } string path = Path.Combine (dir.FullName, name); Uri uri = GuidFu.ToUri (unique_id); Indexable indexable; indexable = new Indexable (IndexableType.Remove, uri); indexable.DisplayUri = UriFu.PathToFileUri (path); indexable.LocalState ["RemovedUri"] = indexable.DisplayUri; // While adding, wait till the files are added to index for clearing cached_uid and writing attributes // For removal, do them first and then remove from index uid_manager.ForgetNewId (path); FileAttributesStore.Drop (path); // Do the same for the corresponding xmp file uid_manager.ForgetNewId (string.Concat (path, ".xmp")); FileAttributesStore.Drop (string.Concat (path, ".xmp")); return indexable; }
///////////////////////////////////////////////////////////// // Try to match any of our current excludes to determine if // we should ignore a file/directory or not. public bool Ignore (DirectoryModel parent, string name, bool is_directory) { if (Debug) Logger.Log.Debug ("*** Ignore Check (parent={0}, name={1}, is_directory={2})", (parent != null) ? parent.FullName : null, name, is_directory); // If parent is null, we have a root. But it might not be // active anymore so we need to check if it's still in the list. if (parent == null && queryable.Roots.Contains (name)) { if (Debug) Logger.Log.Debug ("*** Ignore Check Passed"); return false; } string path; if (parent != null) path = Path.Combine (parent.FullName, name); else path = name; // Exclude paths foreach (string exclude in exclude_paths) if (path.StartsWith (exclude)) return true; // Hardcode this pattern without relying on config if (name.StartsWith (".")) return true; // Exclude patterns if (exclude_regex != null && exclude_regex.IsMatch (name)) return true; if (parent == null) { if (Debug) Logger.Log.Debug ("*** Parent is null (name={0}, is_directory={1}", name, is_directory); return false; } // This is kind of a hack, but if parent.Parent is null, we need to pass // the full path of the directory as second argument to Ignore to allow // us to do the root check. return Ignore (parent.Parent, (parent.Parent == null) ? parent.FullName : parent.Name, true); }
public void MoveTo(DirectoryModel new_parent) { MoveTo(new_parent, null); }
public Indexable GetXmpQueryable (string path, Guid id, DirectoryModel parent) { Log.Debug ("Asked to create xmp indexable for ({0}) {1}", GuidFu.ToShortString (id), path); // Should be at least 6 characters /<...>.xmp if (path.Length < 6) return null; string basefile_path = Path.ChangeExtension (path, null); // Ignore xmp files by itself // FIXME: To support indexing independent xmp files will require even greater trouble if (! File.Exists (basefile_path)) return null; XmpFile xmp_file = null; try { xmp_file = new XmpFile (path); } catch { Log.Warn ("Cannot create xmpfile from {0}", path); return null; } // Try to get the correct uid for the basefile // First we need to see if basefile is already scheduled (yet to be dispatched) Uri basefile_uri = null; Indexable base_indexable; if (uid_manager.HasNewId (basefile_path)) { // Since uid_manager has a new id for this basefile, so basefile is already scheduled // Get basefile uid from there Guid basefile_id = uid_manager.GetNewId (basefile_path); basefile_uri = GuidFu.ToUri (basefile_id); Log.Debug ("{0} is already scheduled with uri {1}", basefile_path, basefile_uri); } else { // Basefile is not scheduled in the current batch string basefile_name = Path.GetFileName (basefile_path); // Try to schedule it for addition base_indexable = queryable.GetCrawlingFileIndexable (parent, basefile_name); if (base_indexable == null) { // GetCrawlingFileIndexable returns null if file does not need to be indexed // So basefile is up-to-date // Need to figure out id from uid manager Guid basefile_id = uid_manager.GetIdByNameAndParentId (basefile_name, parent.UniqueId); basefile_uri = GuidFu.ToUri (basefile_id); Log.Debug ("{0} is not scheduled and need not be, uri is {1}", basefile_path, basefile_uri); } else { Log.Debug ("Need to index {0}", basefile_path); // basefile needs to be indexed // FIXME: Move the task business out of handler and into FSQ.cs Scheduler.Task task; task = queryable.NewAddTask (base_indexable); // FIXME: What is the correct priority ? // If should have similar priority to the one that this xmp-indexable will be a part of task.Priority = Scheduler.Priority.Immediate; queryable.ThisScheduler.Add (task); // Get the basefile uri from the indexable basefile_uri = base_indexable.Uri; } } Log.Debug ("Adding xmp-indexable for {0} (basefile uri {1}) with uid {2}", path, basefile_uri, GuidFu.ToShortString (id)); Indexable indexable = new Indexable (IndexableType.PropertyChange, basefile_uri); // Set the timestamp of the indexable as the timestamp of the basefile // It could have also been skipped, the original Indexable.Add would anyway have it indexable.Timestamp = File.GetLastWriteTimeUtc (basefile_path); indexable.DisplayUri = UriFu.PathToFileUri (path); // If the file was somehow deleted before this point, bail out. if (! FileSystem.ExistsByDateTime (indexable.Timestamp)) { xmp_file.Close (); return null; } // Save some local states for PostAddHook, namely, path to the xmp file, path to basefile and generated uid indexable.LocalState ["XmpFilePath"] = path; indexable.LocalState ["BaseFilePath"] = basefile_path; indexable.LocalState ["XmpGuid"] = GuidFu.ToShortString (id); // FIXME: Should also delete previous xmp properties! foreach (Property p in xmp_file.Properties) { p.IsMutable = true; indexable.AddProperty (p); } xmp_file.Close (); return indexable; }
private void ForgetDirectoryRecursively (DirectoryModel dir) { foreach (DirectoryModel child in dir.Children) ForgetDirectoryRecursively (child); if (dir.WatchHandle != null) { event_backend.ForgetWatch (dir.WatchHandle); dir.WatchHandle = null; } dir_models_by_id.Remove (dir.UniqueId); // We rely on the expire event to remove it from dir_models_by_path }
override protected void DoTaskReal () { // If our last generator is still doing stuff, just reschedule // and return. This keeps us from generating more tasks until // the last one we started runs to completion. if ((current_generator != null && current_generator.HasNextIndexable ()) || current_dir != null) { Reschedule = true; return; } lock (big_lock) { Log.Debug ("Running file crawl task"); current_dir = queryable.GetNextDirectoryToCrawl (); if (current_dir == null) { Log.Debug ("Done crawling files!!!"); SetIsActive (false, current_dir); return; } SetIsActive (true, current_dir); } if (!current_dir.IsAttached) { Reschedule = true; return; } if (FileSystemQueryable.Debug) { Logger.Log.Debug ("Starting crawl of '{0}'", current_dir.FullName); if (current_dir.State == DirectoryState.PossiblyClean) Log.Debug ("It looks as though we've crawled '{0}' before", current_dir.FullName); } // Schedule a DirectoryIndexableGenerator // for that directory, and then reschedule ourselves. try { current_generator = new DirectoryIndexableGenerator (queryable, current_dir); } catch (DirectoryNotFoundException ex) { Logger.Log.Debug ("Couldn't crawl '{0}'", current_dir.FullName); // FIXME: If our attempt to crawl the directory fails, just // mark it as uncrawlable and move on. This isn't optimal behavior, // but works around bugs involving weird permissions for now. current_dir.MarkAsUncrawlable (); current_dir = null; } if (current_generator != null) { Scheduler.TaskGroup group; group = Scheduler.NewTaskGroup ("Crawl task group", null, our_post_hook); Scheduler.Task task; task = queryable.NewAddTask (current_generator); task.AddTaskGroup (group); SpawnChild (task); } Reschedule = true; }
private Indexable NewRenamingIndexable (string name, Guid id, DirectoryModel parent, string last_known_path) { // FIXME if (name.EndsWith (".xmp")) { Log.Warn ("Renaming of xmp files is not yet supported!"); return null; } Indexable indexable; indexable = new Indexable (IndexableType.PropertyChange, GuidFu.ToUri (id)); indexable.DisplayUri = UriFu.PathToFileUri (name); AddStandardPropertiesToIndexable (indexable, name, parent, true); indexable.LocalState ["Id"] = id; indexable.LocalState ["LastKnownPath"] = last_known_path; MergeExternalPendingIndexable (indexable); return indexable; }