// This doesn't check if it makes sense to actually // merge the two indexables: it just does it. public void Merge(Indexable other) { if (other.Timestamp > this.Timestamp) { this.Timestamp = other.Timestamp; } foreach (Property prop in other.Properties) { this.AddProperty(prop); } foreach (DictionaryEntry entry in other.local_state) { this.local_state [entry.Key] = entry.Value; } }
private void CopyPropertyParentToChild(Indexable parent) { // Parent is a top level indexable // Copy all properties foreach (Property prop in parent.Properties) { Property new_prop = (Property)prop.Clone(); // Add parent: to property names ONLY IF // - not private property (these are not properties of the file content) // - property name does not already start with parent: if (!new_prop.Key.StartsWith(Property.PrivateNamespace) && !new_prop.Key.StartsWith("parent:")) { new_prop.Key = "parent:" + new_prop.Key; } this.AddProperty(new_prop); } }
// FIXME: Copying the correct properties from parent to child: // (This is not perfect yet) // It does not make sense to have parent:parent:parent:...:parent:foo // for property names of a nested child // Moreover, if indexable a.mbox has child b.zip which has child c.zip, // then upon matching c.zip, we would like to get the information from // a.mbox (i.e. the toplevel indexable) only. Intermediate parent information // is not necessary for displaying results; in fact, storing them would cause // confusion during display. // E.g. storing parent:beagle:filename for all parents // would cause, parent:beagle:filename=a.mbox, parent.beagle.filename=b.zip // whereas we are only interested in toplevel parent:beagle:filename=a.mbox // For indexables which need to store the intermediate/immediate parent info // separately, explicitly store them. // Another problem is, toplevel indexable might want to store information // which should not be matched when searching for its child. Copying those // properties in all children will incorrectly match them. // private void CopyPropertyChildToChild(Indexable parent) { // If parent itself is a child, // then only copy parents' parent:xxx and _private:xxx properties foreach (Property prop in parent.Properties) { if (prop.Key.StartsWith("parent:") || prop.Key.StartsWith(Property.PrivateNamespace)) { Property new_prop = (Property)prop.Clone(); this.AddProperty(new_prop); } else { Property new_prop = (Property)prop.Clone(); new_prop.IsStored = false; this.AddProperty(new_prop); } } }
public static void SendUpdate (IBrowsableItem item) { Indexable indexable = new Indexable (item.DefaultVersionUri); indexable.Type = IndexableType.PropertyChange; Beagle.Property prop; // Clear the existing tags prop = Beagle.Property.NewKeyword ("fspot:Tag", ""); prop.IsMutable = true; prop.IsPersistent = true; indexable.AddProperty (prop); prop = Beagle.Property.NewKeyword ("image:Tag", ""); prop.IsMutable = true; prop.IsPersistent = true; indexable.AddProperty (prop); foreach (Tag t in item.Tags) { prop = Beagle.Property.NewKeyword ("fspot:Tag", t.Name); prop.IsMutable = true; prop.IsPersistent = true; indexable.AddProperty (prop); prop = Beagle.Property.NewKeyword ("image:Tag", t.Name); prop.IsMutable = true; prop.IsPersistent = true; indexable.AddProperty (prop); } prop = Beagle.Property.New ("fspot:Description", item.Description); prop.IsMutable = true; prop.IsPersistent = true; indexable.AddProperty (prop); // Create a message to send to the daemon with this information. // The source tells it what index the existing "/home/joe/test.txt" document lives. IndexingServiceRequest req = new IndexingServiceRequest (); req.Keepalive = false; req.Source = "Files"; req.Add (indexable); req.SendAsync (); }
////////////////////////// public void SetChildOf(Indexable parent) { this.IsChild = true; if (parent.IsChild) { this.ParentUri = parent.ParentUri; } else { this.ParentUri = parent.Uri; } if (!this.ValidTimestamp) { this.Timestamp = parent.Timestamp; } if (string.IsNullOrEmpty(this.HitType)) { this.HitType = parent.HitType; } this.Source = parent.Source; // FIXME: Set all of the parent's properties on the // child so that we get matches against the child // that otherwise would match only the parent, at // least until we have proper RDF support. if (parent.IsChild) { CopyPropertyChildToChild(parent); } else { CopyPropertyParentToChild(parent); } }
static void Main (string[] args) { if (args.Length != 2) { Console.WriteLine ("Usage: beagle-master-delete-button index-name uri-to-delete"); return; } string index_name = args [0]; LuceneQueryingDriver driver = new LuceneQueryingDriver (index_name, -1, true); Uri uri = new Uri (args [1], false); Uri uri_to_delete = RemapUri (driver, uri); LuceneIndexingDriver indexer = new LuceneIndexingDriver (index_name, false); Indexable indexable = new Indexable (uri_to_delete); indexable.Type = IndexableType.Remove; IndexerRequest request = new IndexerRequest (); request.Add (indexable); IndexerReceipt [] receipts = indexer.Flush (request); if (receipts == null || receipts.Length == 0) { Console.WriteLine ("Uri {0} not found in {1}", uri, index_name); return; } IndexerRemovedReceipt r = receipts [0] as IndexerRemovedReceipt; if (r == null || r.NumRemoved == 0) { Console.WriteLine ("Uri {0} not found in {1}", uri, index_name); return; } Console.WriteLine ("Uri {0} deleted", uri); }
///////////////////////////////////////////////////////////////// static void AddToRequest (Indexable indexable) { if (indexable == null) return; // Disable filtering and only index file attributes if (arg_disable_filtering) indexable.Filtering = IndexableFiltering.Never; // Tag the item for easy identification (for say, removal) if (arg_tag != null) indexable.AddProperty (Property.NewUnsearched("Tag", arg_tag)); indexable.Source = arg_source; pending_request.Add (indexable); bool reschedule = false; do { if (Shutdown.ShutdownRequested) break; if (! reschedule && pending_request.Count < BATCH_SIZE) break; if (reschedule) Logger.Log.Debug ("Continuing indexing indexer generated indexables"); else Logger.Log.Debug ("Flushing driver, {0} items in queue", pending_request.Count); reschedule = FlushIndexer (driver); // Super Lame Hack: gtk-sharp up to 2.10 requires a main loop // to dispose of any managed wrappers around GObjects. Since // we don't have one, we'll process all the pending items in // a loop here. This is particularly an issue with maildirs, // because we need the loop to clean up after GMime. Without // it, GMime's streams are never completely unref'd, the // file descriptors aren't closed, and we run out and crash. while (GLib.MainContext.Pending ()) GLib.MainContext.Iteration (); } while (reschedule); }
static void DoIndexing () { int count_dirs = 0; int count_files = 0; Indexable indexable; pending_request = new IndexerRequest (); Queue modified_directories = new Queue (); while (pending_directories.Count > 0) { DirectoryInfo dir = (DirectoryInfo) pending_directories.Dequeue (); AddToRequest (DirectoryToIndexable (dir, modified_directories)); try { if (arg_recursive) foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos (dir)) if (!Ignore (subdir) && !FileSystem.IsSpecialFile (subdir.FullName)) pending_directories.Enqueue (subdir); foreach (FileInfo file in DirectoryWalker.GetFileInfos (dir)) if (!Ignore (file) && !FileSystem.IsSpecialFile (file.FullName)) { AddToRequest (FileToIndexable (file)); count_files ++; } } catch (DirectoryNotFoundException) {} if (Shutdown.ShutdownRequested) break; count_dirs++; } Logger.Log.Debug ("Scanned {0} files and directories in {1} directories", count_dirs + count_files, count_dirs); if (Shutdown.ShutdownRequested) { backing_fa_store.Flush (); return; } // Time to remove deleted directories from the index and attributes store while (modified_directories.Count > 0) { DirectoryInfo subdir = (DirectoryInfo) modified_directories.Dequeue (); Logger.Log.Debug ("Checking {0} for deleted files and directories", subdir.FullName); // Get a list of all documents from lucene index with ParentDirUriPropKey set as that of subdir ICollection all_dirent = GetAllItemsInDirectory (subdir); foreach (Dirent info in all_dirent) { // check if the item exists if ((! info.IsDirectory && File.Exists (info.FullName)) || (info.IsDirectory && Directory.Exists (info.FullName))) continue; if (info.IsDirectory) // Recursively remove deleted subdirectories modified_directories.Enqueue (new DirectoryInfo (info.FullName)); // remove Uri uri = PathToUri (info.FullName); indexable = new Indexable (IndexableType.Remove, uri); AddToRequest (indexable); } } bool reschedule = false; // Call Flush until our request is empty. We have to do this in a loop // because Flush happens in a batch size and some indexables might generate more indexables while (reschedule || pending_request.Count > 0) { if (Shutdown.ShutdownRequested) break; reschedule = FlushIndexer (driver); } backing_fa_store.Flush (); if (Shutdown.ShutdownRequested) return; Logger.Log.Debug ("Optimizing index"); driver.OptimizeNow (); }
private Indexable OperaRowToIndexable (OperaHistory.Row row) { // It's unsafe to index secure content since it may contain sensitive data if (row.Address.Scheme == Uri.UriSchemeHttps) return null; Indexable indexable = new Indexable (row.Address); indexable.HitType = "WebHistory"; indexable.MimeType = row.MimeType; indexable.Timestamp = row.LastVisited; indexable.AddProperty(Beagle.Property.New ("fixme:host",row.Address.Host)); indexable.AddProperty (Beagle.Property.NewUnsearched ("fixme:size", row.Length)); // hint for the filter about the charset indexable.AddProperty (Property.NewUnsearched (StringFu.UnindexedNamespace + "charset", row.Encoding.ToString ())); if(row.Compression == "gzip") indexable.SetBinaryStream (new GZipInputStream (File.OpenRead (Path.Combine (cache_dir, row.LocalFileName)))); else indexable.ContentUri = new Uri (Path.Combine (cache_dir, row.LocalFileName)); indexer.AttributeStore.AttachLastWriteTime (Path.Combine (cache_dir, row.LocalFileName), DateTime.UtcNow); return indexable; }
public void Add (Indexable indexable) { indexable.StoreStream (); to_add.Add (indexable); }
public int CompareTo(object obj) { Indexable other = (Indexable)obj; return(DateTime.Compare(this.Timestamp, other.Timestamp)); }
////////////////////////// public void SetChildOf (Indexable parent) { this.IsChild = true; if (parent.IsChild) this.ParentUri = parent.ParentUri; else this.ParentUri = parent.Uri; if (!this.ValidTimestamp) this.Timestamp = parent.Timestamp; if (string.IsNullOrEmpty (this.HitType)) this.HitType = parent.HitType; this.Source = parent.Source; // FIXME: Set all of the parent's properties on the // child so that we get matches against the child // that otherwise would match only the parent, at // least until we have proper RDF support. if (parent.IsChild) CopyPropertyChildToChild (parent); else CopyPropertyParentToChild (parent); }
static void Main (String[] args) { string uriStr = null; string title = null; string sourcefile = null; bool deletesourcefile = false; if (args.Length == 0 || Array.IndexOf (args, "--help") > -1) { PrintUsage (); Environment.Exit (1); } for (int i = 0; i < args.Length; i++) { switch (args [i]) { case "--url": case "--title": case "--sourcefile": if (i + 1 >= args.Length || args [i + 1].StartsWith ("--")) { PrintUsage (); Environment.Exit (1); } break; } switch (args [i]) { case "--url": uriStr = args [++i]; break; case "--title": title = args [++i]; break; case "--sourcefile": sourcefile = args [++i]; break; case "--deletesourcefile": deletesourcefile = true; break; case "--help": PrintUsage (); return; case "--version": VersionFu.PrintVersion (); return; } } if (uriStr == null) { Logger.Log.Error ("URI not specified!\n"); PrintUsage (); Environment.Exit (1); } Uri uri = new Uri (uriStr, true); if (uri.Scheme == Uri.UriSchemeHttps) { // For security/privacy reasons, we don't index any // SSL-encrypted pages. Logger.Log.Error ("Indexing secure https:// URIs is not secure!"); Environment.Exit (1); } // We don't index file: Uris. Silently exit. if (uri.IsFile) return; // We *definitely* don't index mailto: Uris. Silently exit. if (uri.Scheme == Uri.UriSchemeMailto) return; Indexable indexable; indexable = new Indexable (uri); indexable.HitType = "WebHistory"; indexable.MimeType = "text/html"; indexable.Timestamp = DateTime.Now; if (title != null) indexable.AddProperty (Property.New ("dc:title", title)); if (sourcefile != null) { if (!File.Exists (sourcefile)) { Logger.Log.Error ("sourcefile '{0}' does not exist!", sourcefile); Environment.Exit (1); } indexable.ContentUri = UriFu.PathToFileUri (sourcefile); indexable.DeleteContent = deletesourcefile; } else { Stream stdin = Console.OpenStandardInput (); if (stdin == null) { Logger.Log.Error ("No sourcefile specified, and no standard input!\n"); PrintUsage (); Environment.Exit (1); } indexable.SetTextReader (new StreamReader (stdin)); } IndexingServiceRequest req = new IndexingServiceRequest (); req.Add (indexable); try { Logger.Log.Info ("Indexing"); Logger.Log.Debug ("SendAsync"); req.SendAsync (); Logger.Log.Debug ("Close"); req.Close (); Logger.Log.Debug ("Done"); } catch (Exception e) { Logger.Log.Error ("Indexing failed: {0}", e); // Still clean up after ourselves, even if we couldn't // index the content. if (deletesourcefile) File.Delete (sourcefile); Environment.Exit (1); } }
public void OnEachPart (GMime.Object mime_part) { GMime.Object part = null; bool part_needs_dispose = false; //for (int i = 0; i < this.depth; i++) // Console.Write (" "); //Console.WriteLine ("Content-Type: {0}", mime_part.ContentType); ++depth; if (mime_part is GMime.MessagePart) { GMime.MessagePart msg_part = (GMime.MessagePart) mime_part; using (GMime.Message message = msg_part.Message) { using (GMime.Object subpart = message.MimePart) this.OnEachPart (subpart); } } else if (mime_part is GMime.Multipart) { GMime.Multipart multipart = (GMime.Multipart) mime_part; int num_parts = multipart.Count; // If the mimetype is multipart/alternative, we only want to index // one part -- the richest one we can filter. if (mime_part.ContentType.MediaSubtype.ToLower () == "alternative") { // The richest formats are at the end, so work from there // backward. for (int i = num_parts - 1; i >= 0; i--) { GMime.Object subpart = multipart[i]; if (IsMimeTypeHandled (subpart.ContentType.ToString ())) { part = subpart; part_needs_dispose = true; break; } else { subpart.Dispose (); } } } // If it's not alternative, or we don't know how to filter any of // the parts, treat them like a bunch of attachments. if (part == null) { for (int i = 0; i < num_parts; i++) { using (GMime.Object subpart = multipart[i]) this.OnEachPart (subpart); } } } else if (mime_part is GMime.Part) part = mime_part; else throw new Exception (String.Format ("Unknown part type: {0}", part.GetType ())); if (part != null) { System.IO.Stream stream = null; using (GMime.DataWrapper content_obj = ((GMime.Part) part).ContentObject) stream = content_obj.Stream; // If this is the only part and it's plain text, we // want to just attach it to our filter instead of // creating a child indexable for it. bool no_child_needed = false; string mime_type = part.ContentType.ToString ().ToLower (); if (this.depth == 1 && this.count == 0) { if (mime_type == "text/plain") { no_child_needed = true; this.reader = new StreamReader (stream); } else if (mime_type == "text/html") { no_child_needed = true; html_part = true; string enc = part.ContentType.GetParameter ("charset"); // DataWrapper.Stream is a very limited stream // and does not allow Seek or Tell // HtmlFilter requires Stream.Position=0. // Play safe and create a memorystream // for HTML parsing. GMime.StreamMem mem_stream; mem_stream = new GMime.StreamMem (); GMime.Stream data_stream; data_stream = ((StreamWrapper) stream).GMimeStream; data_stream.WriteToStream (mem_stream); data_stream.Flush (); // The StreamWrapper and hence the memory_stream // will be closed when the reader is closed // after Pull()-ing is done. System.IO.Stream html_stream; html_stream = new StreamWrapper (mem_stream); html_stream.Seek (0, SeekOrigin.Begin); stream.Close (); try { this.reader = FilterHtml.GetHtmlReader (html_stream, enc, link_handler); } catch (Exception e) { Log.Debug (e, "Exception while filtering HTML email {0}", this.indexable.Uri); this.reader = null; html_stream.Close (); html_part = false; } } } if (!no_child_needed) { // Check the mime type against the blacklist and don't index any // parts that are contained within. That way the user doesn't // get flooded with pointless signatures and vcard and ical // attachments along with (real) attachments. if (Array.IndexOf (blacklisted_mime_types, mime_type) == -1) { string sub_uri = "#" + this.count; Indexable child; child = new Indexable (UriFu.AddFragment (this.indexable.Uri, sub_uri, true)); child.DisplayUri = new Uri (this.indexable.DisplayUri.ToString () + "#" + this.count); // This is a special case. // Even for mails found on disk, MailMessage hitype is set child.HitType = "MailMessage"; child.MimeType = mime_type; // If this is the richest part we found for multipart emails, add its content to textcache if (snippet_attachment || (this.depth == 1 && this.count == 0)) child.CacheContent = true; else child.CacheContent = false; string filename = ((GMime.Part) part).Filename; if (! String.IsNullOrEmpty (filename)) { child.AddProperty (Property.NewKeyword ("fixme:attachment_title", filename)); foreach (Property prop in Property.StandardFileProperties (filename, false)) child.AddProperty (prop); } // Store length of attachment long length = stream.Length; if (length != -1) child.AddProperty (Property.NewUnsearched ("fixme:filesize", length)); if (part.ContentType.MediaType.ToLower () == "text") child.SetTextReader (new StreamReader (stream)); else child.SetBinaryStream (stream); child.SetChildOf (this.indexable); child.StoreStream (); child.CloseStreams (); this.child_indexables.Add (child); } else { Log.Debug ("Skipping attachment {0}#{1} with blacklisted mime type {2}", this.indexable.Uri, this.count, mime_type); } } this.count++; } if (part_needs_dispose) part.Dispose (); --depth; }
public override bool GenerateNextIndexable (out Indexable child) { ArchiveEntry a_entry; child = null; if (! setup_done) SetupArchiveStream (); if (count >= MAX_CHILDREN) { Log.Debug ("Archive {0} contains more than {1} files. Only {1} files indexed.", Indexable.DisplayUri.ToString (), count); Close (); return false; } if (total_size > MAX_ALL_FILES) { Log.Debug ("Archive {0} crossed our max uncompressed size threshold. Only {1} files extracted", Indexable.DisplayUri.ToString (), count); Close (); return false; } a_entry = DoGetNextEntry (); if (a_entry == null) { Close (); return false; } // Store file names in the archive AppendText (Path.GetFileName (a_entry.Name)); AppendWhiteSpace (); // If this is an invalid or oversized entry, skip it. if (a_entry.TempFile == null) return true; ++count; total_size += a_entry.Size; // Add "#<escaped-path-to-entry>" to the end of the Indexable Uri // So, file b#c in archive foo.zip becomes file:///foo.zip#b%23c // And file c in archive b in archive foo.zip becomes file:///foo.zip#b#c child = new Indexable (UriFu.AddFragment (Indexable.Uri, a_entry.Name, false)); child.CacheContent = true; child.MimeType = a_entry.MimeType; child.DisplayUri = new Uri (Indexable.DisplayUri.ToString () + "#" + a_entry.Name); child.ContentUri = UriFu.PathToFileUri (a_entry.TempFile); child.DeleteContent = true; // FIXME Remove fixme:inside_archive during Property Hack Week // Replace most flag properties by value properties child.AddProperty (Property.NewBool ("fixme:inside_archive", true)); // Use this instead of fixme:inside_archive child.AddProperty (Property.NewKeyword ("archive:type", archive_type)); child.AddProperty (Property.NewKeyword ("fixme:relativeuri", a_entry.Name)); child.AddProperty (Property.New ("fixme:comment", a_entry.Comment)); child.AddProperty (Property.NewUnsearched ("fixme:filesize", a_entry.Size)); foreach (Property prop in Property.StandardFileProperties (Path.GetFileName (a_entry.Name), false)) child.AddProperty (prop); child.SetChildOf (Indexable); return true; }
public PartHandler (Indexable parent_indexable, FilterHtml.AddLinkCallback link_handler) { this.indexable = parent_indexable; this.link_handler = link_handler; }
private void CopyPropertyParentToChild (Indexable parent) { // Parent is a top level indexable // Copy all properties foreach (Property prop in parent.Properties) { Property new_prop = (Property) prop.Clone (); // Add parent: to property names ONLY IF // - not private property (these are not properties of the file content) // - property name does not already start with parent: if (! new_prop.Key.StartsWith (Property.PrivateNamespace) && ! new_prop.Key.StartsWith ("parent:")) new_prop.Key = "parent:" + new_prop.Key; this.AddProperty (new_prop); } }
// FIXME: Copying the correct properties from parent to child: // (This is not perfect yet) // It does not make sense to have parent:parent:parent:...:parent:foo // for property names of a nested child // Moreover, if indexable a.mbox has child b.zip which has child c.zip, // then upon matching c.zip, we would like to get the information from // a.mbox (i.e. the toplevel indexable) only. Intermediate parent information // is not necessary for displaying results; in fact, storing them would cause // confusion during display. // E.g. storing parent:beagle:filename for all parents // would cause, parent:beagle:filename=a.mbox, parent.beagle.filename=b.zip // whereas we are only interested in toplevel parent:beagle:filename=a.mbox // For indexables which need to store the intermediate/immediate parent info // separately, explicitly store them. // Another problem is, toplevel indexable might want to store information // which should not be matched when searching for its child. Copying those // properties in all children will incorrectly match them. // private void CopyPropertyChildToChild (Indexable parent) { // If parent itself is a child, // then only copy parents' parent:xxx and _private:xxx properties foreach (Property prop in parent.Properties) { if (prop.Key.StartsWith ("parent:") || prop.Key.StartsWith (Property.PrivateNamespace)) { Property new_prop = (Property) prop.Clone (); this.AddProperty (new_prop); } else { Property new_prop = (Property) prop.Clone (); new_prop.IsStored = false; this.AddProperty (new_prop); } } }
static Indexable FileToIndexable (FileInfo file) { if (!file.Exists) return null; if (fa_store.IsUpToDateAndFiltered (PathInIndex (file.FullName), FileSystem.GetLastWriteTimeUtc (file.FullName))) return null; // Create the indexable and add the standard properties we // use in the FileSystemQueryable. Uri uri = PathToUri (file.FullName); Indexable indexable = new Indexable (uri); indexable.Timestamp = file.LastWriteTimeUtc; indexable.FlushBufferCache = true; indexable.AddProperty (Property.NewUnsearched ("fixme:filesize", file.Length)); FSQ.AddStandardPropertiesToIndexable (indexable, file.Name, Guid.Empty, false); // Store directory name in the index string dirname = file.DirectoryName; indexable.AddProperty (Property.NewUnsearched (Property.ParentDirUriPropKey, PathToUri (dirname))); if (arg_removable) { indexable.AddProperty (Property.NewKeyword ("beagle:RemovableVolume", volume_label)); indexable.ContentUri = UriFu.PathToFileUri (file.FullName); } return indexable; }
static int Main (string[] args) { SystemInformation.SetProcessName ("beagle-extract-content"); if (args.Length < 1 || Array.IndexOf (args, "--help") != -1) { PrintUsage (); return 0; } if (Array.IndexOf (args, "--debug") == -1) Log.Disable (); if (Array.IndexOf (args, "--version") != -1) { VersionFu.PrintVersion (); return 0; } if (Array.IndexOf (args, "--tokenize") != -1) tokenize = true; if (Array.IndexOf (args, "--analyze") != -1) analyze = true; if (Array.IndexOf (args, "--show-generated") != -1 || Array.IndexOf (args, "--show-children") != -1) show_generated = true; StreamWriter writer = null; string outfile = null; foreach (string arg in args) { // mime-type option if (arg.StartsWith ("--mimetype=")) { mime_type = arg.Substring (11); continue; // output file option // we need this in case the output contains different encoding // printing to Console might not always display properly } else if (arg.StartsWith ("--outfile=")) { outfile = arg.Substring (10); Console.WriteLine ("Redirecting output to " + outfile); FileStream f = new FileStream (outfile, FileMode.Create); writer = new StreamWriter (f, System.Text.Encoding.UTF8); continue; } else if (arg.StartsWith ("--")) // option, skip it continue; Uri uri = UriFu.PathToFileUri (arg); Indexable indexable = new Indexable (uri); if (mime_type != null) indexable.MimeType = mime_type; try { if (writer != null) { Console.SetOut (writer); } Display (indexable); if (writer != null) { writer.Flush (); } if (outfile != null) { StreamWriter standardOutput = new StreamWriter(Console.OpenStandardOutput()); standardOutput.AutoFlush = true; Console.SetOut(standardOutput); } } catch (Exception e) { Console.WriteLine ("Unable to filter {0}: {1}", uri, e.Message); return -1; } // Super Lame Hack: gtk-sharp up to 2.10 requires a main loop // to dispose of any managed wrappers around GObjects. Since // we don't have one, we'll process all the pending items in // a loop here. This is particularly an issue with maildirs, // because we need the loop to clean up after GMime. Without // it, GMime's streams are never completely unref'd, the // file descriptors aren't closed, and we run out and crash. while (GLib.MainContext.Pending ()) GLib.MainContext.Iteration (); } if (writer != null) writer.Close (); return 0; }
static Indexable DirectoryToIndexable (DirectoryInfo dir, Queue modified_directories) { if (!dir.Exists) return null; // Check if the directory information is stored in attributes store // And if the mtime of the directory is same as that in the attributes store FileAttributes attr = fa_store.Read (PathInIndex (dir.FullName)); // If the directory exists in the fa store, then it is already indexed. if (attr != null) { // If we don't care about deleted content then we are fine. // If the attributes are up-to-date, then we are fine too. if (! arg_delete || FileAttributesStore.IsUpToDate (attr, FileSystem.GetLastWriteTimeUtc (dir.FullName))) return null; // But the last write time needs to be uptodate to support enable-deletion, // so we actually index the directories, even if --disable-directories // is set. modified_directories.Enqueue (dir); } // Create the indexable and add the standard properties we // use in the FileSystemQueryable. Uri uri = PathToUri (dir.FullName); Indexable indexable = new Indexable (uri); indexable.MimeType = "inode/directory"; indexable.NoContent = true; indexable.Timestamp = dir.LastWriteTimeUtc; // Store the directory information in the index anyway, but if --disable-directories // was passed, then do not store the names and other standard properties // used during searching if (! arg_disable_directories) FSQ.AddStandardPropertiesToIndexable (indexable, dir.Name, Guid.Empty, false); // Add directory name property string dirname = dir.Parent.FullName; indexable.AddProperty (Property.NewUnsearched (Property.ParentDirUriPropKey, PathToUri (dirname))); indexable.AddProperty (Property.NewBool (Property.IsDirectoryPropKey, true)); if (arg_removable) indexable.AddProperty (Property.NewKeyword ("beagle:removable", volume_label)); return indexable; }
static void Display (Indexable indexable) { if (!first_indexable) { Console.WriteLine (); Console.WriteLine ("-----------------------------------------"); Console.WriteLine (); } first_indexable = false; Console.WriteLine ("Filename: " + indexable.Uri); if (indexable.ParentUri != null) Console.WriteLine ("Parent: " + indexable.ParentUri); Stopwatch watch = new Stopwatch (); Filter filter; watch.Start (); if (! FilterFactory.FilterIndexable (indexable, out filter)) { indexable.Cleanup (); indexable.NoContent = true; filter = null; } watch.Stop (); Console.WriteLine ("Filter: {0} (determined in {1})", filter, watch); Console.WriteLine ("MimeType: {0}", indexable.MimeType); Console.WriteLine (); ArrayList generated_indexables = new ArrayList (); Indexable generated_indexable; bool first = true; if (filter != null && filter.HasGeneratedIndexable) { while (filter.GenerateNextIndexable (out generated_indexable)) { if (generated_indexable == null) continue; if (first) { Console.WriteLine ("Filter-generated indexables:"); first = false; } Console.WriteLine (" {0}", generated_indexable.Uri); if (show_generated) generated_indexables.Add (generated_indexable); else generated_indexable.Cleanup (); } } if (! first) Console.WriteLine (); // Make sure that the properties are sorted. ArrayList prop_array = new ArrayList (indexable.Properties); prop_array.Sort (); Console.WriteLine ("Properties:"); if (indexable.ValidTimestamp) Console.WriteLine (" Timestamp = {0}", DateTimeUtil.ToString (indexable.Timestamp)); foreach (Beagle.Property prop in prop_array) { if (String.IsNullOrEmpty (prop.Value)) continue; Console.WriteLine (" {0} = {1}", prop.Key, prop.Value); } Console.WriteLine (); if (indexable.NoContent) return; watch.Reset (); watch.Start (); TextReader reader; Analyzer indexing_analyzer = new BeagleAnalyzer (); char[] buffer = new char [2048]; reader = indexable.GetTextReader (); char separater_char = (tokenize ? '\n' : ' '); if (reader != null) { first = true; if (analyze) { if (! stats_only) Console.WriteLine ("Content:"); TokenStream token_stream = indexing_analyzer.TokenStream ("Text", reader); Lucene.Net.Analysis.Token token = token_stream.Next (); first = (token == null); if (! stats_only) for (; token != null; token = token_stream.Next ()) Console.Write ("{0}{1}", token.TermText (), separater_char); token_stream.Close (); } else { #if false while (true) { int l = reader.Read (buffer, 0, 2048); if (l <= 0) break; if (first) first = false; if (! stats_only) DisplayContent (buffer, l); } #else string line; first = true; while ((line = reader.ReadLine ()) != null) { if (first) { Console.WriteLine ("Content:"); first = false; } if (! stats_only) DisplayContent (line); } #endif } reader.Close (); if (first) Console.WriteLine ("(no content)"); else Console.WriteLine ('\n'); } /* reader = indexable.GetHotTextReader (); first = true; if (reader != null) { Console.WriteLine ("HotContent:"); if (analyze) { TokenStream token_stream = indexing_analyzer.TokenStream ("HotText", reader); Lucene.Net.Analysis.Token token = token_stream.Next (); first = (token == null); for (; token != null; token = token_stream.Next ()) Console.Write ("{0}{1}", token.TermText (), separater_char); token_stream.Close (); } else { while (true) { int l = reader.Read (buffer, 0, 2048); if (l <= 0) break; if (first) first = false; DisplayContent (buffer, l); } } reader.Close (); if (first) Console.WriteLine ("(no hot content)"); else Console.WriteLine ('\n'); } */ watch.Stop (); Console.WriteLine (); Console.WriteLine ("Text extracted in {0}", watch); #if ENABLE_RDF_ADAPTER IList<string> links = indexable.Links; if (links != null && links.Count != 0) { Console.WriteLine ("Links:"); foreach (string link in links) Console.WriteLine (link); Console.WriteLine (); } #endif foreach (Indexable gi in generated_indexables) Display (gi); Stream stream = indexable.GetBinaryStream (); if (stream != null) stream.Close (); // Clean up any temporary files associated with filtering this indexable. indexable.Cleanup (); }
public void Add(Indexable indexable) { indexable.StoreStream(); to_add.Add(indexable); }
// This doesn't check if it makes sense to actually // merge the two indexables: it just does it. public void Merge (Indexable other) { if (other.Timestamp > this.Timestamp) this.Timestamp = other.Timestamp; foreach (Property prop in other.Properties) this.AddProperty (prop); foreach (DictionaryEntry entry in other.local_state) this.local_state [entry.Key] = entry.Value; }