static void Main (String[] args) { string uriStr = null; string title = null; string sourcefile = null; bool deletesourcefile = false; if (args.Length == 0 || Array.IndexOf (args, "--help") > -1) { PrintUsage (); Environment.Exit (1); } for (int i = 0; i < args.Length; i++) { switch (args [i]) { case "--url": case "--title": case "--sourcefile": if (i + 1 >= args.Length || args [i + 1].StartsWith ("--")) { PrintUsage (); Environment.Exit (1); } break; } switch (args [i]) { case "--url": uriStr = args [++i]; break; case "--title": title = args [++i]; break; case "--sourcefile": sourcefile = args [++i]; break; case "--deletesourcefile": deletesourcefile = true; break; case "--help": PrintUsage (); return; case "--version": VersionFu.PrintVersion (); return; } } if (uriStr == null) { Logger.Log.Error ("URI not specified!\n"); PrintUsage (); Environment.Exit (1); } Uri uri = new Uri (uriStr, true); if (uri.Scheme == Uri.UriSchemeHttps) { // For security/privacy reasons, we don't index any // SSL-encrypted pages. Logger.Log.Error ("Indexing secure https:// URIs is not secure!"); Environment.Exit (1); } // We don't index file: Uris. Silently exit. if (uri.IsFile) return; // We *definitely* don't index mailto: Uris. Silently exit. if (uri.Scheme == Uri.UriSchemeMailto) return; Indexable indexable; indexable = new Indexable (uri); indexable.HitType = "WebHistory"; indexable.MimeType = "text/html"; indexable.Timestamp = DateTime.Now; if (title != null) indexable.AddProperty (Property.New ("dc:title", title)); if (sourcefile != null) { if (!File.Exists (sourcefile)) { Logger.Log.Error ("sourcefile '{0}' does not exist!", sourcefile); Environment.Exit (1); } indexable.ContentUri = UriFu.PathToFileUri (sourcefile); indexable.DeleteContent = deletesourcefile; } else { Stream stdin = Console.OpenStandardInput (); if (stdin == null) { Logger.Log.Error ("No sourcefile specified, and no standard input!\n"); PrintUsage (); Environment.Exit (1); } indexable.SetTextReader (new StreamReader (stdin)); } IndexingServiceRequest req = new IndexingServiceRequest (); req.Add (indexable); try { Logger.Log.Info ("Indexing"); Logger.Log.Debug ("SendAsync"); req.SendAsync (); Logger.Log.Debug ("Close"); req.Close (); Logger.Log.Debug ("Done"); } catch (Exception e) { Logger.Log.Error ("Indexing failed: {0}", e); // Still clean up after ourselves, even if we couldn't // index the content. if (deletesourcefile) File.Delete (sourcefile); Environment.Exit (1); } }
public void OnEachPart (GMime.Object mime_part) { GMime.Object part = null; bool part_needs_dispose = false; //for (int i = 0; i < this.depth; i++) // Console.Write (" "); //Console.WriteLine ("Content-Type: {0}", mime_part.ContentType); ++depth; if (mime_part is GMime.MessagePart) { GMime.MessagePart msg_part = (GMime.MessagePart) mime_part; using (GMime.Message message = msg_part.Message) { using (GMime.Object subpart = message.MimePart) this.OnEachPart (subpart); } } else if (mime_part is GMime.Multipart) { GMime.Multipart multipart = (GMime.Multipart) mime_part; int num_parts = multipart.Count; // If the mimetype is multipart/alternative, we only want to index // one part -- the richest one we can filter. if (mime_part.ContentType.MediaSubtype.ToLower () == "alternative") { // The richest formats are at the end, so work from there // backward. for (int i = num_parts - 1; i >= 0; i--) { GMime.Object subpart = multipart[i]; if (IsMimeTypeHandled (subpart.ContentType.ToString ())) { part = subpart; part_needs_dispose = true; break; } else { subpart.Dispose (); } } } // If it's not alternative, or we don't know how to filter any of // the parts, treat them like a bunch of attachments. if (part == null) { for (int i = 0; i < num_parts; i++) { using (GMime.Object subpart = multipart[i]) this.OnEachPart (subpart); } } } else if (mime_part is GMime.Part) part = mime_part; else throw new Exception (String.Format ("Unknown part type: {0}", part.GetType ())); if (part != null) { System.IO.Stream stream = null; using (GMime.DataWrapper content_obj = ((GMime.Part) part).ContentObject) stream = content_obj.Stream; // If this is the only part and it's plain text, we // want to just attach it to our filter instead of // creating a child indexable for it. bool no_child_needed = false; string mime_type = part.ContentType.ToString ().ToLower (); if (this.depth == 1 && this.count == 0) { if (mime_type == "text/plain") { no_child_needed = true; this.reader = new StreamReader (stream); } else if (mime_type == "text/html") { no_child_needed = true; html_part = true; string enc = part.ContentType.GetParameter ("charset"); // DataWrapper.Stream is a very limited stream // and does not allow Seek or Tell // HtmlFilter requires Stream.Position=0. // Play safe and create a memorystream // for HTML parsing. GMime.StreamMem mem_stream; mem_stream = new GMime.StreamMem (); GMime.Stream data_stream; data_stream = ((StreamWrapper) stream).GMimeStream; data_stream.WriteToStream (mem_stream); data_stream.Flush (); // The StreamWrapper and hence the memory_stream // will be closed when the reader is closed // after Pull()-ing is done. System.IO.Stream html_stream; html_stream = new StreamWrapper (mem_stream); html_stream.Seek (0, SeekOrigin.Begin); stream.Close (); try { this.reader = FilterHtml.GetHtmlReader (html_stream, enc, link_handler); } catch (Exception e) { Log.Debug (e, "Exception while filtering HTML email {0}", this.indexable.Uri); this.reader = null; html_stream.Close (); html_part = false; } } } if (!no_child_needed) { // Check the mime type against the blacklist and don't index any // parts that are contained within. That way the user doesn't // get flooded with pointless signatures and vcard and ical // attachments along with (real) attachments. if (Array.IndexOf (blacklisted_mime_types, mime_type) == -1) { string sub_uri = "#" + this.count; Indexable child; child = new Indexable (UriFu.AddFragment (this.indexable.Uri, sub_uri, true)); child.DisplayUri = new Uri (this.indexable.DisplayUri.ToString () + "#" + this.count); // This is a special case. // Even for mails found on disk, MailMessage hitype is set child.HitType = "MailMessage"; child.MimeType = mime_type; // If this is the richest part we found for multipart emails, add its content to textcache if (snippet_attachment || (this.depth == 1 && this.count == 0)) child.CacheContent = true; else child.CacheContent = false; string filename = ((GMime.Part) part).Filename; if (! String.IsNullOrEmpty (filename)) { child.AddProperty (Property.NewKeyword ("fixme:attachment_title", filename)); foreach (Property prop in Property.StandardFileProperties (filename, false)) child.AddProperty (prop); } // Store length of attachment long length = stream.Length; if (length != -1) child.AddProperty (Property.NewUnsearched ("fixme:filesize", length)); if (part.ContentType.MediaType.ToLower () == "text") child.SetTextReader (new StreamReader (stream)); else child.SetBinaryStream (stream); child.SetChildOf (this.indexable); child.StoreStream (); child.CloseStreams (); this.child_indexables.Add (child); } else { Log.Debug ("Skipping attachment {0}#{1} with blacklisted mime type {2}", this.indexable.Uri, this.count, mime_type); } } this.count++; } if (part_needs_dispose) part.Dispose (); --depth; }