private Indexable OperaRowToIndexable (OperaHistory.Row row)
		{
			// It's unsafe to index secure content since it may contain sensitive data
			if (row.Address.Scheme == Uri.UriSchemeHttps)
				return null;
			
			Indexable indexable = new Indexable (row.Address);
			
			indexable.HitType = "WebHistory";
			indexable.MimeType = row.MimeType;
			indexable.Timestamp = row.LastVisited;
			indexable.AddProperty(Beagle.Property.New ("fixme:host",row.Address.Host));
			indexable.AddProperty (Beagle.Property.NewUnsearched ("fixme:size", row.Length));
			// hint for the filter about the charset
			indexable.AddProperty (Property.NewUnsearched (StringFu.UnindexedNamespace + "charset", row.Encoding.ToString ()));

			if(row.Compression == "gzip")	
				indexable.SetBinaryStream (new GZipInputStream (File.OpenRead (Path.Combine (cache_dir, row.LocalFileName))));
			else
				indexable.ContentUri = new Uri (Path.Combine (cache_dir, row.LocalFileName));
			
			indexer.AttributeStore.AttachLastWriteTime (Path.Combine (cache_dir, row.LocalFileName), DateTime.UtcNow);
			
			return indexable;
		}
Exemple #2
0
			public void OnEachPart (GMime.Object mime_part)
			{
				GMime.Object part = null;
				bool part_needs_dispose = false;

				//for (int i = 0; i < this.depth; i++)
				//  Console.Write ("  ");
				//Console.WriteLine ("Content-Type: {0}", mime_part.ContentType);
			
				++depth;

				if (mime_part is GMime.MessagePart) {
					GMime.MessagePart msg_part = (GMime.MessagePart) mime_part;

					using (GMime.Message message = msg_part.Message) {
						using (GMime.Object subpart = message.MimePart)
							this.OnEachPart (subpart);
					}
				} else if (mime_part is GMime.Multipart) {
					GMime.Multipart multipart = (GMime.Multipart) mime_part;
					int num_parts = multipart.Count;

					// If the mimetype is multipart/alternative, we only want to index
					// one part -- the richest one we can filter.
					if (mime_part.ContentType.MediaSubtype.ToLower () == "alternative") {
						// The richest formats are at the end, so work from there
						// backward.
						for (int i = num_parts - 1; i >= 0; i--) {
							GMime.Object subpart = multipart[i];

							if (IsMimeTypeHandled (subpart.ContentType.ToString ())) {
								part = subpart;
								part_needs_dispose = true;
								break;
							} else {
								subpart.Dispose ();
							}
						}
					}

					// If it's not alternative, or we don't know how to filter any of
					// the parts, treat them like a bunch of attachments.
					if (part == null) {
						for (int i = 0; i < num_parts; i++) {
							using (GMime.Object subpart = multipart[i])
								this.OnEachPart (subpart);
						}
					}
				} else if (mime_part is GMime.Part)
					part = mime_part;
				else
					throw new Exception (String.Format ("Unknown part type: {0}", part.GetType ()));

				if (part != null) {
					System.IO.Stream stream = null;
					
					using (GMime.DataWrapper content_obj = ((GMime.Part) part).ContentObject)
						stream = content_obj.Stream;

					// If this is the only part and it's plain text, we
					// want to just attach it to our filter instead of
					// creating a child indexable for it.
					bool no_child_needed = false;

					string mime_type = part.ContentType.ToString ().ToLower ();

					if (this.depth == 1 && this.count == 0) {
						if (mime_type == "text/plain") {
							no_child_needed = true;

							this.reader = new StreamReader (stream);
						} else if (mime_type == "text/html") {
							no_child_needed = true;
							html_part = true;
							string enc = part.ContentType.GetParameter ("charset"); 
							// DataWrapper.Stream is a very limited stream
							// and does not allow Seek or Tell
							// HtmlFilter requires Stream.Position=0.
							// Play safe and create a memorystream
							// for HTML parsing.

							GMime.StreamMem mem_stream;
							mem_stream = new GMime.StreamMem ();

							GMime.Stream data_stream;
							data_stream = ((StreamWrapper) stream).GMimeStream;
							data_stream.WriteToStream (mem_stream);
							data_stream.Flush ();

							// The StreamWrapper and hence the memory_stream
							// will be closed when the reader is closed
							// after Pull()-ing is done.
							System.IO.Stream html_stream; 
							html_stream = new StreamWrapper (mem_stream);
							html_stream.Seek (0, SeekOrigin.Begin);

							stream.Close ();

							try {
								this.reader = FilterHtml.GetHtmlReader (html_stream, enc, link_handler);
							} catch (Exception e) {
								Log.Debug (e, "Exception while filtering HTML email {0}", this.indexable.Uri);
								this.reader = null;
								html_stream.Close ();
								html_part = false;
							}
						}
					}

					if (!no_child_needed) {
						// Check the mime type against the blacklist and don't index any
						// parts that are contained within.  That way the user doesn't
						// get flooded with pointless signatures and vcard and ical
						// attachments along with (real) attachments.

						if (Array.IndexOf (blacklisted_mime_types, mime_type) == -1) {
							string sub_uri = "#" + this.count;
							Indexable child;
							child = new Indexable (UriFu.AddFragment (this.indexable.Uri, sub_uri, true));

							child.DisplayUri = new Uri (this.indexable.DisplayUri.ToString () + "#" + this.count);

							// This is a special case.
							// Even for mails found on disk, MailMessage hitype is set
							child.HitType = "MailMessage";
							child.MimeType = mime_type;

							// If this is the richest part we found for multipart emails, add its content to textcache
							if (snippet_attachment ||
							    (this.depth == 1 && this.count == 0))
								child.CacheContent = true;
							else
								child.CacheContent = false;

							string filename = ((GMime.Part) part).Filename;

							if (! String.IsNullOrEmpty (filename)) {
								child.AddProperty (Property.NewKeyword ("fixme:attachment_title", filename));

								foreach (Property prop in Property.StandardFileProperties (filename, false))
									child.AddProperty (prop);
							}

							// Store length of attachment
							long length = stream.Length;
							if (length != -1)
								child.AddProperty (Property.NewUnsearched ("fixme:filesize", length));

							if (part.ContentType.MediaType.ToLower () == "text")
								child.SetTextReader (new StreamReader (stream));
							else
								child.SetBinaryStream (stream);

							child.SetChildOf (this.indexable);
							child.StoreStream ();
							child.CloseStreams ();
							this.child_indexables.Add (child);
						} else {
							Log.Debug ("Skipping attachment {0}#{1} with blacklisted mime type {2}",
								   this.indexable.Uri, this.count, mime_type);
						}
					}

					this.count++;
				}

				if (part_needs_dispose)
					part.Dispose ();

				--depth;
			}