///////////////////////////////////////////////// private Indexable NoteToIndexable(FileInfo file, Note note) { Indexable indexable = new Indexable(note.Uri); indexable.ContentUri = UriFu.PathToFileUri(file.FullName); indexable.Timestamp = note.timestamp; indexable.HitType = "Note"; indexable.Filtering = IndexableFiltering.AlreadyFiltered; indexable.AddProperty(Property.New("dc:title", note.subject)); indexable.AddProperty(Property.NewUnsearched("fixme:application", "tomboy")); // FIXME: tagging is disabled in Tomboy-0.8.x and is planned for 0.10.0 foreach (string s in note.tags) { indexable.AddProperty(Property.New("note:tag", s)); } // We remember the note's text so that we can stuff it in // the TextCache later. note_text_cache [note.Uri] = note.text; StringReader reader = new StringReader(note.text); indexable.SetTextReader(reader); return(indexable); }
public Indexable GetNextIndexable() { if (initial_scan) { return(null); } Uri uri = new Uri(String.Format("kabc:///{0}", current_uid)); Indexable indexable = new Indexable(uri); indexable.ParentUri = UriFu.PathToFileUri(kabc_file); indexable.MimeType = ICalParser.KabcMimeType; indexable.HitType = "Contact"; indexable.Timestamp = current_dt; indexable.AddProperty(Property.NewKeyword("fixme:client", "KAddressBook")); // Add uid as a keyword field for convenience indexable.AddProperty(Property.NewUnsearched("fixme:uid", current_uid)); // FIXME: Comment this Debug statement after the backend stabilizes //Log.Debug ("Creating {0} from:[{1}]", uri, string_builder.ToString ()); StringReader string_reader = new StringReader(string_builder.ToString()); indexable.SetTextReader(string_reader); return(indexable); }
private Indexable current_itemToIndexable() { // sanity check if (current_item == null) { return(null); } //Log.Debug ("Indexing " + channel_link + ":" + current_item.Link); Indexable indexable = new Indexable(new Uri(String.Format("feed:{0};item={1}", channel_link, current_item.Link))); indexable.ParentUri = UriFu.PathToFileUri(feed_file); indexable.MimeType = "text/html"; indexable.HitType = "FeedItem"; string RFC822 = "ddd, dd MMM yyyy HH:mm:ss zzz"; DateTime date = DateTime.ParseExact(current_item.PubDate, RFC822, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AdjustToUniversal); indexable.Timestamp = date; // replace property names with Dublin Core names indexable.AddProperty(Property.New("dc:title", current_item.Title)); indexable.AddProperty(Property.NewKeyword("dc:identifier", current_item.Link)); indexable.AddProperty(Property.NewKeyword("dc:source", channel_link)); indexable.AddProperty(Property.New("dc:publisher", channel_title)); StringReader reader = new StringReader(current_item.Description); indexable.SetTextReader(reader); return(indexable); }
private Indexable MemoToIndexable(CalComponent cc) { Indexable indexable = new Indexable(GetComponentUri(cc)); indexable.Timestamp = cc.Dtstart; indexable.HitType = "Note"; indexable.Filtering = IndexableFiltering.AlreadyFiltered; indexable.AddProperty(Property.NewUnsearched("fixme:application", "evolution")); indexable.AddProperty(Property.New("dc:title", cc.Summary)); // We remember the note's text so that we can stuff it in // the TextCache later. // This is here form compability with Tomboy notes. foreach (string description in cc.Descriptions) { queryable.IndexableTextCache [indexable.Uri] = description; StringReader reader = new StringReader(description); indexable.SetTextReader(reader); } return(indexable); }
public Indexable GetNextIndexable() { DateTime line_dt = DateTime.MinValue; while (log_line_as_sb != null) { //Log.Debug ("Checking line from {0}:[{1}]", log_file, log_line_as_sb); bool in_session = AppendLogText(log_line_as_sb, out line_dt); if (!in_session) { break; } prev_line_offset = reader.Position; log_line_as_sb = reader.ReadLineAsStringBuilder(); } // Check if there is new data to index if (data_sb.Length == 0) { session_begin_time = line_dt; return(null); } Uri uri = new Uri(String.Format("konversation://{0}@dumb/{1}", session_begin_offset, log_file)); Log.Debug("Creating indexable {0}", uri); Indexable indexable = new Indexable(uri); indexable.ParentUri = UriFu.PathToFileUri(log_file); indexable.Timestamp = session_begin_time; indexable.HitType = "IMLog"; indexable.CacheContent = false; indexable.Filtering = IndexableFiltering.AlreadyFiltered; indexable.AddProperty(Beagle.Property.NewUnsearched("fixme:session_begin_offset", session_begin_offset)); indexable.AddProperty(Beagle.Property.NewUnsearched("fixme:session_end_offset", prev_line_offset)); indexable.AddProperty(Beagle.Property.NewDate("fixme:starttime", session_begin_time)); indexable.AddProperty(Beagle.Property.NewUnsearched("fixme:client", "Konversation")); indexable.AddProperty(Beagle.Property.NewUnsearched("fixme:protocol", "IRC")); AddChannelInformation(indexable); foreach (string speaker in speakers.Keys) { indexable.AddProperty(Beagle.Property.NewUnstored("fixme:speaker", speaker)); } StringReader data_reader = new StringReader(data_sb.ToString()); indexable.SetTextReader(data_reader); // update session begin time to the date of the current line which is not in this session session_begin_time = line_dt; return(indexable); }
private Indexable ToAddRssIndexable(XmlDocument document) { string encoding_str = null; StringReader reader = null; if (ToBool(GetText(document, "HasOffline"))) { try { // RSS does not use OfflineSize but MessageSize instead (for some reason...) int offset = Convert.ToInt32(GetText(document, "MessageOffset")), size = Convert.ToInt32(GetText(document, "MessageSize")); reader = GetRssBody(GetText(document, "FolderFile"), offset, size, out encoding_str); } catch (Exception e) { Logger.Log.Debug(e, "Failed to parse RSS body"); } } Indexable indexable = new Indexable(GenerateUniqueUri(document)); indexable.HitType = "FeedItem"; indexable.MimeType = "text/html"; indexable.Timestamp = DateTimeUtil.UnixToDateTimeUtc(Convert.ToInt64(GetText(document, "Date"))); indexable.CacheContent = true; indexable.FlushBufferCache = true; indexable.AddProperty(Property.NewKeyword("fixme:client", "thunderbird")); indexable.AddProperty(Property.NewKeyword("fixme:folder", GetText(document, "Folder"))); indexable.AddProperty(Property.NewUnsearched("ParentUri", GetText(document, "FolderFile"))); indexable.AddProperty(Property.NewUnsearched("fixme:uri", GetText(document, "Uri"))); indexable.AddProperty(Property.NewKeyword("dc:identifier", ExtractUrl(GetText(document, "MessageId")))); indexable.AddProperty(Property.NewKeyword("dc:source", GetText(document, "FeedURL"))); indexable.AddProperty(Property.New("dc:publisher", Mime.HeaderDecodePhrase(GetText(document, "Author")))); // The title will be added by the filter. In case we add it twice we will just get // an empty tile in the search tool (a bug maybe?). if (reader != null) { // If we got an encoding, make sure we use that if (!String.IsNullOrEmpty(encoding_str)) { indexable.AddProperty(Property.New( String.Format("{0}encoding", StringFu.UnindexedNamespace), encoding_str)); } indexable.SetTextReader(reader); } else { indexable.AddProperty(Property.New("dc:title", Mime.HeaderDecodePhrase(GetText(document, "Subject")))); } return(indexable); }
private Indexable current_itemToIndexable() { Indexable indexable; try { indexable = new Indexable(new Uri(String.Format("{0};item={1}", feed_source, current_item.Source))); } catch (System.UriFormatException) { indexable = new Indexable(new Uri(String.Format("liferea://dummy?{0};item={1}", feed_source, current_item.Source))); } indexable.ParentUri = UriFu.PathToFileUri(feed_file); indexable.MimeType = "text/html"; indexable.HitType = "FeedItem"; DateTime date = DateTimeUtil.UnixToDateTimeUtc(0); date = date.AddSeconds(current_item.Timestamp); indexable.Timestamp = date; // cleaning up the property names as far as possible // this way querying for specific field is possible // following DC element names wherever applicable indexable.AddProperty(Property.New("dc:title", current_item.Title)); Attribute[] attribs = current_item.Attribs.AttribArray; if (attribs != null) { foreach (Attribute attrib in attribs) { if (attrib.Name != "author") { continue; } indexable.AddProperty(Property.New("dc:creator", attrib.Value)); } } indexable.AddProperty(Property.NewKeyword("dc:identifier", current_item.Source)); indexable.AddProperty(Property.NewKeyword("dc:source", feed_source)); indexable.AddProperty(Property.New("dc:publisher", publisher)); if (File.Exists(icon_file)) { indexable.AddProperty(Property.NewUnsearched("fixme:cachedimg", icon_file)); } StringReader reader = new StringReader(current_item.Description); indexable.SetTextReader(reader); return(indexable); }
static private Indexable MemberNodeToIndexable(XmlNode node, Uri base_uri, string parentName) { char memberType = MemberTypeToChar(node.SelectSingleNode("MemberType").InnerText); StringBuilder memberFullName = new StringBuilder(); memberFullName.Append(memberType + ":" + parentName); if (memberType != 'C') { memberFullName.Append("." + node.Attributes["MemberName"].Value); } if (memberType == 'C' || memberType == 'M' || memberType == 'E') { memberFullName.Append("("); bool inside = false; foreach (XmlNode parameter in node.SelectNodes("Parameters/Parameter")) { if (!inside) { inside = true; } else { memberFullName.Append(","); } memberFullName.Append(parameter.Attributes["Type"].Value); } memberFullName.Append(")"); } Indexable indexable = new Indexable(UriFu.AddFragment(base_uri, memberFullName.ToString(), false)); indexable.MimeType = "text/html"; indexable.HitType = "MonodocEntry"; indexable.AddProperty(Property.New("dc:title", memberFullName.ToString())); indexable.AddProperty(Property.New("fixme:name", memberFullName.ToString())); indexable.AddProperty(Property.NewUnsearched("fixme:type", node.SelectSingleNode("MemberType").InnerText.ToLower())); StringReader reader = new StringReader(node.SelectSingleNode("Docs").InnerXml); indexable.SetTextReader(reader); return(indexable); }
static private Indexable TypeNodeToIndexable(XmlNode node, Uri base_uri) { string fragment = "T:" + node.Attributes ["FullName"].Value; Indexable indexable = new Indexable(UriFu.AddFragment(base_uri, fragment, false)); indexable.MimeType = "text/html"; indexable.HitType = "MonodocEntry"; indexable.AddProperty(Property.New("dc:title", "T:" + node.Attributes["FullName"].Value)); indexable.AddProperty(Property.NewUnsearched("fixme:name", "T:" + node.Attributes["FullName"].Value)); indexable.AddProperty(Property.NewUnsearched("fixme:type", "type")); StringReader reader = new StringReader(node.SelectSingleNode("Docs").InnerXml); indexable.SetTextReader(reader); return(indexable); }
Indexable TypeNodeToIndexable(XmlNode node, FileInfo file) { Indexable indexable = new Indexable( new Uri("monodoc:///" + file + ";item=T:" + node.Attributes["FullName"].Value)); indexable.MimeType = "application/monodoc"; indexable.Type = "Monodoc"; indexable.AddProperty(Property.NewUnsearched("fixme:type", "type")); indexable.AddProperty(Property.NewUnsearched("fixme:name", "T:" + node.Attributes["FullName"].Value)); string splitname = String.Join(" ", StringFu.FuzzySplit(node.Attributes["FullName"].Value.ToString())); indexable.AddProperty(Property.NewKeyword("fixme:splitname", splitname)); // Should we add other stuff here? Implemented interfaces etc? StringReader reader = new StringReader(node.SelectSingleNode("Docs").InnerXml); indexable.SetTextReader(reader); return(indexable); }
protected override Indexable MessageToIndexable(Message message) { Uri uri = new Uri(String.Format("feed:///{0};id={1}", message.Path, message.Id)); Indexable indexable = new Indexable(uri); indexable.MimeType = "text/html"; indexable.Type = "FeedItem"; indexable.AddProperty(Property.New("fixme:client", "mozilla")); indexable.AddProperty(Property.New("dc:title", message.Subject)); indexable.AddProperty(Property.New("fixme:author", message.From)); //indexable.AddProperty(Property.NewDate ("fixme:published", item.PubDate)); indexable.AddProperty(Property.NewUnsearched("fixme:itemuri", message.Headers ["Content-Base"])); indexable.AddProperty(Property.New("fixme:offset", message.Offset)); StringReader reader = new StringReader(message.Body); indexable.SetTextReader(reader); return(indexable); }
protected override Indexable MessageToIndexable(Message message) { Uri uri = new Uri(String.Format("email:///{0};id={1}", message.Path, message.Id)); Indexable indexable = new Indexable(uri); indexable.Type = "MailMessage"; indexable.AddProperty(Property.New("fixme:client", "mozilla")); indexable.AddProperty(Property.New("dc:title", message.Subject)); indexable.AddProperty(Property.New("fixme:subject", message.Subject)); indexable.AddProperty(Property.New("fixme:to", message.To)); indexable.AddProperty(Property.New("fixme:from", message.From)); indexable.AddProperty(Property.New("fixme:offset", message.Offset)); StringReader reader = new StringReader(message.Body); indexable.SetTextReader(reader); return(indexable); }
override protected void DoPullProperties() { Stopwatch watch = new Stopwatch(); watch.Start(); while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.Element: if (reader.Name.StartsWith("sect") || reader.Name.StartsWith("chapter")) { string id = reader.GetAttribute("id"); if (id != null && id != String.Empty) { DocbookEntry entry = new DocbookEntry(); entry.Id = id; entry.Depth = reader.Depth; string language = reader.GetAttribute("lang"); if (language != null && language != String.Empty) { entry.Language = language; } entries_stack.Push(entry); } } else if (reader.Name == "article" || reader.Name == "book") { string language = reader.GetAttribute("lang"); if (language != null && language != String.Empty) { base_language = language; } } else if (reader.Name == "title") { reader.Read(); // Go to the text node if (entries_stack.Count == 0 && base_title == null) { // This is probably the book title base_title = reader.Value; } else if (entries_stack.Count > 0) { DocbookEntry entry = (DocbookEntry)entries_stack.Peek(); if (entry.Title == null) { entry.Title = reader.Value; } } } else if (reader.Name == "keyword") { reader.Read(); // read the text node AddProperty(Property.NewKeyword("dc:subject", reader.Value)); } break; case XmlNodeType.Text: // Append text to the child indexable if (entries_stack.Count > 0) { ((DocbookEntry)entries_stack.Peek()).Content.Append(reader.Value); } // Append text to the main indexable else { AppendWord(reader.Value); } break; case XmlNodeType.EndElement: if (entries_stack.Count > 0 && ((DocbookEntry)entries_stack.Peek()).Depth == reader.Depth) { DocbookEntry entry, parent_entry = null; entry = (DocbookEntry)entries_stack.Pop(); if (entries_stack.Count > 0) { parent_entry = (DocbookEntry)entries_stack.Peek(); } Indexable indexable; indexable = new Indexable(UriFu.AddFragment(Indexable.Uri, entry.Id, false)); indexable.HitType = "DocbookEntry"; indexable.MimeType = "text/x-docbook-entry"; indexable.AddProperty(Property.NewKeyword("beagle:FileType", "documentation")); indexable.Filtering = IndexableFiltering.AlreadyFiltered; indexable.AddProperty(Property.NewUnsearched("fixme:id", entry.Id)); indexable.AddProperty(Property.New("dc:title", entry.Title)); // Add the docbook book title indexable.AddProperty(Property.NewUnsearched("fixme:base_title", base_title)); // Add the child language (or docbook language if none is specified) if (entry.Language != null) { indexable.AddProperty(Property.NewUnsearched("fixme:language", entry.Language)); } else if (base_language != null) { indexable.AddProperty(Property.NewUnsearched("fixme:language", base_language)); } // Add any parent (as in docbook parent entry, not beagle) data if we have it if (parent_entry != null) { indexable.AddProperty(Property.NewUnsearched("fixme:parent_id", parent_entry.Id)); indexable.AddProperty(Property.NewUnsearched("fixme:parent_title", parent_entry.Title)); } StringReader content_reader = new StringReader(entry.Content.ToString()); indexable.SetTextReader(content_reader); indexable.SetChildOf(this.Indexable); AddIndexable(indexable); } break; } } // Add the common properties to the top-level // file item such as Title, Language etc. AddProperty(Property.New("dc:title", base_title)); AddProperty(Property.NewUnsearched("fixme:language", base_language)); watch.Stop(); // If we've successfully crawled the file but haven't // found any indexables, we shouldn't consider it // successfull at all (unless we have a title, which // means that it's actually a docbook file, just without // sections. if (!HasGeneratedIndexable && base_title == null) { Log.Error("Probably not a docbook. Ignoring {0}!", base_path); Error(); return; } Logger.Log.Debug("Parsed docbook file in {0}", watch); Finished(); }
public Indexable GetNextIndexable () { DateTime line_dt = DateTime.MinValue; while (log_line_as_sb != null) { //Log.Debug ("Checking line from {0}:[{1}]", log_file, log_line_as_sb); bool in_session = AppendLogText (log_line_as_sb, out line_dt); if (! in_session) break; prev_line_offset = reader.Position; log_line_as_sb = reader.ReadLineAsStringBuilder (); } // Check if there is new data to index if (data_sb.Length == 0) { session_begin_time = line_dt; return null; } Uri uri = new Uri (String.Format ("konversation://{0}@dumb/{1}", session_begin_offset, log_file)); Log.Debug ("Creating indexable {0}", uri); Indexable indexable = new Indexable (uri); indexable.ParentUri = UriFu.PathToFileUri (log_file); indexable.Timestamp = session_begin_time; indexable.HitType = "IMLog"; indexable.CacheContent = false; indexable.Filtering = IndexableFiltering.AlreadyFiltered; indexable.AddProperty (Beagle.Property.NewUnsearched ("fixme:session_begin_offset", session_begin_offset)); indexable.AddProperty (Beagle.Property.NewUnsearched ("fixme:session_end_offset", prev_line_offset)); indexable.AddProperty (Beagle.Property.NewDate ("fixme:starttime", session_begin_time)); indexable.AddProperty (Beagle.Property.NewUnsearched ("fixme:client", "Konversation")); indexable.AddProperty (Beagle.Property.NewUnsearched ("fixme:protocol", "IRC")); AddChannelInformation (indexable); foreach (string speaker in speakers.Keys) indexable.AddProperty (Beagle.Property.NewUnstored ("fixme:speaker", speaker)); StringReader data_reader = new StringReader (data_sb.ToString ()); indexable.SetTextReader (data_reader); // update session begin time to the date of the current line which is not in this session session_begin_time = line_dt; return indexable; }
public void OnEachPart(GMime.Object mime_part) { GMime.Object part = null; bool part_needs_dispose = false; //for (int i = 0; i < this.depth; i++) // Console.Write (" "); //Console.WriteLine ("Content-Type: {0}", mime_part.ContentType); ++depth; if (mime_part is GMime.MessagePart) { GMime.MessagePart msg_part = (GMime.MessagePart)mime_part; using (GMime.Message message = msg_part.Message) { using (GMime.Object subpart = message.MimePart) this.OnEachPart(subpart); } } else if (mime_part is GMime.Multipart) { GMime.Multipart multipart = (GMime.Multipart)mime_part; int num_parts = multipart.Count; // If the mimetype is multipart/alternative, we only want to index // one part -- the richest one we can filter. if (mime_part.ContentType.MediaSubtype.ToLower() == "alternative") { // The richest formats are at the end, so work from there // backward. for (int i = num_parts - 1; i >= 0; i--) { GMime.Object subpart = multipart[i]; if (IsMimeTypeHandled(subpart.ContentType.ToString())) { part = subpart; part_needs_dispose = true; break; } else { subpart.Dispose(); } } } // If it's not alternative, or we don't know how to filter any of // the parts, treat them like a bunch of attachments. if (part == null) { for (int i = 0; i < num_parts; i++) { using (GMime.Object subpart = multipart[i]) this.OnEachPart(subpart); } } } else if (mime_part is GMime.Part) { part = mime_part; } else { throw new Exception(String.Format("Unknown part type: {0}", part.GetType())); } if (part != null) { System.IO.Stream stream = null; using (GMime.DataWrapper content_obj = ((GMime.Part)part).ContentObject) stream = content_obj.Stream; // If this is the only part and it's plain text, we // want to just attach it to our filter instead of // creating a child indexable for it. bool no_child_needed = false; string mime_type = part.ContentType.ToString().ToLower(); if (this.depth == 1 && this.count == 0) { if (mime_type == "text/plain") { no_child_needed = true; this.reader = new StreamReader(stream); } else if (mime_type == "text/html") { no_child_needed = true; html_part = true; string enc = part.ContentType.GetParameter("charset"); // DataWrapper.Stream is a very limited stream // and does not allow Seek or Tell // HtmlFilter requires Stream.Position=0. // Play safe and create a memorystream // for HTML parsing. GMime.StreamMem mem_stream; mem_stream = new GMime.StreamMem(); GMime.Stream data_stream; data_stream = ((StreamWrapper)stream).GMimeStream; data_stream.WriteToStream(mem_stream); data_stream.Flush(); // The StreamWrapper and hence the memory_stream // will be closed when the reader is closed // after Pull()-ing is done. System.IO.Stream html_stream; html_stream = new StreamWrapper(mem_stream); html_stream.Seek(0, SeekOrigin.Begin); stream.Close(); try { this.reader = FilterHtml.GetHtmlReader(html_stream, enc, link_handler); } catch (Exception e) { Log.Debug(e, "Exception while filtering HTML email {0}", this.indexable.Uri); this.reader = null; html_stream.Close(); html_part = false; } } } if (!no_child_needed) { // Check the mime type against the blacklist and don't index any // parts that are contained within. That way the user doesn't // get flooded with pointless signatures and vcard and ical // attachments along with (real) attachments. if (Array.IndexOf(blacklisted_mime_types, mime_type) == -1) { string sub_uri = "#" + this.count; Indexable child; child = new Indexable(UriFu.AddFragment(this.indexable.Uri, sub_uri, true)); child.DisplayUri = new Uri(this.indexable.DisplayUri.ToString() + "#" + this.count); // This is a special case. // Even for mails found on disk, MailMessage hitype is set child.HitType = "MailMessage"; child.MimeType = mime_type; // If this is the richest part we found for multipart emails, add its content to textcache if (snippet_attachment || (this.depth == 1 && this.count == 0)) { child.CacheContent = true; } else { child.CacheContent = false; } string filename = ((GMime.Part)part).Filename; if (!String.IsNullOrEmpty(filename)) { child.AddProperty(Property.NewKeyword("fixme:attachment_title", filename)); foreach (Property prop in Property.StandardFileProperties(filename, false)) { child.AddProperty(prop); } } // Store length of attachment long length = stream.Length; if (length != -1) { child.AddProperty(Property.NewUnsearched("fixme:filesize", length)); } if (part.ContentType.MediaType.ToLower() == "text") { child.SetTextReader(new StreamReader(stream)); } else { child.SetBinaryStream(stream); } child.SetChildOf(this.indexable); child.StoreStream(); child.CloseStreams(); this.child_indexables.Add(child); } else { Log.Debug("Skipping attachment {0}#{1} with blacklisted mime type {2}", this.indexable.Uri, this.count, mime_type); } } this.count++; } if (part_needs_dispose) { part.Dispose(); } --depth; }
static private Indexable TypeNodeToIndexable (XmlNode node, Uri base_uri) { string fragment = "T:" + node.Attributes ["FullName"].Value; Indexable indexable = new Indexable (UriFu.AddFragment (base_uri, fragment, false)); indexable.MimeType = "text/html"; indexable.HitType = "MonodocEntry"; indexable.AddProperty (Property.New ("dc:title", "T:" + node.Attributes["FullName"].Value)); indexable.AddProperty (Property.NewUnsearched ("fixme:name", "T:" + node.Attributes["FullName"].Value)); indexable.AddProperty (Property.NewUnsearched ("fixme:type", "type")); StringReader reader = new StringReader (node.SelectSingleNode ("Docs").InnerXml); indexable.SetTextReader (reader); return indexable; }
static void Main(String[] args) { string uriStr = null; string title = null; string sourcefile = null; bool deletesourcefile = false; if (args.Length == 0 || Array.IndexOf(args, "--help") > -1) { PrintUsage(); Environment.Exit(1); } for (int i = 0; i < args.Length; i++) { switch (args [i]) { case "--url": case "--title": case "--sourcefile": if (i + 1 >= args.Length || args [i + 1].StartsWith("--")) { PrintUsage(); Environment.Exit(1); } break; } switch (args [i]) { case "--url": uriStr = args [++i]; break; case "--title": title = args [++i]; break; case "--sourcefile": sourcefile = args [++i]; break; case "--deletesourcefile": deletesourcefile = true; break; case "--help": PrintUsage(); return; case "--version": VersionFu.PrintVersion(); return; } } if (uriStr == null) { Logger.Log.Error("URI not specified!\n"); PrintUsage(); Environment.Exit(1); } Uri uri = new Uri(uriStr, true); if (uri.Scheme == Uri.UriSchemeHttps) { // For security/privacy reasons, we don't index any // SSL-encrypted pages. Logger.Log.Error("Indexing secure https:// URIs is not secure!"); Environment.Exit(1); } // We don't index file: Uris. Silently exit. if (uri.IsFile) { return; } // We *definitely* don't index mailto: Uris. Silently exit. if (uri.Scheme == Uri.UriSchemeMailto) { return; } Indexable indexable; indexable = new Indexable(uri); indexable.HitType = "WebHistory"; indexable.MimeType = "text/html"; indexable.Timestamp = DateTime.Now; if (title != null) { indexable.AddProperty(Property.New("dc:title", title)); } if (sourcefile != null) { if (!File.Exists(sourcefile)) { Logger.Log.Error("sourcefile '{0}' does not exist!", sourcefile); Environment.Exit(1); } indexable.ContentUri = UriFu.PathToFileUri(sourcefile); indexable.DeleteContent = deletesourcefile; } else { Stream stdin = Console.OpenStandardInput(); if (stdin == null) { Logger.Log.Error("No sourcefile specified, and no standard input!\n"); PrintUsage(); Environment.Exit(1); } indexable.SetTextReader(new StreamReader(stdin)); } IndexingServiceRequest req = new IndexingServiceRequest(); req.Add(indexable); try { Logger.Log.Info("Indexing"); Logger.Log.Debug("SendAsync"); req.SendAsync(); Logger.Log.Debug("Close"); req.Close(); Logger.Log.Debug("Done"); } catch (Exception e) { Logger.Log.Error("Indexing failed: {0}", e); // Still clean up after ourselves, even if we couldn't // index the content. if (deletesourcefile) { File.Delete(sourcefile); } Environment.Exit(1); } }
private Indexable MemoToIndexable (CalComponent cc) { Indexable indexable = new Indexable (GetComponentUri (cc)); indexable.Timestamp = cc.Dtstart; indexable.HitType = "Note"; indexable.Filtering = IndexableFiltering.AlreadyFiltered; indexable.AddProperty (Property.NewUnsearched ("fixme:application","evolution")); indexable.AddProperty (Property.New ("dc:title", cc.Summary)); // We remember the note's text so that we can stuff it in // the TextCache later. // This is here form compability with Tomboy notes. foreach (string description in cc.Descriptions) { queryable.IndexableTextCache [indexable.Uri] = description; StringReader reader = new StringReader (description); indexable.SetTextReader (reader); } return indexable; }
static private Indexable MemberNodeToIndexable(XmlNode node, Uri base_uri, string parentName) { char memberType = MemberTypeToChar (node.SelectSingleNode ("MemberType").InnerText); StringBuilder memberFullName = new StringBuilder (); memberFullName.Append (memberType + ":"+ parentName); if (memberType != 'C') memberFullName.Append ("." + node.Attributes["MemberName"].Value); if (memberType == 'C' || memberType == 'M' || memberType == 'E') { memberFullName.Append ("("); bool inside = false; foreach (XmlNode parameter in node.SelectNodes ("Parameters/Parameter")) { if (!inside) inside = true; else memberFullName.Append(","); memberFullName.Append (parameter.Attributes["Type"].Value); } memberFullName.Append (")"); } Indexable indexable = new Indexable (UriFu.AddFragment (base_uri, memberFullName.ToString (), false)); indexable.MimeType = "text/html"; indexable.HitType = "MonodocEntry"; indexable.AddProperty (Property.New ("dc:title", memberFullName.ToString ())); indexable.AddProperty (Property.New ("fixme:name", memberFullName.ToString ())); indexable.AddProperty (Property.NewUnsearched ("fixme:type", node.SelectSingleNode ("MemberType").InnerText.ToLower ())); StringReader reader = new StringReader (node.SelectSingleNode ("Docs").InnerXml); indexable.SetTextReader (reader); return indexable; }
public Indexable GetNextIndexable () { if (initial_scan) return null; Uri uri = new Uri (String.Format ("kabc:///{0}", current_uid)); Indexable indexable = new Indexable (uri); indexable.ParentUri = UriFu.PathToFileUri (kabc_file); indexable.MimeType = ICalParser.KabcMimeType; indexable.HitType = "Contact"; indexable.Timestamp = current_dt; indexable.AddProperty (Property.NewKeyword ("fixme:client", "KAddressBook")); // Add uid as a keyword field for convenience indexable.AddProperty (Property.NewUnsearched ("fixme:uid", current_uid)); // FIXME: Comment this Debug statement after the backend stabilizes //Log.Debug ("Creating {0} from:[{1}]", uri, string_builder.ToString ()); StringReader string_reader = new StringReader (string_builder.ToString()); indexable.SetTextReader (string_reader); return indexable; }
Indexable MemberNodeToIndexable(XmlNode node, FileInfo file, string parentName) { char memberType = MemberTypeToChar(node.SelectSingleNode("MemberType").InnerText); StringBuilder memberFullName = new StringBuilder(); memberFullName.Append(memberType + ":" + parentName); if (memberType != 'C') { memberFullName.Append("." + node.Attributes["MemberName"].Value); } if (memberType == 'C' || memberType == 'M' || memberType == 'E') { memberFullName.Append("("); bool inside = false; foreach (XmlNode parameter in node.SelectNodes("Parameters/Parameter")) { if (!inside) { inside = true; } else { memberFullName.Append(","); } memberFullName.Append(parameter.Attributes["Type"].Value); } memberFullName.Append(")"); } Indexable indexable = new Indexable( new Uri("monodoc:///" + file + ";item=" + memberFullName)); indexable.MimeType = "application/monodoc"; indexable.Type = "Monodoc"; indexable.AddProperty( Property.NewUnsearched("fixme:type", node.SelectSingleNode("MemberType").InnerText.ToLower())); indexable.AddProperty( Property.New("fixme:name", memberFullName)); int indexHack = memberFullName.ToString().IndexOf("("); string splitname; if (indexHack == -1) { splitname = String.Join(" ", StringFu.FuzzySplit(memberFullName.ToString().Substring(2))); } else { splitname = String.Join(" ", StringFu.FuzzySplit(memberFullName.ToString().Substring(2, indexHack - 2))); } indexable.AddProperty( Property.NewKeyword("fixme:splitname", splitname)); StringReader reader = new StringReader(node.SelectSingleNode("Docs").InnerXml); indexable.SetTextReader(reader); return(indexable); }
override protected void DoPullProperties () { Stopwatch watch = new Stopwatch (); watch.Start (); while (reader.Read ()) { switch (reader.NodeType) { case XmlNodeType.Element: if (reader.Name.StartsWith ("sect") || reader.Name.StartsWith ("chapter")) { string id = reader.GetAttribute ("id"); if (id != null && id != String.Empty) { DocbookEntry entry = new DocbookEntry (); entry.Id = id; entry.Depth = reader.Depth; string language = reader.GetAttribute ("lang"); if (language != null && language != String.Empty) entry.Language = language; entries_stack.Push (entry); } } else if (reader.Name == "article" || reader.Name == "book") { string language = reader.GetAttribute ("lang"); if (language != null && language != String.Empty) base_language = language; } else if (reader.Name == "title") { reader.Read (); // Go to the text node if (entries_stack.Count == 0 && base_title == null) { // This is probably the book title base_title = reader.Value; } else if (entries_stack.Count > 0) { DocbookEntry entry = (DocbookEntry) entries_stack.Peek (); if (entry.Title == null) entry.Title = reader.Value; } } else if (reader.Name == "keyword") { reader.Read (); // read the text node AddProperty (Property.NewKeyword ("dc:subject", reader.Value)); } break; case XmlNodeType.Text: // Append text to the child indexable if (entries_stack.Count > 0) ((DocbookEntry) entries_stack.Peek ()).Content.Append (reader.Value); // Append text to the main indexable else AppendWord (reader.Value); break; case XmlNodeType.EndElement: if (entries_stack.Count > 0 && ((DocbookEntry) entries_stack.Peek ()).Depth == reader.Depth) { DocbookEntry entry, parent_entry = null; entry = (DocbookEntry) entries_stack.Pop (); if (entries_stack.Count > 0) parent_entry = (DocbookEntry) entries_stack.Peek (); Indexable indexable; indexable = new Indexable (UriFu.AddFragment (Indexable.Uri, entry.Id, false)); indexable.HitType = "DocbookEntry"; indexable.MimeType = "text/x-docbook-entry"; indexable.AddProperty (Property.NewKeyword ("beagle:FileType", "documentation")); indexable.Filtering = IndexableFiltering.AlreadyFiltered; indexable.AddProperty (Property.NewUnsearched ("fixme:id", entry.Id)); indexable.AddProperty (Property.New ("dc:title", entry.Title)); // Add the docbook book title indexable.AddProperty (Property.NewUnsearched ("fixme:base_title", base_title)); // Add the child language (or docbook language if none is specified) if (entry.Language != null) indexable.AddProperty (Property.NewUnsearched ("fixme:language", entry.Language)); else if (base_language != null) indexable.AddProperty (Property.NewUnsearched ("fixme:language", base_language)); // Add any parent (as in docbook parent entry, not beagle) data if we have it if (parent_entry != null) { indexable.AddProperty (Property.NewUnsearched ("fixme:parent_id", parent_entry.Id)); indexable.AddProperty (Property.NewUnsearched ("fixme:parent_title", parent_entry.Title)); } StringReader content_reader = new StringReader (entry.Content.ToString ()); indexable.SetTextReader (content_reader); indexable.SetChildOf (this.Indexable); AddIndexable (indexable); } break; } } // Add the common properties to the top-level // file item such as Title, Language etc. AddProperty (Property.New ("dc:title", base_title)); AddProperty (Property.NewUnsearched ("fixme:language", base_language)); watch.Stop (); // If we've successfully crawled the file but haven't // found any indexables, we shouldn't consider it // successfull at all (unless we have a title, which // means that it's actually a docbook file, just without // sections. if (! HasGeneratedIndexable && base_title == null) { Log.Error ("Probably not a docbook. Ignoring {0}!", base_path); Error (); return; } Logger.Log.Debug ("Parsed docbook file in {0}", watch); Finished (); }
protected override Indexable MessageToIndexable (Message message) { Uri uri = new Uri (String.Format ("email:///{0};id={1}", message.Path, message.Id)); Indexable indexable = new Indexable (uri); indexable.Type = "MailMessage"; indexable.AddProperty (Property.New ("fixme:client", "mozilla")); indexable.AddProperty (Property.New ("dc:title", message.Subject)); indexable.AddProperty (Property.New ("fixme:subject", message.Subject)); indexable.AddProperty (Property.New ("fixme:to", message.To)); indexable.AddProperty (Property.New ("fixme:from", message.From)); indexable.AddProperty (Property.New ("fixme:offset", message.Offset)); StringReader reader = new StringReader (message.Body); indexable.SetTextReader (reader); return indexable; }
protected override Indexable MessageToIndexable (Message message) { Uri uri = new Uri (String.Format ("feed:///{0};id={1}", message.Path, message.Id)); Indexable indexable = new Indexable (uri); indexable.MimeType = "text/html"; indexable.Type = "FeedItem"; indexable.AddProperty (Property.New ("fixme:client", "mozilla")); indexable.AddProperty(Property.New ("dc:title", message.Subject)); indexable.AddProperty(Property.New ("fixme:author", message.From)); //indexable.AddProperty(Property.NewDate ("fixme:published", item.PubDate)); indexable.AddProperty(Property.NewUnsearched ("fixme:itemuri", message.Headers ["Content-Base"])); indexable.AddProperty (Property.New ("fixme:offset", message.Offset)); StringReader reader = new StringReader (message.Body); indexable.SetTextReader (reader); return indexable; }
Indexable MemberNodeToIndexable(XmlNode node, FileInfo file, string parentName) { char memberType = MemberTypeToChar (node.SelectSingleNode ("MemberType").InnerText); StringBuilder memberFullName = new StringBuilder (); memberFullName.Append (memberType + ":"+ parentName); if (memberType != 'C') memberFullName.Append ("." + node.Attributes["MemberName"].Value); if (memberType == 'C' || memberType == 'M' || memberType == 'E') { memberFullName.Append ("("); bool inside = false; foreach (XmlNode parameter in node.SelectNodes ("Parameters/Parameter")) { if (!inside) inside = true; else memberFullName.Append(","); memberFullName.Append (parameter.Attributes["Type"].Value); } memberFullName.Append (")"); } Indexable indexable = new Indexable ( new Uri ("monodoc:///" + file + ";item=" + memberFullName)); indexable.MimeType = "application/monodoc"; indexable.Type = "Monodoc"; indexable.AddProperty ( Property.NewUnsearched ("fixme:type", node.SelectSingleNode ("MemberType").InnerText.ToLower ())); indexable.AddProperty ( Property.New ("fixme:name",memberFullName)); int indexHack = memberFullName.ToString ().IndexOf ("("); string splitname; if (indexHack == -1) splitname = String.Join (" ", StringFu.FuzzySplit (memberFullName.ToString ().Substring (2))); else splitname = String.Join (" ", StringFu.FuzzySplit (memberFullName.ToString ().Substring(2,indexHack-2))); indexable.AddProperty ( Property.NewKeyword ("fixme:splitname",splitname)); StringReader reader = new StringReader (node.SelectSingleNode ("Docs").InnerXml); indexable.SetTextReader (reader); return indexable; }
private Indexable ToAddRssIndexable (XmlDocument document) { string encoding_str = null; StringReader reader = null; if (ToBool (GetText (document, "HasOffline"))) { try { // RSS does not use OfflineSize but MessageSize instead (for some reason...) int offset = Convert.ToInt32 (GetText (document, "MessageOffset")), size = Convert.ToInt32 (GetText (document, "MessageSize")); reader = GetRssBody (GetText (document, "FolderFile"), offset, size, out encoding_str); } catch (Exception e) { Logger.Log.Debug (e, "Failed to parse RSS body"); } } Indexable indexable = new Indexable (GenerateUniqueUri (document)); indexable.HitType = "FeedItem"; indexable.MimeType = "text/html"; indexable.Timestamp = DateTimeUtil.UnixToDateTimeUtc (Convert.ToInt64 (GetText (document, "Date"))); indexable.CacheContent = true; indexable.FlushBufferCache = true; indexable.AddProperty (Property.NewKeyword ("fixme:client", "thunderbird")); indexable.AddProperty (Property.NewKeyword ("fixme:folder", GetText (document, "Folder"))); indexable.AddProperty (Property.NewUnsearched ("ParentUri", GetText (document, "FolderFile"))); indexable.AddProperty (Property.NewUnsearched ("fixme:uri", GetText (document, "Uri"))); indexable.AddProperty (Property.NewKeyword ("dc:identifier", ExtractUrl (GetText (document, "MessageId")))); indexable.AddProperty (Property.NewKeyword ("dc:source", GetText (document, "FeedURL"))); indexable.AddProperty (Property.New ("dc:publisher", Mime.HeaderDecodePhrase (GetText (document, "Author")))); // The title will be added by the filter. In case we add it twice we will just get // an empty tile in the search tool (a bug maybe?). if (reader != null) { // If we got an encoding, make sure we use that if (!String.IsNullOrEmpty (encoding_str)) { indexable.AddProperty (Property.New ( String.Format ("{0}encoding", StringFu.UnindexedNamespace), encoding_str)); } indexable.SetTextReader (reader); } else indexable.AddProperty (Property.New ("dc:title", Mime.HeaderDecodePhrase (GetText (document, "Subject")))); return indexable; }
/* Returns false if content can't/needn't be indexed. * If AlreadyFiltered, then we don't return a filter but return true. */ static public bool FilterIndexable (Indexable indexable, TextCache text_cache, out Filter filter) { filter = null; ICollection filters = null; if (indexable.Filtering == IndexableFiltering.AlreadyFiltered) return true; if (! ShouldWeFilterThis (indexable)) return false; string path = null; // First, figure out which filter we should use to deal with // the indexable. // If a specific mime type is specified, try to index as that type. if (indexable.MimeType != null) filters = CreateFiltersFromMimeType (indexable.MimeType); if (indexable.ContentUri.IsFile) { path = indexable.ContentUri.LocalPath; // Otherwise, set the mime type for a directory, // or sniff it from the file. if (indexable.MimeType == null) { if (Directory.Exists (path)) { indexable.MimeType = "inode/directory"; indexable.NoContent = true; } else if (File.Exists (path)) { indexable.MimeType = XdgMime.GetMimeType (path); } else { Log.Warn ("Unable to filter {0}. {1} not found.", indexable.DisplayUri, path); return false; } } // Set the timestamp to the last write time, if it isn't // set by the backend. if (! indexable.ValidTimestamp && indexable.IsNonTransient) indexable.Timestamp = FileSystem.GetLastWriteTimeUtc (path); // Check the timestamp to make sure the file hasn't // disappeared from underneath us. if (! FileSystem.ExistsByDateTime (indexable.Timestamp)) { Log.Warn ("Unable to filter {0}. {1} appears to have disappeared from underneath us", indexable.DisplayUri, path); return false; } if (filters == null || filters.Count == 0) { filters = CreateFiltersFromIndexable (indexable); } } // We don't know how to filter this, so there is nothing else to do. if (filters.Count == 0) { if (! indexable.NoContent) Logger.Log.Debug ("No filter for {0} ({1}) [{2}]", indexable.DisplayUri, path, indexable.MimeType); return false; } foreach (Filter candidate_filter in filters) { if (Debug) Logger.Log.Debug ("Testing filter: {0}", candidate_filter); // Hook up the snippet writer. if (candidate_filter.SnippetMode && text_cache != null) { if (candidate_filter.OriginalIsText && indexable.IsNonTransient) { text_cache.MarkAsSelfCached (indexable.Uri); } else if (indexable.CacheContent) { TextWriter writer = text_cache.GetWriter (indexable.Uri); candidate_filter.AttachSnippetWriter (writer); } } // Set the indexable on the filter. candidate_filter.Indexable = indexable; // Open the filter, copy the file's properties to the indexable, // and hook up the TextReaders. bool successful_open = false; TextReader text_reader; Stream binary_stream; if (path != null) successful_open = candidate_filter.Open (path); else if ((text_reader = indexable.GetTextReader ()) != null) successful_open = candidate_filter.Open (text_reader); else if ((binary_stream = indexable.GetBinaryStream ()) != null) successful_open = candidate_filter.Open (binary_stream); if (successful_open) { // Set FileType indexable.AddProperty (Property.NewKeyword ("beagrep:FileType", candidate_filter.FileType)); indexable.SetTextReader (candidate_filter.GetTextReader ()); indexable.SetHotTextReader (candidate_filter.GetHotTextReader ()); if (Debug) Logger.Log.Debug ("Successfully filtered {0} with {1}", path, candidate_filter); filter = candidate_filter; return true; } else { Log.Warn ("Error in filtering {0} with {1}, falling back", path, candidate_filter); candidate_filter.Cleanup (); } } if (Debug) Logger.Log.Debug ("None of the matching filters could process the file: {0}", path); return false; }
public Indexable GetNextIndexable () { string line; string_builder.Length = 0; DateTime dt = DateTime.MinValue; string uid = null; // Keep reading till "END:VJOURNAL" while ((line = reader.ReadLine ()) != null) { //UID:libkcal-1467827482.768 //LAST-MODIFIED:20061015T085606Z if (line == "END:VJOURNAL") break; else if (line.StartsWith ("UID:")) uid = line.Substring (4); else if (line.StartsWith ("LAST-MODIFIED:")) { string dt_string = line.Substring (14); dt_string = dt_string.Replace ("Z", "+00:00"); dt = DateTime.ParseExact ( dt_string, fmts, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AdjustToUniversal); } else { string_builder.Append (line); string_builder.Append ('\n'); } } if (line == null) { reader.Close (); return null; } // Bad note if (string_builder.Length == 0 || uid == null || dt == DateTime.MinValue) return null; // Mark note with uid as seen ('undeleted') deleted_notes [uid] = false; lock (last_modified_table) { if (last_modified_table.Contains (uid)) { DateTime old_dt = (DateTime) last_modified_table [uid]; // FIXME: Returning null for more than 179 times will cause trouble if (dt == old_dt) return null; else { //Log.Debug ("Updating last_mod_date [{0}] = {1}", uid, dt); last_modified_table [uid] = dt; } } else { //Log.Debug ("Adding last_mod_date [{0}] = {1}", uid, dt); last_modified_table [uid] = dt; } } if (initial_scan) return null; // Open knotes notes as //dcop knotes KNotesIface text <UID> // where the uri is given as knotes://uid Uri uri = new Uri (String.Format ("knotes:///{0}", uid)); Indexable indexable = new Indexable (uri); indexable.ParentUri = UriFu.PathToFileUri (knotes_file); indexable.MimeType = ICalParser.KnotesMimeType; indexable.HitType = "Note"; indexable.Timestamp = dt; // Add uid as a keyword field for convenience indexable.AddProperty (Property.NewUnsearched ("fixme:uid", uid)); // FIXME: Comment this Debug statement after the backend stabilizes //Log.Debug ("Creating {0} from:[{1}]", uri, string_builder.ToString ()); StringReader string_reader = new StringReader (string_builder.ToString()); indexable.SetTextReader (string_reader); return indexable; }
Indexable TypeNodeToIndexable(XmlNode node,FileInfo file) { Indexable indexable = new Indexable( new Uri ("monodoc:///" + file + ";item=T:"+node.Attributes["FullName"].Value)); indexable.MimeType = "application/monodoc"; indexable.Type = "Monodoc"; indexable.AddProperty (Property.NewUnsearched ("fixme:type", "type")); indexable.AddProperty (Property.NewUnsearched ("fixme:name", "T:" + node.Attributes["FullName"].Value)); string splitname = String.Join (" ", StringFu.FuzzySplit (node.Attributes["FullName"].Value.ToString ())); indexable.AddProperty (Property.NewKeyword ("fixme:splitname",splitname)); // Should we add other stuff here? Implemented interfaces etc? StringReader reader = new StringReader (node.SelectSingleNode ("Docs").InnerXml); indexable.SetTextReader (reader); return indexable; }
public Indexable GetNextIndexable() { string id = reader.GetAttribute("Id"); string title = reader.GetAttribute("Title"); string author = reader.GetAttribute("Author"); // FIXME stupid mono bug; DateTime.ParseExact ("0001-01-01T00:00:00.0000000+00:00", ...) // http://bugzilla.ximian.com/show_bug.cgi?id=76082 // Still present in 1.1.9.2 DateTime pub_date; try { pub_date = DateTime.ParseExact( reader.GetAttribute("PubDate"), "yyyy-MM-ddTHH:mm:ss.fffffffzzz", null); } catch (Exception e) { pub_date = DateTime.MinValue; } string link = reader.GetAttribute("Link"); string text = reader.GetAttribute("Text"); reader.Read(); Uri uri = new Uri(String.Format("feed:{0};item={1}", channel_url, id)); Logger.Log.Debug("BlamQ: Indexing [" + channel_name + "] " + title); Indexable indexable = new Indexable(uri); indexable.ParentUri = UriFu.PathToFileUri(feed_file); indexable.MimeType = "text/html"; indexable.HitType = "FeedItem"; indexable.Timestamp = pub_date.ToUniversalTime(); // change property names to DC names, as far as allowed indexable.AddProperty(Property.New("dc:title", title)); indexable.AddProperty(Property.New("dc:creator", author)); indexable.AddProperty(Property.NewKeyword("dc:identifier", link)); indexable.AddProperty(Property.NewKeyword("dc:source", channel_url)); indexable.AddProperty(Property.New("dc:publisher", channel_name)); string img = null; int i = text.IndexOf("<img src=\""); if (i != -1) { i += "<img src=\"".Length; int j = text.IndexOf("\"", i); if (j != -1) { img = text.Substring(i, j - i); } } if (img != null) { string path = Path.Combine(Path.Combine(blam_dir, "Cache"), img.GetHashCode().ToString()); indexable.AddProperty(Property.NewUnsearched("fixme:cachedimg", path)); } StringReader string_reader = new StringReader(text); indexable.SetTextReader(string_reader); return(indexable); }
/* Returns false if content can't/needn't be indexed. * If AlreadyFiltered, then we don't return a filter but return true. */ static public bool FilterIndexable(Indexable indexable, TextCache text_cache, out Filter filter) { filter = null; ICollection filters = null; if (indexable.Filtering == IndexableFiltering.AlreadyFiltered) { return(true); } if (!ShouldWeFilterThis(indexable)) { return(false); } string path = null; // First, figure out which filter we should use to deal with // the indexable. // If a specific mime type is specified, try to index as that type. if (indexable.MimeType != null) { filters = CreateFiltersFromMimeType(indexable.MimeType); } if (indexable.ContentUri.IsFile) { path = indexable.ContentUri.LocalPath; // Otherwise, set the mime type for a directory, // or sniff it from the file. if (indexable.MimeType == null) { if (Directory.Exists(path)) { indexable.MimeType = "inode/directory"; indexable.NoContent = true; } else if (File.Exists(path)) { indexable.MimeType = XdgMime.GetMimeType(path); } else { Log.Warn("Unable to filter {0}. {1} not found.", indexable.DisplayUri, path); return(false); } } // Set the timestamp to the last write time, if it isn't // set by the backend. if (!indexable.ValidTimestamp && indexable.IsNonTransient) { indexable.Timestamp = FileSystem.GetLastWriteTimeUtc(path); } // Check the timestamp to make sure the file hasn't // disappeared from underneath us. if (!FileSystem.ExistsByDateTime(indexable.Timestamp)) { Log.Warn("Unable to filter {0}. {1} appears to have disappeared from underneath us", indexable.DisplayUri, path); return(false); } if (filters == null || filters.Count == 0) { filters = CreateFiltersFromIndexable(indexable); } } // We don't know how to filter this, so there is nothing else to do. if (filters.Count == 0) { if (!indexable.NoContent) { Logger.Log.Debug("No filter for {0} ({1}) [{2}]", indexable.DisplayUri, path, indexable.MimeType); } return(false); } foreach (Filter candidate_filter in filters) { if (Debug) { Logger.Log.Debug("Testing filter: {0}", candidate_filter); } // Hook up the snippet writer. if (candidate_filter.SnippetMode && text_cache != null) { if (candidate_filter.OriginalIsText && indexable.IsNonTransient) { text_cache.MarkAsSelfCached(indexable.Uri); } else if (indexable.CacheContent) { TextWriter writer = text_cache.GetWriter(indexable.Uri); candidate_filter.AttachSnippetWriter(writer); } } // Set the indexable on the filter. candidate_filter.Indexable = indexable; // Open the filter, copy the file's properties to the indexable, // and hook up the TextReaders. bool successful_open = false; TextReader text_reader; Stream binary_stream; if (path != null) { successful_open = candidate_filter.Open(path); } else if ((text_reader = indexable.GetTextReader()) != null) { successful_open = candidate_filter.Open(text_reader); } else if ((binary_stream = indexable.GetBinaryStream()) != null) { successful_open = candidate_filter.Open(binary_stream); } if (successful_open) { // Set FileType indexable.AddProperty(Property.NewKeyword("beagrep:FileType", candidate_filter.FileType)); indexable.SetTextReader(candidate_filter.GetTextReader()); indexable.SetHotTextReader(candidate_filter.GetHotTextReader()); if (Debug) { Logger.Log.Debug("Successfully filtered {0} with {1}", path, candidate_filter); } filter = candidate_filter; return(true); } else { Log.Warn("Error in filtering {0} with {1}, falling back", path, candidate_filter); candidate_filter.Cleanup(); } } if (Debug) { Logger.Log.Debug("None of the matching filters could process the file: {0}", path); } return(false); }
private Indexable current_itemToIndexable () { Indexable indexable; try { indexable = new Indexable (new Uri (String.Format ("{0};item={1}", feed_source, current_item.Source))); } catch (System.UriFormatException) { indexable = new Indexable (new Uri (String.Format ("liferea://dummy?{0};item={1}", feed_source, current_item.Source))); } indexable.ParentUri = UriFu.PathToFileUri (feed_file); indexable.MimeType = "text/html"; indexable.HitType = "FeedItem"; DateTime date = DateTimeUtil.UnixToDateTimeUtc (0); date = date.AddSeconds (current_item.Timestamp); indexable.Timestamp = date; // cleaning up the property names as far as possible // this way querying for specific field is possible // following DC element names wherever applicable indexable.AddProperty (Property.New ("dc:title", current_item.Title)); Attribute[] attribs = current_item.Attribs.AttribArray; if (attribs != null) { foreach (Attribute attrib in attribs) { if (attrib.Name != "author") continue; indexable.AddProperty (Property.New ("dc:creator", attrib.Value)); } } indexable.AddProperty (Property.NewKeyword ("dc:identifier", current_item.Source)); indexable.AddProperty (Property.NewKeyword ("dc:source", feed_source)); indexable.AddProperty (Property.New ("dc:publisher", publisher)); if (File.Exists (icon_file)) indexable.AddProperty (Property.NewUnsearched ("fixme:cachedimg", icon_file)); StringReader reader = new StringReader (current_item.Description); indexable.SetTextReader (reader); return indexable; }
public Indexable GetNextIndexable() { string line; string_builder.Length = 0; DateTime dt = DateTime.MinValue; string uid = null; // Keep reading till "END:EVENT" or "END:VTODO" while ((line = reader.ReadLine()) != null) { //UID:libkcal-1467827482.768 //LAST-MODIFIED:20061015T085606Z if (line == "END:VEVENT" || line == "END:VTODO") { break; } else if (line.StartsWith("UID:")) { uid = line.Substring(4); } else if (line.StartsWith("LAST-MODIFIED:")) { string dt_string = line.Substring(14); dt_string = dt_string.Replace("Z", "+00:00"); dt = DateTime.ParseExact( dt_string, fmts, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AdjustToUniversal); } else { string_builder.Append(line); string_builder.Append('\n'); } } if (line == null) { reader.Close(); return(null); } // Bad entry if (string_builder.Length == 0 || uid == null || dt == DateTime.MinValue) { return(null); } // Mark entry with uid as seen ('undeleted') deleted_entries [uid] = false; lock (last_modified_table) { if (last_modified_table.Contains(uid)) { DateTime old_dt = (DateTime)last_modified_table [uid]; // FIXME: Returning null for more than 179 times will cause trouble if (dt == old_dt) { return(null); } else { //Log.Debug ("Updating last_mod_date [{0}] = {1}", uid, dt); last_modified_table [uid] = dt; } } else { //Log.Debug ("Adding last_mod_date [{0}] = {1}", uid, dt); last_modified_table [uid] = dt; } } if (initial_scan) { return(null); } Uri uri = new Uri(String.Format("korganizer:///{0}", uid)); Indexable indexable = new Indexable(uri); indexable.ParentUri = UriFu.PathToFileUri(korganizer_file); if (line == "END:VEVENT") { indexable.MimeType = ICalParser.KOrganizerEventMimeType; indexable.HitType = "Calendar"; } else { indexable.MimeType = ICalParser.KOrganizerTodoMimeType; indexable.HitType = "Task"; } indexable.Timestamp = dt; // Add uid as a keyword field for convenience indexable.AddProperty(Property.NewUnsearched("fixme:uid", uid)); // FIXME: Comment this Debug statement after the backend stabilizes //Log.Debug ("Creating {0} from:[{1}]", uri, string_builder.ToString ()); StringReader string_reader = new StringReader(string_builder.ToString()); indexable.SetTextReader(string_reader); return(indexable); }
///////////////////////////////////////////////// private Indexable NoteToIndexable (FileInfo file, Note note) { Indexable indexable = new Indexable (note.Uri); indexable.ContentUri = UriFu.PathToFileUri (file.FullName); indexable.Timestamp = note.timestamp; indexable.HitType = "Note"; indexable.Filtering = IndexableFiltering.AlreadyFiltered; indexable.AddProperty (Property.New ("dc:title", note.subject)); indexable.AddProperty (Property.NewUnsearched ("fixme:application","tomboy")); // FIXME: tagging is disabled in Tomboy-0.8.x and is planned for 0.10.0 foreach( string s in note.tags) indexable.AddProperty (Property.New ("note:tag", s)); // We remember the note's text so that we can stuff it in // the TextCache later. note_text_cache [note.Uri] = note.text; StringReader reader = new StringReader (note.text); indexable.SetTextReader (reader); return indexable; }
private Indexable current_itemToIndexable () { // sanity check if (current_item == null) return null; //Log.Debug ("Indexing " + channel_link + ":" + current_item.Link); Indexable indexable = new Indexable (new Uri (String.Format ("feed:{0};item={1}", channel_link, current_item.Link))); indexable.ParentUri = UriFu.PathToFileUri (feed_file); indexable.MimeType = "text/html"; indexable.HitType = "FeedItem"; string RFC822 = "ddd, dd MMM yyyy HH:mm:ss zzz"; DateTime date = DateTime.ParseExact(current_item.PubDate, RFC822, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AdjustToUniversal); indexable.Timestamp = date; // replace property names with Dublin Core names indexable.AddProperty (Property.New ("dc:title", current_item.Title)); indexable.AddProperty (Property.NewKeyword ("dc:identifier", current_item.Link)); indexable.AddProperty (Property.NewKeyword ("dc:source", channel_link)); indexable.AddProperty (Property.New ("dc:publisher", channel_title)); StringReader reader = new StringReader (current_item.Description); indexable.SetTextReader (reader); return indexable; }
public Indexable GetNextIndexable () { string id = reader.GetAttribute ("Id"); string title = reader.GetAttribute ("Title"); string author = reader.GetAttribute ("Author"); // FIXME stupid mono bug; DateTime.ParseExact ("0001-01-01T00:00:00.0000000+00:00", ...) // http://bugzilla.ximian.com/show_bug.cgi?id=76082 // Still present in 1.1.9.2 DateTime pub_date; try { pub_date = DateTime.ParseExact ( reader.GetAttribute ("PubDate"), "yyyy-MM-ddTHH:mm:ss.fffffffzzz", null); } catch (Exception e) { pub_date = DateTime.MinValue; } string link = reader.GetAttribute ("Link"); string text = reader.GetAttribute ("Text"); reader.Read (); Uri uri = new Uri (String.Format ("feed:{0};item={1}", channel_url, id)); Logger.Log.Debug ("BlamQ: Indexing [" + channel_name + "] " + title); Indexable indexable = new Indexable (uri); indexable.ParentUri = UriFu.PathToFileUri (feed_file); indexable.MimeType = "text/html"; indexable.HitType = "FeedItem"; indexable.Timestamp = pub_date.ToUniversalTime (); // change property names to DC names, as far as allowed indexable.AddProperty (Property.New ("dc:title", title)); indexable.AddProperty (Property.New ("dc:creator", author)); indexable.AddProperty (Property.NewKeyword ("dc:identifier", link)); indexable.AddProperty (Property.NewKeyword ("dc:source", channel_url)); indexable.AddProperty (Property.New ("dc:publisher", channel_name)); string img = null; int i = text.IndexOf ("<img src=\""); if (i != -1) { i += "<img src=\"".Length; int j = text.IndexOf ("\"", i); if (j != -1) img = text.Substring (i, j-i); } if (img != null) { string path = Path.Combine (Path.Combine (blam_dir, "Cache"), img.GetHashCode ().ToString ()); indexable.AddProperty (Property.NewUnsearched ("fixme:cachedimg", path)); } StringReader string_reader = new StringReader (text); indexable.SetTextReader (string_reader); return indexable; }