/////////////////////////////////////////////////

        private Indexable NoteToIndexable(FileInfo file, Note note)
        {
            Indexable indexable = new Indexable(note.Uri);

            indexable.ContentUri = UriFu.PathToFileUri(file.FullName);
            indexable.Timestamp  = note.timestamp;
            indexable.HitType    = "Note";
            indexable.Filtering  = IndexableFiltering.AlreadyFiltered;

            indexable.AddProperty(Property.New("dc:title", note.subject));
            indexable.AddProperty(Property.NewUnsearched("fixme:application", "tomboy"));

            // FIXME: tagging is disabled in Tomboy-0.8.x and is planned for 0.10.0
            foreach (string s in note.tags)
            {
                indexable.AddProperty(Property.New("note:tag", s));
            }

            // We remember the note's text so that we can stuff it in
            // the TextCache later.
            note_text_cache [note.Uri] = note.text;

            StringReader reader = new StringReader(note.text);

            indexable.SetTextReader(reader);

            return(indexable);
        }
        public Indexable GetNextIndexable()
        {
            if (initial_scan)
            {
                return(null);
            }

            Uri       uri       = new Uri(String.Format("kabc:///{0}", current_uid));
            Indexable indexable = new Indexable(uri);

            indexable.ParentUri = UriFu.PathToFileUri(kabc_file);
            indexable.MimeType  = ICalParser.KabcMimeType;
            indexable.HitType   = "Contact";
            indexable.Timestamp = current_dt;
            indexable.AddProperty(Property.NewKeyword("fixme:client", "KAddressBook"));
            // Add uid as a keyword field for convenience
            indexable.AddProperty(Property.NewUnsearched("fixme:uid", current_uid));

            // FIXME: Comment this Debug statement after the backend stabilizes
            //Log.Debug ("Creating {0} from:[{1}]", uri, string_builder.ToString ());
            StringReader string_reader = new StringReader(string_builder.ToString());

            indexable.SetTextReader(string_reader);

            return(indexable);
        }
Beispiel #3
0
        private Indexable current_itemToIndexable()
        {
            // sanity check
            if (current_item == null)
            {
                return(null);
            }

            //Log.Debug ("Indexing " + channel_link + ":" + current_item.Link);
            Indexable indexable = new Indexable(new Uri(String.Format("feed:{0};item={1}", channel_link, current_item.Link)));

            indexable.ParentUri = UriFu.PathToFileUri(feed_file);
            indexable.MimeType  = "text/html";
            indexable.HitType   = "FeedItem";

            string   RFC822 = "ddd, dd MMM yyyy HH:mm:ss zzz";
            DateTime date   = DateTime.ParseExact(current_item.PubDate, RFC822, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AdjustToUniversal);

            indexable.Timestamp = date;

            // replace property names with Dublin Core names
            indexable.AddProperty(Property.New("dc:title", current_item.Title));
            indexable.AddProperty(Property.NewKeyword("dc:identifier", current_item.Link));
            indexable.AddProperty(Property.NewKeyword("dc:source", channel_link));
            indexable.AddProperty(Property.New("dc:publisher", channel_title));

            StringReader reader = new StringReader(current_item.Description);

            indexable.SetTextReader(reader);

            return(indexable);
        }
Beispiel #4
0
        private Indexable MemoToIndexable(CalComponent cc)
        {
            Indexable indexable = new Indexable(GetComponentUri(cc));

            indexable.Timestamp = cc.Dtstart;
            indexable.HitType   = "Note";
            indexable.Filtering = IndexableFiltering.AlreadyFiltered;

            indexable.AddProperty(Property.NewUnsearched("fixme:application", "evolution"));

            indexable.AddProperty(Property.New("dc:title", cc.Summary));

            // We remember the note's text so that we can stuff it in
            // the TextCache later.
            // This is here form compability with Tomboy notes.
            foreach (string description in cc.Descriptions)
            {
                queryable.IndexableTextCache [indexable.Uri] = description;

                StringReader reader = new StringReader(description);
                indexable.SetTextReader(reader);
            }

            return(indexable);
        }
            public Indexable GetNextIndexable()
            {
                DateTime line_dt = DateTime.MinValue;

                while (log_line_as_sb != null)
                {
                    //Log.Debug ("Checking line from {0}:[{1}]", log_file, log_line_as_sb);
                    bool in_session = AppendLogText(log_line_as_sb, out line_dt);
                    if (!in_session)
                    {
                        break;
                    }

                    prev_line_offset = reader.Position;
                    log_line_as_sb   = reader.ReadLineAsStringBuilder();
                }

                // Check if there is new data to index
                if (data_sb.Length == 0)
                {
                    session_begin_time = line_dt;
                    return(null);
                }

                Uri uri = new Uri(String.Format("konversation://{0}@dumb/{1}", session_begin_offset, log_file));

                Log.Debug("Creating indexable {0}", uri);
                Indexable indexable = new Indexable(uri);

                indexable.ParentUri    = UriFu.PathToFileUri(log_file);
                indexable.Timestamp    = session_begin_time;
                indexable.HitType      = "IMLog";
                indexable.CacheContent = false;
                indexable.Filtering    = IndexableFiltering.AlreadyFiltered;

                indexable.AddProperty(Beagle.Property.NewUnsearched("fixme:session_begin_offset", session_begin_offset));
                indexable.AddProperty(Beagle.Property.NewUnsearched("fixme:session_end_offset", prev_line_offset));
                indexable.AddProperty(Beagle.Property.NewDate("fixme:starttime", session_begin_time));
                indexable.AddProperty(Beagle.Property.NewUnsearched("fixme:client", "Konversation"));
                indexable.AddProperty(Beagle.Property.NewUnsearched("fixme:protocol", "IRC"));

                AddChannelInformation(indexable);

                foreach (string speaker in speakers.Keys)
                {
                    indexable.AddProperty(Beagle.Property.NewUnstored("fixme:speaker", speaker));
                }

                StringReader data_reader = new StringReader(data_sb.ToString());

                indexable.SetTextReader(data_reader);

                // update session begin time to the date of the current line which is not in this session
                session_begin_time = line_dt;

                return(indexable);
            }
        private Indexable ToAddRssIndexable(XmlDocument document)
        {
            string       encoding_str = null;
            StringReader reader       = null;

            if (ToBool(GetText(document, "HasOffline")))
            {
                try {
                    // RSS does not use OfflineSize but MessageSize instead (for some reason...)
                    int offset = Convert.ToInt32(GetText(document, "MessageOffset")),
                        size   = Convert.ToInt32(GetText(document, "MessageSize"));
                    reader = GetRssBody(GetText(document, "FolderFile"), offset, size, out encoding_str);
                } catch (Exception e) {
                    Logger.Log.Debug(e, "Failed to parse RSS body");
                }
            }

            Indexable indexable = new Indexable(GenerateUniqueUri(document));

            indexable.HitType          = "FeedItem";
            indexable.MimeType         = "text/html";
            indexable.Timestamp        = DateTimeUtil.UnixToDateTimeUtc(Convert.ToInt64(GetText(document, "Date")));
            indexable.CacheContent     = true;
            indexable.FlushBufferCache = true;

            indexable.AddProperty(Property.NewKeyword("fixme:client", "thunderbird"));
            indexable.AddProperty(Property.NewKeyword("fixme:folder", GetText(document, "Folder")));
            indexable.AddProperty(Property.NewUnsearched("ParentUri", GetText(document, "FolderFile")));
            indexable.AddProperty(Property.NewUnsearched("fixme:uri", GetText(document, "Uri")));

            indexable.AddProperty(Property.NewKeyword("dc:identifier", ExtractUrl(GetText(document, "MessageId"))));
            indexable.AddProperty(Property.NewKeyword("dc:source", GetText(document, "FeedURL")));
            indexable.AddProperty(Property.New("dc:publisher", Mime.HeaderDecodePhrase(GetText(document, "Author"))));

            // The title will be added by the filter. In case we add it twice we will just get
            // an empty tile in the search tool (a bug maybe?).
            if (reader != null)
            {
                // If we got an encoding, make sure we use that
                if (!String.IsNullOrEmpty(encoding_str))
                {
                    indexable.AddProperty(Property.New(
                                              String.Format("{0}encoding", StringFu.UnindexedNamespace), encoding_str));
                }

                indexable.SetTextReader(reader);
            }
            else
            {
                indexable.AddProperty(Property.New("dc:title", Mime.HeaderDecodePhrase(GetText(document, "Subject"))));
            }

            return(indexable);
        }
Beispiel #7
0
        private Indexable current_itemToIndexable()
        {
            Indexable indexable;

            try {
                indexable = new Indexable(new Uri(String.Format("{0};item={1}", feed_source, current_item.Source)));
            } catch (System.UriFormatException) {
                indexable = new Indexable(new Uri(String.Format("liferea://dummy?{0};item={1}", feed_source, current_item.Source)));
            }
            indexable.ParentUri = UriFu.PathToFileUri(feed_file);
            indexable.MimeType  = "text/html";
            indexable.HitType   = "FeedItem";

            DateTime date = DateTimeUtil.UnixToDateTimeUtc(0);

            date = date.AddSeconds(current_item.Timestamp);
            indexable.Timestamp = date;

            // cleaning up the property names as far as possible
            // this way querying for specific field is possible
            // following DC element names wherever applicable

            indexable.AddProperty(Property.New("dc:title", current_item.Title));
            Attribute[] attribs = current_item.Attribs.AttribArray;
            if (attribs != null)
            {
                foreach (Attribute attrib in attribs)
                {
                    if (attrib.Name != "author")
                    {
                        continue;
                    }
                    indexable.AddProperty(Property.New("dc:creator", attrib.Value));
                }
            }
            indexable.AddProperty(Property.NewKeyword("dc:identifier", current_item.Source));
            indexable.AddProperty(Property.NewKeyword("dc:source", feed_source));
            indexable.AddProperty(Property.New("dc:publisher", publisher));

            if (File.Exists(icon_file))
            {
                indexable.AddProperty(Property.NewUnsearched("fixme:cachedimg", icon_file));
            }

            StringReader reader = new StringReader(current_item.Description);

            indexable.SetTextReader(reader);

            return(indexable);
        }
        static private Indexable MemberNodeToIndexable(XmlNode node, Uri base_uri, string parentName)
        {
            char          memberType     = MemberTypeToChar(node.SelectSingleNode("MemberType").InnerText);
            StringBuilder memberFullName = new StringBuilder();

            memberFullName.Append(memberType + ":" + parentName);

            if (memberType != 'C')
            {
                memberFullName.Append("." + node.Attributes["MemberName"].Value);
            }

            if (memberType == 'C' || memberType == 'M' || memberType == 'E')
            {
                memberFullName.Append("(");
                bool inside = false;

                foreach (XmlNode parameter in node.SelectNodes("Parameters/Parameter"))
                {
                    if (!inside)
                    {
                        inside = true;
                    }
                    else
                    {
                        memberFullName.Append(",");
                    }
                    memberFullName.Append(parameter.Attributes["Type"].Value);
                }

                memberFullName.Append(")");
            }

            Indexable indexable = new Indexable(UriFu.AddFragment(base_uri, memberFullName.ToString(), false));

            indexable.MimeType = "text/html";
            indexable.HitType  = "MonodocEntry";

            indexable.AddProperty(Property.New("dc:title", memberFullName.ToString()));
            indexable.AddProperty(Property.New("fixme:name", memberFullName.ToString()));
            indexable.AddProperty(Property.NewUnsearched("fixme:type", node.SelectSingleNode("MemberType").InnerText.ToLower()));

            StringReader reader = new StringReader(node.SelectSingleNode("Docs").InnerXml);

            indexable.SetTextReader(reader);

            return(indexable);
        }
        static private Indexable TypeNodeToIndexable(XmlNode node, Uri base_uri)
        {
            string    fragment  = "T:" + node.Attributes ["FullName"].Value;
            Indexable indexable = new Indexable(UriFu.AddFragment(base_uri, fragment, false));

            indexable.MimeType = "text/html";
            indexable.HitType  = "MonodocEntry";

            indexable.AddProperty(Property.New("dc:title", "T:" + node.Attributes["FullName"].Value));
            indexable.AddProperty(Property.NewUnsearched("fixme:name", "T:" + node.Attributes["FullName"].Value));
            indexable.AddProperty(Property.NewUnsearched("fixme:type", "type"));

            StringReader reader = new StringReader(node.SelectSingleNode("Docs").InnerXml);

            indexable.SetTextReader(reader);

            return(indexable);
        }
        Indexable TypeNodeToIndexable(XmlNode node, FileInfo file)
        {
            Indexable indexable = new Indexable(
                new Uri("monodoc:///" + file + ";item=T:" + node.Attributes["FullName"].Value));

            indexable.MimeType = "application/monodoc";
            indexable.Type     = "Monodoc";

            indexable.AddProperty(Property.NewUnsearched("fixme:type", "type"));
            indexable.AddProperty(Property.NewUnsearched("fixme:name", "T:" + node.Attributes["FullName"].Value));

            string splitname = String.Join(" ",
                                           StringFu.FuzzySplit(node.Attributes["FullName"].Value.ToString()));

            indexable.AddProperty(Property.NewKeyword("fixme:splitname", splitname));

            // Should we add other stuff here? Implemented interfaces etc?

            StringReader reader = new StringReader(node.SelectSingleNode("Docs").InnerXml);

            indexable.SetTextReader(reader);

            return(indexable);
        }
        protected override Indexable MessageToIndexable(Message message)
        {
            Uri uri = new Uri(String.Format("feed:///{0};id={1}", message.Path, message.Id));

            Indexable indexable = new Indexable(uri);

            indexable.MimeType = "text/html";
            indexable.Type     = "FeedItem";

            indexable.AddProperty(Property.New("fixme:client", "mozilla"));

            indexable.AddProperty(Property.New("dc:title", message.Subject));
            indexable.AddProperty(Property.New("fixme:author", message.From));
            //indexable.AddProperty(Property.NewDate ("fixme:published", item.PubDate));
            indexable.AddProperty(Property.NewUnsearched("fixme:itemuri", message.Headers ["Content-Base"]));

            indexable.AddProperty(Property.New("fixme:offset", message.Offset));

            StringReader reader = new StringReader(message.Body);

            indexable.SetTextReader(reader);

            return(indexable);
        }
        protected override Indexable MessageToIndexable(Message message)
        {
            Uri uri = new Uri(String.Format("email:///{0};id={1}", message.Path, message.Id));

            Indexable indexable = new Indexable(uri);

            indexable.Type = "MailMessage";

            indexable.AddProperty(Property.New("fixme:client", "mozilla"));

            indexable.AddProperty(Property.New("dc:title", message.Subject));

            indexable.AddProperty(Property.New("fixme:subject", message.Subject));
            indexable.AddProperty(Property.New("fixme:to", message.To));
            indexable.AddProperty(Property.New("fixme:from", message.From));

            indexable.AddProperty(Property.New("fixme:offset", message.Offset));

            StringReader reader = new StringReader(message.Body);

            indexable.SetTextReader(reader);

            return(indexable);
        }
Beispiel #13
0
        override protected void DoPullProperties()
        {
            Stopwatch watch = new Stopwatch();

            watch.Start();

            while (reader.Read())
            {
                switch (reader.NodeType)
                {
                case XmlNodeType.Element:
                    if (reader.Name.StartsWith("sect") || reader.Name.StartsWith("chapter"))
                    {
                        string id = reader.GetAttribute("id");

                        if (id != null && id != String.Empty)
                        {
                            DocbookEntry entry = new DocbookEntry();
                            entry.Id    = id;
                            entry.Depth = reader.Depth;

                            string language = reader.GetAttribute("lang");

                            if (language != null && language != String.Empty)
                            {
                                entry.Language = language;
                            }

                            entries_stack.Push(entry);
                        }
                    }
                    else if (reader.Name == "article" || reader.Name == "book")
                    {
                        string language = reader.GetAttribute("lang");

                        if (language != null && language != String.Empty)
                        {
                            base_language = language;
                        }
                    }
                    else if (reader.Name == "title")
                    {
                        reader.Read();                          // Go to the text node

                        if (entries_stack.Count == 0 && base_title == null)
                        {
                            // This is probably the book title
                            base_title = reader.Value;
                        }
                        else if (entries_stack.Count > 0)
                        {
                            DocbookEntry entry = (DocbookEntry)entries_stack.Peek();

                            if (entry.Title == null)
                            {
                                entry.Title = reader.Value;
                            }
                        }
                    }
                    else if (reader.Name == "keyword")
                    {
                        reader.Read();                          // read the text node
                        AddProperty(Property.NewKeyword("dc:subject", reader.Value));
                    }
                    break;

                case XmlNodeType.Text:
                    // Append text to the child indexable
                    if (entries_stack.Count > 0)
                    {
                        ((DocbookEntry)entries_stack.Peek()).Content.Append(reader.Value);
                    }

                    // Append text to the main indexable
                    else
                    {
                        AppendWord(reader.Value);
                    }
                    break;

                case XmlNodeType.EndElement:
                    if (entries_stack.Count > 0 &&
                        ((DocbookEntry)entries_stack.Peek()).Depth == reader.Depth)
                    {
                        DocbookEntry entry, parent_entry = null;

                        entry = (DocbookEntry)entries_stack.Pop();

                        if (entries_stack.Count > 0)
                        {
                            parent_entry = (DocbookEntry)entries_stack.Peek();
                        }

                        Indexable indexable;
                        indexable          = new Indexable(UriFu.AddFragment(Indexable.Uri, entry.Id, false));
                        indexable.HitType  = "DocbookEntry";
                        indexable.MimeType = "text/x-docbook-entry";
                        indexable.AddProperty(Property.NewKeyword("beagle:FileType", "documentation"));
                        indexable.Filtering = IndexableFiltering.AlreadyFiltered;

                        indexable.AddProperty(Property.NewUnsearched("fixme:id", entry.Id));
                        indexable.AddProperty(Property.New("dc:title", entry.Title));

                        // Add the docbook book title
                        indexable.AddProperty(Property.NewUnsearched("fixme:base_title", base_title));

                        // Add the child language (or docbook language if none is specified)
                        if (entry.Language != null)
                        {
                            indexable.AddProperty(Property.NewUnsearched("fixme:language", entry.Language));
                        }
                        else if (base_language != null)
                        {
                            indexable.AddProperty(Property.NewUnsearched("fixme:language", base_language));
                        }

                        // Add any parent (as in docbook parent entry, not beagle) data if we have it
                        if (parent_entry != null)
                        {
                            indexable.AddProperty(Property.NewUnsearched("fixme:parent_id", parent_entry.Id));
                            indexable.AddProperty(Property.NewUnsearched("fixme:parent_title", parent_entry.Title));
                        }


                        StringReader content_reader = new StringReader(entry.Content.ToString());
                        indexable.SetTextReader(content_reader);
                        indexable.SetChildOf(this.Indexable);

                        AddIndexable(indexable);
                    }
                    break;
                }
            }

            // Add the common properties to the top-level
            // file item such as Title, Language etc.

            AddProperty(Property.New("dc:title", base_title));
            AddProperty(Property.NewUnsearched("fixme:language", base_language));

            watch.Stop();

            // If we've successfully crawled the file but haven't
            // found any indexables, we shouldn't consider it
            // successfull at all (unless we have a title, which
            // means that it's actually a docbook file, just without
            // sections.
            if (!HasGeneratedIndexable && base_title == null)
            {
                Log.Error("Probably not a docbook. Ignoring {0}!", base_path);
                Error();
                return;
            }

            Logger.Log.Debug("Parsed docbook file in {0}", watch);

            Finished();
        }
			public Indexable GetNextIndexable ()
			{
				DateTime line_dt = DateTime.MinValue;

				while (log_line_as_sb != null) {
					//Log.Debug ("Checking line from {0}:[{1}]", log_file, log_line_as_sb);
					bool in_session = AppendLogText (log_line_as_sb, out line_dt);
					if (! in_session)
						break;

					prev_line_offset = reader.Position;
					log_line_as_sb = reader.ReadLineAsStringBuilder ();
				}

				// Check if there is new data to index
				if (data_sb.Length == 0) {
					session_begin_time = line_dt;
					return null;
				}

				Uri uri = new Uri (String.Format ("konversation://{0}@dumb/{1}", session_begin_offset, log_file));
				Log.Debug ("Creating indexable {0}", uri);
				Indexable indexable = new Indexable (uri);
				indexable.ParentUri = UriFu.PathToFileUri (log_file);
				indexable.Timestamp = session_begin_time;
				indexable.HitType = "IMLog";
				indexable.CacheContent = false;
				indexable.Filtering = IndexableFiltering.AlreadyFiltered;

				indexable.AddProperty (Beagle.Property.NewUnsearched ("fixme:session_begin_offset", session_begin_offset));
				indexable.AddProperty (Beagle.Property.NewUnsearched ("fixme:session_end_offset", prev_line_offset));
				indexable.AddProperty (Beagle.Property.NewDate ("fixme:starttime", session_begin_time));
				indexable.AddProperty (Beagle.Property.NewUnsearched ("fixme:client", "Konversation"));
				indexable.AddProperty (Beagle.Property.NewUnsearched ("fixme:protocol", "IRC"));

				AddChannelInformation (indexable);

				foreach (string speaker in speakers.Keys)
					indexable.AddProperty (Beagle.Property.NewUnstored ("fixme:speaker", speaker));

				StringReader data_reader = new StringReader (data_sb.ToString ());
				indexable.SetTextReader (data_reader);

				// update session begin time to the date of the current line which is not in this session
				session_begin_time = line_dt;

				return indexable;
			}
Beispiel #15
0
            public void OnEachPart(GMime.Object mime_part)
            {
                GMime.Object part = null;
                bool         part_needs_dispose = false;

                //for (int i = 0; i < this.depth; i++)
                //  Console.Write ("  ");
                //Console.WriteLine ("Content-Type: {0}", mime_part.ContentType);

                ++depth;

                if (mime_part is GMime.MessagePart)
                {
                    GMime.MessagePart msg_part = (GMime.MessagePart)mime_part;

                    using (GMime.Message message = msg_part.Message) {
                        using (GMime.Object subpart = message.MimePart)
                            this.OnEachPart(subpart);
                    }
                }
                else if (mime_part is GMime.Multipart)
                {
                    GMime.Multipart multipart = (GMime.Multipart)mime_part;
                    int             num_parts = multipart.Count;

                    // If the mimetype is multipart/alternative, we only want to index
                    // one part -- the richest one we can filter.
                    if (mime_part.ContentType.MediaSubtype.ToLower() == "alternative")
                    {
                        // The richest formats are at the end, so work from there
                        // backward.
                        for (int i = num_parts - 1; i >= 0; i--)
                        {
                            GMime.Object subpart = multipart[i];

                            if (IsMimeTypeHandled(subpart.ContentType.ToString()))
                            {
                                part = subpart;
                                part_needs_dispose = true;
                                break;
                            }
                            else
                            {
                                subpart.Dispose();
                            }
                        }
                    }

                    // If it's not alternative, or we don't know how to filter any of
                    // the parts, treat them like a bunch of attachments.
                    if (part == null)
                    {
                        for (int i = 0; i < num_parts; i++)
                        {
                            using (GMime.Object subpart = multipart[i])
                                this.OnEachPart(subpart);
                        }
                    }
                }
                else if (mime_part is GMime.Part)
                {
                    part = mime_part;
                }
                else
                {
                    throw new Exception(String.Format("Unknown part type: {0}", part.GetType()));
                }

                if (part != null)
                {
                    System.IO.Stream stream = null;

                    using (GMime.DataWrapper content_obj = ((GMime.Part)part).ContentObject)
                        stream = content_obj.Stream;

                    // If this is the only part and it's plain text, we
                    // want to just attach it to our filter instead of
                    // creating a child indexable for it.
                    bool no_child_needed = false;

                    string mime_type = part.ContentType.ToString().ToLower();

                    if (this.depth == 1 && this.count == 0)
                    {
                        if (mime_type == "text/plain")
                        {
                            no_child_needed = true;

                            this.reader = new StreamReader(stream);
                        }
                        else if (mime_type == "text/html")
                        {
                            no_child_needed = true;
                            html_part       = true;
                            string enc = part.ContentType.GetParameter("charset");
                            // DataWrapper.Stream is a very limited stream
                            // and does not allow Seek or Tell
                            // HtmlFilter requires Stream.Position=0.
                            // Play safe and create a memorystream
                            // for HTML parsing.

                            GMime.StreamMem mem_stream;
                            mem_stream = new GMime.StreamMem();

                            GMime.Stream data_stream;
                            data_stream = ((StreamWrapper)stream).GMimeStream;
                            data_stream.WriteToStream(mem_stream);
                            data_stream.Flush();

                            // The StreamWrapper and hence the memory_stream
                            // will be closed when the reader is closed
                            // after Pull()-ing is done.
                            System.IO.Stream html_stream;
                            html_stream = new StreamWrapper(mem_stream);
                            html_stream.Seek(0, SeekOrigin.Begin);

                            stream.Close();

                            try {
                                this.reader = FilterHtml.GetHtmlReader(html_stream, enc, link_handler);
                            } catch (Exception e) {
                                Log.Debug(e, "Exception while filtering HTML email {0}", this.indexable.Uri);
                                this.reader = null;
                                html_stream.Close();
                                html_part = false;
                            }
                        }
                    }

                    if (!no_child_needed)
                    {
                        // Check the mime type against the blacklist and don't index any
                        // parts that are contained within.  That way the user doesn't
                        // get flooded with pointless signatures and vcard and ical
                        // attachments along with (real) attachments.

                        if (Array.IndexOf(blacklisted_mime_types, mime_type) == -1)
                        {
                            string    sub_uri = "#" + this.count;
                            Indexable child;
                            child = new Indexable(UriFu.AddFragment(this.indexable.Uri, sub_uri, true));

                            child.DisplayUri = new Uri(this.indexable.DisplayUri.ToString() + "#" + this.count);

                            // This is a special case.
                            // Even for mails found on disk, MailMessage hitype is set
                            child.HitType  = "MailMessage";
                            child.MimeType = mime_type;

                            // If this is the richest part we found for multipart emails, add its content to textcache
                            if (snippet_attachment ||
                                (this.depth == 1 && this.count == 0))
                            {
                                child.CacheContent = true;
                            }
                            else
                            {
                                child.CacheContent = false;
                            }

                            string filename = ((GMime.Part)part).Filename;

                            if (!String.IsNullOrEmpty(filename))
                            {
                                child.AddProperty(Property.NewKeyword("fixme:attachment_title", filename));

                                foreach (Property prop in Property.StandardFileProperties(filename, false))
                                {
                                    child.AddProperty(prop);
                                }
                            }

                            // Store length of attachment
                            long length = stream.Length;
                            if (length != -1)
                            {
                                child.AddProperty(Property.NewUnsearched("fixme:filesize", length));
                            }

                            if (part.ContentType.MediaType.ToLower() == "text")
                            {
                                child.SetTextReader(new StreamReader(stream));
                            }
                            else
                            {
                                child.SetBinaryStream(stream);
                            }

                            child.SetChildOf(this.indexable);
                            child.StoreStream();
                            child.CloseStreams();
                            this.child_indexables.Add(child);
                        }
                        else
                        {
                            Log.Debug("Skipping attachment {0}#{1} with blacklisted mime type {2}",
                                      this.indexable.Uri, this.count, mime_type);
                        }
                    }

                    this.count++;
                }

                if (part_needs_dispose)
                {
                    part.Dispose();
                }

                --depth;
            }
Beispiel #16
0
		static private Indexable TypeNodeToIndexable (XmlNode node, Uri base_uri)
		{
			string fragment = "T:" + node.Attributes ["FullName"].Value;
			Indexable indexable = new Indexable (UriFu.AddFragment (base_uri, fragment, false));
			
			indexable.MimeType = "text/html";
			indexable.HitType = "MonodocEntry";
			
			indexable.AddProperty (Property.New ("dc:title", "T:" + node.Attributes["FullName"].Value));
			indexable.AddProperty (Property.NewUnsearched ("fixme:name", "T:" + node.Attributes["FullName"].Value));
			indexable.AddProperty (Property.NewUnsearched ("fixme:type", "type"));
			
			StringReader reader = new StringReader (node.SelectSingleNode ("Docs").InnerXml); 
                        indexable.SetTextReader (reader);
			
			return indexable;
		}
    static void Main(String[] args)
    {
        string uriStr           = null;
        string title            = null;
        string sourcefile       = null;
        bool   deletesourcefile = false;

        if (args.Length == 0 || Array.IndexOf(args, "--help") > -1)
        {
            PrintUsage();
            Environment.Exit(1);
        }

        for (int i = 0; i < args.Length; i++)
        {
            switch (args [i])
            {
            case "--url":
            case "--title":
            case "--sourcefile":
                if (i + 1 >= args.Length ||
                    args [i + 1].StartsWith("--"))
                {
                    PrintUsage();
                    Environment.Exit(1);
                }
                break;
            }

            switch (args [i])
            {
            case "--url":
                uriStr = args [++i];
                break;

            case "--title":
                title = args [++i];
                break;

            case "--sourcefile":
                sourcefile = args [++i];
                break;

            case "--deletesourcefile":
                deletesourcefile = true;
                break;

            case "--help":
                PrintUsage();
                return;

            case "--version":
                VersionFu.PrintVersion();
                return;
            }
        }

        if (uriStr == null)
        {
            Logger.Log.Error("URI not specified!\n");
            PrintUsage();
            Environment.Exit(1);
        }

        Uri uri = new Uri(uriStr, true);

        if (uri.Scheme == Uri.UriSchemeHttps)
        {
            // For security/privacy reasons, we don't index any
            // SSL-encrypted pages.
            Logger.Log.Error("Indexing secure https:// URIs is not secure!");
            Environment.Exit(1);
        }

        // We don't index file: Uris.  Silently exit.
        if (uri.IsFile)
        {
            return;
        }

        // We *definitely* don't index mailto: Uris.  Silently exit.
        if (uri.Scheme == Uri.UriSchemeMailto)
        {
            return;
        }

        Indexable indexable;

        indexable           = new Indexable(uri);
        indexable.HitType   = "WebHistory";
        indexable.MimeType  = "text/html";
        indexable.Timestamp = DateTime.Now;

        if (title != null)
        {
            indexable.AddProperty(Property.New("dc:title", title));
        }

        if (sourcefile != null)
        {
            if (!File.Exists(sourcefile))
            {
                Logger.Log.Error("sourcefile '{0}' does not exist!", sourcefile);
                Environment.Exit(1);
            }

            indexable.ContentUri    = UriFu.PathToFileUri(sourcefile);
            indexable.DeleteContent = deletesourcefile;
        }
        else
        {
            Stream stdin = Console.OpenStandardInput();
            if (stdin == null)
            {
                Logger.Log.Error("No sourcefile specified, and no standard input!\n");
                PrintUsage();
                Environment.Exit(1);
            }

            indexable.SetTextReader(new StreamReader(stdin));
        }

        IndexingServiceRequest req = new IndexingServiceRequest();

        req.Add(indexable);

        try {
            Logger.Log.Info("Indexing");
            Logger.Log.Debug("SendAsync");
            req.SendAsync();
            Logger.Log.Debug("Close");
            req.Close();
            Logger.Log.Debug("Done");
        } catch (Exception e) {
            Logger.Log.Error("Indexing failed: {0}", e);

            // Still clean up after ourselves, even if we couldn't
            // index the content.
            if (deletesourcefile)
            {
                File.Delete(sourcefile);
            }

            Environment.Exit(1);
        }
    }
Beispiel #18
0
		private Indexable MemoToIndexable (CalComponent cc)
		{
			Indexable indexable = new Indexable (GetComponentUri (cc));
			indexable.Timestamp = cc.Dtstart;
			indexable.HitType = "Note";
			indexable.Filtering = IndexableFiltering.AlreadyFiltered;

			indexable.AddProperty (Property.NewUnsearched ("fixme:application","evolution"));

			indexable.AddProperty (Property.New ("dc:title", cc.Summary));

			// We remember the note's text so that we can stuff it in
			// the TextCache later.
			// This is here form compability with Tomboy notes.
			foreach (string description in cc.Descriptions) {
				queryable.IndexableTextCache [indexable.Uri] = description;

				StringReader reader = new StringReader (description);
				indexable.SetTextReader (reader);
			}

			return indexable;
		}
Beispiel #19
0
		static private Indexable MemberNodeToIndexable(XmlNode node, Uri base_uri, string parentName)
		{
			char memberType = MemberTypeToChar (node.SelectSingleNode ("MemberType").InnerText);
			StringBuilder memberFullName = new StringBuilder ();
			
			memberFullName.Append (memberType + ":"+ parentName);
			
			if (memberType != 'C')
				memberFullName.Append ("." + node.Attributes["MemberName"].Value);
			
			if (memberType == 'C' || memberType == 'M' || memberType == 'E') {
				memberFullName.Append ("(");
				bool inside = false;
				
				foreach (XmlNode parameter in node.SelectNodes ("Parameters/Parameter")) {
					if (!inside) inside = true; else memberFullName.Append(",");
					memberFullName.Append (parameter.Attributes["Type"].Value);
				}
				
				memberFullName.Append (")");
			}

			Indexable indexable = new Indexable (UriFu.AddFragment (base_uri, memberFullName.ToString (), false));

			indexable.MimeType = "text/html";
			indexable.HitType = "MonodocEntry";

			indexable.AddProperty (Property.New ("dc:title",  memberFullName.ToString ()));
			indexable.AddProperty (Property.New ("fixme:name", memberFullName.ToString ()));
			indexable.AddProperty (Property.NewUnsearched ("fixme:type", node.SelectSingleNode ("MemberType").InnerText.ToLower ()));

			StringReader reader = new StringReader (node.SelectSingleNode ("Docs").InnerXml); 
                        indexable.SetTextReader (reader);

			return indexable;		
		}
Beispiel #20
0
		public Indexable GetNextIndexable ()
		{
			if (initial_scan)
				return null;
			
			Uri uri = new Uri (String.Format ("kabc:///{0}", current_uid));
			Indexable indexable = new Indexable (uri);
			indexable.ParentUri = UriFu.PathToFileUri (kabc_file);
			indexable.MimeType = ICalParser.KabcMimeType;
			indexable.HitType = "Contact";
			indexable.Timestamp = current_dt;
			indexable.AddProperty (Property.NewKeyword ("fixme:client", "KAddressBook"));
			// Add uid as a keyword field for convenience
			indexable.AddProperty (Property.NewUnsearched ("fixme:uid", current_uid));

			// FIXME: Comment this Debug statement after the backend stabilizes
			//Log.Debug ("Creating {0} from:[{1}]", uri, string_builder.ToString ());
			StringReader string_reader = new StringReader (string_builder.ToString());
			indexable.SetTextReader (string_reader);

			return indexable;
		}
        Indexable MemberNodeToIndexable(XmlNode node, FileInfo file, string parentName)
        {
            char          memberType     = MemberTypeToChar(node.SelectSingleNode("MemberType").InnerText);
            StringBuilder memberFullName = new StringBuilder();

            memberFullName.Append(memberType + ":" + parentName);

            if (memberType != 'C')
            {
                memberFullName.Append("." + node.Attributes["MemberName"].Value);
            }

            if (memberType == 'C' || memberType == 'M' || memberType == 'E')
            {
                memberFullName.Append("(");
                bool inside = false;

                foreach (XmlNode parameter in node.SelectNodes("Parameters/Parameter"))
                {
                    if (!inside)
                    {
                        inside = true;
                    }
                    else
                    {
                        memberFullName.Append(",");
                    }
                    memberFullName.Append(parameter.Attributes["Type"].Value);
                }

                memberFullName.Append(")");
            }

            Indexable indexable = new Indexable(
                new Uri("monodoc:///" + file + ";item=" + memberFullName));

            indexable.MimeType = "application/monodoc";
            indexable.Type     = "Monodoc";

            indexable.AddProperty(
                Property.NewUnsearched("fixme:type", node.SelectSingleNode("MemberType").InnerText.ToLower()));
            indexable.AddProperty(
                Property.New("fixme:name", memberFullName));

            int    indexHack = memberFullName.ToString().IndexOf("(");
            string splitname;

            if (indexHack == -1)
            {
                splitname = String.Join(" ", StringFu.FuzzySplit(memberFullName.ToString().Substring(2)));
            }
            else
            {
                splitname = String.Join(" ", StringFu.FuzzySplit(memberFullName.ToString().Substring(2, indexHack - 2)));
            }

            indexable.AddProperty(
                Property.NewKeyword("fixme:splitname", splitname));

            StringReader reader = new StringReader(node.SelectSingleNode("Docs").InnerXml);

            indexable.SetTextReader(reader);

            return(indexable);
        }
Beispiel #22
0
		override protected void DoPullProperties ()
		{
			Stopwatch watch = new Stopwatch ();
			
			watch.Start ();

			while (reader.Read ()) {
				switch (reader.NodeType) {
				case XmlNodeType.Element:
					if (reader.Name.StartsWith ("sect") || reader.Name.StartsWith ("chapter")) {
						string id = reader.GetAttribute ("id");

						if (id != null && id != String.Empty) {
							DocbookEntry entry = new DocbookEntry ();
							entry.Id = id;
							entry.Depth = reader.Depth;

							string language = reader.GetAttribute ("lang");
							
							if (language != null && language != String.Empty)
								entry.Language = language;

							entries_stack.Push (entry);
						}
					} else if (reader.Name == "article" || reader.Name == "book") {
						string language = reader.GetAttribute ("lang");

						if (language != null && language != String.Empty)
							base_language = language;
					} else if (reader.Name == "title") {
						reader.Read (); // Go to the text node

						if (entries_stack.Count == 0 && base_title == null) {
							// This is probably the book title
							base_title = reader.Value;
						} else if (entries_stack.Count > 0) {
							DocbookEntry entry = (DocbookEntry) entries_stack.Peek ();

							if (entry.Title == null)
								entry.Title = reader.Value;
						}
					} else if (reader.Name == "keyword") {
						reader.Read (); // read the text node
						AddProperty (Property.NewKeyword ("dc:subject", reader.Value));
					}
					break;
					
				case XmlNodeType.Text:
					// Append text to the child indexable
					if (entries_stack.Count > 0)
						((DocbookEntry) entries_stack.Peek ()).Content.Append (reader.Value);

					// Append text to the main indexable
					else
						AppendWord (reader.Value);
					break;
					
				case XmlNodeType.EndElement:
					if (entries_stack.Count > 0 &&
					    ((DocbookEntry) entries_stack.Peek ()).Depth == reader.Depth) {
						DocbookEntry entry, parent_entry = null;

						entry = (DocbookEntry) entries_stack.Pop ();
						
						if (entries_stack.Count > 0)
							parent_entry = (DocbookEntry) entries_stack.Peek ();
						
						Indexable indexable;
						indexable = new Indexable (UriFu.AddFragment (Indexable.Uri, entry.Id, false));
						indexable.HitType = "DocbookEntry";
						indexable.MimeType = "text/x-docbook-entry";
						indexable.AddProperty (Property.NewKeyword ("beagle:FileType", "documentation"));
						indexable.Filtering = IndexableFiltering.AlreadyFiltered;

						indexable.AddProperty (Property.NewUnsearched ("fixme:id", entry.Id));
						indexable.AddProperty (Property.New ("dc:title", entry.Title));

						// Add the docbook book title
						indexable.AddProperty (Property.NewUnsearched ("fixme:base_title", base_title));

						// Add the child language (or docbook language if none is specified)
						if (entry.Language != null)
							indexable.AddProperty (Property.NewUnsearched ("fixme:language", entry.Language));
						else if (base_language != null)
							indexable.AddProperty (Property.NewUnsearched ("fixme:language", base_language));
						
						// Add any parent (as in docbook parent entry, not beagle) data if we have it
						if (parent_entry != null) {
							indexable.AddProperty (Property.NewUnsearched ("fixme:parent_id", parent_entry.Id));
							indexable.AddProperty (Property.NewUnsearched ("fixme:parent_title", parent_entry.Title));
						}


						StringReader content_reader = new StringReader (entry.Content.ToString ());
						indexable.SetTextReader (content_reader);
						indexable.SetChildOf (this.Indexable);

						AddIndexable (indexable);
					}
					break;
				}
			}

			// Add the common properties to the top-level
			// file item such as Title, Language etc.

			AddProperty (Property.New ("dc:title", base_title));
			AddProperty (Property.NewUnsearched ("fixme:language", base_language));

			watch.Stop ();
			
			// If we've successfully crawled the file but haven't 
			// found any indexables, we shouldn't consider it
			// successfull at all (unless we have a title, which
			// means that it's actually a docbook file, just without
			// sections.
			if (! HasGeneratedIndexable && base_title == null) {
				Log.Error ("Probably not a docbook. Ignoring {0}!", base_path);
				Error ();
				return;
			}

			Logger.Log.Debug ("Parsed docbook file in {0}", watch);

			Finished ();
		}
		protected override Indexable MessageToIndexable (Message message)
		{
			Uri uri = new Uri (String.Format ("email:///{0};id={1}", message.Path, message.Id));

			Indexable indexable = new Indexable (uri);
			indexable.Type = "MailMessage";

			indexable.AddProperty (Property.New ("fixme:client", "mozilla"));

			indexable.AddProperty (Property.New ("dc:title", message.Subject));

			indexable.AddProperty (Property.New ("fixme:subject", message.Subject));
			indexable.AddProperty (Property.New ("fixme:to", message.To));
			indexable.AddProperty (Property.New ("fixme:from", message.From));

			indexable.AddProperty (Property.New ("fixme:offset", message.Offset));

			StringReader reader = new StringReader (message.Body);
			indexable.SetTextReader (reader);

			return indexable;
		}
		protected override Indexable MessageToIndexable (Message message)
		{
			Uri uri = new Uri (String.Format ("feed:///{0};id={1}", message.Path, message.Id));

			Indexable indexable = new Indexable (uri);
			indexable.MimeType = "text/html";
			indexable.Type = "FeedItem";

			indexable.AddProperty (Property.New ("fixme:client", "mozilla"));

			indexable.AddProperty(Property.New ("dc:title", message.Subject));
			indexable.AddProperty(Property.New ("fixme:author", message.From));
			//indexable.AddProperty(Property.NewDate ("fixme:published", item.PubDate));
			indexable.AddProperty(Property.NewUnsearched ("fixme:itemuri", message.Headers ["Content-Base"]));

			indexable.AddProperty (Property.New ("fixme:offset", message.Offset));

			StringReader reader = new StringReader (message.Body);
			indexable.SetTextReader (reader);
			
			return indexable;
		}
		Indexable MemberNodeToIndexable(XmlNode node, FileInfo file, string parentName)
		{
			char memberType = MemberTypeToChar (node.SelectSingleNode ("MemberType").InnerText);
			StringBuilder memberFullName = new StringBuilder ();
	
			memberFullName.Append (memberType + ":"+ parentName);

			if (memberType != 'C')
				memberFullName.Append ("." + node.Attributes["MemberName"].Value);

			if (memberType == 'C' || memberType == 'M' || memberType == 'E')
			{	
				memberFullName.Append ("(");
				bool inside = false;

				foreach (XmlNode parameter in node.SelectNodes ("Parameters/Parameter"))
				{	
					if (!inside) inside = true; else memberFullName.Append(",");
					memberFullName.Append (parameter.Attributes["Type"].Value);
				}

				memberFullName.Append (")");
			}

			Indexable indexable = new Indexable (
				new Uri ("monodoc:///" + file + ";item=" + memberFullName));

			indexable.MimeType = "application/monodoc";
			indexable.Type = "Monodoc";

			indexable.AddProperty (
				Property.NewUnsearched ("fixme:type", node.SelectSingleNode ("MemberType").InnerText.ToLower ()));
			indexable.AddProperty (
				Property.New ("fixme:name",memberFullName));

			int indexHack = memberFullName.ToString ().IndexOf ("(");
			string splitname;
			
			if (indexHack == -1)
				splitname = String.Join (" ", StringFu.FuzzySplit (memberFullName.ToString ().Substring (2)));
			else 
				splitname = String.Join (" ", StringFu.FuzzySplit (memberFullName.ToString ().Substring(2,indexHack-2)));
			
			indexable.AddProperty (
				Property.NewKeyword ("fixme:splitname",splitname));
			
			StringReader reader = new StringReader (node.SelectSingleNode ("Docs").InnerXml); 
                        indexable.SetTextReader (reader);

			return indexable;			
		}
		private Indexable ToAddRssIndexable (XmlDocument document)
		{
			string encoding_str = null;
			StringReader reader = null;
			
			if (ToBool (GetText (document, "HasOffline"))) {
				try {
					// RSS does not use OfflineSize but MessageSize instead (for some reason...)
					int offset = Convert.ToInt32 (GetText (document, "MessageOffset")),
						size = Convert.ToInt32 (GetText (document, "MessageSize"));
					reader = GetRssBody (GetText (document, "FolderFile"), offset, size, out encoding_str);
				} catch (Exception e) {
					Logger.Log.Debug (e, "Failed to parse RSS body");
				}
			}
			
			Indexable indexable = new Indexable (GenerateUniqueUri (document));
			indexable.HitType = "FeedItem";
			indexable.MimeType = "text/html";
			indexable.Timestamp = DateTimeUtil.UnixToDateTimeUtc (Convert.ToInt64 (GetText (document, "Date")));
			indexable.CacheContent = true;
			indexable.FlushBufferCache = true;
			
			indexable.AddProperty (Property.NewKeyword ("fixme:client", "thunderbird"));
			indexable.AddProperty (Property.NewKeyword ("fixme:folder", GetText (document, "Folder")));
			indexable.AddProperty (Property.NewUnsearched ("ParentUri", GetText (document, "FolderFile")));
			indexable.AddProperty (Property.NewUnsearched ("fixme:uri", GetText (document, "Uri")));
			
			indexable.AddProperty (Property.NewKeyword ("dc:identifier", ExtractUrl (GetText (document, "MessageId"))));
			indexable.AddProperty (Property.NewKeyword ("dc:source", GetText (document, "FeedURL")));
			indexable.AddProperty (Property.New ("dc:publisher", Mime.HeaderDecodePhrase (GetText (document, "Author"))));
			
			// The title will be added by the filter. In case we add it twice we will just get
			// an empty tile in the search tool (a bug maybe?).
			if (reader != null) {
				// If we got an encoding, make sure we use that
				if (!String.IsNullOrEmpty (encoding_str)) {
					indexable.AddProperty (Property.New (
						String.Format ("{0}encoding", StringFu.UnindexedNamespace), encoding_str));
				}
				
				indexable.SetTextReader (reader);
			} else
				indexable.AddProperty (Property.New ("dc:title", Mime.HeaderDecodePhrase (GetText (document, "Subject"))));
			
			return indexable;
		}
Beispiel #27
0
		/* Returns false if content can't/needn't be indexed.
		 * If AlreadyFiltered, then we don't return a filter but return true.
		 */
		static public bool FilterIndexable (Indexable indexable, TextCache text_cache, out Filter filter)
		{
			filter = null;
			ICollection filters = null;

			if (indexable.Filtering == IndexableFiltering.AlreadyFiltered)
				return true;

			if (! ShouldWeFilterThis (indexable))
				return false;

			string path = null;

			// First, figure out which filter we should use to deal with
			// the indexable.

			// If a specific mime type is specified, try to index as that type.
			if (indexable.MimeType != null)
				filters = CreateFiltersFromMimeType (indexable.MimeType);

			if (indexable.ContentUri.IsFile) {
				path = indexable.ContentUri.LocalPath;

				// Otherwise, set the mime type for a directory,
				// or sniff it from the file.
				if (indexable.MimeType == null) {
					if (Directory.Exists (path)) {
						indexable.MimeType = "inode/directory";
						indexable.NoContent = true;
					} else if (File.Exists (path)) {
						indexable.MimeType = XdgMime.GetMimeType (path);
					} else {
						Log.Warn ("Unable to filter {0}.  {1} not found.", indexable.DisplayUri, path);
						return false;
					}
				}

				// Set the timestamp to the last write time, if it isn't
				// set by the backend.
				if (! indexable.ValidTimestamp && indexable.IsNonTransient)
					indexable.Timestamp = FileSystem.GetLastWriteTimeUtc (path);

				// Check the timestamp to make sure the file hasn't
				// disappeared from underneath us.
				if (! FileSystem.ExistsByDateTime (indexable.Timestamp)) {
					Log.Warn ("Unable to filter {0}.  {1} appears to have disappeared from underneath us", indexable.DisplayUri, path);
					return false;
				}

				if (filters == null || filters.Count == 0) {
					filters = CreateFiltersFromIndexable (indexable);
				}
			}

			// We don't know how to filter this, so there is nothing else to do.
			if (filters.Count == 0) {
				if (! indexable.NoContent)
					Logger.Log.Debug ("No filter for {0} ({1}) [{2}]", indexable.DisplayUri, path, indexable.MimeType);

				return false;
			}

			foreach (Filter candidate_filter in filters) {
				if (Debug)
					Logger.Log.Debug ("Testing filter: {0}", candidate_filter);
				
				// Hook up the snippet writer.
				if (candidate_filter.SnippetMode && text_cache != null) {
					if (candidate_filter.OriginalIsText && indexable.IsNonTransient) {
						text_cache.MarkAsSelfCached (indexable.Uri);
					} else if (indexable.CacheContent) {
						TextWriter writer = text_cache.GetWriter (indexable.Uri);
						candidate_filter.AttachSnippetWriter (writer);
					}
				}

				// Set the indexable on the filter.
				candidate_filter.Indexable = indexable;

				// Open the filter, copy the file's properties to the indexable,
				// and hook up the TextReaders.

				bool successful_open = false;
				TextReader text_reader;
				Stream binary_stream;

				if (path != null)
					successful_open = candidate_filter.Open (path);
				else if ((text_reader = indexable.GetTextReader ()) != null)
					successful_open = candidate_filter.Open (text_reader);
				else if ((binary_stream = indexable.GetBinaryStream ()) != null)
					successful_open = candidate_filter.Open (binary_stream);
					
				if (successful_open) {
					// Set FileType
					indexable.AddProperty (Property.NewKeyword ("beagrep:FileType", candidate_filter.FileType));

					indexable.SetTextReader (candidate_filter.GetTextReader ());
					indexable.SetHotTextReader (candidate_filter.GetHotTextReader ());

					if (Debug)
						Logger.Log.Debug ("Successfully filtered {0} with {1}", path, candidate_filter);

					filter = candidate_filter;
					return true;
				} else {
					Log.Warn ("Error in filtering {0} with {1}, falling back", path, candidate_filter);
					candidate_filter.Cleanup ();
				}
			}

			if (Debug)
				Logger.Log.Debug ("None of the matching filters could process the file: {0}", path);

			return false;
		}
		public Indexable GetNextIndexable ()
		{
			string line;
			string_builder.Length = 0;

			DateTime dt = DateTime.MinValue;
			string uid = null;

			// Keep reading till "END:VJOURNAL"
			while ((line = reader.ReadLine ()) != null) {
//UID:libkcal-1467827482.768
//LAST-MODIFIED:20061015T085606Z
				if (line == "END:VJOURNAL")
					break;
				else if (line.StartsWith ("UID:"))
					uid = line.Substring (4);
				else if (line.StartsWith ("LAST-MODIFIED:")) {
					string dt_string = line.Substring (14);
					dt_string = dt_string.Replace ("Z", "+00:00");
					dt = DateTime.ParseExact (
						dt_string,
						fmts,
						DateTimeFormatInfo.InvariantInfo,
						DateTimeStyles.AdjustToUniversal);
				} else {
					string_builder.Append (line);
					string_builder.Append ('\n');
				}
			}

			if (line == null) {
				reader.Close ();
				return null;
			}

			// Bad note
			if (string_builder.Length == 0 ||
			    uid == null ||
			    dt == DateTime.MinValue)
				return null;

			// Mark note with uid as seen ('undeleted')
			deleted_notes [uid] = false;

			lock (last_modified_table) {
				if (last_modified_table.Contains (uid)) {
					DateTime old_dt = (DateTime) last_modified_table [uid];
					// FIXME: Returning null for more than 179 times will cause trouble
					if (dt == old_dt)
						return null;
					else {
						//Log.Debug ("Updating last_mod_date [{0}] = {1}", uid, dt);
						last_modified_table [uid] = dt;
					}
				} else {
					//Log.Debug ("Adding last_mod_date [{0}] = {1}", uid, dt);
					last_modified_table [uid] = dt;
				}
			}

			if (initial_scan)
				return null;
			
			// Open knotes notes as
			//dcop knotes KNotesIface text <UID>
			// where the uri is given as knotes://uid
			Uri uri = new Uri (String.Format ("knotes:///{0}", uid));
			Indexable indexable = new Indexable (uri);
			indexable.ParentUri = UriFu.PathToFileUri (knotes_file);
			indexable.MimeType = ICalParser.KnotesMimeType;
			indexable.HitType = "Note";
			indexable.Timestamp = dt;
			// Add uid as a keyword field for convenience
			indexable.AddProperty (Property.NewUnsearched ("fixme:uid", uid));

			// FIXME: Comment this Debug statement after the backend stabilizes
			//Log.Debug ("Creating {0} from:[{1}]", uri, string_builder.ToString ());
			StringReader string_reader = new StringReader (string_builder.ToString());
			indexable.SetTextReader (string_reader);

			return indexable;
		}
		Indexable TypeNodeToIndexable(XmlNode node,FileInfo file)
		{
			Indexable indexable = new Indexable(
				new Uri ("monodoc:///" + file + ";item=T:"+node.Attributes["FullName"].Value));

			indexable.MimeType = "application/monodoc";
			indexable.Type = "Monodoc";

			indexable.AddProperty (Property.NewUnsearched ("fixme:type", "type"));
			indexable.AddProperty (Property.NewUnsearched ("fixme:name", "T:" + node.Attributes["FullName"].Value));

			string splitname = String.Join (" ", 
							StringFu.FuzzySplit (node.Attributes["FullName"].Value.ToString ()));
			indexable.AddProperty (Property.NewKeyword ("fixme:splitname",splitname));
			
			// Should we add other stuff here? Implemented interfaces etc?

			StringReader reader = new StringReader (node.SelectSingleNode ("Docs").InnerXml); 
                        indexable.SetTextReader (reader);

			return indexable;
		}
Beispiel #30
0
        public Indexable GetNextIndexable()
        {
            string id     = reader.GetAttribute("Id");
            string title  = reader.GetAttribute("Title");
            string author = reader.GetAttribute("Author");
            // FIXME stupid mono bug; DateTime.ParseExact ("0001-01-01T00:00:00.0000000+00:00", ...)
            // http://bugzilla.ximian.com/show_bug.cgi?id=76082
            // Still present in 1.1.9.2
            DateTime pub_date;

            try {
                pub_date = DateTime.ParseExact(
                    reader.GetAttribute("PubDate"),
                    "yyyy-MM-ddTHH:mm:ss.fffffffzzz",
                    null);
            } catch (Exception e) {
                pub_date = DateTime.MinValue;
            }
            string link = reader.GetAttribute("Link");
            string text = reader.GetAttribute("Text");

            reader.Read();

            Uri uri = new Uri(String.Format("feed:{0};item={1}", channel_url, id));

            Logger.Log.Debug("BlamQ: Indexing [" + channel_name + "] " + title);

            Indexable indexable = new Indexable(uri);

            indexable.ParentUri = UriFu.PathToFileUri(feed_file);
            indexable.MimeType  = "text/html";
            indexable.HitType   = "FeedItem";
            indexable.Timestamp = pub_date.ToUniversalTime();

            // change property names to DC names, as far as allowed
            indexable.AddProperty(Property.New("dc:title", title));
            indexable.AddProperty(Property.New("dc:creator", author));
            indexable.AddProperty(Property.NewKeyword("dc:identifier", link));
            indexable.AddProperty(Property.NewKeyword("dc:source", channel_url));
            indexable.AddProperty(Property.New("dc:publisher", channel_name));

            string img = null;
            int    i   = text.IndexOf("<img src=\"");

            if (i != -1)
            {
                i += "<img src=\"".Length;
                int j = text.IndexOf("\"", i);
                if (j != -1)
                {
                    img = text.Substring(i, j - i);
                }
            }

            if (img != null)
            {
                string path = Path.Combine(Path.Combine(blam_dir, "Cache"),
                                           img.GetHashCode().ToString());
                indexable.AddProperty(Property.NewUnsearched("fixme:cachedimg", path));
            }

            StringReader string_reader = new StringReader(text);

            indexable.SetTextReader(string_reader);

            return(indexable);
        }
Beispiel #31
0
        /* Returns false if content can't/needn't be indexed.
         * If AlreadyFiltered, then we don't return a filter but return true.
         */
        static public bool FilterIndexable(Indexable indexable, TextCache text_cache, out Filter filter)
        {
            filter = null;
            ICollection filters = null;

            if (indexable.Filtering == IndexableFiltering.AlreadyFiltered)
            {
                return(true);
            }

            if (!ShouldWeFilterThis(indexable))
            {
                return(false);
            }

            string path = null;

            // First, figure out which filter we should use to deal with
            // the indexable.

            // If a specific mime type is specified, try to index as that type.
            if (indexable.MimeType != null)
            {
                filters = CreateFiltersFromMimeType(indexable.MimeType);
            }

            if (indexable.ContentUri.IsFile)
            {
                path = indexable.ContentUri.LocalPath;

                // Otherwise, set the mime type for a directory,
                // or sniff it from the file.
                if (indexable.MimeType == null)
                {
                    if (Directory.Exists(path))
                    {
                        indexable.MimeType  = "inode/directory";
                        indexable.NoContent = true;
                    }
                    else if (File.Exists(path))
                    {
                        indexable.MimeType = XdgMime.GetMimeType(path);
                    }
                    else
                    {
                        Log.Warn("Unable to filter {0}.  {1} not found.", indexable.DisplayUri, path);
                        return(false);
                    }
                }

                // Set the timestamp to the last write time, if it isn't
                // set by the backend.
                if (!indexable.ValidTimestamp && indexable.IsNonTransient)
                {
                    indexable.Timestamp = FileSystem.GetLastWriteTimeUtc(path);
                }

                // Check the timestamp to make sure the file hasn't
                // disappeared from underneath us.
                if (!FileSystem.ExistsByDateTime(indexable.Timestamp))
                {
                    Log.Warn("Unable to filter {0}.  {1} appears to have disappeared from underneath us", indexable.DisplayUri, path);
                    return(false);
                }

                if (filters == null || filters.Count == 0)
                {
                    filters = CreateFiltersFromIndexable(indexable);
                }
            }

            // We don't know how to filter this, so there is nothing else to do.
            if (filters.Count == 0)
            {
                if (!indexable.NoContent)
                {
                    Logger.Log.Debug("No filter for {0} ({1}) [{2}]", indexable.DisplayUri, path, indexable.MimeType);
                }

                return(false);
            }

            foreach (Filter candidate_filter in filters)
            {
                if (Debug)
                {
                    Logger.Log.Debug("Testing filter: {0}", candidate_filter);
                }

                // Hook up the snippet writer.
                if (candidate_filter.SnippetMode && text_cache != null)
                {
                    if (candidate_filter.OriginalIsText && indexable.IsNonTransient)
                    {
                        text_cache.MarkAsSelfCached(indexable.Uri);
                    }
                    else if (indexable.CacheContent)
                    {
                        TextWriter writer = text_cache.GetWriter(indexable.Uri);
                        candidate_filter.AttachSnippetWriter(writer);
                    }
                }

                // Set the indexable on the filter.
                candidate_filter.Indexable = indexable;

                // Open the filter, copy the file's properties to the indexable,
                // and hook up the TextReaders.

                bool       successful_open = false;
                TextReader text_reader;
                Stream     binary_stream;

                if (path != null)
                {
                    successful_open = candidate_filter.Open(path);
                }
                else if ((text_reader = indexable.GetTextReader()) != null)
                {
                    successful_open = candidate_filter.Open(text_reader);
                }
                else if ((binary_stream = indexable.GetBinaryStream()) != null)
                {
                    successful_open = candidate_filter.Open(binary_stream);
                }

                if (successful_open)
                {
                    // Set FileType
                    indexable.AddProperty(Property.NewKeyword("beagrep:FileType", candidate_filter.FileType));

                    indexable.SetTextReader(candidate_filter.GetTextReader());
                    indexable.SetHotTextReader(candidate_filter.GetHotTextReader());

                    if (Debug)
                    {
                        Logger.Log.Debug("Successfully filtered {0} with {1}", path, candidate_filter);
                    }

                    filter = candidate_filter;
                    return(true);
                }
                else
                {
                    Log.Warn("Error in filtering {0} with {1}, falling back", path, candidate_filter);
                    candidate_filter.Cleanup();
                }
            }

            if (Debug)
            {
                Logger.Log.Debug("None of the matching filters could process the file: {0}", path);
            }

            return(false);
        }
		private Indexable current_itemToIndexable ()
		{
			Indexable indexable;
			try {
				indexable = new Indexable (new Uri (String.Format ("{0};item={1}", feed_source, current_item.Source)));
			} catch (System.UriFormatException) {
				indexable = new Indexable (new Uri (String.Format ("liferea://dummy?{0};item={1}", feed_source, current_item.Source)));
			}
			indexable.ParentUri = UriFu.PathToFileUri (feed_file);
			indexable.MimeType = "text/html";
			indexable.HitType = "FeedItem";

			DateTime date = DateTimeUtil.UnixToDateTimeUtc (0);
			date = date.AddSeconds (current_item.Timestamp);
			indexable.Timestamp = date;				

			// cleaning up the property names as far as possible
			// this way querying for specific field is possible
			// following DC element names wherever applicable

			indexable.AddProperty (Property.New ("dc:title", current_item.Title));
			Attribute[] attribs = current_item.Attribs.AttribArray;
			if (attribs != null) {
				foreach (Attribute attrib in attribs) {
					if (attrib.Name != "author")
						continue;
					indexable.AddProperty (Property.New ("dc:creator", attrib.Value));
				}
			}
			indexable.AddProperty (Property.NewKeyword ("dc:identifier", current_item.Source));
			indexable.AddProperty (Property.NewKeyword ("dc:source", feed_source));
			indexable.AddProperty (Property.New ("dc:publisher", publisher));

			if (File.Exists (icon_file))
				indexable.AddProperty (Property.NewUnsearched ("fixme:cachedimg", icon_file));

			StringReader reader = new StringReader (current_item.Description);
			indexable.SetTextReader (reader);

			return indexable;
		}
        public Indexable GetNextIndexable()
        {
            string line;

            string_builder.Length = 0;

            DateTime dt  = DateTime.MinValue;
            string   uid = null;

            // Keep reading till "END:EVENT" or "END:VTODO"
            while ((line = reader.ReadLine()) != null)
            {
//UID:libkcal-1467827482.768
//LAST-MODIFIED:20061015T085606Z
                if (line == "END:VEVENT" || line == "END:VTODO")
                {
                    break;
                }
                else if (line.StartsWith("UID:"))
                {
                    uid = line.Substring(4);
                }
                else if (line.StartsWith("LAST-MODIFIED:"))
                {
                    string dt_string = line.Substring(14);
                    dt_string = dt_string.Replace("Z", "+00:00");
                    dt        = DateTime.ParseExact(
                        dt_string,
                        fmts,
                        DateTimeFormatInfo.InvariantInfo,
                        DateTimeStyles.AdjustToUniversal);
                }
                else
                {
                    string_builder.Append(line);
                    string_builder.Append('\n');
                }
            }

            if (line == null)
            {
                reader.Close();
                return(null);
            }

            // Bad entry
            if (string_builder.Length == 0 ||
                uid == null ||
                dt == DateTime.MinValue)
            {
                return(null);
            }

            // Mark entry with uid as seen ('undeleted')
            deleted_entries [uid] = false;

            lock (last_modified_table) {
                if (last_modified_table.Contains(uid))
                {
                    DateTime old_dt = (DateTime)last_modified_table [uid];
                    // FIXME: Returning null for more than 179 times will cause trouble
                    if (dt == old_dt)
                    {
                        return(null);
                    }
                    else
                    {
                        //Log.Debug ("Updating last_mod_date [{0}] = {1}", uid, dt);
                        last_modified_table [uid] = dt;
                    }
                }
                else
                {
                    //Log.Debug ("Adding last_mod_date [{0}] = {1}", uid, dt);
                    last_modified_table [uid] = dt;
                }
            }

            if (initial_scan)
            {
                return(null);
            }

            Uri       uri       = new Uri(String.Format("korganizer:///{0}", uid));
            Indexable indexable = new Indexable(uri);

            indexable.ParentUri = UriFu.PathToFileUri(korganizer_file);

            if (line == "END:VEVENT")
            {
                indexable.MimeType = ICalParser.KOrganizerEventMimeType;
                indexable.HitType  = "Calendar";
            }
            else
            {
                indexable.MimeType = ICalParser.KOrganizerTodoMimeType;
                indexable.HitType  = "Task";
            }

            indexable.Timestamp = dt;
            // Add uid as a keyword field for convenience
            indexable.AddProperty(Property.NewUnsearched("fixme:uid", uid));

            // FIXME: Comment this Debug statement after the backend stabilizes
            //Log.Debug ("Creating {0} from:[{1}]", uri, string_builder.ToString ());
            StringReader string_reader = new StringReader(string_builder.ToString());

            indexable.SetTextReader(string_reader);

            return(indexable);
        }
		/////////////////////////////////////////////////
		
		private Indexable NoteToIndexable (FileInfo file, Note note)
		{
			Indexable indexable = new Indexable (note.Uri);

			indexable.ContentUri = UriFu.PathToFileUri (file.FullName);
			indexable.Timestamp = note.timestamp;
			indexable.HitType = "Note";
			indexable.Filtering = IndexableFiltering.AlreadyFiltered;

			indexable.AddProperty (Property.New ("dc:title", note.subject));
			indexable.AddProperty (Property.NewUnsearched ("fixme:application","tomboy"));

			// FIXME: tagging is disabled in Tomboy-0.8.x and is planned for 0.10.0
			foreach( string s in note.tags)
				indexable.AddProperty (Property.New ("note:tag", s));
			
			// We remember the note's text so that we can stuff it in
			// the TextCache later.
			note_text_cache [note.Uri] = note.text;

			StringReader reader = new StringReader (note.text);
			indexable.SetTextReader (reader);
			
			return indexable;
		}
		private Indexable current_itemToIndexable ()
		{
			// sanity check
			if (current_item == null)
				return null;

			//Log.Debug ("Indexing " + channel_link + ":" + current_item.Link);
			Indexable indexable = new Indexable (new Uri (String.Format ("feed:{0};item={1}", channel_link, current_item.Link)));
			indexable.ParentUri = UriFu.PathToFileUri (feed_file);
			indexable.MimeType = "text/html";
			indexable.HitType = "FeedItem";

			string RFC822 = "ddd, dd MMM yyyy HH:mm:ss zzz";
			DateTime date = DateTime.ParseExact(current_item.PubDate, RFC822, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AdjustToUniversal);
			indexable.Timestamp = date;

			// replace property names with Dublin Core names
			indexable.AddProperty (Property.New ("dc:title", current_item.Title));
			indexable.AddProperty (Property.NewKeyword ("dc:identifier", current_item.Link));
			indexable.AddProperty (Property.NewKeyword ("dc:source", channel_link));
			indexable.AddProperty (Property.New ("dc:publisher", channel_title));
				
			StringReader reader = new StringReader (current_item.Description);
			indexable.SetTextReader (reader);

			return indexable;
		}
Beispiel #36
0
		public Indexable GetNextIndexable ()
		{
			string id = reader.GetAttribute ("Id");
			string title = reader.GetAttribute ("Title");
			string author = reader.GetAttribute ("Author");
			// FIXME stupid mono bug; DateTime.ParseExact ("0001-01-01T00:00:00.0000000+00:00", ...)
			// http://bugzilla.ximian.com/show_bug.cgi?id=76082
			// Still present in 1.1.9.2
			DateTime pub_date;
			try {
				pub_date = DateTime.ParseExact (
						reader.GetAttribute ("PubDate"),
						"yyyy-MM-ddTHH:mm:ss.fffffffzzz",
						null);
			} catch (Exception e) {
				pub_date = DateTime.MinValue;
			}
			string link = reader.GetAttribute ("Link");
			string text = reader.GetAttribute ("Text");
			reader.Read ();

			Uri uri = new Uri (String.Format ("feed:{0};item={1}", channel_url, id));
			Logger.Log.Debug ("BlamQ: Indexing [" + channel_name + "] " + title);
			
			Indexable indexable = new Indexable (uri);
			indexable.ParentUri = UriFu.PathToFileUri (feed_file);
			indexable.MimeType = "text/html";
			indexable.HitType = "FeedItem";
			indexable.Timestamp = pub_date.ToUniversalTime ();
					
			// change property names to DC names, as far as allowed
			indexable.AddProperty (Property.New ("dc:title", title));
			indexable.AddProperty (Property.New ("dc:creator", author));
			indexable.AddProperty (Property.NewKeyword ("dc:identifier", link));
			indexable.AddProperty (Property.NewKeyword ("dc:source", channel_url));
			indexable.AddProperty (Property.New ("dc:publisher", channel_name));

			string img = null;
			int i = text.IndexOf ("<img src=\"");
			if (i != -1) {
				i += "<img src=\"".Length;
				int j = text.IndexOf ("\"", i);
				if (j != -1)
					img = text.Substring (i, j-i);
			}

			if (img != null) {
				string path = Path.Combine (Path.Combine (blam_dir, "Cache"),
							    img.GetHashCode ().ToString ());
				indexable.AddProperty (Property.NewUnsearched ("fixme:cachedimg", path));
			}

			StringReader string_reader = new StringReader (text);
			indexable.SetTextReader (string_reader);

			return indexable;
		}