static private Indexable MemberNodeToIndexable(XmlNode node, Uri base_uri, string parentName)
        {
            char          memberType     = MemberTypeToChar(node.SelectSingleNode("MemberType").InnerText);
            StringBuilder memberFullName = new StringBuilder();

            memberFullName.Append(memberType + ":" + parentName);

            if (memberType != 'C')
            {
                memberFullName.Append("." + node.Attributes["MemberName"].Value);
            }

            if (memberType == 'C' || memberType == 'M' || memberType == 'E')
            {
                memberFullName.Append("(");
                bool inside = false;

                foreach (XmlNode parameter in node.SelectNodes("Parameters/Parameter"))
                {
                    if (!inside)
                    {
                        inside = true;
                    }
                    else
                    {
                        memberFullName.Append(",");
                    }
                    memberFullName.Append(parameter.Attributes["Type"].Value);
                }

                memberFullName.Append(")");
            }

            Indexable indexable = new Indexable(UriFu.AddFragment(base_uri, memberFullName.ToString(), false));

            indexable.MimeType = "text/html";
            indexable.HitType  = "MonodocEntry";

            indexable.AddProperty(Property.New("dc:title", memberFullName.ToString()));
            indexable.AddProperty(Property.New("fixme:name", memberFullName.ToString()));
            indexable.AddProperty(Property.NewUnsearched("fixme:type", node.SelectSingleNode("MemberType").InnerText.ToLower()));

            StringReader reader = new StringReader(node.SelectSingleNode("Docs").InnerXml);

            indexable.SetTextReader(reader);

            return(indexable);
        }
        static private Indexable TypeNodeToIndexable(XmlNode node, Uri base_uri)
        {
            string    fragment  = "T:" + node.Attributes ["FullName"].Value;
            Indexable indexable = new Indexable(UriFu.AddFragment(base_uri, fragment, false));

            indexable.MimeType = "text/html";
            indexable.HitType  = "MonodocEntry";

            indexable.AddProperty(Property.New("dc:title", "T:" + node.Attributes["FullName"].Value));
            indexable.AddProperty(Property.NewUnsearched("fixme:name", "T:" + node.Attributes["FullName"].Value));
            indexable.AddProperty(Property.NewUnsearched("fixme:type", "type"));

            StringReader reader = new StringReader(node.SelectSingleNode("Docs").InnerXml);

            indexable.SetTextReader(reader);

            return(indexable);
        }
Exemple #3
0
    static int DumpOneIndex_Metadata(string index_name, ArrayList uris, bool show_properties)
    {
        LuceneQueryingDriver driver;

        driver = new LuceneQueryingDriver(index_name, -1, true);

        Hashtable all_hits_by_uri = null;
        ArrayList all_hits        = null;

        if (uris.Count == 0 || index_name == "FileSystemIndex")
        {
            all_hits_by_uri = driver.GetAllHitsByUri();
            all_hits        = new ArrayList(all_hits_by_uri.Values);
        }

        // A hard-wired hack
        if (index_name == "FileSystemIndex")
        {
            foreach (Hit hit in all_hits)
            {
                string internal_uri;

                if (hit [Property.IsChildPropKey] == "true")
                {
                    string path = RemapUriToPath(all_hits_by_uri, hit);

                    internal_uri = UriFu.UriToEscapedString(hit.ParentUri);

                    hit.ParentUri = UriFu.PathToFileUri(path);
                    hit.Uri       = UriFu.AddFragment(UriFu.PathToFileUri(path),
                                                      hit.Uri.Fragment,
                                                      true);
                }
                else
                {
                    internal_uri = UriFu.UriToEscapedString(hit.Uri);

                    hit.Uri = UriFu.PathToFileUri(RemapUriToPath(all_hits_by_uri, hit));
                    hit.AddProperty(Property.NewUnsearched("beagrep:InternalUri", internal_uri));
                }
            }
        }

        ArrayList matching_hits;

        if (uris.Count == 0)
        {
            matching_hits = all_hits;
        }
        else
        {
            matching_hits = new ArrayList(driver.GetHitsForUris(RemapUris(driver, uris)));

            if (index_name == "FileSystemIndex")
            {
                for (int i = 0; i < matching_hits.Count; i++)
                {
                    Hit hit        = (Hit)matching_hits [i];
                    Hit mapped_hit = (Hit)all_hits_by_uri [hit.Uri];

                    matching_hits [i] = mapped_hit;
                }
            }
        }

        matching_hits.Sort(new HitByUriComparer());

        foreach (Hit hit in matching_hits)
        {
            if (!show_properties)
            {
                Console.WriteLine("{0}: {1}", index_name, hit.Uri);
                continue;
            }

            Console.WriteLine(" Index: {0}", index_name);
            Console.WriteLine("   Uri: {0}", hit.Uri);
            if (hit.ParentUri != null)
            {
                Console.WriteLine("Parent: {0}", hit.ParentUri);
            }
            Console.WriteLine(" MimeT: {0}", hit.MimeType);
            Console.WriteLine("  Type: {0}", hit.Type);
            Console.WriteLine("Source: {0}", hit.Source);

            ArrayList props;
            props = new ArrayList(hit.Properties);
            props.Sort();
            foreach (Property prop in props)
            {
                char [] legend = new char [4];

                legend [0] = prop.IsMutable  ? 'm' : ' ';
                legend [1] = prop.IsSearched ? 's' : ' ';
                legend [2] = prop.IsPersistent ? 'p' : ' ';
                legend [3] = prop.Type == PropertyType.Text ? 't' : ' ';

                Console.WriteLine("  Prop: [{0}] {1} = '{2}'", new String(legend), prop.Key, prop.Value);
            }


            Console.WriteLine();
        }

        return(matching_hits.Count);
    }
Exemple #4
0
        override protected void DoPullProperties()
        {
            Stopwatch watch = new Stopwatch();

            watch.Start();

            while (reader.Read())
            {
                switch (reader.NodeType)
                {
                case XmlNodeType.Element:
                    if (reader.Name.StartsWith("sect") || reader.Name.StartsWith("chapter"))
                    {
                        string id = reader.GetAttribute("id");

                        if (id != null && id != String.Empty)
                        {
                            DocbookEntry entry = new DocbookEntry();
                            entry.Id    = id;
                            entry.Depth = reader.Depth;

                            string language = reader.GetAttribute("lang");

                            if (language != null && language != String.Empty)
                            {
                                entry.Language = language;
                            }

                            entries_stack.Push(entry);
                        }
                    }
                    else if (reader.Name == "article" || reader.Name == "book")
                    {
                        string language = reader.GetAttribute("lang");

                        if (language != null && language != String.Empty)
                        {
                            base_language = language;
                        }
                    }
                    else if (reader.Name == "title")
                    {
                        reader.Read();                          // Go to the text node

                        if (entries_stack.Count == 0 && base_title == null)
                        {
                            // This is probably the book title
                            base_title = reader.Value;
                        }
                        else if (entries_stack.Count > 0)
                        {
                            DocbookEntry entry = (DocbookEntry)entries_stack.Peek();

                            if (entry.Title == null)
                            {
                                entry.Title = reader.Value;
                            }
                        }
                    }
                    else if (reader.Name == "keyword")
                    {
                        reader.Read();                          // read the text node
                        AddProperty(Property.NewKeyword("dc:subject", reader.Value));
                    }
                    break;

                case XmlNodeType.Text:
                    // Append text to the child indexable
                    if (entries_stack.Count > 0)
                    {
                        ((DocbookEntry)entries_stack.Peek()).Content.Append(reader.Value);
                    }

                    // Append text to the main indexable
                    else
                    {
                        AppendWord(reader.Value);
                    }
                    break;

                case XmlNodeType.EndElement:
                    if (entries_stack.Count > 0 &&
                        ((DocbookEntry)entries_stack.Peek()).Depth == reader.Depth)
                    {
                        DocbookEntry entry, parent_entry = null;

                        entry = (DocbookEntry)entries_stack.Pop();

                        if (entries_stack.Count > 0)
                        {
                            parent_entry = (DocbookEntry)entries_stack.Peek();
                        }

                        Indexable indexable;
                        indexable          = new Indexable(UriFu.AddFragment(Indexable.Uri, entry.Id, false));
                        indexable.HitType  = "DocbookEntry";
                        indexable.MimeType = "text/x-docbook-entry";
                        indexable.AddProperty(Property.NewKeyword("beagle:FileType", "documentation"));
                        indexable.Filtering = IndexableFiltering.AlreadyFiltered;

                        indexable.AddProperty(Property.NewUnsearched("fixme:id", entry.Id));
                        indexable.AddProperty(Property.New("dc:title", entry.Title));

                        // Add the docbook book title
                        indexable.AddProperty(Property.NewUnsearched("fixme:base_title", base_title));

                        // Add the child language (or docbook language if none is specified)
                        if (entry.Language != null)
                        {
                            indexable.AddProperty(Property.NewUnsearched("fixme:language", entry.Language));
                        }
                        else if (base_language != null)
                        {
                            indexable.AddProperty(Property.NewUnsearched("fixme:language", base_language));
                        }

                        // Add any parent (as in docbook parent entry, not beagle) data if we have it
                        if (parent_entry != null)
                        {
                            indexable.AddProperty(Property.NewUnsearched("fixme:parent_id", parent_entry.Id));
                            indexable.AddProperty(Property.NewUnsearched("fixme:parent_title", parent_entry.Title));
                        }


                        StringReader content_reader = new StringReader(entry.Content.ToString());
                        indexable.SetTextReader(content_reader);
                        indexable.SetChildOf(this.Indexable);

                        AddIndexable(indexable);
                    }
                    break;
                }
            }

            // Add the common properties to the top-level
            // file item such as Title, Language etc.

            AddProperty(Property.New("dc:title", base_title));
            AddProperty(Property.NewUnsearched("fixme:language", base_language));

            watch.Stop();

            // If we've successfully crawled the file but haven't
            // found any indexables, we shouldn't consider it
            // successfull at all (unless we have a title, which
            // means that it's actually a docbook file, just without
            // sections.
            if (!HasGeneratedIndexable && base_title == null)
            {
                Log.Error("Probably not a docbook. Ignoring {0}!", base_path);
                Error();
                return;
            }

            Logger.Log.Debug("Parsed docbook file in {0}", watch);

            Finished();
        }
Exemple #5
0
        public override bool GenerateNextIndexable(out Indexable child)
        {
            child = null;

            if (bib_process == null && !InitBibparse())
            {
                return(false);
            }

            string line = null;
            string type = null, name = null;

            while ((line = reader.ReadLine()) != null)
            {
                if (line == String.Empty || line [0] != '@')
                {
                    continue;
                }

                int i = line.IndexOf(' ');
                if (i == -1 || line.Length == i + 1)
                {
                    continue;
                }
                type = line.Substring(1, i - 1).ToLower();
                name = line.Substring(i + 1);
                break;
            }

            if (line == null)
            {
                return(false);
            }

            child = new Indexable(UriFu.AddFragment(Indexable.Uri, name, false));
            child.CacheContent = false;
            child.MimeType     = "text/x-bibtex";
            child.DisplayUri   = child.Uri;
            child.NoContent    = true;
            child.AddProperty(Property.NewKeyword("bibtex:type", type));

            string key, value;

            // Now fill in properties from the key=value lines
            while ((line = reader.ReadLine()) != null)
            {
                // Entries are separated by empty lines
                if (line == String.Empty)
                {
                    break;
                }

                int i = line.IndexOf('=');
                // ensure non-empty key
                if (i < 1 || line.Length == i + i)
                {
                    continue;
                }
                key   = line.Substring(0, i).ToLower();
                value = line.Substring(i + 1);
                foreach (Property prop in EntryLineToProperty(key, value))
                {
                    child.AddProperty(prop);
                }
            }

            child.SetChildOf(Indexable);
            return(true);
        }
        public override bool GenerateNextIndexable(out Indexable child)
        {
            ArchiveEntry a_entry;

            child = null;

            if (!setup_done)
            {
                SetupArchiveStream();
            }

            if (count >= MAX_CHILDREN)
            {
                Log.Debug("Archive {0} contains more than {1} files.  Only {1} files indexed.", Indexable.DisplayUri.ToString(), count);
                Close();
                return(false);
            }

            if (total_size > MAX_ALL_FILES)
            {
                Log.Debug("Archive {0} crossed our max uncompressed size threshold.  Only {1} files extracted", Indexable.DisplayUri.ToString(), count);
                Close();
                return(false);
            }

            a_entry = DoGetNextEntry();
            if (a_entry == null)
            {
                Close();
                return(false);
            }

            // Store file names in the archive
            AppendText(Path.GetFileName(a_entry.Name));
            AppendWhiteSpace();

            // If this is an invalid or oversized entry, skip it.
            if (a_entry.TempFile == null)
            {
                return(true);
            }

            ++count;
            total_size += a_entry.Size;

            // Add "#<escaped-path-to-entry>" to the end of the Indexable Uri
            // So, file b#c in archive foo.zip becomes file:///foo.zip#b%23c
            // And file c in archive b in archive foo.zip becomes file:///foo.zip#b#c
            child = new Indexable(UriFu.AddFragment(Indexable.Uri, a_entry.Name, false));

            child.CacheContent = true;
            child.MimeType     = a_entry.MimeType;

            child.DisplayUri    = new Uri(Indexable.DisplayUri.ToString() + "#" + a_entry.Name);
            child.ContentUri    = UriFu.PathToFileUri(a_entry.TempFile);
            child.DeleteContent = true;

            // FIXME Remove fixme:inside_archive during Property Hack Week
            // Replace most flag properties by value properties
            child.AddProperty(Property.NewBool("fixme:inside_archive", true));
            // Use this instead of fixme:inside_archive
            child.AddProperty(Property.NewKeyword("archive:type", archive_type));

            child.AddProperty(Property.NewKeyword("fixme:relativeuri", a_entry.Name));
            child.AddProperty(Property.New("fixme:comment", a_entry.Comment));
            child.AddProperty(Property.NewUnsearched("fixme:filesize", a_entry.Size));

            foreach (Property prop in Property.StandardFileProperties(Path.GetFileName(a_entry.Name), false))
            {
                child.AddProperty(prop);
            }

            child.SetChildOf(Indexable);

            return(true);
        }
Exemple #7
0
            public void OnEachPart(GMime.Object mime_part)
            {
                GMime.Object part = null;
                bool         part_needs_dispose = false;

                //for (int i = 0; i < this.depth; i++)
                //  Console.Write ("  ");
                //Console.WriteLine ("Content-Type: {0}", mime_part.ContentType);

                ++depth;

                if (mime_part is GMime.MessagePart)
                {
                    GMime.MessagePart msg_part = (GMime.MessagePart)mime_part;

                    using (GMime.Message message = msg_part.Message) {
                        using (GMime.Object subpart = message.MimePart)
                            this.OnEachPart(subpart);
                    }
                }
                else if (mime_part is GMime.Multipart)
                {
                    GMime.Multipart multipart = (GMime.Multipart)mime_part;
                    int             num_parts = multipart.Count;

                    // If the mimetype is multipart/alternative, we only want to index
                    // one part -- the richest one we can filter.
                    if (mime_part.ContentType.MediaSubtype.ToLower() == "alternative")
                    {
                        // The richest formats are at the end, so work from there
                        // backward.
                        for (int i = num_parts - 1; i >= 0; i--)
                        {
                            GMime.Object subpart = multipart[i];

                            if (IsMimeTypeHandled(subpart.ContentType.ToString()))
                            {
                                part = subpart;
                                part_needs_dispose = true;
                                break;
                            }
                            else
                            {
                                subpart.Dispose();
                            }
                        }
                    }

                    // If it's not alternative, or we don't know how to filter any of
                    // the parts, treat them like a bunch of attachments.
                    if (part == null)
                    {
                        for (int i = 0; i < num_parts; i++)
                        {
                            using (GMime.Object subpart = multipart[i])
                                this.OnEachPart(subpart);
                        }
                    }
                }
                else if (mime_part is GMime.Part)
                {
                    part = mime_part;
                }
                else
                {
                    throw new Exception(String.Format("Unknown part type: {0}", part.GetType()));
                }

                if (part != null)
                {
                    System.IO.Stream stream = null;

                    using (GMime.DataWrapper content_obj = ((GMime.Part)part).ContentObject)
                        stream = content_obj.Stream;

                    // If this is the only part and it's plain text, we
                    // want to just attach it to our filter instead of
                    // creating a child indexable for it.
                    bool no_child_needed = false;

                    string mime_type = part.ContentType.ToString().ToLower();

                    if (this.depth == 1 && this.count == 0)
                    {
                        if (mime_type == "text/plain")
                        {
                            no_child_needed = true;

                            this.reader = new StreamReader(stream);
                        }
                        else if (mime_type == "text/html")
                        {
                            no_child_needed = true;
                            html_part       = true;
                            string enc = part.ContentType.GetParameter("charset");
                            // DataWrapper.Stream is a very limited stream
                            // and does not allow Seek or Tell
                            // HtmlFilter requires Stream.Position=0.
                            // Play safe and create a memorystream
                            // for HTML parsing.

                            GMime.StreamMem mem_stream;
                            mem_stream = new GMime.StreamMem();

                            GMime.Stream data_stream;
                            data_stream = ((StreamWrapper)stream).GMimeStream;
                            data_stream.WriteToStream(mem_stream);
                            data_stream.Flush();

                            // The StreamWrapper and hence the memory_stream
                            // will be closed when the reader is closed
                            // after Pull()-ing is done.
                            System.IO.Stream html_stream;
                            html_stream = new StreamWrapper(mem_stream);
                            html_stream.Seek(0, SeekOrigin.Begin);

                            stream.Close();

                            try {
                                this.reader = FilterHtml.GetHtmlReader(html_stream, enc, link_handler);
                            } catch (Exception e) {
                                Log.Debug(e, "Exception while filtering HTML email {0}", this.indexable.Uri);
                                this.reader = null;
                                html_stream.Close();
                                html_part = false;
                            }
                        }
                    }

                    if (!no_child_needed)
                    {
                        // Check the mime type against the blacklist and don't index any
                        // parts that are contained within.  That way the user doesn't
                        // get flooded with pointless signatures and vcard and ical
                        // attachments along with (real) attachments.

                        if (Array.IndexOf(blacklisted_mime_types, mime_type) == -1)
                        {
                            string    sub_uri = "#" + this.count;
                            Indexable child;
                            child = new Indexable(UriFu.AddFragment(this.indexable.Uri, sub_uri, true));

                            child.DisplayUri = new Uri(this.indexable.DisplayUri.ToString() + "#" + this.count);

                            // This is a special case.
                            // Even for mails found on disk, MailMessage hitype is set
                            child.HitType  = "MailMessage";
                            child.MimeType = mime_type;

                            // If this is the richest part we found for multipart emails, add its content to textcache
                            if (snippet_attachment ||
                                (this.depth == 1 && this.count == 0))
                            {
                                child.CacheContent = true;
                            }
                            else
                            {
                                child.CacheContent = false;
                            }

                            string filename = ((GMime.Part)part).Filename;

                            if (!String.IsNullOrEmpty(filename))
                            {
                                child.AddProperty(Property.NewKeyword("fixme:attachment_title", filename));

                                foreach (Property prop in Property.StandardFileProperties(filename, false))
                                {
                                    child.AddProperty(prop);
                                }
                            }

                            // Store length of attachment
                            long length = stream.Length;
                            if (length != -1)
                            {
                                child.AddProperty(Property.NewUnsearched("fixme:filesize", length));
                            }

                            if (part.ContentType.MediaType.ToLower() == "text")
                            {
                                child.SetTextReader(new StreamReader(stream));
                            }
                            else
                            {
                                child.SetBinaryStream(stream);
                            }

                            child.SetChildOf(this.indexable);
                            child.StoreStream();
                            child.CloseStreams();
                            this.child_indexables.Add(child);
                        }
                        else
                        {
                            Log.Debug("Skipping attachment {0}#{1} with blacklisted mime type {2}",
                                      this.indexable.Uri, this.count, mime_type);
                        }
                    }

                    this.count++;
                }

                if (part_needs_dispose)
                {
                    part.Dispose();
                }

                --depth;
            }