Exemplo n.º 1
0
        public void TestParseNormalTags()
        {
            string testData = File.ReadAllText("Data/Zoo.xml");

            XTag[] tags = XTag.Generate(testData.ToCharArray()).ToArray();
            tags.ForEach(tag => Console.WriteLine(tag));
        }
Exemplo n.º 2
0
        public void TestXmlSpec()
        {
            string testData = File.ReadAllText("Data/XML.xml");

            XTag[] tags = XTag.Generate(testData.ToCharArray()).ToArray();
            tags.ForEach(tag => Console.WriteLine(tag));
        }
Exemplo n.º 3
0
        public ManifestItem(XELement e, EpubFile belongTo)
        {
            this.belongTo = belongTo;
            XTag tag = e.tag;

            href       = tag.GetAttribute("href");//Will be Add opf path in ReadSpine()
            id         = tag.GetAttribute("id");
            mediaType  = tag.GetAttribute("media-type");
            properties = tag.GetAttribute("properties");
        }
Exemplo n.º 4
0
        public void TestPrint()
        {
            string sampleTag = "<sample&amp; key&lt;='Tom&apos; book' noValue authors&gt;=\"Eric &amp; Frank\" />";
            XTag   tag       = XTag.Generate(sampleTag.ToCharArray()).First();

            Console.WriteLine(tag.ToString());
            Assert.AreEqual("<sample& sample&=\"Eric & Frank\" sample&=\"Tom' book\" sample& />",
                            tag.ToString());
            Console.WriteLine(tag.Print(XDocument.ShowAllPrintConfig));
            Assert.AreEqual("<sample&amp; key&lt;=\"Tom&#39; book\" noValue authors&gt;=\"Eric &amp; Frank\" />",
                            tag.Print(XDocument.ShowAllPrintConfig));
        }
Exemplo n.º 5
0
    static void ProcXHTML(TextItem i)
    {
        Log.log("[Info]" + i.fullName);
        string name = Path.GetFileNameWithoutExtension(i.fullName);
        string r    = i.data.Replace("\r", "").Replace("\n", "");
        Match  m    = Regex.Match(r, "<body(.*)</body>");

        if (!m.Success)
        {
            Log.log("[Error]body?"); return;
        }
        r = m.Groups[0].Value;
        XFragment f       = new XFragment(r, 0);
        string    txt     = "";
        string    counter = "";

        f.parts.ForEach((p) =>
        {
            if (p.GetType() == typeof(XText))
            {
                txt += Util.Trim(p.originalText); counter += Util.Trim(p.originalText);
            }
            if (p.GetType() == typeof(XTag))
            {
                XTag p0 = (XTag)p;
                if (p.type == PartType.tag_start && p0.tagname == "rt")
                {
                    txt += "(";
                }
                if (p.type == PartType.tag_end && p0.tagname == "rt")
                {
                    txt += ")";
                }
                if (p.type == PartType.tag_start && p0.tagname == "p")
                {
                    txt += "//";
                }
                if (p.type == PartType.tag_end && p0.tagname == "p")
                {
                    txt += "\r\n";
                }
                if (p.type == PartType.tag_end && p0.tagname == "div")
                {
                    txt += "\r\n";
                }
            }
        });
        if (Util.Trim(counter).Length > 0)
        {
            File.WriteAllText("epub2note_output/" + name + ".txt", txt);
        }
    }
Exemplo n.º 6
0
        public void TestDescriptiveTags()
        {
            string sampleTag = "<?xml-stylesheet type=\"text/xsl\" href=\"REC-xml.xsl\"?>";
            XTag   tag       = XTag.Generate(sampleTag.ToCharArray()).First();

            Console.WriteLine(tag.ToString());

            sampleTag = @"<!--
Notes on preparation of the Fifth Edition:
	
- Worked from http://www.w3.org/XML/xml-V10-4e-errata -->";
            tag       = XTag.Generate(sampleTag.ToCharArray()).First();
            Console.WriteLine(tag.Print(XDocument.ShowAllPrintConfig));
        }
Exemplo n.º 7
0
    /// <param name="pos">找到Tag之后,Tag的起始位置</param>
    public static XTag FindTag(string tagName, string text, ref int pos)
    {
        Regex reg1 = new Regex("<" + tagName + "[^a-zA-Z]");
        Regex reg2 = new Regex("<" + tagName + ".*?>");
        Match m    = reg1.Match(text, pos);

        if (m.Success)
        {
            m = reg2.Match(text, m.Index);
            XTag tag = new XTag(m.Value);
            pos = m.Index;
            return(tag);
        }
        return(null);
    }
Exemplo n.º 8
0
        void CheckHead()
        {
            Regex reg = new Regex("<head>[\\w\\W]*?</head>");
            Match m   = reg.Match(text);

            if (!m.Success)
            {
                Log.log("[Warn]No head tag in xhtml"); return;
            }
            Regex reg_link   = new Regex("<link .*?>");
            Regex reg_script = new Regex("<script .*?>");
            var   ms         = reg_link.Matches(m.Value);

            foreach (Match link in ms)
            {
                XTag tag = new XTag(link.Value);
                if (tag.GetAttribute("type").ToLower() == "text/css")
                {
                    string url = tag.GetAttribute("href");
                    url = Util.ReferPath(xhtml.fullName, url);
                    css.Add(url);
                }
            }
            int   pos  = m.Index;
            Match scpt = reg_script.Match(text, pos);

            while (scpt.Success)
            {
                XTag tag = new XTag(scpt.Value);
                if (tag.GetAttribute("src").Contains("notereplace.js"))
                {
                    string scpt_end = "</script>";
                    int    sei      = text.IndexOf(scpt_end, scpt.Index);
                    if (sei < 0)
                    {
                        Log.log("[Error]Unclosed script tag."); break;
                    }
                    text = text.Remove(scpt.Index, sei - scpt.Index + scpt_end.Length);
                    Log.log("[Info ]Removed reference to notereplace.js");
                    break;
                }
                else
                {
                    pos = scpt.Index + scpt.Length;
                }
                scpt = reg_script.Match(text, pos);
            }
        }
Exemplo n.º 9
0
        void ProcNote(Match m, XTag tag)
        {
            string note_id = "", ref_id;

            //Link tag solve
            {
                var a = tag.GetClassNames();
                if (!Util.Contains(a, "duokan-footnote"))
                {
                    string added = "duokan-footnote";
                    if (a.Length != 0)
                    {
                        added = " " + added;
                    }
                    tag.SetAttribute("class", tag.GetAttribute("class") + added);
                }
            }

            if (tag.GetAttribute("epub:type") != "noteref")
            {
                tag.SetAttribute("epub:type", "noteref");
            }
            {
                string href = tag.GetAttribute("href");
                int    pt   = href.IndexOf('#');
                if (pt < 0)
                {
                    Log.log("[Error]Not a valid link :" + href + ""); return;
                }
                if (pt != 0)
                {
                    Log.log("[Warn ]href=\":" + href + "\"");
                }
                note_id = href.Substring(pt + 1);
            }

            ref_id = note_id + "_ref";
            tag.SetAttribute("id", ref_id);

            text = text.Remove(m.Index, m.Length);
            text = text.Insert(m.Index, tag.ToString());


            //Note content
            ProcNoteContent(note_id, ref_id);
        }
Exemplo n.º 10
0
        public ProcOPF(Epub epub)
        {
            TextItem opf = epub.OPF;
            int      pos = 0;
            XTag     t   = XTag.FindTag("item", opf.data, ref pos);

            while (t != null)
            {
                if (Path.GetFileName(t.GetAttribute("href")) == "notereplace.js")
                {
                    opf.data = opf.data.Remove(pos, t.originalText.Length);
                    Log.log("[Info ]Removed item reference to notereplace.js");
                    break;
                }
                pos++;
                t = XTag.FindTag("item", opf.data, ref pos);
            }
        }
Exemplo n.º 11
0
        void CheckFootnotes()
        {
            Regex reg_link = new Regex("<a .*?>");
            int   pos      = 0;
            Match link     = reg_link.Match(text);

            while (link.Success)
            {
                XTag tag = new XTag(link.Value);
                if (Util.Contains(tag.GetClassNames(), "duokan-footnote") ||
                    tag.GetAttribute("epub:type") == "noteref")
                {
                    ProcNote(link, tag);
                }
                pos  = link.Index + 1;//假定注释本体都在链接后面
                link = reg_link.Match(text, pos);
            }
        }
Exemplo n.º 12
0
        public void ReadMeta()
        {
            var packge_tag = XTag.FindTag("package", OPF.text);

            idref         = packge_tag.GetAttribute("unique-identifier");
            _version      = packge_tag.GetAttribute("version");
            xml_lang      = packge_tag.GetAttribute("xml:lang");//bookwalker
            dc_titles     = new List <MetaRecord>();
            dc_creators   = new List <MetaRecord>();
            dc_language   = new List <MetaRecord>();
            dc_identifier = new List <MetaRecord>();
            others        = new List <MetaRecord>();
            meta          = new List <MetaRecord>();
            switch (_version)
            {
            case "3.0": ReadMeta3(); break;

            default: ReadMeta2(); break;
            }
        }
Exemplo n.º 13
0
    public XFragment(string text, int start)
    {
        if (text[start] != '<')
        {
            throw new XMLException("XFragment Error:Unexpect Start.");
        }
        indexInSource = start;
        Regex reg_tag = new Regex("<[^\\!]*?>");
        int   count = 0, pos = start;
        Match m;

        do
        {
            m = reg_tag.Match(text, pos);
            if (!m.Success)
            {
                new XMLException("XFragment Error:Unexpect end.");
            }
            XTag tag = new XTag(m.Value);
            if (tag.type == PartType.tag_start)
            {
                count++;
            }
            if (tag.type == PartType.tag_end)
            {
                count--;
            }
            if (m.Index > pos)
            {
                parts.Add(new XText(text.Substring(pos, m.Index - pos)));
            }
            parts.Add(tag);
            pos = m.Index + m.Value.Length;
        }while (count > 0);
        originalLength = m.Index - start + m.Value.Length;
        root           = new XELement(this, 0);
    }
Exemplo n.º 14
0
    static void ProcXHTML(TextItem i)
    {
        Log.log("[Info]" + i.fullName);
        string name = Path.GetFileNameWithoutExtension(i.fullName);
        string r    = i.data.Replace("\r", "").Replace("\n", "");
        Match  m    = Regex.Match(r, "<body(.*)</body>");

        if (!m.Success)
        {
            Log.log("[Error]body?"); return;
        }
        r = m.Groups[0].Value;
        XFragment f       = new XFragment(r, 0);
        string    txt     = "";
        string    counter = "";
        string    temp    = "";

        foreach (var p in f.parts)
        {
            if (p.GetType() == typeof(XText))
            {
                string trimed = Util.Trim(p.originalText);
                txt     += trimed;
                counter += trimed;
                if (trimed.Length > 0)
                {
                    switch (trimed[0])
                    {
                    case '「':
                        temp = "「」";
                        break;

                    case '『':
                        temp = "『』";
                        break;

                    default:
                        temp = "";
                        break;
                    }
                }
                else
                {
                    temp = "";
                }
            }
            if (p.GetType() == typeof(XTag))
            {
                XTag p0 = (XTag)p;
                if (p0.tagname == "img")
                {
                    txt += p0.originalText;
                }
                if (p.type == PartType.tag_start && p0.tagname == "rt")
                {
                    txt += "(";
                }
                if (p.type == PartType.tag_end && p0.tagname == "rt")
                {
                    txt += ")";
                }
                if (p.type == PartType.tag_start && p0.tagname == "p")
                {
                    txt += "##";
                }
                if (p.type == PartType.tag_end && p0.tagname == "p")
                {
                    txt += "\r\n" + temp + "\r\n##——————\r\n";
                }
                if (p.type == PartType.tag_end && p0.tagname == "div")
                {
                    txt += "\r\n\r\n##——————\r\n";
                }
            }
        }
        if (Util.Trim(counter).Length > 0)
        {
            File.WriteAllText("epub2comment_output/" + name + ".txt", txt);
        }
    }
Exemplo n.º 15
0
        public static void CreateEntry(WikiEntry entry)
        {
            foreach (var item in entry.Categories)
            {
                string categoryId = "";

                // Category exists
                if (XCategories.Any(c => c.Attribute("Text").Value == item.Text))
                {
                    continue;
                }
                else
                {
                    // Generate new Category
                    categoryId = (Convert.ToInt32(XCategories.Last().Attribute("Id").Value) + 1).ToString();

                    XCategory.Add(
                        new XElement("Category",
                                     new XAttribute("Id", categoryId),
                                     new XAttribute("Text", item.Text))
                        );
                }
            }

            foreach (var item in entry.Tags)
            {
                string tagId = "";

                // Tag exists
                if (XTags.Any(c => c.Attribute("Text").Value == item.Text))
                {
                    continue;
                }
                else
                {
                    // Generate new Tag
                    tagId = (Convert.ToInt32(XTags.Last().Attribute("Id").Value) + 1).ToString();
                    XTag.Add
                    (
                        new XElement("Tag",
                                     new XAttribute("Id", tagId),
                                     new XAttribute("Text", item.Text))
                    );
                }
            }

            XElement wikiEntry =
                new XElement("WikiEntry",
                             new XAttribute("Id", Guid.NewGuid()),
                             new XAttribute("CreatedBy", "*****@*****.**"),
                             new XAttribute("CreatedAt", DateTimeOffset.UtcNow.ToString()),
                             new XAttribute("UpdatedBy", "*****@*****.**"),
                             new XAttribute("UpdatedAt", DateTimeOffset.UtcNow.ToString()),
                             new XAttribute("CategoryIds", string.Join(",", from x in entry.Categories select x.Id)),
                             new XAttribute("TagIds", string.Join(",", from x in entry.Tags select x.Id)),
                             new XElement("Title", entry.Title),
                             new XElement("Content", new XCData(entry.Content))
                             );

            XWikiEntries.Add(wikiEntry);
            xDoc.Save(file);
        }
Exemplo n.º 16
0
    static void ProcXHTML(TextItem i)
    {
        Log.log("[Info]" + i.fullName);
        string name = Path.GetFileNameWithoutExtension(i.fullName);
        string r    = i.data.Replace("\r", "").Replace("\n", "");
        Match  m    = Regex.Match(r, "<body(.*)</body>");

        if (!m.Success)
        {
            Log.log("[Error]body?"); return;
        }
        r = m.Groups[0].Value;
        XFragment f       = new XFragment(r, 0);
        string    txt     = "";
        string    counter = "";

        foreach (var p in f.parts)
        {
            if (p.GetType() == typeof(XText))
            {
                string trimed = Util.Trim(p.originalText);
                txt     += trimed;
                counter += trimed;
                continue;
            }
            if (p.GetType() == typeof(XTag))
            {
                XTag p0 = (XTag)p;
                if (p.type == PartType.tag_start)
                {
                    switch (p0.tagname)
                    {
                    case "h1":
                    case "h2":
                    case "h3":
                    case "h4":
                    case "h5":
                    case "h6":
                        txt += "[" + p0.tagname + "]";
                        continue;

                    case "body":
                        continue;
                    }
                }
                if (p.type == PartType.tag_end)
                {
                    switch (p0.tagname)
                    {
                    case "h1":
                    case "h2":
                    case "h3":
                    case "h4":
                    case "h5":
                    case "h6":
                        txt += "[/" + p0.tagname + "]\r\n";
                        continue;

                    case "body":
                        continue;
                    }
                }
                if (p.type == PartType.tag_end && p0.tagname == "div")
                {
                    txt += "</div>\r\n"; continue;
                }
                if (p.type == PartType.tag_start && p0.tagname == "p")
                {
                    continue;
                }
                if (p.type == PartType.tag_end && p0.tagname == "p")
                {
                    txt += "\r\n"; continue;
                }
                txt += p0.originalText;
            }
        }
        if (Util.Trim(counter).Length > 0)
        {
            File.WriteAllText(output_dir + name + ".txt", txt);
        }
    }
Exemplo n.º 17
0
        void ProcXHTML(TextItem item)
        {
            XTag tag = XTag.FindTag("link", item.data);

            if (tag != null)
            {
                if (tag.GetAttribute("type").ToLower() == "text/css")
                {
                    string url = tag.GetAttribute("href");
                    url = Util.ReferPath(item.fullName, url);
                    bool already = false;
                    foreach (var c in css)
                    {
                        if (c.fullName == url)
                        {
                            already = true; break;
                        }
                    }
                    if (!already)
                    {
                        TextItem i = epub.GetItem <TextItem>(url);
                        if (i != null)
                        {
                            css.Add(i);
                        }
                        else
                        {
                            Log.log("[Warn ]Cannot find CSS:" + url);
                        }
                    }
                }
                else
                {
                    Log.log("[Warn ]Cannot find CSS reference.");
                }
            }
            else
            {
                Log.log("[Warn ]Cannot find CSS reference.");
            }

            int pos = 0;

            tag = XTag.FindTag("p", item.data, ref pos);
            int count = 0;

            while (tag != null)
            {
                switch (item.data[pos + tag.originalText.Length])
                {
                case '「':
                case '(':
                case '『':
                case '<':
                case '《':
                    tag.AddClassName("ae_draw_out");
                    item.data = item.data.Remove(pos, tag.originalText.Length);
                    item.data = item.data.Insert(pos, tag.ToString());
                    count++;
                    break;
                }
                pos++;
                tag = XTag.FindTag("p", item.data, ref pos);
            }
            Log.log("[Info ]Added class for " + count + " elements in " + item.fullName);
        }
Exemplo n.º 18
0
        void ProcNoteContent(string note_id, string ref_id)
        {
            string log = "";
            Regex  reg_tag = new Regex("<.*?>");
            Regex  reg_duokan = new Regex("<ol .*?>");
            Regex  reg_aside = new Regex("<aside .*?>");
            int    index = -1, length = 0;
            string note_content = null; string list_value = "1";

            Match m = reg_aside.Match(text);

            while (m.Success)
            {
                XTag tag = new XTag(m.Value);
                if (tag.GetAttribute("id") == note_id)
                {
                    index = m.Index;
                    log  += "aside; ";
                    XFragment frag = new XFragment(text, index);
                    if (frag.root != null)
                    {
                        var dk = frag.root.GetElementById(note_id);
                        if (dk != null)
                        {
                            //做过兼容,aside里套多看li
                            note_content = dk.innerXHTML;
                            list_value   = dk.tag.GetAttribute("value");
                            log         += "duplicate id at <" + dk.tag.tagname + ">" + dk.tag.GetAttribute("class") + "; ";
                        }
                        else
                        {
                            note_content = frag.root.innerXHTML;
                        }
                        length = frag.originalLength;
                    }
                    else
                    {
                        Log.log("[Error]Found note but failure on parsing. id=" + note_id); return;
                    }
                    break;
                }
                m = m.NextMatch();
            }

            if (index < 0)//如果只对多看适配,没有aside
            {
                m = reg_duokan.Match(text);
                while (m.Success)
                {
                    XFragment frg = new XFragment(text, m.Index);
                    if (frg.root != null)
                    {
                        if (Util.Contains(frg.root.tag.GetClassNames(), "duokan-footnote-content"))
                        {
                            var a = frg.root.GetElementById(note_id);
                            if (a != null)
                            {
                                index        = m.Index;
                                note_content = a.innerXHTML;
                                length       = frg.originalLength;
                                log         += "duokan-footnote; ";
                                break;
                            }
                        }
                    }
                    m = m.NextMatch();
                }
            }

            if (note_content == null)
            {
                Log.log("[Error]cannot find note"); return;
            }

            {
                Match ma = Regex.Match(note_content, "<a .*?></a>");
                if (ma.Success)
                {
                    note_content = Regex.Replace(note_content, "<a .*?></a>", "");
                    log         += "empty <a> tag;";
                }
            }
            note_content = Util.Trim(note_content);
            if (note_content.StartsWith("<div"))
            {
                log += "<div>;";
                XFragment f = new XFragment(note_content, 0);
                if (f != null)
                {
                    note_content = f.root.innerXHTML;
                }
            }
            string note_full = string.Format(template, note_id, ref_id, note_content);

            text = text.Remove(index, length);
            text = text.Insert(index, note_full);
            Log.log("[Info ]Detected id=" + note_id + ":" + log);
            Log.log("[Info ]Formated id=" + note_id + ":" + note_content);
            contain_footnote = true;
        }
        void ProcXHTML(TextItem item)
        {
            XTag tag = XTag.FindTag("link", item.data);

            if (tag != null)
            {
                if (tag.GetAttribute("type").ToLower() == "text/css")
                {
                    string url = tag.GetAttribute("href");
                    url = Util.ReferPath(item.fullName, url);
                    bool already = false;
                    foreach (var c in css)
                    {
                        if (c.fullName == url)
                        {
                            already = true; break;
                        }
                    }
                    if (!already)
                    {
                        TextItem i = epub.GetItem <TextItem>(url);
                        if (i != null)
                        {
                            css.Add(i);
                        }
                        else
                        {
                            Log.log("[Warn ]Cannot find CSS:" + url);
                        }
                    }
                }
                else
                {
                    Log.log("[Warn ]Cannot find CSS reference.");
                }
            }
            else
            {
                Log.log("[Warn ]Cannot find CSS reference.");
            }

            int pos = 0;

            tag = XTag.FindTag("p", item.data, ref pos);
            int count = 0;

            while (tag != null)
            {
                XFragment p        = new XFragment(item.data, pos);
                string    onlytext = Regex.Replace(p.root.innerXHTML, "<.*?>", "");
                string    nospace  = onlytext.Replace(" ", "").Replace(" ", "");
                switch (nospace)
                {
                case "*":
                case "*":
                case "***":
                case "※":
                case "※※※":
                case "◆":
                case "◇":
                case "●":
                case "☆":
                case "⭐":
                case "×××":

                    tag.AddClassName("ae_center");
                    item.data = item.data.Remove(pos, tag.originalText.Length);
                    item.data = item.data.Insert(pos, tag.ToString());
                    Log.log("[Info ]Detect Separator:" + p.root.innerXHTML);
                    count++;
                    break;

                default:
                    if (p.root.innerXHTML.Length < 4)
                    {
                        Log.log("[ Info]Short <p> element:" + p.root.innerXHTML);
                    }
                    break;
                }

                pos++;
                tag = XTag.FindTag("p", item.data, ref pos);
            }
            Log.log("[Info ]Added class for " + count + " elements in " + item.fullName);
        }