internal override void UpdateFile() { //Get the Body Tag and the CSS file from a html file MyHtmlDocument doc = new MyHtmlDocument(); BodyTag = doc.FindBodyStyleClass(ref CSSFile); fileExtractStream = base.GetStream(CSSFile); if (fileExtractStream == null) { System.Windows.Forms.MessageBox.Show("No Stylesheet present", Variables.BookName); return; } this.ReadCSS(); if (cssOutput.Count == 0) { System.Windows.Forms.MessageBox.Show("Stylesheet is empty", Variables.BookName); return; } this.WriteCSS(); base.UpdateZip(fileOutStream); SaveOpfFixToFile(); if (Variables.Filenames.IndexOf(Variables.Filename) == (Variables.Filenames.Count - 1) && Factory.NumberOfJobs == 1) { System.Windows.Forms.MessageBox.Show("Your Files are now fixed"); } }
internal int Run(ChromeDriver mDriver, string fileName, string Url) { Console.Clear(); Console.WriteLine("[Blogspot 크롤러 작동중] URL : {0}", Url); var label = GetLabelForXmlFeed(mDriver, Url); Console.Write("Blogspot RSS Feed 가져오는 중 ..."); var doc = GetXmlDocument(Url, label); Console.WriteLine("성공!"); var titles = new List <string>(); var texts = new List <string>(); // Creating namespace object XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable); nsmgr.AddNamespace("ns", "http://www.w3.org/2005/Atom"); var entryNodes = doc.SelectNodes("ns:feed/ns:entry", nsmgr); Console.Write("XML 파일 파싱 중 ..."); foreach (XmlNode node in entryNodes) { var title = node.SelectSingleNode("ns:title", nsmgr).InnerText; titles.Add(title); var html = node.SelectSingleNode("ns:content", nsmgr).InnerText; MyHtmlDocument.LoadHtml(html); var elements = MyHtmlDocument.DocumentNode.SelectNodes("child::*"); texts.Add(string.Join('\n', elements.Select(o => WebUtility.HtmlDecode(o.InnerText)).ToList())); } titles.Reverse(); texts.Reverse(); Console.WriteLine("성공!"); Console.Write("텍스트 파일 작성 중 ..."); using (StreamWriter w = new StreamWriter(fileName, false, System.Text.Encoding.Default)) { foreach ((string title, string text) in titles.Zip(texts, Tuple.Create)) { string finalText = title + "\n\n" + text; w.WriteLine(finalText); w.Flush(); } } Console.WriteLine("성공!"); Console.WriteLine("모든 작업 완료!"); return(texts.Count); }
private XElement LoadFiles() { //Get Files LIst OpfDocument OpfDoc = new OpfDocument(); List <string> htmlFileLIst = OpfDoc.GetFilesList("html"); List <string> t = htmlFileLIst; //Load text from Chapter MyHtmlDocument htmlDoc = new MyHtmlDocument(); Dictionary <string, DetectedHeaders> DetectText = htmlDoc.FindHeaderTextInFile(t); //Create NavDetails ENtry for Each FIles List <NavDetails> FilesToAdd = new List <NavDetails>(); foreach (var item in DetectText) { string Text = String.Empty; Text = Variables.TextInChapters == true ? item.Value.Result.Count == 0 ? item.Key : item.Value.Result[0] : item.Key; FilesToAdd.Add(new NavDetails(Utils.GetId(item.Key), item.Key, Text)); } List <XElement> list = new List <XElement>(); //Converts the navdetails to xml foreach (var item in FilesToAdd) { XElement navPoint = ConvertToXElement(item); if (navPoint != null) { list.Add(navPoint); } } XElement navMap = new XElement(ns + "navMap", list); //return the new xml file return(navMap); }
private XElement LoadFiles() { //Get Files LIst OpfDocument OpfDoc = new OpfDocument(); List<string> htmlFileLIst = OpfDoc.GetFilesList("html"); List<string> t = htmlFileLIst; //Load text from Chapter MyHtmlDocument htmlDoc = new MyHtmlDocument(); Dictionary<string, DetectedHeaders> DetectText = htmlDoc.FindHeaderTextInFile(t); //Create NavDetails ENtry for Each FIles List<NavDetails> FilesToAdd = new List<NavDetails>(); foreach (var item in DetectText) { string Text = String.Empty; Text = Variables.TextInChapters == true ? item.Value.Result.Count == 0 ? item.Key : item.Value.Result[0] : item.Key; FilesToAdd.Add(new NavDetails(Utils.GetId(item.Key), item.Key, Text)); } List<XElement> list = new List<XElement>(); //Converts the navdetails to xml foreach (var item in FilesToAdd) { XElement navPoint = ConvertToXElement(item); if (navPoint != null) { list.Add(navPoint); } } XElement navMap = new XElement(ns + "navMap", list); //return the new xml file return navMap; }
private string CreateHtmlTOC() { HtmlNode Title = Tocdocument.CreateElement("h1"); HtmlNode titleAppendChild = Title.AppendChild(HtmlTextNode.CreateNode("Table of Contents")); Tocdocument.DocumentNode.AppendChild(Title); foreach (XElement item in NewNavMap.Elements()) { indent = 0; HtmlNode itemNode = CreateHtmlTOC(item); if (itemNode != null) { Tocdocument.DocumentNode.AppendChild(itemNode); } } MyHtmlDocument doc = new MyHtmlDocument(); string text = doc.TidyHtml(Tocdocument.DocumentNode.OuterHtml); return text; }
private void GetImage() { CoverFile = GetCoverFile(); MyHtmlDoc = new MyHtmlDocument(); HtmlDocument HtmlDoc = MyHtmlDoc.GetHtml(CoverFile); ImageNode = null; Dictionary<string, string> TagToCheck = new Dictionary<string, string>(); TagToCheck.Add("img", "src"); TagToCheck.Add("image", "xlink:href"); foreach (var item in TagToCheck) { ImageNode = HtmlDoc == null ? null : HtmlDoc.DocumentNode.SelectSingleNode("//" + item.Key); if (ImageNode != null) { ImageIsSVG = ImageNode.ParentNode.Name == "svg" && item.Key=="image" ? true : false; ImageURL = ImageNode.Attributes[item.Value].Value; //Clean URL OriginalImageURL = ImageURL; ImageURL = ImageURL.Replace("../", "").Trim(); ImageURL = Utils.VerifyFilenameEncoding(ImageURL); break; } } fileExtractStream = GetStream(ImageURL); if (fileExtractStream != null) BookImage = string.IsNullOrEmpty(ImageURL) ? null : Image.FromStream(fileExtractStream); }
private void CutFiles(string filename, string id, string prevId, string splitFilename) { string prevIdAtt = prevId; string idAtt = id; string html = GetHtml(filename); if (!String.IsNullOrEmpty(html)) { List<string> File = GetHtmlBody(html); string Head = GetHead(html); List<string> ExtractedBody = new List<string>(); idAtt = id != "" ? "id=\"" + id : id; prevIdAtt = String.IsNullOrEmpty(prevId) ? "id=\"" + prevId : prevId; //string nameAtt = id != "" ? "name=\"" + id : id; //string prevNameAtt = !String.IsNullOrEmpty(prevId) ? "name=\"" + prevId : prevId; if (String.IsNullOrEmpty(idAtt)) // Just for the last file ExtractedBody = File.SkipWhile(x => !x.Contains(prevIdAtt)).ToList(); else ExtractedBody = File.SkipWhile(x => !x.Contains(prevIdAtt)) .TakeWhile(x => !x.Contains(idAtt)).ToList(); StringBuilder sb = new StringBuilder(); sb.Append(Head); //sb.AppendLine(BodyHeading); ExtractedBody.ForEach(x => sb.AppendLine(x)); string newHtml = sb.ToString(); if (ExtractedBody.Count <= 2 || prevIdAtt == "" && PreviousContainedAnchor) { splitNumber--; } else { MyHtmlDocument htmlDoc = new MyHtmlDocument(); string TidiedHTML = htmlDoc.TidyHtml(newHtml); htmlDoc.fileOutStream = TidiedHTML.ToStream(); string file = ZipFileNames.Where(x => x == Variables.OPFpath + filename).Select(x => x).FirstOrDefault().Replace(filename, splitFilename); htmlDoc.fileOutName = file; htmlDoc.UpdateZip(); } } }
//string BodyHeading = ""; private string GetHtml(string filename) { if (HtmlCache.ContainsKey(filename)) return HtmlCache[filename]; string html = ""; MyHtmlDocument htmlDoc = new MyHtmlDocument(); var Stream = htmlDoc.GetStreamOPF(filename); using (Tidy.Document doc = Tidy.Document.FromStream(Stream)) { doc.ShowWarnings = false; doc.Quiet = true; doc.OutputXhtml = true; doc.InputCharacterEncoding = Tidy.EncodingType.Utf8; doc.OutputCharacterEncoding = Tidy.EncodingType.Utf8; doc.IndentAttributes = false; doc.IndentBlockElements = Tidy.AutoBool.Auto; doc.NewBlockLevelTags = "svg,image"; doc.WrapAt = 0; doc.CleanAndRepair(); html = doc.Save(); HtmlCache.Add(filename, html); } return html; }
private void SetFilesBasedOnAnchors(string file) { if (AddType == AddWindowType.TOCEdit) { MyHtmlDocument doc = new MyHtmlDocument(); List<string> Anchors = doc.FindAnchorsInFile(file); foreach (var anch in Anchors) { string str = file + '#' + anch; if (!PresentAnchors.ContainsKey(str) && PresentFileList.Contains(file)) { PresentFileList.Remove(file); } } } }
private void LoadFiles() { try { using (new HourGlass()) { treeView1.BeginUpdate(); if (Model != null && Model.Nodes.Count > 0) Model.Nodes.Clear(); OpfDocument OpfDoc = new OpfDocument(); List<string> htmlFileLIst = OpfDoc.GetFilesList("html"); List<string> t = htmlFileLIst; if (!cbShowAll.Checked) { t = (from i in htmlFileLIst where !PresentFileList.Contains(i) select i).ToList(); } MyHtmlDocument htmlDoc = new MyHtmlDocument(); Dictionary<string, DetectedHeaders> DetectText = htmlDoc.FindHeaderTextInFile(t); foreach (string item in t) { DetectedHeaders det = new DetectedHeaders(); DetectText.TryGetValue(item, out det); List<string> text = det != null ? det.Result : null; MyNode n = new MyNode(item, text); n.OriginalCount = det != null ? det.OriginalCount : 0; Model.Nodes.Add(n); } SortList(); Dictionary<string, string> SrcTag = OpfDoc.GetFilesList(); foreach (MyNode item in Model.Nodes) { item.Tag = new NavDetails(Utils.GetId(item.Text, SrcTag), item.Text, item.DetectedCombo); NavDetails nav = item.Tag as NavDetails; if (AddType == AddWindowType.TOCEdit && cbShowAnchors.Checked) { List<string> Anchors = htmlDoc.FindAnchorsInFile(item.Text); if (!cbShowAll.Checked) { Anchors = (from i in Anchors where !PresentAnchors.ContainsKey(nav.File + "#" + i) select i).ToList(); } Dictionary<string, DetectedHeaders> DetectAnchorText = htmlDoc.FindAchorTextInFile(item.Text, Anchors); item.AddAnchors(Anchors, DetectAnchorText); } } RemoveEmptyNodes(); treeView1.EndUpdate(); //Utils.RemoveNonExistantNode(Model.Nodes); } } catch (Exception) { treeView1.EndUpdate(); } }