private Dictionary <string, List <string> > GetAllLinkList(Dictionary <string, string> linkList) { Dictionary <string, List <string> > allLinkDict = new Dictionary <string, List <string> >(); HtmlDocument allHTML = new HtmlDocument(); HtmlDocument currentHTML = new HtmlDocument(); OnMaxValueAll(linkList.Count); foreach (KeyValuePair <string, string> item in linkList) { fileTempAddress = "linkTypes\\" + item.Key + ".txt"; List <string> templinkType = new List <string>(); if (!allLinkDict.ContainsKey(item.Key)) { allLinkDict.Add(item.Key, templinkType); } else { allLinkDict[item.Key] = templinkType; } if (File.Exists(fileTempAddress)) { allHTML.LoadHtml(HtmlToString.ReadCacheFile(fileTempAddress, encoding)); } else { allHTML.LoadHtml(HtmlToString.Read(siteAddress + item.Value, encoding, fileTempAddress)); } string xpq_allWorks; HtmlNodeCollection TableNodes; xpq_allWorks = "//table[@class=\"tbl tablecol-2\"]/tbody/tr"; TableNodes = allHTML.DocumentNode.SelectNodes(xpq_allWorks); if (TableNodes == null) { xpq_allWorks = "//table[@class=\"tbl tablecol-2\"]/tr"; TableNodes = allHTML.DocumentNode.SelectNodes(xpq_allWorks); if (TableNodes == null) { MessageBox.Show("Ошибка - " + item.Key); continue; } } foreach (var lineNode in TableNodes) { HtmlNodeCollection tdNodes = lineNode.SelectNodes("td"); if (tdNodes == null) { continue; } HtmlNode elA = tdNodes[0].SelectSingleNode("a"); HtmlAttribute href = elA.Attributes["href"]; templinkType.Add(href.Value); } OnChangeValueAll(); } return(allLinkDict); }
private Dictionary <string, string> GetTypeLinkList() { Dictionary <string, string> ExtList = new Dictionary <string, string>(); //xPathQuery string xpq_allWorks = "//ul[@class=\"nav-list\"]"; //ul class="nav-list" HtmlDocument allHTML = new HtmlDocument(); HtmlDocument currentHTML = new HtmlDocument(); fileTempAddress = "links.txt"; if (File.Exists(fileTempAddress)) { allHTML.LoadHtml(HtmlToString.ReadCacheFile(fileTempAddress, encoding)); } else { allHTML.LoadHtml(HtmlToString.Read(siteAddress, encoding, fileTempAddress)); } var TableNodes = allHTML.DocumentNode.SelectSingleNode(xpq_allWorks).SelectNodes("li"); OnMaxValueGeneral(TableNodes.Count()); foreach (var lineNode in TableNodes) { if (lineNode.Name == "#text") { continue; } HtmlNode tdNodes = lineNode.SelectSingleNode("a"); HtmlAttribute href = tdNodes.Attributes["href"]; string name = tdNodes.InnerText; string link = href.Value; ExtList.Add(name, link); OnChangeValueGen(); } return(ExtList); }
private ExtInfo GetDescriptionExtension(string link) { ExtInfo ext = new ExtInfo(); HtmlDocument allHTML = new HtmlDocument(); HtmlDocument currentHTML = new HtmlDocument(); string siteAddress = ""; if (link.Contains(this.siteAddress)) { siteAddress = link; } else { siteAddress = this.siteAddress + link; } ext.Link = siteAddress; fileTempAddress = siteAddress.Replace("http://open-file.ru/types/", ""); fileTempAddress = "types\\" + fileTempAddress + ".txt"; if (File.Exists(fileTempAddress)) { allHTML.LoadHtml(HtmlToString.ReadCacheFile(fileTempAddress, encoding)); } else { string s = HtmlToString.Read(siteAddress, encoding, fileTempAddress); if (s == "") { return(ext); } allHTML.LoadHtml(s); } //xPathQuery //table class="desc" HtmlNodeCollection TableNodes; HtmlNodeCollection TableHaderNodes; string xpq_allWorks = "//table[@class=\"desc\"]/*/*/*/td|//table[@class=\"desc\"]/*/*/td|//table[@class=\"desc\"]/*/td|//table[@class=\"desc\"]/td|//table[@class=\"desc\"]/th"; string xpq_HeaderExt = "//table[@class=\"desc\"]/td|//table[@class=\"desc\"]/th"; TableNodes = allHTML.DocumentNode.SelectNodes(xpq_allWorks); TableHaderNodes = allHTML.DocumentNode.SelectNodes(xpq_HeaderExt); if (TableNodes == null) { //MessageBox.Show("Ошибка - " + link); return(null); } string key = null; key = TableHaderNodes[0].InnerText; if (key.Contains("Формат")) { Match m = Regex.Match(key, @"\.[a-zа-я0-9]*"); if (m.Value != string.Empty) { ext.Name = m.Value.Remove(0, 1); } } else { MessageBox.Show("Заголовок таблицы: " + key); } foreach (var tdNode in TableNodes) { key = null; key = tdNode.InnerText; key = key.Replace("<", "<"); key = key.Replace(" ", " "); int index = TableNodes.IndexOf(tdNode); if (key.Contains("Тип файла")) { if (ext.TypeFile == string.Empty) { ext.TypeFile = TableNodes[index + 1].InnerText; } else { ext.TypeFile += ",\n" + TableNodes[index + 1].InnerText; } } else if (key.Contains("на русском")) { if (ext.RusDescription == string.Empty) { ext.RusDescription = TableNodes[index + 1].InnerText; } else { ext.RusDescription += ",\n" + TableNodes[index + 1].InnerText; } } else if (key.Contains("на английском")) { if (ext.EngDescription == string.Empty) { ext.EngDescription = TableNodes[index + 1].InnerText; } else { ext.EngDescription += ",\n" + TableNodes[index + 1].InnerText; } } else if (key.Contains("Подробное описание")) { string description = TableNodes[index + 1].InnerText; description = description.Replace("<", "<"); description = description.Replace(">", ">"); description = description.Replace(" ", " "); if (ext.DetailedDescription == string.Empty) { ext.DetailedDescription = description; } else { ext.DetailedDescription += ",\n " + description; } } else if (key.Contains("ASCII:")) { ext.InfoHeaderFile.Add(key); } else if (key.Contains("HEX:")) { ext.InfoHeaderFile.Add(key); } else if (key == " Windows" || key == "Windows") { ext.WhatOpen = TableNodes[index + 1].InnerText; HtmlNodeCollection li_s = TableNodes[index + 1].SelectNodes("*/li|*/*/li"); foreach (HtmlNode li in li_s) { ext.WhatOpenWindows.Add(li.InnerText); } } else if (key == " Linux" || key == "Linux") { ext.WhatOpen += TableNodes[index + 1].InnerText; HtmlNodeCollection li_s = TableNodes[index + 1].SelectNodes("*/li"); foreach (HtmlNode li in li_s) { ext.WhatOpenLinux.Add(li.InnerText); } } else if (key == " MacOS") { ext.WhatOpen += TableNodes[index + 1].InnerText; HtmlNodeCollection li_s = TableNodes[index + 1].SelectNodes("*/li"); foreach (HtmlNode li in li_s) { ext.WhatOpenMac.Add(li.InnerText); } } } if (ext.EngDescription == null || ext.RusDescription == null || ext.Name == null || ext.TypeFile == null || ext.Link == null ) { MessageBox.Show("Не найдены данные"); } return(ext); }