예제 #1
0
        private Dictionary <string, List <string> > GetAllLinkList(Dictionary <string, string> linkList)
        {
            Dictionary <string, List <string> > allLinkDict =
                new Dictionary <string, List <string> >();

            HtmlDocument allHTML     = new HtmlDocument();
            HtmlDocument currentHTML = new HtmlDocument();

            OnMaxValueAll(linkList.Count);
            foreach (KeyValuePair <string, string> item in linkList)
            {
                fileTempAddress = "linkTypes\\" + item.Key + ".txt";
                List <string> templinkType = new List <string>();
                if (!allLinkDict.ContainsKey(item.Key))
                {
                    allLinkDict.Add(item.Key, templinkType);
                }
                else
                {
                    allLinkDict[item.Key] = templinkType;
                }

                if (File.Exists(fileTempAddress))
                {
                    allHTML.LoadHtml(HtmlToString.ReadCacheFile(fileTempAddress, encoding));
                }
                else
                {
                    allHTML.LoadHtml(HtmlToString.Read(siteAddress + item.Value, encoding, fileTempAddress));
                }
                string             xpq_allWorks;
                HtmlNodeCollection TableNodes;
                xpq_allWorks = "//table[@class=\"tbl tablecol-2\"]/tbody/tr";
                TableNodes   = allHTML.DocumentNode.SelectNodes(xpq_allWorks);
                if (TableNodes == null)
                {
                    xpq_allWorks = "//table[@class=\"tbl tablecol-2\"]/tr";
                    TableNodes   = allHTML.DocumentNode.SelectNodes(xpq_allWorks);
                    if (TableNodes == null)
                    {
                        MessageBox.Show("Ошибка - " + item.Key);
                        continue;
                    }
                }
                foreach (var lineNode in TableNodes)
                {
                    HtmlNodeCollection tdNodes = lineNode.SelectNodes("td");
                    if (tdNodes == null)
                    {
                        continue;
                    }
                    HtmlNode      elA  = tdNodes[0].SelectSingleNode("a");
                    HtmlAttribute href = elA.Attributes["href"];
                    templinkType.Add(href.Value);
                }
                OnChangeValueAll();
            }

            return(allLinkDict);
        }
예제 #2
0
        private Dictionary <string, string> GetTypeLinkList()
        {
            Dictionary <string, string> ExtList = new Dictionary <string, string>();

            //xPathQuery
            string xpq_allWorks = "//ul[@class=\"nav-list\"]"; //ul class="nav-list"

            HtmlDocument allHTML     = new HtmlDocument();
            HtmlDocument currentHTML = new HtmlDocument();

            fileTempAddress = "links.txt";
            if (File.Exists(fileTempAddress))
            {
                allHTML.LoadHtml(HtmlToString.ReadCacheFile(fileTempAddress, encoding));
            }
            else
            {
                allHTML.LoadHtml(HtmlToString.Read(siteAddress, encoding, fileTempAddress));
            }

            var TableNodes = allHTML.DocumentNode.SelectSingleNode(xpq_allWorks).SelectNodes("li");

            OnMaxValueGeneral(TableNodes.Count());
            foreach (var lineNode in TableNodes)
            {
                if (lineNode.Name == "#text")
                {
                    continue;
                }

                HtmlNode tdNodes = lineNode.SelectSingleNode("a");

                HtmlAttribute href = tdNodes.Attributes["href"];
                string        name = tdNodes.InnerText;
                string        link = href.Value;
                ExtList.Add(name, link);
                OnChangeValueGen();
            }
            return(ExtList);
        }
예제 #3
0
        private ExtInfo GetDescriptionExtension(string link)
        {
            ExtInfo      ext         = new ExtInfo();
            HtmlDocument allHTML     = new HtmlDocument();
            HtmlDocument currentHTML = new HtmlDocument();
            string       siteAddress = "";

            if (link.Contains(this.siteAddress))
            {
                siteAddress = link;
            }
            else
            {
                siteAddress = this.siteAddress + link;
            }
            ext.Link        = siteAddress;
            fileTempAddress = siteAddress.Replace("http://open-file.ru/types/", "");
            fileTempAddress = "types\\" + fileTempAddress + ".txt";
            if (File.Exists(fileTempAddress))
            {
                allHTML.LoadHtml(HtmlToString.ReadCacheFile(fileTempAddress, encoding));
            }
            else
            {
                string s = HtmlToString.Read(siteAddress, encoding, fileTempAddress);
                if (s == "")
                {
                    return(ext);
                }
                allHTML.LoadHtml(s);
            }
            //xPathQuery
            //table class="desc"
            HtmlNodeCollection TableNodes;
            HtmlNodeCollection TableHaderNodes;

            string xpq_allWorks  = "//table[@class=\"desc\"]/*/*/*/td|//table[@class=\"desc\"]/*/*/td|//table[@class=\"desc\"]/*/td|//table[@class=\"desc\"]/td|//table[@class=\"desc\"]/th";
            string xpq_HeaderExt = "//table[@class=\"desc\"]/td|//table[@class=\"desc\"]/th";

            TableNodes      = allHTML.DocumentNode.SelectNodes(xpq_allWorks);
            TableHaderNodes = allHTML.DocumentNode.SelectNodes(xpq_HeaderExt);
            if (TableNodes == null)
            {
                //MessageBox.Show("Ошибка - " + link);
                return(null);
            }
            string key = null;

            key = TableHaderNodes[0].InnerText;
            if (key.Contains("Формат"))
            {
                Match m = Regex.Match(key, @"\.[a-zа-я0-9]*");
                if (m.Value != string.Empty)
                {
                    ext.Name = m.Value.Remove(0, 1);
                }
            }
            else
            {
                MessageBox.Show("Заголовок таблицы: " + key);
            }

            foreach (var tdNode in TableNodes)
            {
                key = null;
                key = tdNode.InnerText;
                key = key.Replace("&lt;", "<");
                key = key.Replace("&nbsp;", " ");

                int index = TableNodes.IndexOf(tdNode);

                if (key.Contains("Тип файла"))
                {
                    if (ext.TypeFile == string.Empty)
                    {
                        ext.TypeFile = TableNodes[index + 1].InnerText;
                    }
                    else
                    {
                        ext.TypeFile += ",\n" + TableNodes[index + 1].InnerText;
                    }
                }
                else if (key.Contains("на русском"))
                {
                    if (ext.RusDescription == string.Empty)
                    {
                        ext.RusDescription = TableNodes[index + 1].InnerText;
                    }
                    else
                    {
                        ext.RusDescription += ",\n" + TableNodes[index + 1].InnerText;
                    }
                }
                else if (key.Contains("на английском"))
                {
                    if (ext.EngDescription == string.Empty)
                    {
                        ext.EngDescription = TableNodes[index + 1].InnerText;
                    }
                    else
                    {
                        ext.EngDescription += ",\n" + TableNodes[index + 1].InnerText;
                    }
                }
                else if (key.Contains("Подробное описание"))
                {
                    string description = TableNodes[index + 1].InnerText;
                    description = description.Replace("&lt;", "<");
                    description = description.Replace("&gt;", ">");
                    description = description.Replace("&nbsp;", " ");

                    if (ext.DetailedDescription == string.Empty)
                    {
                        ext.DetailedDescription = description;
                    }
                    else
                    {
                        ext.DetailedDescription += ",\n " + description;
                    }
                }
                else if (key.Contains("ASCII:"))
                {
                    ext.InfoHeaderFile.Add(key);
                }
                else if (key.Contains("HEX:"))
                {
                    ext.InfoHeaderFile.Add(key);
                }
                else if (key == " Windows" || key == "Windows")
                {
                    ext.WhatOpen = TableNodes[index + 1].InnerText;

                    HtmlNodeCollection li_s = TableNodes[index + 1].SelectNodes("*/li|*/*/li");
                    foreach (HtmlNode li in li_s)
                    {
                        ext.WhatOpenWindows.Add(li.InnerText);
                    }
                }
                else if (key == " Linux" || key == "Linux")
                {
                    ext.WhatOpen += TableNodes[index + 1].InnerText;
                    HtmlNodeCollection li_s = TableNodes[index + 1].SelectNodes("*/li");

                    foreach (HtmlNode li in li_s)
                    {
                        ext.WhatOpenLinux.Add(li.InnerText);
                    }
                }
                else if (key == " MacOS")
                {
                    ext.WhatOpen += TableNodes[index + 1].InnerText;
                    HtmlNodeCollection li_s = TableNodes[index + 1].SelectNodes("*/li");
                    foreach (HtmlNode li in li_s)
                    {
                        ext.WhatOpenMac.Add(li.InnerText);
                    }
                }
            }
            if (ext.EngDescription == null ||
                ext.RusDescription == null ||
                ext.Name == null ||
                ext.TypeFile == null ||
                ext.Link == null
                )
            {
                MessageBox.Show("Не найдены данные");
            }

            return(ext);
        }