Пример #1
0
        public string ParseAndPopulate(string input)
        {
            string output = input;

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(input);
            if (doc.DocumentNode.SelectNodes("//datasource") != null)
            {
                foreach (HtmlAgilityPack.HtmlNode ds in doc.DocumentNode.SelectNodes("//datasource"))
                {
                    HtmlAgilityPack.HtmlAttribute att = ds.Attributes["name"];
                    if (att != null)
                    {
                        try
                        {
                            HtmlAgilityPack.HtmlNode hn = HtmlAgilityPack.HtmlNode.CreateNode(string.Format("", LoadContent(att.Value)));
                            var temp = doc.CreateElement("temp");
                            temp.InnerHtml = LoadContent(att.Value);
                            var current = ds;
                            foreach (var child in temp.ChildNodes)
                            {
                                ds.ParentNode.InsertAfter(child, current);
                                current = child;
                            }
                            ds.Remove();
                        }
                        catch { }
                    }
                }
            }
            output = doc.DocumentNode.OuterHtml;
            return(output);
        }
Пример #2
0
        /*By luulong: 11/2017*/
        //https://stackoverflow.com/questions/13771083/html-agility-pack-get-all-elements-by-class
        public static bool HasClass(this HtmlAgilityPack.HtmlNode element, String className)
        {
            if (element == null)
            {
                throw new ArgumentNullException(nameof(element));
            }
            if (String.IsNullOrWhiteSpace(className))
            {
                throw new ArgumentNullException(nameof(className));
            }
            if (element.NodeType != HtmlAgilityPack.HtmlNodeType.Element)
            {
                return(false);
            }

            HtmlAgilityPack.HtmlAttribute classAttrib = element.Attributes["class"];
            if (classAttrib == null)
            {
                return(false);
            }

            Boolean hasClass = CheapClassListContains(classAttrib.Value, className, StringComparison.Ordinal);

            return(hasClass);
        }
Пример #3
0
        public Property(HtmlAgilityPack.HtmlNode node, HtmlAgilityPack.HtmlAttribute itemscopeAttr)
        {
            this.node = node;
            this.itemscopeAttr = itemscopeAttr;

            nodeType = XmlNodeType.Element;
        }
Пример #4
0
        /// <summary>
        /// Initializes a new instance of the <see cref="CSF.Zpt.DocumentProviders.ZptHtmlAttribute"/> class.
        /// </summary>
        /// <param name="original">The original (wrapped) attribute.</param>
        public ZptHtmlAttribute(HtmlAgilityPack.HtmlAttribute original)
        {
            if(original == null)
              {
            throw new ArgumentNullException(nameof(original));
              }

              _original = original;
        }
Пример #5
0
        public async static Task <List <TagImg> > GetImgFromHtml(string html, bool isHotspot = false)
        {
            Task <List <TagImg> > task = Task.Run(() => {
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(html);
                var imgList = doc.DocumentNode.SelectNodes("//img");
                var w       = 0;
                var h       = 0;
                HtmlAgilityPack.HtmlAttribute tempAttribute = null;

                List <TagImg> list = new List <TagImg>();
                foreach (var item in imgList)
                {
                    TagImg tagImg = new TagImg();
                    tempAttribute = item.Attributes["alt"];
                    tagImg.Alt    = tempAttribute == null ? "":tempAttribute.Value;
                    tempAttribute = item.Attributes["src"];
                    tagImg.Src    = tempAttribute == null ? "" : tempAttribute.Value;

                    tempAttribute = item.Attributes["h"];
                    if (tempAttribute != null)
                    {
                        int.TryParse(tempAttribute.Value, out h);
                    }
                    tempAttribute = item.Attributes["w"];
                    if (tempAttribute != null)
                    {
                        int.TryParse(tempAttribute.Value, out w);
                    }

                    //Search Detail

                    /*
                     * <a class="iusc" style="height:208px;width:333px" m="{&quot;cid&quot;:&quot;1jz2ZvDM&quot;,&quot;purl&quot;:&quot;https://www.927tour.com/News_newsDetail_id_20180408195735146766.html&quot;,&quot;murl&quot;:&quot;http://ynwgm.ynurl.cn/uploadfile/s10/2018/0408/20180408075500850.jpg&quot;,&quot;turl&quot;:&quot;https://tse1-mm.cn.bing.net/th?id=OIP.1jz2ZvDMIyhtns4hK1ay-AHaFJ&amp;pid=15.1&quot;,&quot;md5&quot;:&quot;d63cf666f0cc23286d9ece212b56b2f8&quot;,&quot;shkey&quot;:&quot;&quot;,&quot;t&quot;:&quot;铁路、民航保障游客正常出游&quot;,&quot;mid&quot;:&quot;1034F8C523DE0FCD1B8302CF3C0D52E2DA5E1CD3&quot;,&quot;desc&quot;:&quot;&quot;}" onclick="sj_evt.fire('IFrame.Navigate', this.href); return false;" href="/images/search?view=detailV2&amp;ccid=1jz2ZvDM&amp;id=1034F8C523DE0FCD1B8302CF3C0D52E2DA5E1CD3&amp;thid=OIP.1jz2ZvDMIyhtns4hK1ay-AHaFJ&amp;mediaurl=http%3a%2f%2fynwgm.ynurl.cn%2fuploadfile%2fs10%2f2018%2f0408%2f20180408075500850.jpg&amp;exph=407&amp;expw=585&amp;q=%e6%b8%85%e6%98%8e%e5%81%87%e6%9c%9f%e5%9b%bd%e5%86%85%e6%97%85%e6%b8%b8%e6%8e%a5%e5%be%85%e6%80%bb%e4%ba%ba%e6%95%b01.12%e4%ba%bf&amp;simid=608053044385353052&amp;selectedIndex=32&amp;qft=+filterui%3aphoto-photo" h="ID=images.5601_7,5217.1">
                     *   <div class="img_cont hoff">
                     *       <img class="mimg" style="background-color:#c10a34;color:#c10a34" height="208" width="299" src="https://tse3-mm.cn.bing.net/th?id=OIP.1jz2ZvDMIyhtns4hK1ay-AHaFJ&amp;w=299&amp;h=208&amp;c=7&amp;o=5&amp;pid=1.7" alt="清明假期国内旅游接待总人数1.12亿 的图像结果" />
                     *   </div>
                     * </a>
                     */

                    /*< a class="iusc" style="height:207px;width:276px" m="{&quot;cid&quot;:&quot;Ox2V7JRH&quot;,&quot;purl&quot;:&quot;http://www.wall001.com/nature/under_sky/html/image8.html&quot;,&quot;murl&quot;:&quot;http://wall001.com/nature/under_sky/mxxx01/[wall001.com]_sky_AP23070.jpg&quot;,&quot;turl&quot;:&quot;https://tse2-mm.cn.bing.net/th?id=OIP.Ox2V7JRHXMInhT3_WlPpVgHaFj&amp;pid=15.1&quot;,&quot;md5&quot;:&quot;3b1d95ec94475cc227853dff5a53e956&quot;,&quot;shkey&quot;:&quot;&quot;,&quot;t&quot;:&quot;桌布天堂 --- 晴朗天空 - 藍天白云8&quot;,&quot;mid&quot;:&quot;8A372FC995FECC38853858A07F4171C439B8FA58&quot;,&quot;desc&quot;:&quot;&quot;}" onclick="sj_evt.fire('IFrame.Navigate', this.href); return false;" href="/images/search?view=detailV2&amp;ccid=Ox2V7JRH&amp;id=8A372FC995FECC38853858A07F4171C439B8FA58&amp;thid=OIP.Ox2V7JRHXMInhT3_WlPpVgHaFj&amp;mediaurl=http%3a%2f%2fwall001.com%2fnature%2funder_sky%2fmxxx01%2f%5bwall001.com%5d_sky_AP23070.jpg&amp;exph=768&amp;expw=1024&amp;q=%e5%a4%a9%e7%a9%ba&amp;simid=608010515721882861&amp;selectedIndex=5&amp;qft=+filterui%3aphoto-photo" h="ID=images.5601_7,5055.1"><div class="img_cont hoff"><img class="mimg" style="background-color:#1543b6;color:#1543b6" height="207" width="276" src="https://tse4-mm.cn.bing.net/th?id=OIP.Ox2V7JRHXMInhT3_WlPpVgHaFj&amp;w=276&amp;h=207&amp;c=7&amp;o=5&amp;pid=1.7" alt="天空 的图像结果"></div></a>*/

                    Tuple <bool, string> extractResult = RegexUtil.ExtractBingImage(item.ParentNode.ParentNode.OuterHtml);

                    if (extractResult.Item1 == true || isHotspot == true)
                    {
                        tagImg.DetailUrl = extractResult.Item2;
                        tagImg.Width     = w;
                        tagImg.Height    = h;
                        list.Add(tagImg);
                    }
                }
                return(list);
            });

            return(await task);
        }
        public static IHtmlAttribute CreateAttribute(HtmlAgilityPack.HtmlAttribute attribute)
        {
            if (String.Compare(attribute.Name, "href", StringComparison.OrdinalIgnoreCase) == 0)
            {
                return(new AttributeHRef(attribute));
            }

            return(new HtmlGenericAttribute(attribute));
        }
Пример #7
0
        public List <KeyValuePair <int, int> > GetTableHeaderIndexes(HtmlAgilityPack.HtmlNode table, params String[] vals)
        {
            List <KeyValuePair <int, int> > ret = new List <KeyValuePair <int, int> >();

            HtmlAgilityPack.HtmlNodeCollection rows  = table.SelectNodes("tr");
            HtmlAgilityPack.HtmlNodeCollection cells = null;
            if (rows != null)
            {
                HtmlAgilityPack.HtmlNode header = rows[0];
                cells = header.SelectNodes("th|td");
            }
            else
            {
                HtmlAgilityPack.HtmlNodeCollection thead = table.SelectNodes("thead");
                if (thead != null)
                {
                    rows  = thead[0].SelectNodes("tr");
                    cells = rows[0].SelectNodes("th|td");
                }
            }


            int colAbsIdx = 0;

            for (int i = 0; i < cells.Count; ++i)
            {
                HtmlAgilityPack.HtmlNode cell = cells[i];
                String txt = cell.InnerText.Trim();
                for (int j = 0; j < vals.Length; ++j)
                {
                    String match = vals[j];
                    if (txt.IndexOf(match) > -1)
                    {
                        ret.Add(new KeyValuePair <int, int>(colAbsIdx, j));
                        break;
                    }
                }
                HtmlAgilityPack.HtmlAttribute colspan_attr = cell.Attributes["colspan"];
                int colSpan = 1;
                if (colspan_attr != null)
                {
                    if (!int.TryParse(colspan_attr.Value, out colSpan))
                    {
                        colSpan = 1;
                    }
                }
                colAbsIdx += colSpan;
            }
            return(ret);
        }
Пример #8
0
        public void SetAttributeValue(string name, string value)
        {
            var attribute = Attributes[name];

            if (attribute == null)
            {
                HtmlAgilityPack.HtmlAttribute newAttribute = _htmlNode.OwnerDocument.CreateAttribute(name, value);
                _htmlNode.Attributes.Add(newAttribute);
                Attributes.Add(new HtmlAttribute(newAttribute));
            }
            else
            {
                attribute.Value = value;
            }
        }
Пример #9
0
        public static void Test()
        {
            string path = MapProjectPath("HTML/FlatIconMain.txt");

            System.Console.WriteLine(path);


            HtmlAgilityPack.HtmlWeb page = new HtmlAgilityPack.HtmlWeb();
            // HtmlAgilityPack.HtmlDocument doc = page.Load("http://www.flaticon.com/packs");
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.Load(path);


            int iMaxPage = GetMaxPage(doc);


            // http://www.flaticon.com/packs/1
            // http://www.flaticon.com/packs/2
            // ...
            // http://www.flaticon.com/packs/65
            for (int i = 1; i <= iMaxPage; ++i)
            {
                doc = page.Load("http://www.flaticon.com/packs/" + i.ToString());
                // System.Console.WriteLine(doc.DocumentNode.OuterHtml);

                foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//article[@class=\"box\"]/a[@href]"))
                {
                    // System.Console.WriteLine(link);
                    HtmlAgilityPack.HtmlAttribute att = link.Attributes["href"];
                    System.Console.WriteLine(att.Value);
                    try
                    {
                        DownloadPack(att.Value);
                        System.Threading.Thread.Sleep(5000);
                    }
                    catch (System.Exception ex)
                    {
                        System.Console.WriteLine("Error on page " + i.ToString());
                        System.Console.WriteLine(ex.Message);
                        System.Console.WriteLine("URL: " + att.Value);
                    }
                } // Next link
            }
        }         // End Sub Test
        private void btnLoad_Click(object sender, RoutedEventArgs e)
        {
            try
            {
                Tab    tab     = new Tab();
                string webpage = GetHTML(txtUrl.Text);

                HtmlAgilityPack.HtmlDocument htdoc = new HtmlAgilityPack.HtmlDocument();
                htdoc.LoadHtml(webpage);

                //get tab content
                foreach (HtmlAgilityPack.HtmlNode n in htdoc.DocumentNode.SelectNodes("//pre"))
                {
                    HtmlAgilityPack.HtmlAttribute f = n.Attributes.FirstOrDefault(i => i.Name.Equals("class") && i.Value.Contains("js-tab-content"));
                    if (f != null)
                    {
                        tab.Content = n.InnerText;
                    }
                }

                //get tab title
                //< div class="t_title"><div><h1 itemProp = "name" > Title </ h1 ></ div ></ div >
                tab.Title = htdoc.DocumentNode.Descendants("h1")
                            .FirstOrDefault(i => i.Attributes.Contains("itemProp") && i.Attributes["itemProp"].Value.Contains("name"))
                            .InnerText;

                //get tab artist (current implementation is kinda bad)
                //< div class="t_author"> by <a>innerText</a>
                tab.Artist = htdoc.DocumentNode.Descendants("div")
                             .FirstOrDefault(i => i.Attributes.Contains("class") && i.Attributes["class"].Value.Contains("t_autor"))
                             .Descendants("a").FirstOrDefault()
                             .InnerText;

                tab.URL = txtUrl.Text;

                //update view
                (this.DataContext as MainWindow).SelectedViewModel = new TabView(tab);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                MessageBox.Show("An error occured while loading that URL");
            }
        }
Пример #11
0
        private static void RenderPagePartials(ref string contentHtml, List <HtmlAgilityPack.HtmlNode> controlElements)
        {
            Dictionary <string, string> dicPagePartials = new Dictionary <string, string>();

            foreach (HtmlAgilityPack.HtmlNode controlElement in controlElements)
            {
                /*
                 *  Estou esperando o elemento com a seguinte estrutura '<cc control="pagePartial" name="header"></cc>'
                 */
                HtmlAgilityPack.HtmlAttribute attrControl = controlElement.Attributes.FirstOrDefault(a => a.Name.ToLower().Equals("control")); //Guardo atributo "control" do elemento
                HtmlAgilityPack.HtmlAttribute attrName    = controlElement.Attributes.FirstOrDefault(a => a.Name.ToLower().Equals("name"));    //Guardo atributo "name" do elemento

                if (attrControl != null && attrName != null)
                {
                    if (attrControl.Value.ToLower().Equals("pagepartial"))
                    {
                        dicPagePartials.Add(controlElement.OuterHtml, attrName.Value); //Adiciono ao dicionário o código html do controle e o valor do atributo name
                    }
                }
            }

            PortalEntities portal = new PortalEntities(Account.Context.GetConnectionStringEntity("Portal"));
            IEnumerable <Portal.porPagePartial> queryPagePartial;

            foreach (var dic in dicPagePartials)
            {
                queryPagePartial = from pp in portal.porPagePartials
                                   where pp.Name.Equals(dic.Value)
                                   select pp;

                if (queryPagePartial.FirstOrDefault() != null)
                {
                    contentHtml = contentHtml.Replace(dic.Key, queryPagePartial.FirstOrDefault().Html);
                }
                else
                {
                    contentHtml = contentHtml.Replace(dic.Key, string.Format("<!-- Erro de Configuração: Página parcial \"{0}\" não existe -->", dic.Value));
                }
            }
        }
Пример #12
0
        }         // End Sub Test

        public static void DownloadPack(string url)
        {
            string harvestPath = MapProjectPath("harvest");

            if (!System.IO.Directory.Exists(harvestPath))
            {
                System.IO.Directory.CreateDirectory(harvestPath);
            }



            // string path = MapProjectPath("HTML/flatIcon_lvl_1.txt");
            // string url = @"http://www.flaticon.com/packs/web-navigation-line-craft";
            HtmlAgilityPack.HtmlWeb      page = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc  = page.Load(url);
            // HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            // doc.Load(path);



            // <section class="list-top">
            // <a href="http://file005.flaticon.com/packs/112154-web-navigation-line-craft.zip" class="btn pull-right track_download_pack" title="Download Pack" data-pack="112154">Download Pack <i class="flaticon-download"></i></a>
            foreach (HtmlAgilityPack.HtmlNode link in doc.DocumentNode.SelectNodes("//section[@class=\"list-top\"]/a[@href]"))
            {
                //System.Console.WriteLine(link);

                HtmlAgilityPack.HtmlAttribute att = link.Attributes["href"];
                string downloadLink = att.Value;

                System.Uri uri = new System.Uri(downloadLink, System.UriKind.Absolute);
                string     fn  = System.IO.Path.GetFileName(uri.AbsolutePath);
                fn = System.IO.Path.Combine(harvestPath, fn);

                System.Console.WriteLine("Downloading " + downloadLink + ".");
                DownloadFile(downloadLink, fn);
                System.Console.WriteLine("Finished downloading " + downloadLink + ".");
            }
        }
Пример #13
0
        } // End Class RedirectValues


        public static RedirectValues GetFormValues(string html)
        {
            RedirectValues rv = new RedirectValues();

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(html);

            HtmlAgilityPack.HtmlNode form = doc.DocumentNode.SelectSingleNode("//form");
            rv.Method = form.Attributes["method"]?.Value;
            rv.Action = form.Attributes["action"]?.Value;

            foreach (HtmlAgilityPack.HtmlNode node in form.SelectNodes(".//input"))
            {
                HtmlAgilityPack.HtmlAttribute valueAttribute = node.Attributes["value"];
                HtmlAgilityPack.HtmlAttribute nameAttribute  = node.Attributes["name"];

                if (nameAttribute != null && valueAttribute != null)
                {
                    rv.PostValues.Add(nameAttribute.Value, valueAttribute.Value);
                } // End if (nameAttribute != null && valueAttribute != null)
            }     // Next node

            return(rv);
        } // End Function GetFormValues
        public ActionResult Index(string url, bool?preview)
        {
            Post PPM = db.Posts.FirstOrDefault(t => t.URL.ToLower() == url.ToLower().Trim() && (t.Status == Models.PostStatus.Publish || (preview.HasValue && preview.Value)));

            if (PPM != null)
            {
                #region Replace Custom Data Source
                DataSourceManager dsm = new DataSourceManager(db);

                if (PPM.TemplateName != string.Empty)
                {
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    string templateHTML = dsm.LoadContent(PPM.TemplateName);
                    doc.LoadHtml(templateHTML);

                    if (doc.DocumentNode.SelectNodes("//datasource") != null)
                    {
                        foreach (HtmlAgilityPack.HtmlNode ds in doc.DocumentNode.SelectNodes("//datasource"))
                        {
                            try
                            {
                                HtmlAgilityPack.HtmlAttribute att = ds.Attributes["name"];

                                if (att != null)
                                {
                                    var temp    = doc.CreateElement("temp");
                                    var current = ds;
                                    if (att.Value == "articletext")
                                    {
                                        temp.InnerHtml = PPM.Article;
                                        foreach (var child in temp.ChildNodes)
                                        {
                                            ds.ParentNode.InsertAfter(child, current);
                                            current = child;
                                        }
                                        ds.Remove();
                                    }
                                    else if (att.Value == "articleimg")
                                    {
                                        if (PPM.OGImage != string.Empty)
                                        {
                                            temp.InnerHtml = string.Format("<img src='{0}' alt='' />", PPM.OGImage);
                                        }
                                        else
                                        {
                                            temp.InnerHtml = "";
                                        }

                                        foreach (var child in temp.ChildNodes)
                                        {
                                            ds.ParentNode.InsertAfter(child, current);
                                            current = child;
                                        }
                                        ds.Remove();
                                    }
                                    else if (att.Value == "articletitle")
                                    {
                                        temp.InnerHtml = PPM.Title;
                                        foreach (var child in temp.ChildNodes)
                                        {
                                            ds.ParentNode.InsertAfter(child, current);
                                            current = child;
                                        }
                                        ds.Remove();
                                    }
                                    else if (att.Value == "articledate")
                                    {
                                        temp.InnerHtml = PPM.DateCreated.ToShortDateString();
                                        foreach (var child in temp.ChildNodes)
                                        {
                                            ds.ParentNode.InsertAfter(child, current);
                                            current = child;
                                        }
                                        ds.Remove();
                                    }
                                    else if (att.Value == "articleviewcount")
                                    {
                                        temp.InnerHtml = PPM.Viewed.ToString();
                                        foreach (var child in temp.ChildNodes)
                                        {
                                            ds.ParentNode.InsertAfter(child, current);
                                            current = child;
                                        }
                                        ds.Remove();
                                    }
                                    else if (att.Value == "articletag")
                                    {
                                        temp.InnerHtml = PPM.Tag;
                                        foreach (var child in temp.ChildNodes)
                                        {
                                            ds.ParentNode.InsertAfter(child, current);
                                            current = child;
                                        }
                                        ds.Remove();
                                    }
                                    else if (att.Value == "articlewritername")
                                    {
                                        temp.InnerHtml = PPM.WriterName;
                                        foreach (var child in temp.ChildNodes)
                                        {
                                            ds.ParentNode.InsertAfter(child, current);
                                            current = child;
                                        }
                                        ds.Remove();
                                    }
                                    else if (att.Value == "articlewriteremail")
                                    {
                                        temp.InnerHtml = PPM.WriterEmail;
                                        foreach (var child in temp.ChildNodes)
                                        {
                                            ds.ParentNode.InsertAfter(child, current);
                                            current = child;
                                        }
                                        ds.Remove();
                                    }
                                    else if (att.Value == "articlecategory")
                                    {
                                        temp.InnerHtml = PPM.Category.Name;
                                        foreach (var child in temp.ChildNodes)
                                        {
                                            ds.ParentNode.InsertAfter(child, current);
                                            current = child;
                                        }
                                        ds.Remove();
                                    }
                                    else if (att.Value == "articledescription")
                                    {
                                        temp.InnerHtml = PPM.OGDescription;
                                        foreach (var child in temp.ChildNodes)
                                        {
                                            ds.ParentNode.InsertAfter(child, current);
                                            current = child;
                                        }
                                        ds.Remove();
                                    }
                                }
                            }
                            catch { }
                        }
                    }

                    PPM.Article = doc.DocumentNode.OuterHtml;
                }
                PPM.Article = dsm.ParseAndPopulate(PPM.Article);
                #endregion
            }
            else
            {
                PPM = new Post();
            }
            return(View(PPM));
        }
Пример #15
0
 public HtmlAttribute(HtmlAgilityPack.HtmlAttribute htmlAttribute)
 {
     _htmlAttribute = htmlAttribute;
 }
Пример #16
0
 public ElementAttribute(HtmlAgilityPack.HtmlAttribute Attr)
 {
     this.Attr = Attr;
 }
Пример #17
0
 public static string value(this HtmlAgilityPack.HtmlAttribute attribute)
 {
     return(attribute.Value);
 }
Пример #18
0
 /// <summary>
 ///     Construct a HTML element attribute wrapper object.
 /// </summary>
 /// <param name="attribute"></param>
 public AttributeHRef(HtmlAgilityPack.HtmlAttribute attribute)
     :   base(attribute)
 {
     this.url = attribute.Value;
 }
Пример #19
0
        private List <String[]> fetchTableWithoutRowSpan(HtmlAgilityPack.HtmlNode table)
        {
            List <String[]> ret = new List <String[]>();

            //Console.WriteLine("Serialize Table {0}", table);
            //Console.WriteLine("Table InnerHTML:{0}", invokeScript(wb, table + ".innerHTML"));
            try
            {
                HtmlAgilityPack.HtmlNodeCollection rows = table.SelectNodes("tr");
                int goProcessLen = rows.Count;
                //Console.WriteLine("goProcessLen={0}", goProcessLen);
                RowSpanRecord[] rowSpanRecord = null;
                for (int i = 0; i < goProcessLen; ++i) // foreach row in table.rows
                {
                    HtmlAgilityPack.HtmlNodeCollection cells = rows[i].SelectNodes("td|th");
                    int columnLen = cells.Count;
                    if (columnLen == 0)
                    {
                        continue;
                    }
                    if (rowSpanRecord == null)
                    {
                        rowSpanRecord = new RowSpanRecord[columnLen];
                    }
                    else
                    {
                        if (columnLen > rowSpanRecord.Length)
                        {
                            RowSpanRecord[] newRowSpan = new RowSpanRecord[columnLen];
                            for (int j = 0; j < newRowSpan.Length; ++j)
                            {
                                newRowSpan[j] = rowSpanRecord[j];
                            }
                            rowSpanRecord = newRowSpan;
                        }
                    }

                    String[] cellBucket = new String[rowSpanRecord.Length];
                    // pre-fill if it has rowspan
                    for (int j = 0; j < rowSpanRecord.Length; ++j) // foreach cells in row.cells
                    {
                        if (rowSpanRecord[j] != null)
                        {
                            continue;
                        }
                        int rowspan = 1;
                        if (j < cells.Count && cells[j].HasAttributes)
                        {
                            HtmlAgilityPack.HtmlAttribute rowspan_attr = cells[j].Attributes["rowspan"];
                            if (rowspan_attr != null)
                            {
                                if (!int.TryParse(rowspan_attr.Value, out rowspan))
                                {
                                    rowspan = 1;
                                }
                            }
                        }
                        if (rowspan > 1)
                        {
                            RowSpanRecord rowspanRec = new RowSpanRecord();
                            rowSpanRecord[j]   = rowspanRec;
                            rowspanRec.rowSpan = rowspan;
                            rowspanRec.rowIdx  = i;
                            rowspanRec.data    = HtmlAgilityPack.HtmlEntity.DeEntitize(cells[j].InnerText).Trim();
                        }
                    }
                    int dataIndex = 0;
                    // fill data into bucket
                    for (int j = 0; j < rowSpanRecord.Length; ++j)
                    {
                        // if exist rowspan in the position, fetch data from rowspanRecord.
                        if (rowSpanRecord[j] != null)
                        {
                            cellBucket[j] = rowSpanRecord[j].data;
                            --rowSpanRecord[j].rowSpan;
                            if (rowSpanRecord[j].rowIdx == i)
                            {
                                ++dataIndex;
                            }
                            if (rowSpanRecord[j].rowSpan <= 0)
                            {
                                rowSpanRecord[j] = null;
                            }
                        }
                        else
                        {
                            cellBucket[j] = HtmlAgilityPack.HtmlEntity.DeEntitize(cells[dataIndex].InnerText).Trim();
                            ++dataIndex; // increase dataIndex
                        }
                    }
                    //Console.WriteLine("Add cellBucket({0}) to list",ret.Count);
                    ret.Add(cellBucket);
                }
            }
            catch (Exception ee)
            {
                Console.WriteLine(ee.ToString());
            }
            return(ret);
        }
Пример #20
0
        public async void Buscar()
        {
            try
            {
                if (_modo == (short)Herramientas.Enumeradores.eModosBusqueda.SIN_DEFINIR)
                {
                    animBuscando.Visibility = System.Windows.Visibility.Collapsed;
                    txtBuscando.Text        = "SELECCIONE EL MODO DE BUSQUEDA";
                    return;
                }

                GridResultados.ItemsSource = new System.Collections.Generic.List <Herramientas.ListaTorrents>();
                animBuscando.Visibility    = System.Windows.Visibility.Visible;
                txtBuscando.Text           = "BUSCANDO";
                var _categoriaElegida = (Herramientas.Categorias)ComboCategoria.SelectedItem;
                Herramientas.VariablesGlobales variablesGlobales = new Herramientas.VariablesGlobales()
                {
                    CantidadPaginas     = string.IsNullOrEmpty(txtCantidadPaginas.Text) ? System.Convert.ToInt32(Herramientas.Enumeradores.eDatosPorDefault.PAGINAS_POR_DEFECTO) : System.Convert.ToInt32(txtCantidadPaginas.Text),
                    Url                 = "https://thepiratebay.org/browse/",
                    CategoriaPorDefecto = _categoriaElegida != null?System.Convert.ToInt32(_categoriaElegida.Id) : System.Convert.ToInt32(Herramientas.Enumeradores.eDatosPorDefault.CATEGORIA_POR_DEFECTO)
                };

                _lista = new System.Collections.Generic.List <Herramientas.ListaTorrents>();
                string          _claves = !string.IsNullOrEmpty(txtPalabrasClave.Text) ? txtPalabrasClave.Text.Replace(" ", "%20") : string.Empty;
                System.DateTime _fecha  = System.DateTime.Now;
                string          _url    = string.Empty;
                if (variablesGlobales.CantidadPaginas > 0)
                {
                    for (int i = 0; i < variablesGlobales.CantidadPaginas; i++)
                    {
                        if (_modo == (short)Herramientas.Enumeradores.eModosBusqueda.POR_CATEGORIA)
                        {
                            _url = variablesGlobales.Url + System.Convert.ToInt32(_categoriaElegida.Id) + "/" + i + "/3";
                        }
                        if (_modo == (short)Herramientas.Enumeradores.eModosBusqueda.POR_PALABRA_CLAVE)
                        {
                            _url = "https://thepiratebay.org/search/" + _claves + "/0/99/0";
                        }

                        if (string.IsNullOrEmpty(_url))
                        {
                            _url = variablesGlobales.Url;
                        }

                        variablesGlobales.Cliente = new System.Net.Http.HttpClient();
                        var _respuesta = await variablesGlobales.Cliente.GetByteArrayAsync(_url);

                        System.String source = System.Text.Encoding.GetEncoding("utf-8").GetString(_respuesta, 0, _respuesta.Length - 1);
                        source = System.Net.WebUtility.HtmlDecode(source);
                        variablesGlobales.DocumentoHTML = new HtmlAgilityPack.HtmlDocument();
                        variablesGlobales.DocumentoHTML.LoadHtml(source);
                        txtBuscando.Text = "DECODIFICANDO";
                        string _urlBase = string.Empty;
                        foreach (HtmlAgilityPack.HtmlNode link in variablesGlobales.DocumentoHTML.DocumentNode.SelectNodes("//a[@href]"))
                        {
                            HtmlAgilityPack.HtmlAttribute att = link.Attributes["href"];
                            if (!string.IsNullOrEmpty(att.Value))
                            {
                                if (att.Value.StartsWith("/torrent/"))
                                {
                                    _urlBase = "https://thepiratebay.org" + att.Value.Trim();
                                }

                                if (att.Value.StartsWith("magnet:?"))
                                {
                                    var    _elementos    = att.Value.Split('&');
                                    string encodedString = System.Web.HttpUtility.HtmlEncode(_elementos.FirstOrDefault(x => x.StartsWith("dn="))).Replace('+', ' ').Remove(0, 3);
                                    string _nombreL      = System.Text.RegularExpressions.Regex.Replace(encodedString, @"([^a-zA-Z0-9_]|^\s)", " ");
                                    _lista.Add(new Herramientas.ListaTorrents()
                                    {
                                        Categoria      = System.Convert.ToInt32(Herramientas.Enumeradores.eDatosPorDefault.CATEGORIA_POR_DEFECTO),
                                        Direccion      = att.Value,
                                        Fecha          = _fecha,
                                        NombreAmigable = _nombreL,
                                        Seleccionado   = false,
                                        Origen         = _urlBase
                                    });
                                }
                            }
                        }
                    }
                }

                animBuscando.Visibility    = System.Windows.Visibility.Collapsed;
                txtBuscando.Text           = "TERMINADO, SE HAN HALLADO " + _lista.Count + " ELEMENTOS";
                GridResultados.ItemsSource = _lista != null?_lista.Any() ? _lista.Where(x => x.NombreAmigable != "").OrderBy(y => y.NombreAmigable).ToList() : new System.Collections.Generic.List <Herramientas.ListaTorrents>() : new System.Collections.Generic.List <Herramientas.ListaTorrents>();
            }
            catch (System.Exception exc)
            {
                animBuscando.Visibility = System.Windows.Visibility.Collapsed;
                txtBuscando.Text        = "SURGIO UNA EXCEPCION: " + exc.Message;
            }
        }
 /// <summary>
 ///     Construct a HTML element attribute wrapper object.
 /// </summary>
 /// <param name="attribute"></param>
 public HtmlGenericAttribute(HtmlAgilityPack.HtmlAttribute attribute)
     :   base(attribute)
 {
 }