Inheritance: IXPathNavigable
Ejemplo n.º 1
1
        void SanitizerNodeVisited(string nodeName, HtmlNode node, XmlWriter writer)
        {
            if (nodeName == "img" && node.Attributes["src"] != null && node.Attributes["src"].Value.StartsWith("cid:"))
            {
                // split src
                var src = node.Attributes["src"].Value.Split(new[] { ':' }, 2);

                if (src.Length == 2)
                {
                    // Find inline attachment with given contentid
                    var document = source.Documents.FirstOrDefault(d => d.ContentType == ContentType.Inline && d.ContentId == src[1]);

                    if (document != null)
                    {
                        // Replace content-id url with filename
                        var filename = ClientState.Current.Storage.ResolvePhysicalFilename(".", document.StreamName);

                        node.Attributes["src"].Value = String.Format("file://{0}", filename);
                    }
                }
            }
            else if (nodeName == "a" && node.Attributes["href"] != null)
            {
                var url = node.Attributes["href"].Value;

                // Clean href and inject javascript hook
                node.Attributes["href"].Value = String.Empty;

                writer.WriteAttributeString("onclick", String.Format("javascript:window.external.JsNavigate('{0}')", url));
            }
        }
Ejemplo n.º 2
1
 private static void ConvertContentTo(HtmlNode node, TextWriter outText)
 {
     foreach (HtmlNode subnode in node.ChildNodes)
       {
     ConvertTo(subnode, outText);
       }
 }
Ejemplo n.º 3
1
 public ItemCrawler(Uri url)
 {
     _htmlDocument = new HtmlDocument();
     var html = new WebClient().DownloadString(url.OriginalString);
     _htmlDocument.LoadHtml(html);
     _document = _htmlDocument.DocumentNode;
 }
Ejemplo n.º 4
0
        private List <VideoInfo> GetOppetArkivVideoList(HtmlAgilityPack.HtmlNode node)
        {
            List <VideoInfo> videoList = new List <VideoInfo>();

            foreach (var article in node.Descendants("article"))
            {
                VideoInfo video = new VideoInfo();
                video.VideoUrl = article.Descendants("a").Select(a => a.GetAttributeValue("href", "")).FirstOrDefault();
                Uri result;
                if (!Uri.TryCreate(video.VideoUrl, UriKind.Absolute, out result))
                {
                    Uri.TryCreate(new Uri("http://www.oppetarkiv.se/"), video.VideoUrl, out result);
                }
                video.VideoUrl = result.ToString();
                if (!string.IsNullOrEmpty(video.VideoUrl))
                {
                    video.Title = HttpUtility.HtmlDecode((article.Descendants("a").Select(a => a.GetAttributeValue("title", "")).FirstOrDefault() ?? "").Trim().Replace('\n', ' '));
                    video.Thumb = (article.SelectSingleNode(".//noscript/img") != null) ? article.SelectSingleNode(".//noscript/img").GetAttributeValue("src", "") : "";
                    if (video.Thumb.StartsWith("//"))
                    {
                        video.Thumb = "http:" + video.Thumb;
                    }
                    video.Airdate = article.Descendants("time").Select(t => t.GetAttributeValue("datetime", "")).FirstOrDefault();
                    if (!string.IsNullOrEmpty(video.Airdate))
                    {
                        video.Airdate = DateTime.Parse(video.Airdate).ToString("d", OnlineVideoSettings.Instance.Locale);
                    }
                    videoList.Add(video);
                }
            }
            return(videoList);
        }
Ejemplo n.º 5
0
        // Schedule_W 얻기
        private Schedule_W CreateSchedule_WFromNode(HtmlNode node, Int32 year, Int32 month, Int32 day)
        {
            try
            {
                if (GetInnerHtml(node, "none") != null || GetInnerHtml(node, "relay") == null)
                {
                    return null;
                }

                return new Schedule_W
                {
                    Year = year,
                    Month = month,
                    Day = day,
                    Time = GetInnerHtml(node, "time"),
                    Play = GetInnerHtml(node, "play"),
                    Relay = GetInnerHtml(node, "relay"),
                    BallPark = GetInnerHtml(node, "ballpark"),
                    Etc = GetInnerHtml(node, "etc"),
                };
            }
            catch (Exception exception)
            {
                throw exception;
            }

        }
Ejemplo n.º 6
0
        public static HAP.HtmlNode ReplaceWithText(this HAP.HtmlNode el, string s)
        {
            var newNode = HAP.HtmlNode.CreateNode(s);

            el.ParentNode.ReplaceChild(newNode, el);
            return(newNode);
        }
Ejemplo n.º 7
0
        private static void AddPackage(SteamApp app, HtmlNode packageNode)
        {
            var package = app.AddNewPackage();

            var packageTitleNode = packageNode.SelectSingleNode($"//{PackageTitle}");

            package.Title = packageTitleNode.InnerHtml.Replace("Buy ", "").Trim();

            var priceNodes = packageNode.SelectNodes($"//div[@class='{PackagePriceXPath}']");

            if (priceNodes != null)
            {
                var priceNode = priceNodes[0];

                package.CurrentPrice = ParseNodeWithCurrencyToDecimal(priceNode);

                package.OriginalPrice = package.CurrentPrice;
            }
            else
            {
                var originalPriceNode = packageNode.SelectSingleNode($"//div[@class='{PackageOriginalPriceXPath}']");

                package.OriginalPrice = ParseNodeWithCurrencyToDecimal(originalPriceNode);

                var discountPriceNode = packageNode.SelectSingleNode($"//div[@class='{PackageDiscountPriceXPath}']");

                package.CurrentPrice = ParseNodeWithCurrencyToDecimal(discountPriceNode);
            }
        }
Ejemplo n.º 8
0
        public object[] Parse(string subject, string body)
        {
            this.subject = subject;
            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(body);
            this.senderInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/div/table[2]");
            this.productInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/h3");
            this.cantactInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/div/table/tr/td[2]/h4");

            object[] dataItem = new object[13];
            dataItem[0] = 0;
            dataItem[1] = GetMsgIp();
            dataItem[2] = GetOrigin();
            dataItem[3] = GetProduct();
            dataItem[4] = GetName();
            dataItem[5] = GetMail();
            dataItem[6] = GetCountry();
            dataItem[7] = GetTelephone();
            dataItem[8] = GetCompany();
            dataItem[9] = GetAddress();
            dataItem[10] = GetFax();
            dataItem[11] = string.Empty;
            dataItem[12] = string.Empty;
            return dataItem;
        }
Ejemplo n.º 9
0
        public static IList<HtmlWord> GetWords(this HtmlNode node, HtmlNode top)
        {
            var words = new List<HtmlWord>();

            if (node.HasChildNodes)
            {
                foreach (var child in node.ChildNodes)
                    words.AddRange(child.GetWords(top));
            }
            else
            {
                var textNode = node as HtmlTextNode;
                if (textNode != null && !string.IsNullOrEmpty(textNode.Text))
                {
                    string[] singleWords = textNode.Text.Split(
                        new string[] {" "},
                        StringSplitOptions.RemoveEmptyEntries
                        );
                    words.AddRange(
                        singleWords
                            .Select(w => new HtmlWord(w, node.ParentNode, top)
                            )
                        );
                }
            }

            return words.AsReadOnly();
        }
Ejemplo n.º 10
0
        public HtmlNode ParseLink(HtmlNode node)
        {
            HtmlNode htmlHref = null;
            htmlHref = node.SelectSingleNode("a");

            return htmlHref;
        }
       public static void LoadFromUrl(string url)
       {
           HtmlWeb web = new HtmlWeb();
           HtmlDocument doc = web.Load("http://brico-direct.tn/63-quincaillerie-en-ligne?id_category=63&n=275");

           // ParseErrors is an ArrayList containing any errors from the Load statement
           if (doc.ParseErrors != null && doc.ParseErrors.Any())
           {
               // Handle any parse errors as required

           }
           else
           {
               if (doc.DocumentNode != null)
               {
                   HtmlAgilityPack.HtmlNode bodyNode = doc.DocumentNode.SelectSingleNode("//body");

                   if (bodyNode != null)
                   {
                       HtmlNode RootNode = null, FirstDivNode = null, HeaderNode = null; 
                       //declares and instantiates htmlNode needed

                       RootNode = doc.DocumentNode; //Gets the root node of the document and passes to the RootNode
                       //select the first div in the root node or document
FirstDivNode = RootNode.SelectSingleNode(&quot;//div&quot;);

                   }
               }
           }
       }
Ejemplo n.º 12
0
        public string GetPropertyBySeletor(string container, string selector)
        {
            string       result  = string.Empty;
            HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();

            htmlDoc.LoadHtml(container); // to load from a string (was htmlDoc.LoadXML(xmlString)

            // ParseErrors is an ArrayList containing any errors from the Load statement
            if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
            {
                // Handle any parse errors as required
                LogApplication.Agent.LogError(htmlDoc.ParseErrors.First().Reason);
                //Console.WriteLine(htmlDoc.ParseErrors.First().Reason);
            }
            else
            {
                if (htmlDoc.DocumentNode != null)
                {
                    HtmlAgilityPack.HtmlNode elementNode = htmlDoc.DocumentNode.SelectSingleNode("//" + selector);

                    if (elementNode != null)
                    {
                        result = elementNode.InnerHtml;
                        Console.WriteLine("-->>>>>>>>>>>>>>>>>>>>>>>" + result + "<<<<<<<<<<<<<<<<<<<<<<<<<<---");
                        LogApplication.Agent.LogWarn(elementNode.OuterHtml);
                    }
                }
            }
            return(result);
        }
Ejemplo n.º 13
0
        private static HtmlAgilityPack.HtmlNode GetPsalmSext(HtmlAgilityPack.HtmlNode at, out string a, out string n, out string c, out string t)
        {
            var at1 = at;

            a = at1.InnerText;

            var psn1 = at1.NextSibling;

            n = psn1.InnerText;

            var psc1 = psn1.NextSibling;
            var pst1 = psc1;

            c = string.Empty;
            if (psc1.Name != "div")
            {
                c    = psc1.InnerText;
                pst1 = FindNode(psc1, "div");
            }
            // t = ParsePsalm(pst1);
            if (pst1 == null)
            {
                Console.ReadKey();
            }
            t = ParsePsalm(pst1);
            return(pst1);
        }
Ejemplo n.º 14
0
        private static HtmlAgilityPack.HtmlNode GetPsalm(HtmlAgilityPack.HtmlNode at, out string a, out string n, out string c, out string t)
        {
            var at1 = at;

            a = at1.InnerText;

            var psn1 = at1.NextSibling;

            n = psn1.InnerText;

            var psc1 = psn1.NextSibling;
            var pst1 = psc1;

            c = string.Empty;
            if (psc1.Name == "p")
            {
                c    = psc1.InnerText;
                pst1 = psc1.NextSibling; //pst1
                if (pst1.Name == "p")
                {
                    c    = $"{c} | {pst1.InnerText}";
                    pst1 = pst1.NextSibling; //pst1
                }
            }
            t = ParsePsalm(pst1);
            // t = pst1.InnerText;


            return(pst1);
        }
Ejemplo n.º 15
0
        public Tuple <string, string, string, string> GetMatchData(HtmlAgilityPack.HtmlNode MatchListItem)
        {
            Tuple <string, string, string> newTuple;
            string date = MatchListItem.Descendants("td")
                          .Where(node => (node.GetAttributeValue("class", "")
                                          .Contains("date"))).FirstOrDefault().InnerText.Trim();

            string day        = date.Substring(0, date.IndexOf('/'));
            string month      = date.Substring(date.IndexOf('/') + 1, 2);
            string year       = date.Substring(date.LastIndexOf('/') + 1, 2);
            string testString = date.Remove(date.LastIndexOf('/') + 1, 2);

            date = month + "/" + day + "/" + "20" + year;
            //DateTime evaluatedDate = Convert.ToDateTime(date);

            string homeTeam = MatchListItem.Descendants("td")
                              .Where(node => (node.GetAttributeValue("class", "")
                                              .Contains("team-a"))).FirstOrDefault().InnerText.Trim();

            string awayTeam = MatchListItem.Descendants("td")
                              .Where(node => (node.GetAttributeValue("class", "")
                                              .Contains("team-b"))).FirstOrDefault().InnerText.Trim();

            string score = MatchListItem.Descendants("td")
                           .Where(node => (node.GetAttributeValue("class", "")
                                           .Contains("score-time"))).FirstOrDefault().InnerText.Trim();

            return(new Tuple <string, string, string, string>(date, homeTeam, awayTeam, score));
        }
        public string GetPDFUrl(string id)
        {
            string   pdfUrl   = string.Empty;
            string   postData = string.Empty;
            string   url      = "http://www.hkexnews.hk/listedco/listconews/advancedsearch/search_active_main.aspx";
            DateTime today    = DateTime.Now;
            string   todayAll = today.ToString("yyyyMMdd");
            string   month    = todayAll.Substring(4, 2);
            string   day      = todayAll.Substring(6, 2);
            string   year     = todayAll.Substring(0, 4);

            postData = File.ReadAllText(@"Config\HK\HKFMAndBulkFileGenerator.txt", Encoding.Default);
            postData = string.Format(postData, todayAll, id, day, month, year);
            Thread.Sleep(2000);
            HtmlDocument htmlDoc = GetDocFromHK(url, postData);

            HtmlAgilityPack.HtmlNode pdfLinkNode = htmlDoc.DocumentNode.SelectSingleNode("//a[contains(@href, '.pdf')]");

            if (pdfLinkNode == null)
            {
                Logger.Log("There's no PDF file for ric " + id);
                return(null);
            }
            else
            {
                pdfUrl  = "http://www.hkexnews.hk";
                pdfUrl += pdfLinkNode.Attributes["href"].Value;
            }

            return(pdfUrl);
        }
Ejemplo n.º 17
0
        private static Dictionary <string, EmailRecord> getNameValueByElementType(
            HtmlAgilityPack.HtmlDocument source,
            SalesForce salesForce
            )
        {
            Dictionary <string, EmailRecord> output
                = new Dictionary <string, EmailRecord>();

            var document = source.DocumentNode;

            foreach (KeyValuePair <string, bool> emailItem
                     in salesForce.emailHeaderIdentities)
            {
                if (emailItem.Value)
                {
                    HtmlAgilityPack.HtmlNode editNode = source.GetElementbyId(emailItem.Key);

                    if (editNode.Attributes.ToList().Count(x => x.Name == "value") >= 1)
                    {
                        HtmlAgilityPack.HtmlAttribute attribute = editNode.Attributes["value"];

                        EmailRecord record = new EmailRecord();

                        record.emailAddress = attribute.Value;

                        output.Add(emailItem.Key, record);
                    }
                }
            }

            return(output);
        }
 private static void ProcessContent(HtmlNode node, TextWriter outText)
 {
     foreach (var child in node.ChildNodes)
     {
         ProcessNode(child, outText);
     }
 }
Ejemplo n.º 19
0
 private static IEnumerable<string> ExtractTitles(HtmlNode container)
 {
     // <a href="http://fr.feedbooks.com/item/316137/les-les-de-l-espace" itemprop="url">Les Îles de l'espace</a>
     return from element in container.Descendants("a")
            where element.GetAttributeValue("href", "").StartsWith("http://fr.feedbooks.com/item/")
            select element.InnerText;
 }
Ejemplo n.º 20
0
        public static void AddColumnRowToTable(HtmlAgilityPack.HtmlNode table, string s1, string s2, string s3)
        {
            HtmlAttribute attr = table.OwnerDocument.CreateAttribute("style", "border:1px solid black;border-collapse:collapse;");

            HtmlNode row = table.OwnerDocument.CreateElement("tr");

            row.Attributes.Add(table.OwnerDocument.CreateAttribute("valign", "top"));

            HtmlNode td1 = table.OwnerDocument.CreateElement("td");
            HtmlNode td2 = table.OwnerDocument.CreateElement("td");

            HtmlAttribute rightJustify = table.OwnerDocument.CreateAttribute("align", "right");

            row.Attributes.Add(attr);
            td1.Attributes.Add(attr);
            td2.Attributes.Add(attr);
            td1.Attributes.Add(rightJustify);
            td2.Attributes.Add(rightJustify);

            td1.InnerHtml = s1;
            td2.InnerHtml = s2;

            row.ChildNodes.Add(td1);
            row.ChildNodes.Add(td2);

            if (string.IsNullOrEmpty(s3) == false)
            {
                HtmlNode td3 = table.OwnerDocument.CreateElement("td");
                td3.Attributes.Add(attr);
                td3.Attributes.Add(rightJustify);
                td3.InnerHtml = s3;
                row.ChildNodes.Add(td3);
            }
            table.ChildNodes.Add(row);
        }
Ejemplo n.º 21
0
 private static HtmlNode CreateContentHeaderNode(HtmlNode linkNode)
 {
     var headerNode = HtmlNode.CreateNode("<div />");
     headerNode.SetAttributeValue("class", "regionMain fullWidth");
     headerNode.AppendChild(linkNode);
     return headerNode;
 }
        private void ExtractChildren(ConfigSection config, HtmlAgilityPack.HtmlNode parentNode, JObject container, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            foreach (var child in config.Children)
            {
                var childName   = child.Key;
                var childConfig = child.Value;

                var childObject = this.Extract(name: childName, config: childConfig, parentNode: parentNode, logicalParents: logicalParents);

                if (childObject is JObject)
                {
                    if (((JObject)childObject).Count > 0)
                    {
                        container[childName] = (JToken)childObject;
                    }
                }
                else if (childObject is JArray)
                {
                    if (((JArray)childObject).Count > 0)
                    {
                        container[childName] = (JToken)childObject;
                    }
                }
                else
                {
                    container[childName] = (JToken)childObject;
                }
            }
        }
Ejemplo n.º 23
0
 public Field(HtmlNode node)
 {
     String fieldInnerText = Utils.RemoveAllNotNumberCharacters(node.InnerText);
     String[] tempArray = fieldInnerText.Split(',');
     this.actual = int.Parse(tempArray[0]);
     this.max = int.Parse(tempArray[1]);
 }
Ejemplo n.º 24
0
        private static List <string> GetdateSeparatorList(HtmlAgilityPack.HtmlDocument pageresult)
        {
            string dateSeparatorXPath = "//*[contains(concat( \" \", @class, \" \" ), concat( \" \", \"w2g\", \" \" ))]";

            HtmlAgilityPack.HtmlNodeCollection dateSeparators = pageresult.DocumentNode.SelectNodes(dateSeparatorXPath); // Date separators, e.g. 2019. március 3. vasárnap. Typically there is 7 on a page.
            List <string> dateSeparatorList = new List <string>();                                                       // This holds a list of date separator strings.

            string daterangeXPath = "//*[@id=\"ctl00_C_p\"]/div[@class=\"tvhead\"]/div[@class=\"tvheadtitle\"]/h2[@class=\"tvh2\"]";

            HtmlAgilityPack.HtmlNode startdate = pageresult.DocumentNode.SelectSingleNode(daterangeXPath); // This will be used to get the first day/date of the current week schedule
            dateSeparatorList.Add(startdate.InnerText);                                                    // start the dateseparatorList with the startdate

            string[]        parts1       = startdate.InnerText.Split();                                    // Fix up first entry with proper day of the week
            string[]        parts2       = dateSeparators[dateSeparators.Count - 1].InnerText.Split();
            List <ShowDate> showDateList = new List <ShowDate>();

            dateSeparatorList[0] = string.Join(" ", String.Join(" ", parts1.Take(3).ToArray()), parts2[parts2.Count() - 1]);

            foreach (HtmlAgilityPack.HtmlNode dateSeparator in dateSeparators)
            {
                dateSeparatorList.Add(dateSeparator.InnerText);
            }

            return(dateSeparatorList);
        }
 private HtmlNodeCollection NavigateToSubstitutionTableAndRemoveTextNodex (HtmlNode root)
 {
     HtmlNode vp = root.LastChild.PreviousSibling;
     HtmlNode haupt = vp.ChildNodes[5];
     HtmlNodeCollection collectionWithoutTextNodes = removeTextNodes(haupt.ChildNodes);
     return collectionWithoutTextNodes;
 }
Ejemplo n.º 26
0
        public static H.HtmlNodeCollection SelectNodesOrEmpty(this H.HtmlNode node, string xpath)
        {
            Requires.NonNull(node, nameof(node));
            var nodeCollection = node.SelectNodes(xpath);

            return(nodeCollection ?? new H.HtmlNodeCollection(node));
        }
Ejemplo n.º 27
0
        public bool isLINodeaVote(HtmlAgilityPack.HtmlNode LiNode)          //helps to determine if the li node on input is a vote or not
        {
            if (LiNode.HasChildNodes)
            {
                if (LiNode.ChildNodes.Count == 3)
                {
                    var firstChildClassAValue = LiNode.ChildNodes.First().GetAttributeValue("class", "not found");
                    switch (firstChildClassAValue) //all the possible types of voting
                    {
                    case "flag yes":
                    case "flag no":
                    case "flag not-logged-in":
                    case "flag refrained":
                    case "flag excused":
                        return(true);

                    default:
                        return(false);
                    }
                }
                else
                {
                    return(false);
                }
            }
            else
            {
                return(false);
            }
        }
Ejemplo n.º 28
0
        public static HAP.HtmlNode ReplaceWithInnerText(this HAP.HtmlNode el)
        {
            var newNode = HAP.HtmlNode.CreateNode(el.InnerText);

            el.ParentNode.ReplaceChild(newNode, el);
            return(newNode);
        }
        private Result ParseResult(HtmlNode resultSet)
        {
            string[] courseSplit = _currentCourse.Split(new[] { "<br>" }, StringSplitOptions.RemoveEmptyEntries);
            string courseName = courseSplit[0];
            string eventName = courseSplit[1];

            var doc = new HtmlDocument();
            doc.LoadHtml(resultSet.OuterHtml);

            IEnumerable<HtmlNode> allResults = doc.DocumentNode.QuerySelectorAll("td");
            string position = allResults.ElementAt(0).QuerySelector("b").InnerText;
            string name = allResults.ElementAt(1).QuerySelector("b").InnerText;
            string jockey = allResults.ElementAt(2).QuerySelector("b").InnerText;
            string startingPrice = allResults.ElementAt(3).QuerySelector("b").InnerText.Trim();

            var price = new Price{DecimalPrice = 0,Denominator=0,Numerator = 0};
            try {
                price = new Price(startingPrice);
            } catch(ArgumentException) {}

            return new Result
                   	{
                        CourseName = courseName,
                        EventName = eventName,
                        Position = position,
                        HorseName = name,
                        JockeyName = jockey,
                        StartingPriceDecimal = price.DecimalPrice,
                        StartingPriceDenominator = price.Denominator,
                        StartingPriceNumerator = price.Numerator
                    };
        }
Ejemplo n.º 30
0
 private void moduleProperties_Click(object sender, EventArgs e)
 {
     HtmlAgilityPack.HtmlDocument doc  = HTMLDocumentConverter.mshtmlDocToAgilityPackDoc(htmlEditor1.HtmlDocument2);
     HtmlAgilityPack.HtmlNode     elem = doc.GetElementbyId(this.activeElement.id);
     CFormController.Instance.mainForm.propertiesForm.moduleChanged += new ModuleChanged(propertiesForm_moduleChanged);
     CFormController.Instance.mainForm.showProperties(elem);
 }
Ejemplo n.º 31
0
        private MarketRateModel BuildMarketRate(HtmlNode monthNode)
        {
            var result = new MarketRateModel();
            string value = "";
            var dataCode = monthNode.GetAttributeValue("href", "http://finance.ifeng.com/app/hq/stock/sh000001/");
            if ("http://finance.ifeng.com/app/hq/stock/sh000001/" == dataCode)
            {
                value = monthNode.NextSibling.NextSibling.InnerText.Trim();
                value = value.Split(' ')[0];
                result.Type = RateType.StockShangzheng;
                result.Rate = decimal.Parse(value);
            }
            else if ("http://finance.ifeng.com/app/hq/stock/sz399001/" == dataCode)
            {
                value = monthNode.NextSibling.NextSibling.InnerText.Trim();
                value = value.Split(' ')[0];
                result.Type = RateType.StockShenzhen;
                result.Rate = decimal.Parse(value);
            }
            else
            {
                return null;
            }

            result.CreateTime = DateTime.Now;
            result.RateDay = DateTime.Now.Date;
            result.Source = SourceType.eIfeng;

            return result;
        }
Ejemplo n.º 32
0
        protected void Page_Load(object sender, EventArgs e)
        {
            string heads = @"Accept: application/json, text/javascript, */* q=0.01 " +
                           @"Accept-Encoding: gzip, deflate " +
                           @"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 " +
                           @"Connection: keep-alive " +
                           @"Cookie: s_ViewType=10; _lxsdk_cuid=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _lxsdk=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _hc.v=6c48a318-c117-5df7-478a-f0f694f1570e.1591768948; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1591768950,1591788446; _lxsdk_s=1729dfc18eb-4f6-3ef-94c%7C%7C19; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1591788446 " +
                           @"Host: catdot.dianping.com " +
                           @"Referer: http:/www.dianping.com/search…/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE " +
                           @"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0";
            string url = @"http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE";
            ClassHttpRequestClient s   = new ClassHttpRequestClient(true);
            HtmlDocument           doc = new HtmlDocument();
            string content             = "";
            string response            = s.httpPost(url, heads, content, Encoding.UTF8);

            HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]");
            StringBuilder sb = new StringBuilder();

            foreach (HtmlAgilityPack.HtmlNode item in collection)
            {
                HtmlAgilityPack.HtmlNode divtit     = item.SelectNodes("div[@class=\"txt\"]")[0];
                HtmlAgilityPack.HtmlNode aname      = divtit.SelectNodes("a[1]")[0];
                HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0];
                HtmlAgilityPack.HtmlNode anum       = divcomment.SelectNodes("a[1]")[0];
                HtmlAgilityPack.HtmlNode aprice     = divcomment.SelectNodes("a[2]")[0];
                sb.Append(string.Format("{0}—{1}—{2}", aname.InnerText, anum.InnerText, aprice.InnerText));
            }
            Response.Write(sb);
        }
Ejemplo n.º 33
0
 protected override string OnScrape(string url, HtmlNode elem)
 {
     url = SubstringBetween(elem.InnerHtml, "un=\"", "\"");
     if (!url.StartsWith("http"))
         url = "http://" + url;
     return new Uri(url).AbsoluteUri;
 }
Ejemplo n.º 34
0
        public void HTMLAgilityPack(string filePath)
        {
            HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();

            // There are various options, set as needed
            htmlDoc.OptionFixNestedTags = true;

            // filePath is a path to a file containing the html
            //htmlDoc.Load(filePath);
            htmlDoc.LoadHtml(filePath);

            // Use:  htmlDoc.LoadHtml(xmlString);  to load from a string (was htmlDoc.LoadXML(xmlString)

            // ParseErrors is an ArrayList containing any errors from the Load statement
            if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
            {
                // Handle any parse errors as required
            }
            else
            {
                if (htmlDoc.DocumentNode != null)
                {
                    HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body");

                    if (bodyNode != null)
                    {
                        // Do something with bodyNode
                    }
                }
            }
        }
Ejemplo n.º 35
0
 public TagOpenToken(int id, HtmlNode node, TextVisualProperties properties, int parentID)
     : base(id)
 {
     Name = node.Name;
     TextProperties = properties;
     ParentID = parentID;
 }
        public void UpdateChecker(string currentVersion)   //구현
        {
            string github = "https://github.com/hanel2527/dcinisde-crawler.ver.2/blob/master/versions.txt";
            var    client = new WebClient();

            client.Encoding = System.Text.Encoding.UTF8;
            string text = client.DownloadString(github);

            hap.HtmlDocument doc = new hap.HtmlDocument();
            doc.LoadHtml(text);
            hap.HtmlNode myVersions = doc.DocumentNode.
                                      SelectSingleNode("//table[@class='highlight tab-size js-file-line-container']");
            text = myVersions.InnerText.Trim();
            string[] versions = text.Split(new[] { ' ', '\r', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries);
            if (NewVersionUpdateExist != null)
            {
                if (versions[0].Equals(currentVersion))
                {
                    NewVersionUpdateExist("최신 버전입니다: " + versions[0], null);
                }
                else
                {
                    NewVersionUpdateExist("새로운 업데이트가 있습니다(클릭): " + versions[0], null);
                }
            }
        }
Ejemplo n.º 37
0
        public string GetPDFUrl(string id)
        {
            string pdfUrl   = string.Empty;
            string postData = getPostData(id);
            string Uri      = "http://www.hkexnews.hk/listedco/listconews/advancedsearch/search_active_main.aspx";

            try
            {
                string pageSource = WebClientUtil.GetPageSource(Uri, 24000, postData);
                HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                htmlDoc.LoadHtml(pageSource);
                HtmlAgilityPack.HtmlNode pdfLinkNode = htmlDoc.DocumentNode.SelectSingleNode("//a[contains(@href, '.pdf')]");
                if (pdfLinkNode == null)
                {
                    Logger.Log("There's no PDF file for ric " + id);
                    return(null);
                }
                else
                {
                    pdfUrl  = "http://www.hkexnews.hk";
                    pdfUrl += pdfLinkNode.Attributes["href"].Value;
                }
            }
            catch (Exception ex)
            {
                string errInfo = ex.ToString();
            }
            return(pdfUrl);
        }
Ejemplo n.º 38
0
 long NumOfHits(string phrase)
 {
     HtmlAgilityPack.HtmlWeb      web     = new HtmlAgilityPack.HtmlWeb();
     HtmlAgilityPack.HtmlDocument htmlDoc = web.Load("https://www.google.com/search?q=" + phrase);
     if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
     {
         // Handle any parse errors as requiredcw
         System.Console.WriteLine("error");
         debug.Print("error\n");
         return(-1);
     }
     else if (htmlDoc.DocumentNode != null)
     {
         HtmlAgilityPack.HtmlNode node = htmlDoc.DocumentNode.SelectSingleNode("//div[@id='resultStats']");
         Regex  re     = new Regex(@"[1-9](?:\d{0,2})(?:,\d{3})*(?:\.\d*[1-9])?|0?\.\d*[1-9]|0");
         String result = re.Match(node.InnerHtml).Value;
         long   hits   = 0;
         if (result.Contains(","))
         {
             hits = long.Parse(result.Replace(",", ""));
         }
         //System.Console.WriteLine(hits);
         return(hits);
     }
     return(-1);
 }
        private bool IsAllowedTypeRecursive(HtmlAgilityPack.HtmlNode node)
        {
            if (node.NodeType == HtmlAgilityPack.HtmlNodeType.Text)
            {
                return(true);
            }

            if (node.NodeType != HtmlAgilityPack.HtmlNodeType.Element)
            {
                return(false);
            }

            if (!allowedTags.Contains(node.Name))
            {
                return(false);
            }

            var children = node.ChildNodes;

            if (children != null)
            {
                foreach (var child in children)
                {
                    if (!this.IsAllowedTypeRecursive(child))
                    {
                        return(false);
                    }
                }
            }

            return(true);
        }
Ejemplo n.º 40
0
        private string GetStringDataFormUrl(string url)
        {
            try
            {
                HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(url);
                myRequest.Method = "GET";
                WebResponse  myResponse = myRequest.GetResponse();
                StreamReader sr         = new StreamReader(myResponse.GetResponseStream(), System.Text.Encoding.UTF8);
                string       result     = sr.ReadToEnd();
                sr.Close();
                myResponse.Close();
                HtmlAgilityPack.HtmlDocument htmldoc = new HtmlAgilityPack.HtmlDocument();
                htmldoc.LoadHtml(result);
                HtmlAgilityPack.HtmlNode datanode = htmldoc.GetElementbyId("js-exportData");

                if (datanode != null)
                {
                    return(datanode.InnerText);
                }
            }
            catch (Exception e)
            {
                MessageBox.Show(e.Message + "\n" + url);
            }
            return("");
        }
Ejemplo n.º 41
0
        private static Activity ExtractActivity(HtmlNode node, int index)
        {
            var name = node.Descendants("div")
                           .Where(div => div.GetAttributeValue("class", null) == "action_prompt")
                           .Select(div => HtmlEntity.DeEntitize(div.InnerText).Trim().Replace("  ", " "))
                           .FirstOrDefault();
            if (name == null)
            {
                throw new InvalidDataException("Unable to find activity name");
            }

            return new Activity
                {
                    Sequence = index,
                    Name = name,
                    Note = node.Descendants("li")
                               .Where(li => li.GetAttributeValue("class", null) == "stream_note")
                               .Select(li => HtmlEntity.DeEntitize(li.InnerText).Trim())
                               .FirstOrDefault(),
                    Sets = node.Descendants("li")
                               .Where(li => li.GetAttributeValue("class", null) != "stream_note")
                               .Select(ExtractSet)
                               .ToList()
                };
        }
Ejemplo n.º 42
0
        internal static bool IsMatch(this HtmlAgilityPack.HtmlNode node, ExCSS.Model.Attribute attribute)
        {
            var attr = node.Attributes[attribute.Operand];

            if (attr != null)
            {
                var value = attr.Value;
                var test  = (attribute.Value ?? "").Trim(' ', '\'', '"');
                switch (attribute.Operator)
                {
                case AttributeOperator.BeginsWith:
                    return(value.StartsWith(test));

                case AttributeOperator.Contains:
                    return(value.Contains(test));

                case AttributeOperator.EndsWith:
                    return(value.EndsWith(test));

                case AttributeOperator.Equals:
                    return(value == test);

                case AttributeOperator.Hyphenated:
                    return(value.Split('-').Contains(test));

                case AttributeOperator.InList:
                    return(value.Split(' ').Contains(test));

                case AttributeOperator.None:
                    return(true);
                }
            }

            return(false);
        }
Ejemplo n.º 43
0
 public void Parse(HtmlNode rowNode)
 {
     Status =
         rowNode.Descendants("td")
             .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("status"))
             .Descendants("img")
             .FirstOrDefault()
             .GetAttributeValue("src", string.Empty);
     Icon =
         rowNode.Descendants("td")
             .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("icon"))
             .Descendants("img")
             .FirstOrDefault()
             .GetAttributeValue("src", string.Empty);
     Title =
         rowNode.Descendants("td")
             .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("title"))
             .InnerText;
     Sender = rowNode.Descendants("td")
         .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("sender"))
         .InnerText;
     Date = rowNode.Descendants("td")
         .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("date"))
         .InnerText;
 }
Ejemplo n.º 44
0
 /// <summary>
 /// 读取response.ResponseHtml转化为HtmlNode
 /// </summary>
 /// <param name="chtml">response.ResponseHtml</param>
 /// <returns>HtmlNode</returns>
 public static HtmlNode ToHtmlNode(string chtml)
 {
     HtmlAgilityPack.HtmlDocument htmlNode = new HtmlDocument();
     htmlNode.LoadHtml(chtml);
     HtmlAgilityPack.HtmlNode item = htmlNode.DocumentNode;
     return(item);
 }
Ejemplo n.º 45
0
        private static Nhl_Players_Bio_Goalie MapHtmlRowToModel(HtmlNode row, NhlSeasonType nhlSeasonType, int year)
        {
            HtmlNodeCollection tdNodes = row.SelectNodes(@"./td");

            Nhl_Players_Bio_Goalie model = new Nhl_Players_Bio_Goalie();

            model.NhlSeasonType = nhlSeasonType;
            model.Year = year;

            model.Number = ConvertStringToInt(tdNodes[0].InnerText);
            model.Name = tdNodes[1].InnerText;
            model.Team = tdNodes[2].InnerText;
            model.Position = "G";
            model.DateOfBirth = Convert.ToDateTime(tdNodes[3].InnerText.Replace("'", "/"));
            model.BirthCity = tdNodes[4].InnerText;
            model.StateOrProvince = tdNodes[5].InnerText;
            model.BirthCountry = tdNodes[6].InnerText;
            model.HeightInches = ConvertStringToInt(tdNodes[7].InnerText);
            model.WeightLbs = ConvertStringToInt(tdNodes[8].InnerText);
            model.Catches = tdNodes[9].InnerText;
            model.Rookie = tdNodes[10].InnerText;
            model.DraftYear = ConvertStringToInt(tdNodes[11].InnerText);
            model.DraftRound = ConvertStringToInt(tdNodes[12].InnerText);
            model.DraftOverall = ConvertStringToInt(tdNodes[13].InnerText);

            model.GamesPlayed = ConvertStringToInt(tdNodes[14].InnerText);
            model.Wins = ConvertStringToInt(tdNodes[15].InnerText);
            model.Losses = ConvertStringToInt(tdNodes[16].InnerText);
            model.OTSOLosses = ConvertStringToInt(tdNodes[17].InnerText);
            model.GAA = Convert.ToDouble(tdNodes[18].InnerText);
            model.SavePercentage = Convert.ToDouble(tdNodes[19].InnerText);
            model.Shutouts = ConvertStringToInt(tdNodes[20].InnerText);

            return model;
        }
Ejemplo n.º 46
0
    public static void Classification(Menu menu, agi.HtmlNode node)
    {
        agi.HtmlNodeCollection divide_td = node.SelectNodes(".//td");
        agi.HtmlNodeCollection check_div = divide_td[0].SelectNodes(".//div");
        agi.HtmlNodeCollection check_br  = divide_td[0].SelectNodes(".//br");

        int count = check_br.Count;

        if (check_div == null)
        {
            return;
        }
        if (count > 2)
        {
            String text = divide_td[0].InnerHtml;
            text = text.Replace("<br>", "</div><div>");
            divide_td[0].InnerHtml = text;
            agi.HtmlNodeCollection tmp = divide_td[0].SelectNodes(".//div");
            for (int i = 0; i < tmp.Count; i++)
            {
                menu.menu.Add(tmp[i].InnerText);
            }
        }
        else
        {
            //menu.menu.Add(node.InnerText);
            for (int i = 0; i < count; i++)
            {
                menu.menu.Add(check_div[i].InnerText);
            }
        }
    }
Ejemplo n.º 47
0
        private HtmlNode GetTable1Node(HtmlNode node)
        {
            HtmlNode table = UIUtils.CreateReportTableNode(node);

            UIUtils.AddColumnRowToTable(table,
                                           "Total Number of Patients",
                                            highrisk.denominator.ToString("#,###,###"),"");

            int percent = (int)Math.Round(100 * (double)high_risk_prevelance / (double)highrisk.denominator, 0);

            UIUtils.AddColumnRowToTable(table,
                                           "All High Risk BRCA Patients",
                                            high_risk_incidence.ToString("#,###,###"),
                                            percent.ToString() + "%");

            percent = (int)Math.Round(100 * (double)high_risk_prevelance / (double)highrisk.denominator, 0);

            UIUtils.AddColumnRowToTable(table,
                                           "New High Risk BRCA",
                                            high_risk_incidence.ToString("#,###,###"),
                                            percent.ToString() + "%");

            percent = (int)Math.Round(100 * (double)high_risk_seenInRC / (double)highrisk.denominator, 0);

            UIUtils.AddColumnRowToTable(table,
                                           "All High Risk BRCA Seen In Cancer Genetics",
                                            high_risk_seenInRC.ToString("#,###,###"),
                                            percent.ToString() + "%");
            return table;
        }
Ejemplo n.º 48
0
        private string[] GetValueFromClears(HtmlAgilityPack.HtmlNode item)
        {
            string[] finalValue = { "", "", "" };                                          // Create array of strings
            int      position   = 0;                                                       // Make postition 0

            foreach (var val in item.ChildNodes)                                           // For each value in the child node
            {
                foreach (var attribute in val.Attributes)                                  // And for each attribute
                {
                    string attributeValue = attribute.Value;                               // Get the attribute value
                    attributeValue = attributeValue.Replace("typography typography-", ""); // Remove unnecessary characters

                    if (attributeValue.Length <= 1)                                        // If the attribute length is lower or equal to one
                    {
                        finalValue[position] += attributeValue;                            // Add the attribute value to the position position in final value
                    }
                    else if (attributeValue == "slash")                                    // Otherwise if the attribute value equals second
                    {
                        position = 1;                                                      // Go to next position in final value array
                    }
                }
            }

            finalValue[2] = ((float)int.Parse(finalValue[0]) / (float)int.Parse(finalValue[1])).ToString("P"); // Get the clear rate by dividing clears over plays

            return(finalValue);                                                                                // Return string
        }
Ejemplo n.º 49
0
 public ActionUrlNode(HtmlNode htmlNode)
     : base(htmlNode.NodeType, htmlNode.OwnerDocument, -1)
   {
       Name = htmlNode.Name;
       CopyFrom(htmlNode, false);
       RouteValues = new Dictionary<string, RouteValueDictionary>();
   }
Ejemplo n.º 50
0
        public static string ProcessPageContentToString(HtmlAgilityPack.HtmlNode HTMLContent)
        {
            //remove all the tabs and newlines
            String output = Regex.Replace(HTMLContent.OuterHtml, @"\t|\n|\r", "");

            return(output);
        }
        private void ParseChapterRow(HtmlNode tr)
        {
            var item = new Book();

            var chaperTd = HtmlParseHelper.GetSingleDirectChildByType(tr, "td");
            var chaperUrl = HtmlParseHelper.GetSingleDirectChildByType(chaperTd, "a");
            var websiteTd = HtmlParseHelper.GetSingleDirectChildByTypeAndIndex(tr, "td", 1);
            var websiteName = HtmlParseHelper.GetSingleDirectChildByType(websiteTd, "a");
            var websiteIndexTd = HtmlParseHelper.GetSingleDirectChildByTypeAndIndex(tr, "td", 2);
            var websiteIndexUrl = HtmlParseHelper.GetSingleDirectChildByType(websiteIndexTd, "a");

            item.LastUpdateTime = DateTime.Now;


            item.Name = metaData.Name.Trim();
            item.IndexPage = new Uri("http://www.xiaoelang.com" + websiteIndexUrl.Attributes["href"].Value, UriKind.Absolute);
            item.LastestUpdateChapterName = chaperUrl.InnerText.Trim();
            item.WebSite = new WebSite();
            item.WebSite.WebSiteName = websiteName.InnerText.Trim();

            if (item.WebSite.WebSiteName.Contains(websiteFilter1) || item.WebSite.WebSiteName.Contains(websiteFilter2))
                return;

            var websiteBookPairAlreadyExists = (from i in items
                                                where i.WebSite.WebSiteName == item.WebSite.WebSiteName
                                                select i).FirstOrDefault();

            if (websiteBookPairAlreadyExists == null)
            {
                items.Add(item);
            }
        }
Ejemplo n.º 52
0
        public string GetTextFromSiblings(HtmlAgilityPack.HtmlNode node, HtmlAgilityPack.HtmlNode parentNode, ref bool foundParent)
        {
            var ret = new StringBuilder();

            if (node != null)
            {
                HtmlAgilityPack.HtmlNode sibling = null;

                do
                {
                    sibling = sibling != null ? sibling.PreviousSibling : node.PreviousSibling;

                    if (sibling == parentNode)
                    {
                        foundParent = true;
                    }

                    if (sibling != null && sibling != parentNode)
                    {
                        var siblingInnerText = sibling.InnerText;

                        if (!string.IsNullOrWhiteSpace(siblingInnerText))
                        {
                            var text = HtmlEntity.DeEntitize(siblingInnerText).Trim();
                            ret.Append(text);
                            ret.Append(" ");
                        }
                    }
                }while (sibling != null);
            }

            return(ret.ToString().Trim());
        }
        public YoutubeVideoEntry(HtmlNode node)
        {
            var url_node = node.SelectNodes(".//a[@href]");

            if (url_node != null)
            {
                var url_value = url_node.FirstOrDefault().Attributes["href"].Value;
                var splitIndex = url_value.IndexOf("&");

                if (splitIndex > 0)
                {
                    url = "http://www.youtube.com" + url_value.Substring(0, splitIndex);
                }
            }

            var title_node = node.SelectNodes(".//span[contains(@class, 'video-title')]");

            if (title_node != null)
                title = title_node.FirstOrDefault().InnerText;

            if (!String.IsNullOrEmpty(title))
                title = title.Trim();

            var img_node = node.SelectNodes(".//img[@src]");

            if (img_node != null)
                imageUrl = "http:" + img_node.FirstOrDefault().Attributes["src"].Value;
        }
Ejemplo n.º 54
0
        private string VariableNameResolver(HtmlNode row)
        {
            var isOptional = row.SelectSingleNode(@".//td[contains(@class,'description')]/span[@class='optional']");

            var variable = row.SelectSingleNode(@".//td[@class=""name"" ]").InnerText + (isOptional != null ? "?" : "");
            return variable;
        }
Ejemplo n.º 55
0
		private static Gear ParseGear(HtmlNode gearNode, ProfileParseConfig config)
		{
			var gear = new Gear();
			var gearArray = new[] { gearNode };

			var gearImage = config.GearImageGetter(gearArray).Single();
			var gearImageUris = ParseImage(gearImage, config);
			gear.ImageUri = gearImageUris.Item1;
			gear.RetinaImageUri = gearImageUris.Item2;

			var gearPowerMainSvg = config.GearPowerMainSvgGetter(gearArray).Single();
			gear.GearPowerMainSvgUri = ParseImageUriFromStyle(gearPowerMainSvg, config);

			var gearPowerSub = config.GearPowerSubGetter(gearArray).Single();
			var gearPowerSubArray = new[] { gearPowerSub };

			var gearPowerSub1Svg = config.GearPowerSub1Getter(gearPowerSubArray).Single();
			gear.GearPowerSub1SvgUri = ParseImageUriFromStyle(gearPowerSub1Svg, config);

			try
			{
				var gearPowerSub2Svg = config.GearPowerSub2Getter(gearPowerSubArray).Single();
				gear.GearPowerSub2SvgUri = ParseImageUriFromStyle(gearPowerSub2Svg, config);

				var gearPowerSub3Svg = config.GearPowerSub3Getter(gearPowerSubArray).Single();
				gear.GearPowerSub3SvgUri = ParseImageUriFromStyle(gearPowerSub3Svg, config);
			}
			catch (ArgumentOutOfRangeException) { }

			return gear;
		}
Ejemplo n.º 56
0
        public object[] Parse(string subject, string body)
        {
            this.subject = subject;
            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(body);
            this.messageInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/div/div[2]");
            if (messageInfo != null)
            {
                messageText = messageInfo.InnerText.Replace("\t", "").Replace("\r\n", " ");
            }
            this.productInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/table/tr[2]/td[2]/a/strong");
            this.cantactInfo = doc.DocumentNode.SelectSingleNode(@"//body/table/tr[2]/td/table[3]/tr/td");

            object[] dataItem = new object[13];
            dataItem[0] = 0;
            dataItem[1] = GetMsgIp();
            dataItem[2] = GetOrigin();
            dataItem[3] = GetProduct();
            dataItem[4] = GetName();
            dataItem[5] = GetMail();
            dataItem[6] = GetCountry();
            dataItem[7] = GetTelephone();
            dataItem[8] = GetCompany();
            dataItem[9] = GetAddress();
            dataItem[10] = GetFax();
            dataItem[11] = string.Empty;
            dataItem[12] = string.Empty;
            return dataItem;
        }
Ejemplo n.º 57
0
        protected virtual int FindLimitIndex(HtmlNode currentNode, ref int currentCount, int maxCount)
        {
            if (currentNode.NodeType == HtmlNodeType.Text)
            {
                var prevCount = currentCount;
                currentCount += currentNode.InnerText.Length;

                if (currentCount >= maxCount)
                {
                    var extraChars = maxCount - prevCount;
                    return currentNode.StreamPosition + extraChars;
                }
            }

            if (currentNode.HasChildNodes)
            {
                foreach (var node in currentNode.ChildNodes)
                {
                    var index = FindLimitIndex(node, ref currentCount, maxCount);
                    if (index != -1)
                        return index;
                }
            }

            return -1;
        }
Ejemplo n.º 58
0
 public static List<HtmlNode> GetListNodeToTag(HtmlNode node, string tag,string att, bool remove_text = false)
 {
     // for vao chirdNode
     node = node.ChildNodes.Where(t => t.GetAttributeValue(tag, "") == att).ToList()[0];
     //end
     return GetListNode(node, remove_text);
 }
        private static ArticleInfo ParseArticleInfoDiv(HtmlNode articleDiv)
        {
            var linkToArticle = articleDiv.SelectSingleNode("h3/a");
            var dateDiv = articleDiv.SelectSingleNode("div[@class='headline-date']");
            var commentCountNode = articleDiv.SelectSingleNode("h3/a[@class='commentCount']");

            var articleInfo = new ArticleInfo();

            articleInfo.Url = linkToArticle.Attributes["href"].Value;
            if (articleInfo.Url.Contains(@"/video/"))
            {
                throw new CommonParsingException("Delfi TV article");
            }

            articleInfo.Id.ExternalId = articleInfo.Url.GetQueryParameterValueFromUrl("id");
            articleInfo.Title = linkToArticle.InnerText;
            articleInfo.DatePublished = DelfiWordyDateParser.Parse(dateDiv.InnerText);
            articleInfo.DateScraped = DateTime.UtcNow.AddHours(2);
            articleInfo.Id.Portal = Portal.Delfi;
            articleInfo.CommentCount = commentCountNode == null ? 0 : Convert.ToInt32(commentCountNode.InnerText.TrimStart('(').TrimEnd(')'));

            var articleId = Convert.ToInt32(articleInfo.Url.GetQueryParameterValueFromUrl("id"));
            if (articleId == 0) throw new CommonParsingException("Article id not found");

            return articleInfo;
        }
Ejemplo n.º 60
0
        static bool Get_IDX_HASTC(string address, ref MarketData hastcIdx)
        {
            bool            retVal      = true;
            CultureInfo     dataCulture = common.language.GetCulture("en-US");
            HttpWebRequest  wRequest    = HttpWebRequest.Create(new Uri(address)) as HttpWebRequest;
            HttpWebResponse wResponse   = wRequest.GetResponse() as HttpWebResponse;
            StreamReader    reader      = new StreamReader(wResponse.GetResponseStream());
            string          htmlContent = reader.ReadToEnd();

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(htmlContent);
            HtmlAgilityPack.HtmlNode nodeHNXIndex = doc.GetElementbyId("IDX");
            HtmlAgilityPack.HtmlNode nodeTongKL   = doc.GetElementbyId("QTY");
            if (nodeHNXIndex != null)
            {
                hastcIdx.Value = decimal.Parse(nodeHNXIndex.InnerHtml, dataCulture);
            }
            else
            {
                retVal = false;
            }
            if (nodeTongKL != null)
            {
                hastcIdx.TotalQty = decimal.Parse(nodeTongKL.InnerHtml, dataCulture);
            }
            else
            {
                retVal = false;
            }
            hastcIdx.TotalAmt = 0;
            return(retVal);
        }