SelectSingleNode() public method

Selects the first XmlNode that matches the XPath expression.
public SelectSingleNode ( string xpath ) : HtmlNode
xpath string The XPath expression. May not be null.
return HtmlNode
Exemplo n.º 1
0
        public Segment GetSegment(HtmlNode segmentDiv, string day)
        {
            var imgDiv = segmentDiv.SelectSingleNode("./div[@class='programListContentImgArea']");
            var txtDiv = segmentDiv.SelectSingleNode("./div[@class='programListContentTextArea scheduleColumnTextArea']");

            var anchor = txtDiv.GetChildById("hypProgramTxt");
            var authorNode = txtDiv.GetChildById("lblAuthor");
            var imageNode = imgDiv.SelectSingleNode(".//img");

            //TODO: Move url building (below) into UrlBuilder class

            var segment = new Segment
            {
                DayDescription = day,
                Title = anchor.DecodeHtml(),
                SegmentDetailUrl = configurationManager.BookTvDomain + anchor.GetAttributeValue("href", ""),
                Time = txtDiv.GetChildById("lblAirTime").DecodeHtml(),
                Duration = txtDiv.GetChildById("lblLength").DecodeHtml(),
                Author = (authorNode == null) ? "" : authorNode.DecodeHtml(),
                ImageUrl = configurationManager.BookTvDomain + imageNode.Attributes["src"].Value,
            };

            segment.DurationInMinutes = durationParser.GetDurationInMinutes(segment.Duration);
            segment.Date = dayParser.Parse(segment.DayDescription);

            return segment;
        }
        private ProductBasicData BuildProductBasicData(HtmlNode productNode)
        {
            log.DebugFormat("[BuildProductBasicData] OuterHtml= {0}.", productNode.OuterHtml);
            //throw new NotImplementedException();
            ProductBasicData returnValue = new ProductBasicData();

            //should be transfered to barcode, but last digitals of the barcode is productId
            returnValue.Barcode = productNode.Attributes.First(c => c.OriginalName == "DdPiD").Value;
            returnValue.ProductId = productNode.Attributes.First(c => c.OriginalName == "DdPiD").Value;
            returnValue.pbcatid = productNode.Attributes.First(c => c.OriginalName == "pbcatid").Value;
            returnValue.qty = productNode.Attributes.First(c => c.OriginalName == "qty").Value;
            returnValue.iq = productNode.Attributes.First(c => c.OriginalName == "iq").Value;
            returnValue.inb = productNode.Attributes.First(c => c.OriginalName == "inb").Value;

            //returnValue.ImageSource = productNode.SelectSingleNode("//img[@src]").Attributes.FirstOrDefault(c=> c.Name == "src").Value;

            returnValue.ImageSource = productNode.SelectNodes("child::*/child::div/child::img").First().Attributes.First(c => c.Name == "src").Value;
            returnValue.EffectivePrice = productNode.SelectSingleNode("child::*/child::div/child::div/child::div/child::span").InnerText;

            //returnValue.EffectivePrice = productNode.SelectSingleNode("//span[@id='spnEffectivePrice']").InnerText;
            //returnValue.Description = HttpUtility.HtmlDecode(productNode.SelectSingleNode("//div[@class='ProdBoxSupplierText']").InnerText);

            returnValue.Description = HttpUtility.HtmlDecode(productNode.SelectSingleNode("child::*/child::div/child::div/child::a").InnerText);
            returnValue.ProductName = HttpUtility.HtmlDecode(productNode.SelectSingleNode("child::*/child::div/child::div/child::a").InnerText);
            log.DebugFormat("[BuildProductBasicData] fetched product={0}.", returnValue.ToString());
            return returnValue;
        }
Exemplo n.º 3
0
        private static void AddPackage(SteamApp app, HtmlNode packageNode)
        {
            var package = app.AddNewPackage();

            var packageTitleNode = packageNode.SelectSingleNode($"//{PackageTitle}");

            package.Title = packageTitleNode.InnerHtml.Replace("Buy ", "").Trim();

            var priceNodes = packageNode.SelectNodes($"//div[@class='{PackagePriceXPath}']");

            if (priceNodes != null)
            {
                var priceNode = priceNodes[0];

                package.CurrentPrice = ParseNodeWithCurrencyToDecimal(priceNode);

                package.OriginalPrice = package.CurrentPrice;
            }
            else
            {
                var originalPriceNode = packageNode.SelectSingleNode($"//div[@class='{PackageOriginalPriceXPath}']");

                package.OriginalPrice = ParseNodeWithCurrencyToDecimal(originalPriceNode);

                var discountPriceNode = packageNode.SelectSingleNode($"//div[@class='{PackageDiscountPriceXPath}']");

                package.CurrentPrice = ParseNodeWithCurrencyToDecimal(discountPriceNode);
            }
        }
Exemplo n.º 4
0
        public Trade(HtmlNode code)
        {
            htmlCode = code;

            //  fetch the trade ID (well, actually its the url)
            id = htmlCode.SelectSingleNode("//a[contains(@href,'/trade/')]").GetAttributeValue("href", "");

            //  fetch the name, donator type is used to find it
            HtmlNode node = htmlCode.SelectSingleNode("//span[@class='regular' or starts-with(@class, 'donator')]");
            name = (node != null) ? node.InnerText : null;

            //  the trade's description
            HtmlNode descriptionNode = htmlCode.SelectSingleNode("//div[@class='notes expandable show']");
            description = (descriptionNode != null) ? descriptionNode.InnerText : "";

            /*
             * here we construct the item objects
             *
             */
            HtmlNode itemOfferBundleHtml = htmlCode.SelectSingleNode("(div[@class='four-column'])[1]");
            HtmlNode itemRequestBundleHtml = htmlCode.SelectSingleNode("(div[@class='four-column'])[2]");

            HtmlNodeCollection itemOfferHtml = itemOfferBundleHtml.SelectNodes("div[starts-with(@class, 'item')]");
            HtmlNodeCollection itemRequestHtml = itemRequestBundleHtml.SelectNodes("div[starts-with(@class, 'item')]");

            offer = new List<Item>();
            request = new List<Item>();

            for (int i = 0; i < itemOfferHtml.Count; i++) { offer.Add(new Item(itemOfferHtml[i])); }
            for (int i = 0; i < itemRequestHtml.Count; i++) { request.Add(new Item(itemRequestHtml[i])); }
        }
Exemplo n.º 5
0
        internal Match MapHtmlNodeToMatch(HtmlNode root, MatchPath matchPath, int currentCount)
        {
            Match match = new Match();
            match.Id = HtmlEntity.DeEntitize(root.SelectSingleNode(mainController.CombinePathWithListCount(matchPath.Id, currentCount)).Attributes[matchPath.IdAttribute].Value).Replace(matchPath.IdReplace, "");

            HtmlNode heroNode = root.SelectSingleNode(mainController.CombinePathWithListCount(matchPath.Hero, currentCount));
            if (heroNode != null)
            {
                string heroReference = root.SelectSingleNode(mainController.CombinePathWithListCount(matchPath.Hero, currentCount)).Attributes[HtmlAttributes.Hero.Attribute.Value].Value.Replace(HtmlAttributes.Hero.Replace.Value, "");
                match.Hero = mainController.HeroController.GetHero(heroReference);
            }

            match.Result = mainController.MapStringToEnum<Results>(root.SelectSingleNode(mainController.CombinePathWithListCount(matchPath.Result, currentCount)).InnerText);
            match.TimeAgo = DateTime.Parse(root.SelectSingleNode(mainController.CombinePathWithListCount(matchPath.TimeAgo, currentCount)).Attributes[MainController.HTML_ATTRIBUTE_DATETIME].Value);
            match.Type = mainController.MapStringToEnum<Types>(root.SelectSingleNode(mainController.CombinePathWithListCount(matchPath.Type, currentCount)).InnerText);
            match.Mode = mainController.MapStringToEnum<Modes>(root.SelectSingleNode(mainController.CombinePathWithListCount(matchPath.Mode, currentCount)).InnerText);

            HtmlNode skillBracketNode = root.SelectSingleNode(mainController.CombinePathWithListCount(matchPath.Skillbracket, currentCount));
            if (skillBracketNode != null)
                match.Skillbracket = mainController.MapStringToEnum<Skillbrackets>(HtmlEntity.DeEntitize(skillBracketNode.InnerText));

            match.Duration = mainController.ConvertStringToTimeSpan(root.SelectSingleNode(mainController.CombinePathWithListCount(matchPath.Duration, currentCount)).InnerText);
            match.Kda = mainController.ConvertStringToKda(root.SelectSingleNode(mainController.CombinePathWithListCount(matchPath.Kda, currentCount)).InnerText);
            return match;
        }
        private static ArticleInfo ParseArticleInfoDiv(HtmlNode articleDiv)
        {
            var linkToArticle = articleDiv.SelectSingleNode("h3/a");
            var dateDiv = articleDiv.SelectSingleNode("div[@class='headline-date']");
            var commentCountNode = articleDiv.SelectSingleNode("h3/a[@class='commentCount']");

            var articleInfo = new ArticleInfo();

            articleInfo.Url = linkToArticle.Attributes["href"].Value;
            if (articleInfo.Url.Contains(@"/video/"))
            {
                throw new CommonParsingException("Delfi TV article");
            }

            articleInfo.Id.ExternalId = articleInfo.Url.GetQueryParameterValueFromUrl("id");
            articleInfo.Title = linkToArticle.InnerText;
            articleInfo.DatePublished = DelfiWordyDateParser.Parse(dateDiv.InnerText);
            articleInfo.DateScraped = DateTime.UtcNow.AddHours(2);
            articleInfo.Id.Portal = Portal.Delfi;
            articleInfo.CommentCount = commentCountNode == null ? 0 : Convert.ToInt32(commentCountNode.InnerText.TrimStart('(').TrimEnd(')'));

            var articleId = Convert.ToInt32(articleInfo.Url.GetQueryParameterValueFromUrl("id"));
            if (articleId == 0) throw new CommonParsingException("Article id not found");

            return articleInfo;
        }
Exemplo n.º 7
0
        private string VariableNameResolver(HtmlNode row)
        {
            var isOptional = row.SelectSingleNode(@".//td[contains(@class,'description')]/span[@class='optional']");

            var variable = row.SelectSingleNode(@".//td[@class=""name"" ]").InnerText + (isOptional != null ? "?" : "");
            return variable;
        }
Exemplo n.º 8
0
 private static string getMoney(HtmlNode node)
 {
     try
     {
         string money = "";
         string gold = HtmlEntity.DeEntitize(node.SelectSingleNode("descendant::span[@class='mgold']").InnerText).TrimStart();
         string silver = HtmlEntity.DeEntitize(node.SelectSingleNode("descendant::span[@class='msilver']").InnerText).TrimStart();
         string bronze = HtmlEntity.DeEntitize(node.SelectSingleNode("descendant::span[@class='mbronze']").InnerText).TrimStart();
         if (gold != "")
             money += gold;
         if (silver != "")
             if (silver.Length > 1)
                 money += silver;
             else
                 if (gold == "")
                     money += silver;
                 else
                     money += '0' + silver;
         if (bronze.Length > 1)
             money += bronze;
         else
             money += '0' + bronze;
         return money;
     }
     catch (NullReferenceException e)
     {
         return "";
     }
 }
Exemplo n.º 9
0
        internal Stat MapHtmlNode(HtmlNode root, int currentCount)
        {
            string matchesPlayed = root.SelectSingleNode(string.Format(PlayerPath.MostPlayedHeroes.MatchesPlayed.Value, currentCount)).InnerText;
            string winRate = root.SelectSingleNode(string.Format(PlayerPath.MostPlayedHeroes.Winrate.Value, currentCount)).InnerText;
            string kdaRatio = root.SelectSingleNode(string.Format(PlayerPath.MostPlayedHeroes.Kda.Value, currentCount)).InnerText;

            return new Stat(matchesPlayed.Replace(",", ""), winRate, kdaRatio.Replace(".", ","));
        }
Exemplo n.º 10
0
 public static Remark ParseRemark(HtmlNode remarkNode)
 {
     var remark = new Remark();
     //发现物
     var discoveryNode = remarkNode.SelectSingleNode("a[@title!='']");
     if (discoveryNode!=null)
     {
         var levelNode = discoveryNode.PreviousSibling.PreviousSibling;
         var typeNode = levelNode.PreviousSibling.PreviousSibling;
         remark.DiscoveryType = Enum.Parse(typeof(DisType), typeRegex.Match(typeNode.Attributes["src"].Value).Groups["type"].Value).ToString();
         remark.DiscoveryLevel = Int32.Parse(levelNode.InnerText.Substring(0, 1));
         remark.DiscoveryExp = Int32.Parse(discoveryNode.Attributes["title"].Value.Remove(0,5));
         remark.Discovery = discoveryNode.InnerText;
     }
     //奖励物
     var awardNode = remarkNode.SelectSingleNode("span[@style='color:#804000;']");
     if (awardNode != null)
         remark.AwardItem = awardNode.InnerText;
     //相关任务
     var relativeNodes = remarkNode.SelectNodes("descendant::a[@style='color:#C000C0;' or @style='color:DarkBlue;']");
     if (relativeNodes != null)
     {
         foreach (HtmlNode relativeNode in relativeNodes)
         {
             IList<int> questList = null;
             IList<string> foundNameList = null;
             if (relativeNode.InnerText.StartsWith("前:"))
             {
                 foundNameList = remark.PreFoundName;
                 questList = remark.PreQuestID;
             }
             else
             {
                 questList = remark.FollowQuestID;
             }
             var match = questRegex.Match(relativeNode.Attributes["href"].Value);
             if (relativeNode.InnerText.StartsWith("前:港口-") == false)
                 questList.Add(Int32.Parse(match.Groups["id"].Value));
             else
                 foundNameList.Add(relativeNode.InnerText.Replace("前:港口-",""));
         }
     }
     //接受城市
     //last br next a
     var cityNodes = remarkNode.SelectNodes("descendant::a[@class='MisCity']");
     if (cityNodes != null)
     {
         cityNodes.All(node =>
         {
             if (node.InnerText == "南美开拓港" || node.InnerText == "东南亚开拓港" ||
                 node.InnerText == "掠夺地图" || node.InnerText == "沉船资讯")
                 return true;
             remark.FromCityList.Add(node.InnerText);
             return true;
         });
     }
     return remark;
 }
Exemplo n.º 11
0
        private string VariableTypeResolver(HtmlNode row)
        {
            var tdType = row.SelectSingleNode(@".//td[@class=""type"" ]");
            //EntityCollection  contains(entity) as no type information.
            var typeNodes = tdType.SelectNodes(@".//span[@class='param-type']");
            if (typeNodes == null)
                return "any";

            var types = typeNodes.Select(Program.TypeReader).ToArray().Distinct();

            if (!types.Skip(1).Any() && types.First() == "Object")
            {


                var props = GetSignatureTypes(row.SelectSingleNode(@".//td[contains(@class,'description')]"));

                
                

                if (props.Keys.Any())
                {
                    
                    //   var type = "opt_" + Program.CalculateMD5Hash(string.Join("", props.Keys.OrderBy(k => k)));
                    var type = this._name + "Options";
                    var dependencies = new List<string>();

                    var writer = Program.GetWriter(type,_source);
                    if (writer != null)
                    {
                       

                        props = props.ToDictionary(k => k.Key, v => Program.extractDependencies(dependencies, v.Value));


                        Program.WriteDependencies(type, dependencies, writer, null, null, _source);


                        writer.WriteLine($"interface {type}");
                        writer.WriteLine("{");
                        foreach (var prop in props)
                        {
                           
                            writer.WriteLine($"\t{prop.Key}: {prop.Value};");

                        }
                        writer.WriteLine("}");
                        writer.WriteLine($"export = {type}");

                    }

                    return "Cesium." + type;
                }

            }

            return string.Join("|", types);

        }
Exemplo n.º 12
0
        protected override string ExtractAuthor(HtmlNode node)
        {
            if (node.SelectSingleNode("div/div[2]/div/div/span") != null)
            {
                return node.SelectSingleNode("div/div[2]/div/div/span").InnerText.Split('-')[0].Trim();
            }

            return string.Empty;
        }
Exemplo n.º 13
0
 private string getDescription(HtmlNode node)
 {
     var node2 = node.SelectSingleNode(".//div[@class='hoverbox-details']/p");
     if (node2 == null)
         node2 = node.SelectSingleNode(".//div[@class='list-item-main1']");//klipit
     if (node2 != null)
         return node2.InnerText;
     return String.Empty;
 }
 private string GetAuthorName(HtmlNode docNode)
 {
     var authorNode = docNode.SelectSingleNode(".//strong[@itemprop='author']");
     if (authorNode == null)
     {
         authorNode = docNode.SelectSingleNode(".//div[@class='rtl-info']/strong");
     }
     return authorNode != null ? authorNode.InnerText.Trim() : null;
 }
Exemplo n.º 15
0
        protected override string ExtractSnippet(HtmlNode node)
        {
            if (node.SelectSingleNode("div/div[2]") != null)
            {
                return node.SelectSingleNode("div/div[2]").InnerText;
            }

            return string.Empty;
        }
Exemplo n.º 16
0
 private string getDescription(HtmlNode node)
 {
     var node2 = node.SelectSingleNode(".//p[@class='expanding-card__description']");//kausi
     if (node2 == null)
         node2 = node.SelectSingleNode(".//div[@class='list-item-main1']");//klipit
     if (node2 != null)
         return node2.InnerText;
     return String.Empty;
 }
Exemplo n.º 17
0
        protected override string ExtractTitle(HtmlNode node)
        {
            if (node.SelectSingleNode("div/div[1]/h3/a") != null)
            {
                return node.SelectSingleNode("div/div[1]/h3/a").InnerText;
            }

            return string.Empty;
        }
Exemplo n.º 18
0
        public override string ReadContent(HtmlNode node)
        {
            var content = node.SelectSingleNode("//div[@id='mediaarticlebody']");
            if (content == null)
                content = node.SelectSingleNode("//div[@itemprop='articleBody']");
            RemoveScripts(content);
            RemoveTags(GetNodeByXpathAndClass(node, "//div", "yog-ycb"));            

            return content.InnerHtml;            
        }
 public virtual void Parser(HtmlNode node)
 {
     userURL = node.SelectSingleNode("div[@class='log-content']/div[@class='log-body']/a[last()]").Attributes["href"].Value;
     userName = node.SelectSingleNode("div[@class='log-content']/div[@class='log-body']/a[last()]").InnerText;
     stampName = node.SelectSingleNode("div[@class='log-content']/div[@class='log-details log-target log-target-stamp']/div[@class='log-target-info']/a").InnerText;
     stampURL = node.SelectSingleNode("div[@class='log-content']/div[@class='log-details log-target log-target-stamp']/div[@class='log-target-info']/a").Attributes["href"].Value;
     stampImageURL = node.SelectSingleNode("div[@class='log-content']/div[@class='log-details log-target log-target-stamp']/div[@class='log-target-thumbnail']/a/img").Attributes["data-src"].Value;
     longago = new NicorepoItemSubLongago(node);
     nicoru = new NicorepoItemSubNicoru(node);
 }
Exemplo n.º 20
0
        public override string ReadContent(HtmlNode node)
        {
            var content = node.SelectSingleNode("//div[@class='article-body']");
            if (content == null)
                content = node.SelectSingleNode("//div[@class='slide-description']");

            RemoveScripts(content);
            RemoveTags(GetNodeByXpathAndClass(node,"//div","poll_module"));
            return CleanHtml(content.InnerHtml);
        }
Exemplo n.º 21
0
 public override string ReadAuthor(HtmlNode node)
 {
     var author = node.SelectSingleNode("//a[@rel='author']");
     if (author != null)
         return author.InnerText;
     else
     {
         author = node.SelectSingleNode("//meta[@itemprop='author']");
     }
     return string.Empty;
 }
Exemplo n.º 22
0
 private static IUsage CreateUsageNode(HtmlNode node)
 {
     var typeNode = node.SelectSingleNode( ".//em/text()" );
     var textNode = node.SelectSingleNode( ".//strong/text()" );
     return new Usage
     {
         Text = textNode.InnerText,
         Type = GetWordType( typeNode ),
         Synonyms = ExtractSynonyms( node )
     };
 }
 public HtmlAgilityPackTableXpath(string html, int skip)
 {
     htmlTableAttDic = new Dictionary<string, int>();
     HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
     this.skip = skip;
     doc.LoadHtml(html);
     rootDomNode = doc.DocumentNode;
     GetHtmlAllTableXpath(rootDomNode);
     maxValue = rootDomNode.SelectSingleNode(maxKey).GetAttributeValue("id", "");
     secondValue = rootDomNode.SelectSingleNode(secondKey).GetAttributeValue("id", "");
 }
Exemplo n.º 24
0
        public Departure(HtmlNode departure)
        {
            var time = departure.SelectSingleNode(TimetabledDepartureXPath).ChildNodes[0].InnerText;

            TimetabledDeparture = new DepartureTime(time);

            if (departure.SelectSingleNode(PlatformXPath) != null)
            {
                Platform = departure.SelectSingleNode(PlatformXPath).ChildNodes[2].InnerText;
            }
        }
Exemplo n.º 25
0
        public override string ReadContent(HtmlNode node)
        {
            var content = node.SelectSingleNode("//div[@class='bg-opaque pad-16 article']");
            if (content == null)
                content = node.SelectSingleNode("//article");


            RemoveScripts(content);
            RemoveTags(content, "//div");

            return CleanHtml(content.InnerHtml);
        }
Exemplo n.º 26
0
 public TorrentLeechEntry(HtmlNode node, BrowserClient client)
 {
     _client = client;
     Title = node.SelectSingleNode(".//span[@class='title']/a").InnerText;
     Friendly = Title.TorrentName();
     var size = node.SelectSingleNode(".//td[5]").InnerText;
     double number = double.Parse(size.RegexMatch(@"\d+").Value);
     if(size.Contains("GB"))
         number *= 1024;
     Size = number;
     Torrent = "http://torrentleech.org" + node.SelectSingleNode(".//td[@class='quickdownload']/a").Attributes["href"].Value;
 }
Exemplo n.º 27
0
        private static string GetAuthorName(HtmlNode docNode)
        {
            var node = docNode.SelectSingleNode("//div[@class='delfi-author-name']") ??
                       docNode.SelectSingleNode("//div[@class='delfi-source-name']");

            if (node == null)
            {
                var t = 5;
            }

            return node == null ? null : node.InnerText;
        }
Exemplo n.º 28
0
        public override string ReadContent(HtmlNode node)
        {
            var content = node.SelectSingleNode("//div[@class='fs-article']");
            if (content == null)
                content = node.SelectSingleNode("//div[@id='fsn_v3_main']");
            if (content == null)
                return null;
            RemoveScripts(content);
            RemoveTags(content, "//div[@id='story-top-container']");

            RemoveTags(content, "//div[@class='parentWrapper']");
            return CleanHtml(content.InnerHtml);
        }
Exemplo n.º 29
0
        private BsonDocument ParseMessageViewNode(HtmlNode messageViewNode)
        {
            var mainContentNode = messageViewNode.SelectSingleNode("div//div[contains(@class,'lia-quilt-column-main-right')]");

            var messageAuthorNode = messageViewNode.SelectSingleNode("div//div[contains(@class, 'lia-message-author')]");

            var idNode = messageViewNode.SelectSingleNode("div//div[@data-message-id]");

            ThrowExceptionIfNull(idNode, "can not find the id node");

            ThrowExceptionIfNull(mainContentNode, "can not find the content node");

            var subjectNode = mainContentNode.SelectSingleNode("div//div[contains(@class, 'lia-message-subject')]");

            ThrowExceptionIfNull(subjectNode, "can not find the subject node");

            var titleNode = subjectNode.SelectSingleNode("h1");

            var solutionNode = subjectNode.SelectSingleNode("span[contains(@class, 'solution')]");

            var messagePostDateNode = mainContentNode.SelectSingleNode("div//span[contains(@class,'lia-message-posted-on')]");

            var bodyNode = mainContentNode.SelectSingleNode("div//div[contains(@class, 'lia-message-body-content')]");

            var userNameNode = messageAuthorNode.SelectSingleNode("div//a[contains(@class, 'lia-user-name-link')]/span");

            var userRegisterDateNode = messageAuthorNode.SelectSingleNode("div//span[contains(@class, 'DateTime')]");

            var author = new BsonDocument
            {
                { "name", userNameNode.InnerText},
                { "registeredOn", ParseDateTimeNode(userRegisterDateNode)}
            };

            var message = new BsonDocument
                {
                    { "id", idNode.Attributes["data-message-id"].Value },
                    { "body", bodyNode == null ? string.Empty : bodyNode.InnerHtml},
                    { "isAnswer", solutionNode != null },
                    { "createdOn", ParseDateTimeNode(messagePostDateNode)},
                    { "author", author }
                };

            if(titleNode != null)
            {
                message.Add("title", titleNode.InnerText);
            }

            return message;
        }
Exemplo n.º 30
0
 public TorrentLeechEntry(HtmlNode node, BrowserClient client)
 {
     _client = client;
     var titleNode = node.SelectSingleNode(".//span[@class='title']/a");
     Title = titleNode.InnerText;
     Id = int.Parse(titleNode.Attributes["href"].Value.RegexMatch(@"\d+").Value);
     Friendly = Title.TorrentName();
     var size = node.SelectSingleNode(".//td[5]").InnerText;
     double number = double.Parse(size.RegexMatch(@"\d+").Value);
     if (size.Contains("GB"))
         number *= 1024;
     Size = number;
     Torrent = "http://www.torrentleech.org/rss/download/{0}/ed2597d8977cde9da218/{1}".Template(Id,Title.RegexReplace(@"\s", "."));
 }
Exemplo n.º 31
0
        private int AddSubcats(HtmlNode node, RssLink parentCat)
        {
            var subs = node.SelectNodes(".//article");

            foreach (var sub in subs)
            {
                RssLink subcat = new RssLink()
                {
                    ParentCategory = parentCat
                };
                subcat.Name  = HttpUtility.HtmlDecode(sub.SelectSingleNode(".//a[@title]").Attributes["title"].Value.Trim());
                subcat.Url   = FormatDecodeAbsolutifyUrl(parentCat.Url, sub.SelectSingleNode(".//a[@href]").Attributes["href"].Value, null, UrlDecoding.None);
                subcat.Thumb = getThumb(sub.SelectSingleNode(".//picture/img"));

                parentCat.SubCategories.Add(subcat);
            }

            var np = node.SelectSingleNode(".//a[@href and text()='More shows']");

            nextPageAvailable = false;
            if (np != null)
            {
                string url   = CreateUrl(parentCat.Url, np.Attributes["href"].Value);
                var    npCat = new NextPageCategory()
                {
                    Url = url, ParentCategory = parentCat
                };
                parentCat.SubCategories.Add(npCat);
            }

            parentCat.SubCategoriesDiscovered = true;
            return(parentCat.SubCategories.Count);
        }
Exemplo n.º 32
0
            private static void ParseEventDiv(List <DateTimeOffset> weekDays, int columnIndex, List <NewsEvent> list1, HAP.HtmlNode div)
            {
                var name    = div.SelectNode("a").InnerText.Trim();
                var country = name.Split(':').FirstOrDefault();

                if (string.IsNullOrWhiteSpace(country))
                {
                    country = "ALL";
                }
                else
                {
                    name = name.Substring(country.Length + 1);
                }
                var childNodes = div.ChildNodes;
                var dates      = (from node in childNodes
                                  where node.NodeType == HAP.HtmlNodeType.Text
                                  select node.InnerText.Decode()).ToArray();

                if (!dates.Any())
                {
                    throw new NewsParserException("No text nodes found in column " + (columnIndex + 1) + " for " + name);
                }

                var date = dates.FirstOrDefault(s => !string.IsNullOrWhiteSpace(s));

                if (date == null)
                {
                    return;
                }
                //throw new NewsParserException("No dates found in column " + (columnIndex + 1) + " for " + name);

                var level = div.SelectSingleNode(starXPath("Star")) != null
          ? NewsEventLevel.H
          : div.SelectSingleNode(starXPath("djStar")) != null ? NewsEventLevel.M
          : NewsEventLevel.L;
                var n = new NewsEvent()
                {
                    Country = country,
                    Name    = name,
                    Time    = weekDays[columnIndex].Add(ParseEventDate(date).TimeOfDay),
                    Type    = name.Contains("Speaks") ? NewsEventType.Speech : NewsEventType.Report,
                    Level   = level
                };

                list1.Add(n);
            }
Exemplo n.º 33
0
        private void buttonParseHtml_Click(object sender, EventArgs e)
        {
            OpenFileDialog openFileDialog = new OpenFileDialog();

            openFileDialog.Filter      = "HTML File (*.html;)|*.html";
            openFileDialog.Multiselect = false;

            if (openFileDialog.ShowDialog() == DialogResult.OK)
            {
                if (String.IsNullOrEmpty(openFileDialog.FileName))
                {
                    return;
                }

                string strHtml = String.Empty;

                using (StreamReader reader = new StreamReader(openFileDialog.FileName, Encoding.UTF8))
                {
                    strHtml = reader.ReadToEnd();
                    reader.Close();
                }

                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(strHtml);                 //加载html
                foreach (var err in doc.ParseErrors)
                {
                    Console.WriteLine(err.Code);
                }

                //Console.WriteLine(doc.Text);

                HtmlAgilityPack.HtmlNode rootNode = doc.DocumentNode;                //获取文档的根节点
                //Console.WriteLine(rootNode.OuterHtml);
#if true
                string xpath = @"//table";
                HtmlAgilityPack.HtmlNode node = rootNode.SelectSingleNode(xpath);                //获取表格
                foreach (var script in node.Descendants("script").ToArray())
                {
                    script.Remove();
                }
                foreach (var style in node.Descendants("style").ToArray())
                {
                    style.Remove();
                }
                string innerText = node.OuterHtml;                //到这里就是纯纯的表格了
                var    trNodes   = node.SelectNodes("tr");

                foreach (var trnod in trNodes)                 //遍历行
                {
                    var tdNodes = trnod.SelectNodes("td");
                    for (int i = 0; i < tdNodes.Count; i++)                     //遍历列
                    {
                        Console.WriteLine(tdNodes[i].InnerText);
                    }
                }
#endif
            }
        }
Exemplo n.º 34
0
        public static HAP.HtmlNode SelectNode(this HAP.HtmlNode parent, string xPath)
        {
            var node = parent.SelectSingleNode(xPath);

            if (node == null)
            {
                throw new NewsParserException(new { xPath });
            }
            return(node);
        }
Exemplo n.º 35
0
        /// <summary>
        /// 检测服务器是否存在错误
        /// </summary>
        /// <returns></returns>
        public static bool TestServiceError(Html.HtmlNode doucmentNode)
        {
            var node = doucmentNode.SelectSingleNode("/html[1]/body[1]/strong[1]/span[1]");

            if (node != null && node.InnerText.IndexOf("网站忙") != -1)
            {
                return(true);
            }
            return(false);
        }
Exemplo n.º 36
0
        public DAL.Entities.Article ParseArticle(HtmlAgilityPack.HtmlNode rootNode, ScrapingExpressions expressions)
        {
            var title = rootNode.SelectSingleNode(expressions.TitleXPath);
            var post  = rootNode.SelectSingleNode(expressions.PostXPath);
            var date  = rootNode.SelectSingleNode(expressions.DateXPath);

            if (title == null || post == null || date == null)
            {
                Console.WriteLine("Failed to parse document");
                return(null);
            }

            return(new DAL.Entities.Article()
            {
                Contents = post.InnerText,
                Title = title.InnerText,
                CreationDate = Program.ParseDateTime(date.InnerText),
            });
        }
        public static BsonDocument AnalizeGeneralListInformation(HtmlAgilityPack.HtmlNode node, Checker checker, BufferBlock <string> imageTargetBlock)
        {
            var bson     = new BsonDocument();
            var linkNode = node.SelectSingleNode(".//div[@class='h16']/a");

            if (linkNode != null)
            {
                if (checker(linkNode.Attributes["href"].Value))
                {
                    return(null);
                }
                var link = linkNode.Attributes["href"].Value;
                bson.Add("link", link);
                bson.Add("title", linkNode.InnerText);
            }
            var dateNode = node.SelectSingleNode(".//div[@class='date']");

            if (dateNode != null)
            {
                bson.Add("date", dateNode.InnerText);
            }
            var contentNode = node.SelectSingleNode(".//div[@class='p']");

            if (contentNode != null)
            {
                bson.Add("content", contentNode.InnerText);
            }
            var imageNode = node.SelectSingleNode(".//img");

            if (imageNode != null)
            {
                var imgUrl = imageNode.Attributes["src"].Value;
                bson.Add("img", GetSubUrl(imgUrl));
                bson.Add("compressImg", WebImageSaver.Instance.GetComressImageName(GetSubUrl(imgUrl)));
                imageTargetBlock.Post(GetIhChina.MainPage + imgUrl);
            }
            Console.WriteLine(bson.ToString());
            return(bson);
        }
Exemplo n.º 38
0
        protected override float?getRating(HtmlAgilityPack.HtmlNode node)
        {
            var ratingNode = node.SelectSingleNode(ratingXPath);

            if (ratingNode != null)
            {
                float rating = 0;
                if (!float.TryParse(ratingNode.Attributes["content"].Value, out rating))
                {
                }
                return(rating);
            }
            return(null);
        }
Exemplo n.º 39
0
        private List <VideoInfo> GetVids(HtmlNode node, string parentUrl)
        {
            List <VideoInfo> videos = new List <VideoInfo>();
            var vids = node.SelectNodes(".//article");

            foreach (var vid in vids)
            {
                VideoInfo video = new VideoInfo();
                if (vid.SelectSingleNode(".//h2[contains(@class,'h3')]") == null)
                {
                    video.Title    = HttpUtility.HtmlDecode(vid.SelectSingleNode(".//a[@title]").Attributes["title"].Value.Trim());
                    video.VideoUrl = FormatDecodeAbsolutifyUrl(parentUrl, vid.SelectSingleNode(".//a[@href]").Attributes["href"].Value, null, UrlDecoding.None);
                }
                else
                {
                    video.Title    = vid.SelectSingleNode(".//h2[contains(@class,'h3')]").InnerText.Trim();
                    video.VideoUrl = FormatDecodeAbsolutifyUrl(parentUrl, vid.SelectSingleNode(".//a[@class='teaser__link' and @href]").Attributes["href"].Value, null, UrlDecoding.None);
                    if (vid.SelectSingleNode(".//p[contains(@class,'teaser__description')]") != null)
                    {
                        video.Description = vid.SelectSingleNode(".//p[contains(@class,'teaser__description')]").InnerText.Trim();
                    }
                    else
                    {
                        video.Description = vid.SelectSingleNode(".//h3[contains(@class,'teaser__subtitle')]").InnerText.Trim();
                    }
                }
                var moNode = vid.SelectSingleNode(".//span[@data-month]");
                var daNode = vid.SelectSingleNode(".//span[@data-date]");
                if (moNode != null && daNode != null)
                {
                    video.Airdate = moNode.InnerText.Trim() + ' ' + daNode.InnerText.Trim();
                }

                video.Thumb = getThumb(vid.SelectSingleNode(".//picture/img"));
                videos.Add(video);
            }

            var np = node.SelectSingleNode(".//a[@href and contains(text(),'More ')]");

            nextPageAvailable = false;
            if (np != null)
            {
                nextPageAvailable = true;
                nextPageUrl       = CreateUrl(parentUrl, np.Attributes["href"].Value);
            }
            return(videos);
        }
Exemplo n.º 40
0
        public static BookInfo AmazonSearchBook(string title, string author)
        {
            BookInfo result = null;

            string authorTrim = "";

            Regex regex = new Regex(@"( [A-Z]\.)", RegexOptions.Compiled);
            Match match = Regex.Match(author, @"( [A-Z]\.)", RegexOptions.Compiled);

            if (match.Success)
            {
                foreach (Match m in regex.Matches(author))
                {
                    authorTrim = author.Replace(m.Value, m.Value.Trim());
                }
            }
            else
            {
                authorTrim = author;
            }
            if (title.IndexOf(" (") >= 0)
            {
                title = title.Substring(0, title.IndexOf(" ("));
            }

            string searchUrl = @"http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Ddigital-text&field-keywords=" +
                               Uri.EscapeDataString(title + " " + authorTrim + " kindle edition");

            HAP.HtmlDocument searchDoc = new HAP.HtmlDocument();
            searchDoc.LoadHtml(HttpDownloader.GetPageHtml(searchUrl));
            HAP.HtmlNode node = searchDoc.DocumentNode.SelectSingleNode("//li[@id='result_0']");
            //At least attempt to verify it might be the same book?
            //Ignore case of title
            if (node != null && node.InnerText.IndexOf(title, StringComparison.OrdinalIgnoreCase) >= 0)
            {
                string foundASIN = node.GetAttributeValue("data-asin", "");
                node = node.SelectSingleNode(".//div/div/div/div[@class='a-fixed-left-grid-col a-col-right']/div/a");
                if (node != null)
                {
                    result           = new BookInfo(node.InnerText, author, foundASIN);
                    result.amazonUrl = node.GetAttributeValue("href", ""); // Grab the true link for good measure
                }
            }

            return(result);
        }
Exemplo n.º 41
0
        internal static int GetReviewLastPageNumber(HtmlAgilityPack.HtmlNode html)
        {
            /*
             * <ul class="a-pagination">
             *  <li class="a-disabled">
             *      <span class="a-declarative" data-action="reviews:page-action" data-reviews:page-action="{&quot;allowLinkDefault&quot;:&quot;1&quot;}">
             *          &larr;<span class="a-letter-space"></span><span class="a-letter-space"></span>Previous
             *      </span>
             *  </li>
             *  <li class="a-selected page-button" data-reftag="cm_cr_pr_btm_link">
             *      <a href="/We-Yevgeny-Zamyatin/product-reviews/0140185852/ref=cm_cr_pr_btm_link_1?ie=UTF8&pageSize=50&sortBy=recent">
             *          1
             *      </a>
             *  </li>
             * ...
             *  <li class="page-button" data-reftag="cm_cr_pr_btm_link">
             *      <a href="/We-Yevgeny-Zamyatin/product-reviews/0140185852/ref=cm_cr_pr_btm_link_5?ie=UTF8&pageNumber=5&pageSize=50&sortBy=recent">
             *          5
             *      </a>
             *  </li>
             *  <li class="a-last">
             *      <a href="/We-Yevgeny-Zamyatin/product-reviews/0140185852/ref=cm_cr_pr_paging_btm_2?ie=UTF8&pageNumber=2&pageSize=50&sortBy=recent">
             *          Next<span class="a-letter-space"></span><span class="a-letter-space"></span>&rarr;
             *      </a>
             *  </li>
             * </ul>
             */
            int pageNum      = 0;
            var lastPageNode = html.SelectSingleNode("//ul/li[@class='page-button' and position() = (last()-1)]/a");

            if (lastPageNode != null)
            {
                Match m = pageNumberFromUrl.Match(lastPageNode.GetAttributeValue("href", ""));

                if (m.Groups.Count > 1)
                {
                    int.TryParse(m.Groups[1].Value, out pageNum);
                }
            }

            return(pageNum);
        }
Exemplo n.º 42
0
 public string ParseNewsItem(HtmlAgilityPack.HtmlNode item)
 {
     // to visualize, news items have this structure:
     // <p>сегодня, 12:30</p
     // <a href="/relative-uri">
     //  <span>title</span>
     // </a>
     try
     {
         var link    = item.SelectSingleNode("a");
         var uriText = link.GetAttributeValue("href", "");
         return(uriText);
     }
     catch (NullReferenceException ex)
     {
         // something wrong with markup, log
         Console.WriteLine("Error parsing item {0}, malformed markup: {1}, {2}", item.XPath, ex.Message, ex.StackTrace);
     }
     return(null);
 }
Exemplo n.º 43
0
        Boolean PageHasData(string documentText)
        {
            try
            {
                HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();
                htmlDocument.LoadHtml(documentText);

                HtmlAgilityPack.HtmlNode bloodDonor = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regmain']");

                HtmlAgilityPack.HtmlNode userLink = bloodDonor.SelectSingleNode("//*[@id='regPage']/div/div/div");

                // Of Label
                // LabelElement.InnerHTML.Equals("of")
                // linkPageCount = aElement.InnHTML
                var n1 = userLink.Descendants().ToArray();

                var link = userLink.Descendants().Count();

                totalPages = Int32.Parse(n1[(link - 5)].InnerText);


                if (totalPages > 0)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            catch (Exception ex)
            {
                totalPages = 0;
                return(false);
            }
        }
Exemplo n.º 44
0
        //static int nLevels = 64;
        //static Size testPadding = new Size(32, 32);
        //static double hitThreshold = 0;
        //static int groupThreshold = 2;
        //static double scaleStep = 1.05;
        //static bool useMeanShiftGrouping = false;


        public static List <PSM4TxSample> LoadSamples(string folder)
        {
            //Load all samples from all the folders
            List <PSM4TxSample> samples = new List <PSM4TxSample>(); List <string> missing_xml_samples = new List <string>();

            string[] files = Directory.EnumerateFiles(folder)
                             .Where(file => file.ToLower().EndsWith(".bmp") || file.ToLower().EndsWith(".jpg"))
                             .ToArray();

            for (int k = 0; k < files.Length; k++)
            {
                string img_file = files[k].ToLower();
                string xml_file = img_file.EndsWith("bmp") ? img_file.Replace(".bmp", "_data.xml") : img_file.Replace(".jpg", "_data.xml");

                if (!File.Exists(xml_file))
                {
                    missing_xml_samples.Add(img_file); continue;
                }

                PSM4TxSample sample = new PSM4TxSample(img_file);

                //  .// Means descendants, which includes children of children (and so forth).
                //  ./ Means direct children.
                //If a XPath starts with a / it becomes relative to the root of the document;
                //to make it relative to your own node start it with ./.
                HtmlAgilityPack.HtmlDocument       doc         = new HtmlAgilityPack.HtmlDocument(); doc.Load(xml_file);
                HtmlAgilityPack.HtmlNodeCollection shape_nodes = doc.DocumentNode.SelectNodes("//shape");
                foreach (HtmlAgilityPack.HtmlNode shape_node in shape_nodes)
                {
                    HtmlAgilityPack.HtmlNode blocktext_node = shape_node.SelectSingleNode(".//blocktext");
                    HtmlAgilityPack.HtmlNode text_node      = blocktext_node.SelectSingleNode("./text");
                    HtmlAgilityPack.HtmlNode data_node      = shape_node.SelectSingleNode(".//data");
                    HtmlAgilityPack.HtmlNode extent_node    = data_node.SelectSingleNode("./extent");

                    string s_x = extent_node.GetAttributeValue("X", "");
                    string s_y = extent_node.GetAttributeValue("Y", "");
                    string s_w = extent_node.GetAttributeValue("Width", "");
                    string s_h = extent_node.GetAttributeValue("Height", "");

                    if (text_node.InnerText.ToLower() == "isolator")
                    {
                        sample.SetIsolator(s_x, s_y, s_w, s_h); //(787.18896484375, 1370.0)
                    }
                    else if (text_node.InnerText.ToLower() == "arrayblock")
                    {
                        sample.SetArrayBlock(s_x, s_y, s_w, s_h); //(1052.86828613281, 1201.8359375)
                    }
                    else if (text_node.InnerText.ToLower() == "aperture")
                    {
                        sample.SetAperture(s_x, s_y, s_w, s_h); //(209.156982421875, 1885.03271484375)
                    }
                }

                samples.Add(sample);
            }

            using (StreamWriter sw = new StreamWriter("missing_xml.txt"))
            {
                for (int i = 0; i < missing_xml_samples.Count; i++)
                {
                    sw.WriteLine(missing_xml_samples[i]);
                }
            }

            return(samples);
        }
Exemplo n.º 45
0
        void ExtractUsers(string documentText)
        {
            try
            {
                HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();
                htmlDocument.LoadHtml(documentText);

                HtmlAgilityPack.HtmlNode bloodDonor = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regmain']");

                HtmlAgilityPack.HtmlNode linkPage = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regPage']/div/div/div/a[4]");

                //linkPageCount = linkPage.InnerLength;

                HtmlAgilityPack.HtmlNode userTable = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regPage']/div/table");

                HtmlAgilityPack.HtmlNodeCollection userTableCount = userTable.SelectNodes("./tr");

                for (int k = 2; k <= userTableCount.Count(); k++)
                {
                    HtmlAgilityPack.HtmlNode moreBtn = userTable.SelectSingleNode("./tr[" + k + "]/td[6]/a");

                    string   linkBtn  = moreBtn.GetAttributeValue("onclick", null);
                    string[] mainLink = linkBtn.Split('b');
                    string   moreLink = mainLink[1];
                    using (WebClient client = new WebClient())
                    {
                        var link = client.DownloadString("http://bloodhelpers.com/b" + moreLink);
                        HtmlAgilityPack.HtmlDocument userDocument = new HtmlAgilityPack.HtmlDocument();
                        userDocument.LoadHtml(link);

                        HtmlAgilityPack.HtmlNode selectTable = userDocument.DocumentNode.SelectSingleNode("//*[@id='search']/form/table");

                        //User Name
                        HtmlAgilityPack.HtmlNode userName = selectTable.SelectSingleNode("./tr[1]/td[2]");
                        string Name = userName.InnerText;

                        //User Email
                        HtmlAgilityPack.HtmlNode userEmail = selectTable.SelectSingleNode("./tr[2]/td[2]");
                        string[] Mail    = userEmail.InnerHtml.Split('=');
                        string   addMail = Mail[1] + "=" + Mail[2];
                        string   Email   = addMail.Replace("border", "");
                        //fuction to convert png image into jpg


                        //User BloodGroup
                        HtmlAgilityPack.HtmlNode userBloodGroup = selectTable.SelectSingleNode("./tr[3]/td[2]");
                        string BloodGroup = userBloodGroup.InnerText;

                        //User Gender
                        HtmlAgilityPack.HtmlNode userGender = selectTable.SelectSingleNode("./tr[4]/td[2]");
                        string Gender = userGender.InnerText;

                        //User Age
                        HtmlAgilityPack.HtmlNode userAge = selectTable.SelectSingleNode("./tr[5]/td[2]");
                        int Age = Int32.Parse(userAge.InnerText.Replace("Years", ""));

                        //User City
                        HtmlAgilityPack.HtmlNode userCity = selectTable.SelectSingleNode("./tr[6]/td[2]");
                        string City = userCity.InnerText;

                        //User Mobile Number
                        HtmlAgilityPack.HtmlNode userMobile = selectTable.SelectSingleNode("./tr[7]/td[2]");
                        string[] num          = userMobile.InnerHtml.Split('=');
                        string   addNum       = num[1] + "=" + num[2];
                        string   mobileNumber = addNum.Replace("border", "");
                        //function to convert png image into jpg

                        //User Land Line Number
                        HtmlAgilityPack.HtmlNode userLandLine = selectTable.SelectSingleNode("./tr[8]/td[2]");
                        string[] landNum     = userLandLine.InnerHtml.Split('=');
                        string   addLandNum  = landNum[1] + "=" + landNum[2];
                        string   landLineNum = addLandNum.Replace("border", "");
                        //Function to convert png image into jpg

                        //User Last Donation Date
                        HtmlAgilityPack.HtmlNode userLastDonationDate = selectTable.SelectSingleNode("./tr[9]/td[2]");
                        string LastDonationDate = userLastDonationDate.InnerText;


                        //Store Data in DataBase
                        StoreUserData(Name, Email, BloodGroup, Gender, Age, City, mobileNumber, landLineNum, LastDonationDate);
                    }
                }
            }
            catch
            {
            }
        }
Exemplo n.º 46
0
        public void start_conversion()
        {
            WebClient wc = new WebClient();

            wc.Encoding = Encoding.UTF8;
            string html = wc.DownloadString(url.Text);

            agi.HtmlDocument doc = new agi.HtmlDocument();
            doc.LoadHtml(html);

            agi.HtmlNode id_main = doc.GetElementbyId("main");

            string title    = id_main.SelectSingleNode("//h1").InnerText;   //제목
            string re_title = "";

            String[] result_title;

            if (title.Contains("&raquo; "))
            {
                result_title = title.Split(' ');

                /*for (int r = 0; r < result_title.Length; r++)
                 * {
                 *  if (result_title[r] != "&raquo;")
                 *  {
                 *      if (r != 0) re_title += " ";
                 *      //title = result_title[r];
                 *      re_title += result_title[r];
                 *  } else
                 *  {
                 *      break;
                 *  }
                 * }*/
                re_title += result_title[0];
            }
            else
            {
                re_title = title;
            }
            //result_context.Text = title + "\r\n";

            int now_num = 1;
            int data_i  = 1;

            String[] now_split   = new String[] { };
            var      result_date = "";

            //실제날짜 추출
            var    ani_date        = doc.DocumentNode.SelectSingleNode("//div[@class='tidHeader']//table/tr/td[1]").InnerHtml;
            String result_ani_date = ani_date.ToString().Split('月')[0];

            //result_context.Text = ani_date.ToString().Split('月')[0] + "\r\n";

            for (;;)
            {
                result_date = doc.DocumentNode.SelectSingleNode("//table[@class='data']//tr[" + data_i + "]/td[1]").InnerText;       //날짜 분기
                if (data_i == 3)
                {
                    if (result_date.Contains('~'))
                    {
                        now_split = doc.DocumentNode.SelectSingleNode("//table[@class='data']//tr[" + data_i + "]/td[1]").InnerText.Split('~');
                        break;
                    }
                }
                if (data_i == 4)
                {
                    if (result_date.Contains('~'))
                    {
                        now_split = doc.DocumentNode.SelectSingleNode("//table[@class='data']//tr[" + data_i + "]/td[1]").InnerText.Split('~');
                        break;
                    }
                }
                if (data_i == 5)
                {
                    if (result_date.Contains('~'))
                    {
                        now_split = doc.DocumentNode.SelectSingleNode("//table[@class='data']//tr[" + data_i + "]/td[1]").InnerText.Split('~');
                        break;
                    }
                }
                data_i++;
            }

            String[] now_split2 = now_split[0].Split('-');
            if (Convert.ToInt32(now_split2[1]) == 4)
            {
                now_num = 2;
            }
            if (Convert.ToInt32(now_split2[1]) == 7)
            {
                now_num = 3;
            }
            if (Convert.ToInt32(now_split2[1]) == 10)
            {
                now_num = 4;
            }
            String nowyear = now_split2[0];



            ArrayList ch_ = new ArrayList();

            foreach (var cell in doc.DocumentNode.SelectNodes("//td[@class='ch']"))
            {
                if (cell.InnerText != "&nbsp;")
                {
                    ch_.Add(cell.InnerText);
                }
            }
            ArrayList now_date  = new ArrayList();
            int       ch_num    = 1;
            var       team_date = "";

            foreach (var cell in doc.DocumentNode.SelectNodes("//table[@class='schedule']/tbody/tr"))
            {
                foreach (var cell2 in cell.SelectNodes(".//td").Count.ToString())
                {
                    //result_context.Text += Convert.ToInt32(cell2);
                    if (Convert.ToInt32(cell2) == 53)
                    {
                        if (cell.SelectSingleNode(".//td[3]").InnerText != "&nbsp;")
                        {
                            //result_context.Text += doc.DocumentNode.SelectSingleNode("//table[@class='schedule']/tbody/tr[" + ch_num + "]/td[1]").InnerText + "\r\n";
                            team_date = doc.DocumentNode.SelectSingleNode("//table[@class='schedule']/tbody/tr[" + ch_num + "]/td[1]").InnerText.Split(' ')[0];
                            now_date.Add(doc.DocumentNode.SelectSingleNode("//table[@class='schedule']/tbody/tr[" + ch_num + "]/td[1]").InnerText.Split(' ')[0]);
                        }
                    }
                    else if (Convert.ToInt32(cell2) == 52)
                    {
                        if (cell.SelectSingleNode(".//td[2]").InnerText != "&nbsp;")
                        {
                            //result_context.Text += team_date + "\r\n";
                            now_date.Add(team_date);
                        }
                    }
                    else if (Convert.ToInt32(cell2) == 50)
                    {
                        team_date = doc.DocumentNode.SelectSingleNode("//table[@class='schedule']/tbody/tr[" + ch_num + "]/td[1]").InnerText.Split(' ')[0] + "\r\n";
                    }
                    ch_num++;
                }
            }
            ArrayList subtitle_all     = new ArrayList();
            ArrayList subtitle_num     = new ArrayList();
            ArrayList subtitle_content = new ArrayList();
            ArrayList numsss           = new ArrayList();
            int       numssd           = 0;

            foreach (var cell in doc.DocumentNode.SelectNodes("//td[@class='subtitle']"))
            {
                if (cell.InnerText != "&nbsp;")
                {
                    //MessageBox.Show(cell.InnerText.Substring(0, 1));
                    if (cell.InnerText.Substring(0, 1) == "^")
                    {
                        subtitle_all.Add(cell.InnerText);
                        subtitle_num.Add(cell.InnerText.Split('#')[1].Split(' ')[0]);
                        if (1 < cell.InnerText.Split(' ').Length)
                        {
                            string cells_text = "";
                            for (int c = 1; c < cell.InnerText.Split(' ').Length; c++)
                            {
                                if (c != 1)
                                {
                                    cells_text += " ";
                                }
                                cells_text += cell.InnerText.Split(' ')[c].Replace("!", "!").Replace("?", "?");
                            }
                            subtitle_content.Add(cells_text);
                        }
                        else
                        {
                            subtitle_content.Add(cell.InnerText.Split(' ')[1]);
                        }
                        numsss.Add(numssd);
                        numssd++;
                    }
                    else if (cell.InnerText.Split(' ')[0].Contains("#"))
                    {
                        //result_context.Text += cell.InnerText + "\r\n";
                        subtitle_all.Add(cell.InnerText);
                        subtitle_num.Add(cell.InnerText.Split(' ')[0].Replace("#", ""));
                        if (1 < cell.InnerText.Split(' ').Length)
                        {
                            string cells_text = "";
                            for (int c = 1; c < cell.InnerText.Split(' ').Length; c++)
                            {
                                if (c != 1)
                                {
                                    cells_text += " ";
                                }
                                cells_text += cell.InnerText.Split(' ')[c].Replace("!", "!").Replace("?", "?");
                            }
                            subtitle_content.Add(cells_text);
                        }
                        else
                        {
                            subtitle_content.Add(cell.InnerText.Split(' ')[1]);
                        }
                        numsss.Add(numssd);
                        numssd++;
                    }
                    else
                    {
                        numssd++;
                        continue;
                    }
                }
            }



            int       notget = 0;
            ArrayList flag_  = new ArrayList();

            foreach (var cell in doc.DocumentNode.SelectNodes("//td[@class='flag nobr']"))
            {
                if ((int)numsss[0] <= (int)notget)
                {
                    //result_context.Text += cell.InnerText.Replace("!", "").Replace("●", "").Replace("再", "").Replace("Update", "").Replace(" &nbsp;", "").Trim() + "\r\n";
                    flag_.Add("[" + cell.InnerText.Replace("!", "").Replace("●", "").Replace("注", "").Replace("再", "").Replace("Update", "").Replace(" &nbsp;", "").Trim() + "] ");
                }
                notget++;
            }

            ArrayList date_sort      = new ArrayList();
            ArrayList date_text_sort = new ArrayList();
            int       now_info       = 0;

            for (int i = 0; i < subtitle_all.Count; i++)
            {
                Application.DoEvents();
                //MessageBox.Show(subtitle_num[i].ToString());
                int    nums       = Convert.ToInt32(subtitle_num[i]);
                String resert_num = "";

                if (nums < 10)
                {
                    resert_num = "0" + Convert.ToString(subtitle_num[i]);
                }
                else
                {
                    resert_num = Convert.ToString(subtitle_num[i]);
                }

                string clb_string = "";
                for (int x = 0; x <= brocas.CheckedItems.Count - 1; x++)
                {
                    clb_string = clb_string + brocas.CheckedItems[x].ToString();
                }
                String flag = "";
                if (Convert.ToString(flag_[i]) == "[] ")
                {
                    flag = "";
                }
                else
                {
                    flag = Convert.ToString(flag_[i]);
                }

                if (result_context.Text.Contains("第" + resert_num + "話 ") || ch_[i].ToString().Contains(clb_string))
                {
                    continue;
                }
                else
                {
                    if (all_radio.Checked)
                    {    //전체
                        string sub_content = "";
                        if (subtitle_content[i].ToString() != "")
                        {
                            sub_content = "〔 " + subtitle_content[i].ToString() + " 〕";
                        }
                        result_context.Text += "(" + nowyear + "Q" + now_num + ") " + re_title + " - 第" + resert_num + "話 " + flag + sub_content;
                        result_context.Text += clb_string + " 1280x720 x264 \r\n";
                    }
                    else if (now_radio.Checked)
                    { //현재
                        DateTime time_1 = new DateTime(Convert.ToInt32(result_ani_date.Split('年')[0]), Convert.ToInt32(result_ani_date.Split('年')[1]), Convert.ToInt32(now_date[i]), 0, 0, 0);
                        //result_context.Text += now_date_text.Text.Split('-')[0]+ now_date_text.Text.Split('-')[1]+ now_date_text.Text.Split('-')[2];
                        int      year   = Convert.ToInt32(DateTime.Now.ToString("yyyy"));
                        int      month  = Convert.ToInt32(DateTime.Now.ToString("MM"));
                        int      day    = Convert.ToInt32(DateTime.Now.ToString("dd"));
                        DateTime time_2 = new DateTime(year, month, day, 0, 0, 0);
                        if (DateTime.Compare(time_1, time_2) <= 0)
                        {
                            now_info = 1;
                            string sub_content = "";
                            if (subtitle_content[i].ToString() != "")
                            {
                                sub_content = "〔 " + subtitle_content[i].ToString() + " 〕";
                            }

                            //date_sort.Add(Convert.ToInt32(DateTime.Now.ToString("dd")) - Convert.ToInt32(now_date[i])+ "(" + nowyear + "Q" + now_num + ") " + re_title + " - 第" + resert_num + "話 " + flag + sub_content + clb_string + " 1280x720 x264");
                            result_context.Text += "(" + nowyear + "Q" + now_num + ") " + re_title + " - 第" + resert_num + "話 " + flag + " " + sub_content;
                            result_context.Text += clb_string + " 1280x720 x264 \r\n";
                        }
                    }

                    else //미래
                    {
                        DateTime time_1 = new DateTime(Convert.ToInt32(result_ani_date.Split('年')[0]), Convert.ToInt32(result_ani_date.Split('年')[1]), Convert.ToInt32(now_date[i]), 0, 0, 0);
                        //result_context.Text += now_date_text.Text.Split('-')[0]+ now_date_text.Text.Split('-')[1]+ now_date_text.Text.Split('-')[2];
                        int      year   = Convert.ToInt32(DateTime.Now.ToString("yyyy"));
                        int      month  = Convert.ToInt32(DateTime.Now.ToString("MM"));
                        int      day    = Convert.ToInt32(DateTime.Now.ToString("dd"));
                        DateTime time_2 = new DateTime(year, month, day, 0, 0, 0);
                        if (DateTime.Compare(time_1, time_2) > 0)
                        {
                            string sub_content = "";
                            if (subtitle_content[i].ToString() != "")
                            {
                                sub_content = "〔 " + subtitle_content[i].ToString() + " 〕";
                            }
                            result_context.Text += "(" + nowyear + "Q" + now_num + ") " + re_title + " - 第" + resert_num + "話 " + flag + sub_content;
                            result_context.Text += clb_string + " 1280x720 x264 \r\n";
                        }
                    }
                }
            }

            if (now_info == 1)
            {
                string[] tempArray = result_context.Lines;
                result_context.Text = tempArray[result_context.Lines.Length - 2];
                now_info            = 0;
            }

            if (result_context.Text == "")
            {
                MessageBox.Show("추출결과가 없습니다.");
            }
            //result_context.Text += subtitle_.Count;
        }
Exemplo n.º 47
0
        public void start_conversion_time()
        {
            result_context.Text = "";
            WebClient wc = new WebClient();

            wc.Encoding = Encoding.UTF8;
            string html = wc.DownloadString(url.Text);


            agi.HtmlDocument doc = new agi.HtmlDocument();
            doc.LoadHtml(html);

            agi.HtmlNode id_main = doc.GetElementbyId("main");

            //제목 타이틀 추출
            string title    = id_main.SelectSingleNode("//h1").InnerText;   //제목
            string re_title = "";
            String result_title;

            //MessageBox.Show(title.Split('年')[0].ToString());
            if (title.Contains("&raquo; "))
            {
                result_title = "";
                if (title.Contains("年"))
                {
                    result_title = title.Split('年')[0].Substring(0, title.Split('年')[0].LastIndexOf(' ')).ToString();
                }
                if (title.Contains("&"))
                {
                    result_title = title.Split('&')[0].ToString().Trim();
                }

                /*for (int r = 0; r < result_title.Length; r++)
                 * {
                 *  if (result_title[r] != "&raquo;")
                 *  {
                 *      if (r != 0) re_title += " ";
                 *      //title = result_title[r];
                 *      re_title += result_title[0];
                 *  }
                 *  else
                 *  {
                 *      break;
                 *  }
                 * }*/
                re_title = result_title;
            }
            else
            {
                re_title = title;
            }
            //re_title==========================================

            //날짜분기추출
            int now_num = 1;
            int data_i  = 1;

            String[] now_split   = new String[] { };
            var      result_date = "";

            //실제날짜 추출
            var    ani_date        = doc.DocumentNode.SelectSingleNode("//table[@class='section basic']//tr/td[1]").InnerHtml;
            String result_ani_date = ani_date.ToString().Split('月')[0];

            //result_context.Text = ani_date.ToString().Split('月')[0] + "\r\n";

            for (;;)
            {
                result_date = doc.DocumentNode.SelectSingleNode("//table[@class='data']//tr[" + data_i + "]/td[1]").InnerText;       //날짜 분기
                if (data_i == 3)
                {
                    if (result_date.Contains('~'))
                    {
                        now_split = doc.DocumentNode.SelectSingleNode("//table[@class='data']//tr[" + data_i + "]/td[1]").InnerText.Split('~');
                        break;
                    }
                }
                if (data_i == 4)
                {
                    if (result_date.Contains('~'))
                    {
                        now_split = doc.DocumentNode.SelectSingleNode("//table[@class='data']//tr[" + data_i + "]/td[1]").InnerText.Split('~');
                        break;
                    }
                }
                if (data_i == 5)
                {
                    if (result_date.Contains('~'))
                    {
                        now_split = doc.DocumentNode.SelectSingleNode("//table[@class='data']//tr[" + data_i + "]/td[1]").InnerText.Split('~');
                        break;
                    }
                }
                data_i++;
            }

            String[] now_split2 = now_split[0].Split('-');
            if (Convert.ToInt32(now_split2[1]) == 4)
            {
                now_num = 2;
            }
            if (Convert.ToInt32(now_split2[1]) == 7)
            {
                now_num = 3;
            }
            if (Convert.ToInt32(now_split2[1]) == 10)
            {
                now_num = 4;
            }
            String nowyear = now_split2[0];
            //nowyear+now_num==============================

            //flag_ 추출
            ArrayList flag_ = new ArrayList();

            foreach (var cell in doc.DocumentNode.SelectNodes("//table[@id='ProgList']/tr/td[6]"))
            {
                //MessageBox.Show(cell.InnerText);
                //result_context.Text += cell.InnerText.Replace("!", "").Replace("●", "").Replace("再", "").Replace("Update", "").Replace(" &nbsp;", "").Trim() + "\r\n";
                flag_.Add(" [" + cell.InnerText.Replace("!", "").Replace("●", "").Replace("注", "").Replace("再", "").Replace("Update", "").Replace(" &nbsp;", "").Trim() + "]");
            }
            //flag_============================

            //ch_ 추출 ==============================
            ArrayList ch_ = new ArrayList();

            foreach (var cell in doc.DocumentNode.SelectNodes("//table[@id='ProgList']/tr/td[5]"))
            {
                if (cell.InnerText != "&nbsp;")
                {
                    if (cell.SelectSingleNode(".//div[@class='peNotice']") != null)
                    {
                        if (cell.SelectSingleNode(".//div[@class='peNotice']").InnerText != "")
                        {
                            ch_.Add(cell.InnerText.Replace(cell.SelectSingleNode(".//div[@class='peNotice']").InnerText, "").Replace("!", "!").Replace("?", "?"));
                        }
                        else
                        {
                            ch_.Add(cell.InnerText.Replace("!", "!").Replace("?", "?"));
                        }
                    }
                    else if (cell.SelectSingleNode(".//div[@class='peComment']") != null)
                    {
                        if (cell.SelectSingleNode(".//div[@class='peComment']").InnerText != "")
                        {
                            ch_.Add(cell.InnerText.Replace(cell.SelectSingleNode(".//div[@class='peComment']").InnerText, "").Replace("!", "!").Replace("?", "?"));
                        }
                        else
                        {
                            ch_.Add(cell.InnerText.Replace("!", "!").Replace("?", "?"));
                        }
                    }
                    else
                    {
                        ch_.Add(cell.InnerText.Replace("!", "!").Replace("?", "?"));
                    }
                }
            }
            //ch_ ========================================

            //날짜==============
            ArrayList day_date  = new ArrayList();
            ArrayList time_date = new ArrayList();

            foreach (var cell in doc.DocumentNode.SelectNodes("//table[@id='ProgList']/tr/td[2]"))
            {
                day_date.Add(cell.InnerText.Split('(')[0]);
                time_date.Add(cell.InnerText.Split(')')[1].Replace("&nbsp;", "").Replace(" ", ""));
            }
            //t_date=================


            string clb_string = "";

            for (int x = 0; x <= brocas.CheckedItems.Count - 1; x++)
            {
                clb_string = clb_string + brocas.CheckedItems[x].ToString();
            }

            int cell_num = 0;

            //ArrayList Rs_Array_LIst = new ArrayList();
            Dictionary <int, string> Rs_Array_LIst = new Dictionary <int, string>();

            foreach (var cell in doc.DocumentNode.SelectNodes("//table[@id='ProgList']/tr/td[4]"))
            {
                System.Windows.Forms.Application.DoEvents();
                string numbers = cell.InnerText;
                //MessageBox.Show(time_date[cell_num].ToString());
                string minute_date = "";
                if (time_date[cell_num].ToString().Split(':')[1].Contains('↑'))
                {
                    minute_date = time_date[cell_num].ToString().Split(':')[1].Split('↑')[0];
                }
                else if (time_date[cell_num].ToString().Split(':')[1].Contains('↓'))
                {
                    minute_date = time_date[cell_num].ToString().Split(':')[1].Split('↓')[0];
                }
                else
                {
                    minute_date = time_date[cell_num].ToString().Split(':')[1];
                }
                int hours = 0;
                if (Convert.ToInt32(time_date[cell_num].ToString().Split(':')[0]) < 24)
                {
                    hours = Convert.ToInt32(time_date[cell_num].ToString().Split(':')[0]);
                }
                else
                {
                    hours = Convert.ToInt32(time_date[cell_num].ToString().Split(':')[0]) - 24;
                }

                string flag = "";
                if (flag_[cell_num].ToString() == " []")
                {
                    flag = " ";
                }
                else
                {
                    flag = flag_[cell_num].ToString() + " ";
                }

                //MessageBox.Show(Convert.ToInt32(day_date[cell_num].ToString().Split('-')[0])+" "+ Convert.ToInt32(day_date[cell_num].ToString().Split('-')[1])+" "+ Convert.ToInt32(day_date[cell_num].ToString().Split('-')[2])+" "+ Convert.ToInt32(time_date[cell_num].ToString().Split(':')[0])+" "+ Convert.ToInt32(minute_date));
                DateTime time_1 = new DateTime(Convert.ToInt32(day_date[cell_num].ToString().Split('-')[0]), Convert.ToInt32(day_date[cell_num].ToString().Split('-')[1]), Convert.ToInt32(day_date[cell_num].ToString().Split('-')[2]), hours, Convert.ToInt32(minute_date), 0);

                int      year   = Convert.ToInt32(DateTime.Now.ToString("yyyy"));
                int      month  = Convert.ToInt32(DateTime.Now.ToString("MM"));
                int      day    = Convert.ToInt32(DateTime.Now.ToString("dd"));
                int      hour   = Convert.ToInt32(DateTime.Now.ToString("HH"));
                int      minute = Convert.ToInt32(DateTime.Now.ToString("mm"));
                DateTime time_2 = new DateTime(year, month, day, hour, minute, 0);

                int result = DateTime.Compare(time_1, time_2);

                if (result > 0)
                {
                    cell_num++;
                    continue;
                }

                if (numbers == "")
                {
                    cell_num++;
                    continue;
                }
                if (Convert.ToInt32(numbers) < 10)
                {
                    numbers = "0" + cell.InnerText;
                }

                if (result_context.Text.Contains(" - 第" + numbers + "話 " + ch_[cell_num]))
                {
                    cell_num++;
                    continue;
                }
                string ch_str = "〔 " + ch_[cell_num] + " 〕 ";
                if (ch_str == "〔  〕 ")
                {
                    ch_str = "";
                }
                try
                {
                    Rs_Array_LIst.Add(Convert.ToInt32(numbers), "(" + nowyear + "Q" + now_num + ") " + re_title + " - 第" + numbers + "話" + flag + "" + ch_str + clb_string + " 1280x720 x264" + "\r\n");
                }
                catch { }
                //result_context.AppendText("(" + nowyear + "Q" + now_num + ") " + re_title + " - 第" + numbers + "話 "+ ch_str + clb_string+" 1280x720 x264"+ "\r\n");
                cell_num++;
            }
            //Rs_Array_LIst.Keys.Sort();
            foreach (String re_line in Rs_Array_LIst.Values)
            {
                Application.DoEvents();
                result_context.AppendText(re_line);
            }
        }
Exemplo n.º 48
0
        public static BookModels GetBookDetails(string url)
        {
            var book = new BookModels();

            book.AmazonUrl = url;

            var webGet  = new HtmlWeb();
            var htmlDoc = webGet.Load(url);

            htmlDoc.OptionFixNestedTags = true;

            // ParseErrors is an ArrayList containing any errors from the Load statement
            if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
            {
                // Handle any parse errors as required
            }

            if (htmlDoc.DocumentNode != null)
            {
                HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body");

                if (bodyNode != null)
                {
                    var image = bodyNode.SelectSingleNode("//img[@id='main-image']");

                    if (image == null)
                    {
                        image = bodyNode.SelectSingleNode("//img[@id='imgBlkFront']");
                    }

                    if (image != null)
                    {
                        book.CoverUrl = image.Attributes["src"].Value;
                        book.CoverUrl = book.CoverUrl.Substring(0, book.CoverUrl.IndexOf("._")) + ".jpg";
                    }

                    var title = bodyNode.Descendants("span")
                                .Where(x => x.Id == "btAsinTitle")
                                .Select(s => s.InnerText);
                    book.Title = title.FirstOrDefault();

                    if (string.IsNullOrEmpty(book.Title))
                    {
                        title = bodyNode.Descendants("h1")
                                .Where(x => x.Id == "title")
                                .Select(s => s.InnerText);

                        book.Title = title.FirstOrDefault();
                    }

                    var price = bodyNode.SelectSingleNode("//b[@class='priceLarge']");

                    if (price != null)
                    {
                        book.Price = Convert.ToDecimal(price.InnerText.Trim().Replace("$", string.Empty).Replace("\n", string.Empty));
                    }

                    var description = bodyNode.SelectSingleNode("//div[@id='postBodyPS']")
                                      .InnerText;

                    book.Description = description;
                }
            }

            return(book);
        }
Exemplo n.º 49
0
        /// <summary>
        /// Search Shelfari for series info, scrape series page, and return next title in series.
        /// </summary>
        /// <param name="searchHtmlDoc">Book's Shelfari page, pre-downloaded</param>
        private string GetNextInSeriesTitle2(HtmlAgilityPack.HtmlDocument searchHtmlDoc)
        {
            bool   hasSeries          = false;
            string series             = "";
            string seriesShort        = "";
            string seriesURL          = "";
            int    currentSeriesIndex = 0;
            int    currentSeriesCount = 0;
            string nextTitle          = "";

            //Check if book's Shelfari page contains series info
            HtmlAgilityPack.HtmlNode node = searchHtmlDoc.DocumentNode.SelectSingleNode("//span[@class='series']");
            if (node != null)
            {
                //Series name and book number
                series = node.InnerText.Trim();
                //Convert book number string to integer
                Int32.TryParse(series.Substring(series.LastIndexOf(" ") + 1), out currentSeriesIndex);
                //Parse series Shelfari URL
                seriesURL = node.SelectSingleNode("//span[@class='series']/a[@href]")
                            .GetAttributeValue("href", "");
                seriesShort = node.FirstChild.InnerText.Trim();
                //Add series name and book number to log, if found
                searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(String.Format(seriesURL)));
                //Parse number of books in series and convert to integer
                node = searchHtmlDoc.DocumentNode.SelectSingleNode("//h2[@class='f_m']");
                string test  = node.FirstChild.InnerText.Trim();
                Match  match = Regex.Match(test, @"\d+");
                if (match.Success)
                {
                    Int32.TryParse(match.Value, out currentSeriesCount);
                }
                hasSeries = true;
                //Check if there is a next book
                if (currentSeriesIndex < currentSeriesCount)
                {
                    //Add series name and book number to log, if found
                    main.Log(String.Format("This is book {0} of {1} in the {2} Series...",
                                           currentSeriesIndex, currentSeriesCount, seriesShort));
                    foreach (HtmlAgilityPack.HtmlNode seriesItem in
                             searchHtmlDoc.DocumentNode.SelectNodes(".//ol/li"))
                    {
                        node = seriesItem.SelectSingleNode(".//div/span[@class='series bold']");
                        if (node != null)
                        {
                            if (node.InnerText.Contains((currentSeriesIndex + 1).ToString()))
                            {
                                node = seriesItem.SelectSingleNode(".//h3/a");
                                //Parse title of the next book
                                nextTitle = node.InnerText.Trim();
                                //Add next book in series to log, if found
                                main.Log(String.Format("The next book in this series is {0}!", nextTitle));
                                return(nextTitle);
                            }
                        }
                    }
                }
                if (hasSeries)
                {
                    return("");
                }
            }
            return("");
        }
Exemplo n.º 50
0
        /// <summary>
        /// Search Shelfari page for possible series info, returning the next title in the series without downloading any other pages.
        /// TODO: Un-yuckify all the return paths without nesting a ton of ifs
        /// </summary>
        /// <param name="searchHtmlDoc">Book's Shelfari page, pre-downloaded</param>
        private string GetNextInSeriesTitle(HtmlAgilityPack.HtmlDocument searchHtmlDoc)
        {
            //Added estimated reading time and page count from Shelfari, for now...
            HtmlAgilityPack.HtmlNode pageNode = searchHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='WikiModule_FirstEdition']");
            if (pageNode == null)
            {
                return("");
            }
            HtmlAgilityPack.HtmlNode node1 = pageNode.SelectSingleNode(".//div/div");
            if (node1 == null)
            {
                return("");
            }
            //Parse page count and multiply by average reading time
            Match match1 = Regex.Match(node1.InnerText, @"Page Count: ((\d+)|(\d+,\d+))");

            if (match1.Success)
            {
                double   minutes = int.Parse(match1.Groups[1].Value, NumberStyles.AllowThousands) * 1.2890625;
                TimeSpan span    = TimeSpan.FromMinutes(minutes);
                main.Log(String.Format("Typical time to read: {0} hours and {1} minutes ({2} pages)"
                                       , span.Hours, span.Minutes, match1.Groups[1].Value));
                curBook.pagesInBook    = match1.Groups[1].Value;
                curBook.readingHours   = span.Hours.ToString();
                curBook.readingMinutes = span.Minutes.ToString();
            }

            //Added highlighted passage from Shelfari, dummy info for now...
            HtmlAgilityPack.HtmlNode members = searchHtmlDoc.DocumentNode.SelectSingleNode("//ul[@class='tabs_n tn1']");
            int highlights = 0;

            if (members != null)
            {
                Match match3 = Regex.Match(members.InnerText, @"Reviews \(((\d+)|(\d+,\d+))\)");
                if (match3.Success)
                {
                    curBook.popularPassages = match3.Groups[1].Value.ToString();
                }
                match3 = Regex.Match(members.InnerText, @"Readers \(((\d+)|(\d+,\d+))\)");
                if (match3.Success)
                {
                    curBook.popularHighlights = match3.Groups[1].Value.ToString();
                    highlights = int.Parse(match3.Groups[1].Value, NumberStyles.AllowThousands);
                }
                string textPassages = curBook.popularPassages == "1"
                    ? String.Format("{0} passage has ", curBook.popularPassages)
                    : String.Format("{0} passages have ", curBook.popularPassages);
                string textHighlights = curBook.popularHighlights == "1"
                    ? String.Format("{0} time", curBook.popularHighlights)
                    : String.Format("{0} times", curBook.popularHighlights);

                main.Log(String.Format("Popular Highlights: {0}been highlighted {1}"
                                       , textPassages, textHighlights));
            }

            //If no "highlighted passages" found from Shelfari, add to log
            if (highlights == 0)
            {
                main.Log("Popular Highlights: No highlighted passages have been found for this book");
                curBook.popularPassages   = "";
                curBook.popularHighlights = "";
            }

            //Check if book series is available and displayed in Series & Lists on Shelfari page.
            HtmlAgilityPack.HtmlNode seriesNode = searchHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='WikiModule_Series']/div");
            if (seriesNode != null)
            {
                //If multiple Series found, find and use standard series.
                foreach (HtmlAgilityPack.HtmlNode seriesType in
                         seriesNode.SelectNodes(".//div"))
                {
                    if (seriesType.InnerText.Contains("(standard series)", StringComparison.OrdinalIgnoreCase) && !seriesType.InnerText.Contains("(Reading Order)", StringComparison.OrdinalIgnoreCase))
                    {
                        Match match = Regex.Match(seriesType.InnerText, @"This is book (\d+) of (\d+)");
                        if (!match.Success)
                        {
                            continue;
                        }
                        curBook.seriesName = seriesType.ChildNodes["a"].InnerText.Trim();
                        main.Log("About the series: " + seriesType.InnerText.Replace(". (standard series)", ""));
                        if (!match.Success || match.Groups.Count != 3)
                        {
                            return("");
                        }
                        curBook.seriesPosition = match.Groups[1].Value;
                        curBook.totalInSeries  = match.Groups[2].Value;
                        HtmlAgilityPack.HtmlNode seriesInfo = seriesNode.SelectSingleNode(".//p");
                        //Parse preceding book
                        if (seriesInfo != null && seriesInfo.InnerText.Contains("Preceded by ", StringComparison.OrdinalIgnoreCase))
                        {
                            match = Regex.Match(seriesInfo.InnerText, @"Preceded by (.*),", RegexOptions.IgnoreCase);
                            if (match.Success && match.Groups.Count == 2)
                            {
                                previousTitle = match.Groups[1].Value;
                            }
                            else
                            {
                                match = Regex.Match(seriesInfo.InnerText, @"Preceded by (.*)\.", RegexOptions.IgnoreCase);
                                if (match.Success && match.Groups.Count == 2)
                                {
                                    previousTitle = match.Groups[1].Value;
                                }
                            }
                            main.Log("Preceded by: " + previousTitle);
                            //Grab Shelfari Kindle edition link for this book
                            previousShelfariUrl = seriesInfo.ChildNodes["a"].GetAttributeValue("href", "") +
                                                  "/editions?binding=Kindle";
                        }
                        // Check if book is the last in the series
                        if (!curBook.seriesPosition.Equals(curBook.totalInSeries))
                        {
                            //Parse following book
                            if (seriesInfo != null && seriesInfo.InnerText.Contains("followed by ", StringComparison.OrdinalIgnoreCase))
                            {
                                match = Regex.Match(seriesInfo.InnerText, @"followed by (.*)\.", RegexOptions.IgnoreCase);
                                if (match.Success && match.Groups.Count == 2)
                                {
                                    main.Log("Followed by: " + match.Groups[1].Value);
                                    //Grab Shelfari Kindle edition link for this book
                                    nextShelfariUrl = seriesInfo.ChildNodes["a"].GetAttributeValue("href", "") + "/editions?binding=Kindle";
                                    return(match.Groups[1].Value);
                                }
                            }
                        }
                        //Stop after first standard series is found maybe
                        //add popup (already started implimentaton) in
                        //future to pick which standard series you
                        //want to use, not sure if worthwhile though.
                        //eg: http://www.shelfari.com/books/37598923
                        break;
                    }
                }
            }
            return("");
        }
Exemplo n.º 51
0
        private void textBox1_KeyDown(object sender, KeyEventArgs e)
        {
            if (e.KeyCode == Keys.Enter)
            {
                String serial = "INVALID_SERIAL";
                if (textBox1.Text.Length > 0)
                {
                    serial = textBox1.Text;
                }
                try
                {
                    webBrowser1.AllowNavigation = true;
                    string parsedHTML = "";
                    string movieTitle = "UNKNOWN";
                    string imgSrc     = "UNKNOWN";
                    //string html = new WebClient().DownloadString("https://www.google.com/search?q=" + serial);

                    //parsedHTML = parseHTML(html);

                    //richTextBox1.Text = html;
                    //webBrowser1.DocumentText = html;

                    HtmlWeb web = new HtmlWeb();
                    document = web.Load("https://www.google.com/search?q=" + serial);

                    HtmlAgilityPack.HtmlNode bodyNode = document.DocumentNode.SelectSingleNode("//td[@id='rhs_block']");
                    richTextBox1.Text = document.DocumentNode.InnerHtml;

                    if (bodyNode.InnerText.Length > 1)
                    {
                        webBrowser1.DocumentText = bodyNode.InnerHtml;
                        foreach (var image in bodyNode.SelectNodes(".//img"))
                        {
                            var src = image.GetAttributeValue("src", null);

                            if (src != null)
                            {
                                imgSrc = src;
                                break;
                            }
                        }

                        textBox2.Enabled = false;
                        normalQuery      = true;

                        try
                        {
                            movieTitle = bodyNode.SelectSingleNode("//div[@class='_B5d']").InnerText;
                        }
                        catch (Exception ex)
                        {
                            textBox2.Enabled = true;
                        }
                    }
                    else
                    {
                        normalQuery      = false;
                        textBox2.Enabled = true;
                        webBrowser1.Navigate("https://www.google.com/search?q=" + serial);// = new WebClient().DownloadString("https://www.google.com/search?q=" + serial);                    }
                        if (list.Count == 0)
                        {
                            list.Add(new String[] { serial + " (1)", movieTitle, imgSrc });
                            listView1.Items.Add(serial + " (1)");
                            listBox2.Items.Add(movieTitle);
                        }
                        else
                        {
                            for (int i = 0; i < list.Count; ++i)
                            {
                                if (list[i][0].ToString().Contains(serial))
                                {
                                    int count = Int32.Parse(list[i][0].ToString().Substring(list[i][0].ToString().IndexOf('(')).Substring(1, list[i][0].ToString().Substring(list[i][0].ToString().IndexOf('(')).Length - 2));
                                    list[i][0] = list[i][0].ToString().Substring(0, list[i][0].ToString().IndexOf('(')) + "(" + ++count + ")";
                                    listView1.Items.Clear();
                                    listBox2.Items.Clear();
                                    foreach (String[] j in list)
                                    {
                                        listView1.Items.Add(j[0]);
                                        listBox2.Items.Add(j[1]);
                                    }
                                    i = list.Count;
                                    break;
                                }
                                else if (i == list.Count - 1)
                                {
                                    list.Add(new String[] { serial + " (1)", movieTitle, imgSrc });
                                    listView1.Items.Add(serial + " (1)");
                                    listBox2.Items.Add(movieTitle);
                                    i = list.Count;
                                    break;
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    textBox1.Text = "INVALID SERIAL";
                }
                textBox1.Text      = "";
                e.SuppressKeyPress = true;
            }
        }
        public void Crawler()
        {
            int previousPageGallNum = 1000000000;

            Console.WriteLine(initDate.ToString() + endDate.ToString());

            string url = gallUrl + "&page=";

            var client = new WebClient();

            client.Encoding = System.Text.Encoding.UTF8;
            //Dictionary value => count, replyNum, gallCount, gallRecommend
            Dictionary <UserInfo, int[]> userDic = new Dictionary <UserInfo, int[]>();

            int currentPage = this.initPage;

            while (true)
            {
                string text;
                try
                {
                    text = client.DownloadString(url + currentPage.ToString());
                    if (string.IsNullOrEmpty(text))
                    {
                        continue;
                    }
                }
                catch
                {
                    continue;
                }


                hap.HtmlDocument textHap = new hap.HtmlDocument();
                textHap.LoadHtml(text);

                hap.HtmlNodeCollection nicks = textHap.DocumentNode.SelectNodes("//tr[@class='ub-content us-post']");
                //Console.WriteLine(nicks.Count);
                //Console.WriteLine("==================" + currentPage.ToString() + "==================");
                try
                {
                    foreach (hap.HtmlNode nick in nicks)
                    {
                        int      gallNum, replyNum, gallCount, gallRecommend;
                        DateTime gallDate; string subject;

                        gallNum  = GetOnlyInt(nick.SelectSingleNode("./td[@class='gall_num']").InnerText);
                        gallDate = DateTime.ParseExact(nick.SelectSingleNode("./td[@class='gall_date']").Attributes["title"].Value,
                                                       "yyyy-MM-dd HH:mm:ss", null);
                        Console.WriteLine(gallNum.ToString() + " " + gallDate.ToString());
                        if (gallNum >= previousPageGallNum)
                        {
                            Console.WriteLine(previousPageGallNum.ToString() + " " + gallNum.ToString());
                            Console.WriteLine("번호 에러");
                            continue;
                        }
                        if (DateTime.Compare(gallDate, initDate) < 0 || DateTime.Compare(gallDate, endDate) > 0)
                        {
                            Console.WriteLine("날짜 에러");
                            continue;
                        }

                        hap.HtmlNode user         = nick.SelectSingleNode("./td[@class='gall_writer ub-writer']");
                        UserInfo     tempUserInfo = new UserInfo(user.Attributes["data-nick"].Value);
                        if (user.Attributes["data-uid"].Value == "")
                        {
                            tempUserInfo.setFluidNick(user.Attributes["data-ip"].Value);
                        }
                        else
                        {
                            tempUserInfo.setFixedNick(user.Attributes["data-uid"].Value);
                        }

                        //replyNum and subject are in <td class='gall_tit ub-word'></td>
                        hap.HtmlNode subjectNode = nick.SelectSingleNode("./td[2]");
                        try
                        {
                            if (subjectNode.Attributes["class"].Value == "gall_subject")
                            {
                                subjectNode = nick.SelectSingleNode("./td[3]");
                            }
                            subject = subjectNode.SelectSingleNode("./a[1]").InnerText;
                            if (subjectNode.SelectNodes("./a").Count == 2)
                            {
                                replyNum = GetOnlyInt(subjectNode.SelectSingleNode("./a[@class='reply_numbox']/span").InnerText);
                            }
                            else
                            {
                                replyNum = 0;
                            }
                        }
                        catch
                        {
                            subject  = "NullSubjectException";
                            replyNum = 0;
                        }
                        // Console.WriteLine("댓글: " + replyNum.ToString());
                        gallCount     = GetOnlyInt(nick.SelectSingleNode("./td[@class='gall_count']").InnerText);
                        gallRecommend = GetOnlyInt(nick.SelectSingleNode("./td[@class='gall_recommend']").InnerText);


                        //Dictionary value => count, replyNum, gallCount, gallRecommend
                        if (userDic.ContainsKey(tempUserInfo))
                        {
                            userDic[tempUserInfo][0] += 1;
                            userDic[tempUserInfo][1] += replyNum;
                            userDic[tempUserInfo][2] += gallCount;
                            userDic[tempUserInfo][3] += gallRecommend;
                        }
                        else
                        {
                            int[] tempInts = new int[] { 1, replyNum, gallCount, gallRecommend };
                            userDic.Add(tempUserInfo, tempInts);
                        }
                        UserData tempUserData = new UserData(tempUserInfo);
                        tempUserData.DataInput(gallNum, replyNum, gallCount, gallRecommend, gallDate, subject);
                        //gallDatas.Add(tempUserData);
                    }
                }
                catch
                {
                    if (ErrorOccured != null)
                    {
                        ErrorOccured(text, null);
                    }
                    currentPage++;
                    continue;
                }

                previousPageGallNum = GetOnlyInt(nicks[nicks.Count - 1].SelectSingleNode("./td[@class='gall_num']").InnerText);
                DateTime currentDate = DateTime.ParseExact(nicks[nicks.Count - 1].
                                                           SelectSingleNode("./td[@class='gall_date']").Attributes["title"].Value, "yyyy-MM-dd HH:mm:ss", null);
                if (currentPage >= endPage || DateTime.Compare(currentDate, initDate) < 0)
                {
                    break;
                }
                else
                {
                    System.Collections.ArrayList arr = new System.Collections.ArrayList();
                    string str = currentPage.ToString() + " 페이지, 날짜: " + currentDate.ToString();
                    arr.Add(str); arr.Add(currentDate); arr.Add(currentPage - initPage);
                    if (newPageHappened != null)
                    {
                        newPageHappened(arr, null);
                    }
                    currentPage++;
                }
            }
            //Dictionary value => count, replyNum, gallCount, gallRecommend
            foreach (KeyValuePair <UserInfo, int[]> user in userDic)
            {
                UserInfo tempUser = user.Key;
                tempUser.count         = user.Value[0];
                tempUser.replyNum      = user.Value[1];
                tempUser.gallCount     = user.Value[2];
                tempUser.gallRecommend = user.Value[3];
                UserRank tempUserRank = new UserRank(tempUser, user.Value[0], user.Value[1], user.Value[2], user.Value[3]);
                userList.Add(tempUserRank);
            }
            var sorted = from userRank in userList
                         orderby userRank.count descending
                         select userRank;

            userList = sorted.ToList <UserRank>();
            if (CrawlingEnded != null)
            {
                CrawlingEnded(userList, null);
            }
            string tempDataDir = Directory.GetCurrentDirectory() + "\\temp-data\\";

            Directory.CreateDirectory(tempDataDir);
            string filename = tempDataDir + gallId + DateTime.Now.ToString("_yyyy-MM-dd_HH-mm-ss");

            SaveResult(filename);
        }
Exemplo n.º 53
0
        public static int GetCoverImageUrl(
            IWin32Window owner,
            string strISSN,
            string strYear,
            string strIssueNo,
            ref CookieContainer cookie,
            out string strImageUrl,
            out string strError)
        {
            strError    = "";
            strImageUrl = "";

            string strUrl = "http://xxdy.qikan.com/MagInfo.aspx?issn=" + strISSN + "&year=" + strYear + "&periodNum=" + strIssueNo;

            /*
             * Connection: Keep-Alive
             * Cookie: xxdy=Default|Default_blue|1674-3121|%e4%b8%ad%e5%b0%8f%e5%ad%a6%e5%be%b7%e8%82%b2|2016|8|xxdy|459|||%e5%b0%8f%e5%ad%a6%e5%be%b7%e8%82%b2%e7%bd%91%e7%ab%99%3ahttp%3a%2f%2fxxdy.qikan.com|2016|9|True|《中小学德育》是由国家教育部委托华南师范大学主办的一本å
             * ¨é¢åæ˜ ä¸­å°å­¦å¾·è‚²å·¥ä½œçš„专业期刊,同时作为中国教育学会中小学德育研究分会会刊。本刊致力于为中小学德育理论研究与实践工作è€
             * 提供最新的德育改革动向、权威的德育政策指引、新锐的德育研究成果、鲜活的德育实践经验,力争成为中小学德育改革的“风向标”,锐意改革的“排头å
             * µâ€ï¼Œå¾·è‚²å·¥ä½œè€
             * 交流经验、探索德育规律的“大舞台”,引领德育行政部门、教研部门和广大德育教师的“参谋部”。%0d%0a编 辑 部:020-85215129  85211209%0d%0a发 行 部:020-85215179  85211443(传真)%0d%0a电子邮箱:[email protected](投稿)%0d%0a          [email protected](订é˜
             * )%0d%0a博客:http://blog.sina.com.cn/s/articlelist_2734759432_0_1.html
             */
            if (cookie == null)
            {
                cookie = new CookieContainer();
            }
            WebClientEx webClient = new WebClientEx(cookie);

#if NO
            {
                byte[] byteArray = x.DownloadData(new Uri("http://xxdy.qikan.com"));
            }

            x = new WebClientEx(cookie);
#endif

            webClient.Headers.Add("Accept", "text/html, application/xhtml+xml, image/jxr, */*");
            webClient.Headers.Add("Accept-Encoding", "gzip, deflate");

            webClient.Headers.Add("Accept-Language", "zh-Hans-CN, zh-Hans; q=0.8, en-US; q=0.5, en; q=0.3");
            //    Host: xxdy.qikan.com
            webClient.Headers.Add("Host", "xxdy.qikan.com");
            // x.BaseAddress = "xxdy.qikan.com";
            webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393");
            try
            {
#if NO
                byte[] byteArray = x.DownloadData(new Uri(strUrl));
                Stream stream    = new MemoryStream(byteArray);
                HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                htmlDoc.OptionFixNestedTags = true;
                htmlDoc.Load(stream, true);


                // ParseErrors is an ArrayList containing any errors from the Load statement
                if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
                {
                    // Handle any parse errors as required
                    strError = "parse html error: " + htmlDoc.ParseErrors.ToString();
                    return(-1);
                }
#endif
                byte[] byteArray  = webClient.DownloadData(new Uri(strUrl));
                string strContent = Encoding.UTF8.GetString(byteArray);
                // string strContent = x.DownloadString(new Uri(strUrl));
                HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                htmlDoc.OptionFixNestedTags = true;
                htmlDoc.LoadHtml(strContent);

#if NO
                // ParseErrors is an ArrayList containing any errors from the Load statement
                if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
                {
                    // Handle any parse errors as required
                    strError = "parse html error: " + htmlDoc.ParseErrors.ToString();
                    return(-1);
                }
#endif

                if (htmlDoc.DocumentNode == null)
                {
                    strError = "htmlDoc.DocumentNode == null";
                    return(-1);
                }

                /*
                 * <div class="left1">
                 * <!--最新封面开始-->
                 *
                 *
                 * <div class="cover1">
                 * <h1>封面</h1>
                 * <div class="cover1_box">
                 *   <a href="../../MagInfo.aspx?issn=1674-3121&year=2013&periodNum=7"><img src="http://img.qikan.com.cn/qkimages/xxdy/xxdy201307-l.jpg" width="190" height="270" border="0" alt="2013年第7期" /></a>
                 *   <span class="f14 fBold"><a href="MagInfo.aspx?issn=1674-3121&year=2013&periodNum=7" title="2013年第7期">2013年第7期</a>
                 *   </span>
                 *
                 * */

#if NO
                HtmlNodeCollection nodes = htmlDoc.DocumentNode.SelectNodes("//img");
                foreach (HtmlNode node in nodes)
                {
                    string src1 = node.GetAttributeValue("src", "");
                    int    i    = 0;
                    i++;
                }
#endif

                HtmlAgilityPack.HtmlNode cover1_box = htmlDoc.DocumentNode.SelectSingleNode("//div[@class='cover1_box']");

                if (cover1_box == null)
                {
                    strError = "cover1_box 没有找到";
                    return(-1);
                }

                HtmlNode img = cover1_box.SelectSingleNode("*/img");
                string   src = img.GetAttributeValue("src", "");
                strImageUrl = src;
                return(1);
            }
            catch (Exception ex)
            {
                strError = "异常: " + ex.Message;
                return(-1);
            }
        }
Exemplo n.º 54
0
        public override void Parse(Response response)
        {
            //Create a new HTMLAglityPack document
            HtmlDocument ContentDocument = new HtmlDocument();

            //load the #content of the page into the document
            ContentDocument.LoadHtml(response.Css("#content").First().OuterHtml);
            HtmlAgilityPack.HtmlNode BodyNode = ContentDocument.DocumentNode;
            patternObject.Title = BodyNode.SelectSingleNode("//*[@id=\"firstHeading\"]").InnerHtml;
            HtmlAgilityPack.HtmlNode ContentNode = BodyNode.SelectSingleNode("//*[@id=\"mw-content-text\"]");

            //remove the "toc" and "jump" and "siteSub" sections to save space and later client-side processing time
            if (ContentNode.SelectSingleNode("//*[@id=\"toc\"]") != null)
            {
                ContentNode.SelectSingleNode("//*[@id=\"toc\"]").Remove();
            }

            foreach (var node in ContentNode.SelectNodes("//comment()"))
            {
                node.Remove();
            }

            ContentNode.PrependChild(BodyNode.SelectSingleNode("//*[@id=\"firstHeading\"]"));

            //set the patternObject's title
            patternObject.Title = ContentNode.SelectSingleNode("//*[@id=\"firstHeading\"]").InnerHtml;


            foreach (var link in ContentNode.SelectNodes("//a/@href"))
            {
                //skip if this is a redlink (page doesn't exist).
                if (link.Attributes["href"].Value.Contains("redlink=1"))
                {
                    continue;
                }
                //skip if this links to this page
                if (link.Attributes["href"].Value.Split('#').First() == response.FinalUrl)
                {
                    continue;
                }

                //if any of the links ancestor nodes is the "category links" part of the page
                if (link.Ancestors().Any(node => node.Id == "catlinks"))
                {
                    if (link.InnerText != "Categories") //if it is not the "categories" special page
                    {
                        //add it to the patterns list of categories
                        patternObject.Categories.Add(link.InnerText);
                    }
                }
                else //assume its a normal text-body link
                {
                    //check if we don't already know about this link
                    patternObject.CreateOrGetPatternLink(link.InnerText);
                }

                //add relation info if this is a relation link
                if (GetNodeReleventPageHeading(link, "h2") != null &&
                    GetNodeReleventPageHeading(link, "h2").InnerText == "Relations")
                {
                    //get the relation type of this relation and get its inner text
                    HtmlAgilityPack.HtmlNode RelationHeadingNode = GetNodeReleventPageHeading(link, "h3");
                    String RelationName = RelationHeadingNode.InnerText;

                    //if there is a h4 node before the previous h3 node
                    if (GetNodeReleventPageHeading(link, "h4") != null &&
                        RelationHeadingNode.InnerStartIndex < GetNodeReleventPageHeading(link, "h4").InnerStartIndex)
                    {
                        //assume it is a "with x" sub-category of relation for the "Can Instantiate" section
                        RelationName = RelationHeadingNode.InnerText + " " + GetNodeReleventPageHeading(link, "h4").InnerText;
                    }

                    //add the relevent relation to this link
                    patternObject.CreateOrGetPatternLink(link.InnerText).Type.Add(RelationName);
                }
            }

            //get a cleaned copy of the #content HTML for giving in the JSON data
            patternObject.Content = ProcessPageContentToString(ContentNode);

            string Json = JsonConvert.SerializeObject(patternObject);

            File.WriteAllText(Pattern.GetFileName(patternObject.Title), Json);
        }
Exemplo n.º 55
0
 /// <summary>
 /// return single node using _LabelData.XPath
 /// </summary>
 /// <returns></returns>
 protected HtmlAgilityPack.HtmlNode GetCurrentNode()
 {
     HtmlAgilityPack.HtmlNode ndTempParent = GetTempParentNode();
     return(ndTempParent.SelectSingleNode(_LabelData.XPath));
 }