예제 #1
0
        public string Scrap(string html)
        {
            HtmlParser    parser = new HtmlParser();
            IHtmlDocument doc    = parser.Parse(html);
            IHtmlCollection <IElement> docByPriceClasses = doc.GetElementsByClassName("price");

            if (docByPriceClasses.Count() == 0)
            {
                return("");
            }

            String text = docByPriceClasses.First()
                          .GetElementsByTagName("span")
                          .First()
                          .TextContent;

            return(String.Join("",
                               text.Where(c => char.IsNumber(c))
                               .Select(c => c.ToString())
                               ));
        }
예제 #2
0
        static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine("Usage: grab <url>");
                return;
            }

            try
            {
                _sleep = ConfigurationUtility.GetInt32("sleep", 3000);
                Console.WriteLine($"Sleep={_sleep}ms");
                _scale = ConfigurationUtility.GetInt32("scale", 4);
                Console.WriteLine($"Scale={_scale}x");

                _pageUrl = Url.Create(args[0]);

                _pdfFileName = Path.ChangeExtension
                               (
                    Path.GetFileNameWithoutExtension(_pageUrl.Path),
                    ".pdf"
                               );

                _browsingConfiguration = Configuration.Default.WithDefaultLoader();
                _browsingContext       = BrowsingContext.New(_browsingConfiguration);
                IDocument document = _browsingContext.OpenAsync(_pageUrl).Result;
                Console.WriteLine("HTML downloaded");
                string selector = "script";
                IHtmlCollection <IElement> scripts = document.QuerySelectorAll(selector);
                Console.WriteLine($"Scripts: {scripts.Length}");
                IElement scriptElement = scripts.First
                                         (
                    e => e.InnerHtml.StartsWith("jQuery.extend")
                                         );
                string json   = scriptElement.InnerHtml;
                int    offset = json.IndexOf('{');
                int    length = json.LastIndexOf('}') - offset + 1;
                json = json.Substring(offset, length);
                JObject root  = JObject.Parse(json);
                JValue  token = (JValue)root.SelectToken("$.diva.1.options.objectData");
                if (ReferenceEquals(token, null))
                {
                    Console.WriteLine("Not a book");
                    return;
                }
                string dataUrl = (string)token.Value;
                Console.WriteLine($"Data URL={dataUrl}");
                token = (JValue)root.SelectToken("$.diva.1.options.iipServerURL");
                if (ReferenceEquals(token, null))
                {
                    Console.WriteLine("No ServerUrl");
                    return;
                }
                _serverUrl = (string)token.Value;
                Console.WriteLine($"Server URL={_serverUrl}");
                token = (JValue)root.SelectToken("$.diva.1.options.imageDir");
                if (ReferenceEquals(token, null))
                {
                    Console.WriteLine("No imageDir");
                    return;
                }
                _imageDir = (string)token.Value;
                Console.WriteLine($"Image dir={_imageDir}");
                _webClient = new WebClient();
                json       = _webClient.DownloadString(dataUrl);
                root       = JObject.Parse(json);
                JArray array = (JArray)root.SelectToken("pgs");
                _pages = array.ToObject <Page[]>();
                Console.WriteLine($"Total pages={_pages.Length}");
                int pageNumber = 1;
                foreach (Page page in _pages)
                {
                    if (DownloadPage(pageNumber, page))
                    {
                        Thread.Sleep(_sleep);
                    }

                    pageNumber++;
                }
                BuildDocument();
            }
            catch (Exception exception)
            {
                Console.WriteLine(exception);
            }
        }
예제 #3
0
 private bool NoHitAndRun(IHtmlCollection <IElement> torrentNodes)
 {
     return(torrentNodes.Count() == 1 &&
            torrentNodes.First().TextContent == "Az általad letöltött anyagokat a szabályoknak megfelelően visszaosztottad, a listád ennek köszönhetően üres.");
 }
예제 #4
0
 private static bool TryParseRank(IHtmlCollection <IElement> nodes, out int rank) =>
 int.TryParse(
     nodes.First().QuerySelector("span").InnerHtml.TrimEnd('.'),
     out rank);