Example #1
0
        public async Task <Parsing> ParseAsync(string source)
        {
            if (string.IsNullOrWhiteSpace(_source) || !Equals(_source, source))
            {
                _source = source;
                var domParser = new AngleSharp.Parser.Html.HtmlParser();
                _document = await domParser.ParseAsync(source);
            }

            Parsing    result = new Parsing();
            List <Tag> tags   = new List <Tag>();
            IHtmlCollection <IElement> parsedTags;

            await Task.Run(() =>
            {
                foreach (string element in Settings.Tags)
                {
                    parsedTags = _document?.QuerySelectorAll(element);
                    foreach (IElement tag in parsedTags)
                    {
                        tags.Add(new Tag()
                        {
                            Name = tag.TagName.ToLower(), Attributes = ParseTag(tag), Parsing = result
                        });
                    }
                }
                result.Date = DateTime.UtcNow;
                result.Tags = tags;
            });

            return(result);
        }
Example #2
0
        public async Task <Parsing> ParseAsync(IHtmlDocument document)
        {
            Parsing    result = new Parsing();
            List <Tag> tags   = new List <Tag>();

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> parsedTags;

            await Task.Run(() =>
            {
                foreach (string element in Settings.Tags)
                {
                    parsedTags = document?.QuerySelectorAll(element);
                    foreach (AngleSharp.Dom.IElement tag in parsedTags)
                    {
                        tags.Add(new Tag()
                        {
                            Name = tag.TagName, Attributes = ParseTag(tag), Parsing = result
                        });
                    }
                }
                result.Date = DateTime.UtcNow;
                result.Tags = tags;
            });

            return(result);
        }
Example #3
0
        /// <summary>
        /// Parses the account and profile settings page of the user and creates a user out of it.
        /// </summary>
        /// <param name="accountSettingsPage">The account settings page, which contains the user name and the email address of the user.</param>
        /// <param name="profileSettingsPage">The profile settings page, which contains the full name and the avatar of the user.</param>
        /// <returns>Returns the created user with the parsed information.</returns>
        public static User FromHtml(IHtmlDocument accountSettingsPage, IHtmlDocument profileSettingsPage)
        {
            // Creates a new user
            User user = new User();

            // Tries to parse the account settings page for the user name and the email address, if it could not be parsed, then an exception is thrown
            try
            {
                IElement accountSettingsForm = accountSettingsPage.QuerySelector("#setting");
                user.UserName = accountSettingsForm.QuerySelectorAll("input").FirstOrDefault(input => input.GetAttribute("name") == "login_name").GetAttribute("value");
                user.EmailAddress = accountSettingsForm.QuerySelectorAll("input").FirstOrDefault(input => input.GetAttribute("name") == "email").GetAttribute("value");
            }
            catch (Exception exception)
            {
                throw new NineGagException("The user name and the email address could not be parsed. This could be an indicator, that the 9GAG website is down or its content has changed. If this problem keeps coming, then please report this problem to 9GAG or the maintainer of the library.", exception);
            }

            // Tries to parse the profile settings page for the full name and the avatar image of the user, if it could not be parsed, then an exception is thrown
            try
            {
                user.FullName = profileSettingsPage.QuerySelectorAll("input").FirstOrDefault(input => input.GetAttribute("name") == "fullName").GetAttribute("value");
                user.AvatarUri = new Uri(profileSettingsPage.QuerySelector("#jsid-profile-avatar").GetAttribute("src"), UriKind.Absolute);
            }
            catch (Exception exception)
            {
                throw new NineGagException("The full name and the avatar image could not be parsed. This could be an indicator, that the 9GAG website is down or its content has changed. If this problem keeps coming, then please report this problem to 9GAG or the maintainer of the library.", exception);
            }

            // Returns the created user
            return user;
        }
Example #4
0
 protected string GetPartialWhere(IHtmlDocument html, string selector, string textContent)
 {
     return
         (html?.QuerySelectorAll(selector)?
          .First(m => m.TextContent.Contains(textContent))?
          .TextContent?.Replace("\r", string.Empty)
          .Trim() ?? string.Empty);
 }
Example #5
0
        protected IElement GetHtmlElement(IHtmlDocument html, string selector, int index = 1)
        {
            var element = html?.QuerySelectorAll(selector);

            if (element == null)
            {
                return(null);
            }

            return(element.Any() ? element[index - 1] : null);
        }
Example #6
0
        /// <summary>
        /// ページを解析してJSONを取得する
        /// </summary>
        /// <param name="source"></param>
        /// <returns></returns>
        private RemotePlaylistInfo AnalyzePage(string source)
        {
            var series = new RemotePlaylistInfo();

            IHtmlDocument?document            = HtmlParser.ParseDocument(source);
            IHtmlCollection <IElement>?videos = document?.QuerySelectorAll(".SeriesVideoListContainer-video");

            if (videos is null)
            {
                return(series);
            }

            IElement?ownerElm  = document?.QuerySelector(".SeriesAdditionalContainer-ownerName");
            string   ownerName = ownerElm?.InnerHtml ?? string.Empty;
            int      ownerID   = int.Parse((ownerElm?.GetAttribute("href")?.Split("/")[^ 1]) ?? "0");
Example #7
0
        public async Task <Dictionary <string, List <StringDictionary> > > Parse(IHtmlDocument document)
        {
            Dictionary <string, List <StringDictionary> > result = new Dictionary <string, List <StringDictionary> >();

            foreach (string tag in Settings.Tags)
            {
                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> items = document?.QuerySelectorAll(tag); //.Where(item => item.ClassName != null && item.ClassName.Contains(""));
                List <StringDictionary> elements = new List <StringDictionary>();
                foreach (AngleSharp.Dom.IElement item in items)
                {
                    elements.Add(ParseTag(item));
                }
                result.Add(tag, elements);
            }
            return(result);
        }
Example #8
0
        /// <summary>
        /// documentを受け取ってJSっぽく解析する
        /// </summary>
        /// <param name="document"></param>
        /// <returns></returns>
        private INiconicoIchibaInfo GetNiconicoIchibaInfo(IHtmlDocument?document)
        {
            IHtmlCollection <IElement>?items = document?.QuerySelectorAll(".IchibaMainItem");
            var info = new NiconicoIchibaInfo();

            if (items is null)
            {
                return(info);
            }

            foreach (var item in items)
            {
                IElement?titleElm = item.QuerySelector(".IchibaMainItem_Name");
                string?  title    = titleElm?.InnerHtml;
                string?  link     = titleElm?.GetAttribute("href");
                string?  category = item?.QuerySelector(".IchibaMainItem_Info_Category")?.InnerHtml;
                string   price    = item?.QuerySelector(".IchibaMainItem_Price_Number")?.InnerHtml ?? "NaN";
                string?  thumb    = item?.QuerySelector(".IchibaMainItem_Thumbnail img")?.GetAttribute("src");

                if (title is null || link is null || category is null || price is null || thumb is null)
                {
                    continue;
                }

                var itemInfo = new IchibaItem()
                {
                    Name     = title,
                    LinkUrl  = link,
                    Category = category,
                    Price    = price,
                    ThumbUrl = thumb,
                };
                info.IchibaItems.Add(itemInfo);
            }

            return(info);
        }
Example #9
0
            public MTReleaseInfo Extract(IHtmlDocument html)
            {
                var release = new MTReleaseInfo();

                release.IsMovie = true;
                var selectors          = html.QuerySelectorAll("b");
                var titleSelector      = html.QuerySelector("span>b");
                var titleSelector3do4k = html.QuerySelector("span:nth-child(4) > b:nth-child(1)");

                try
                {
                    var title = titleSelector.TextContent;
                    if (title.Contains("("))
                    {
                        title = title.Substring(0, title.IndexOf("(")).Trim();
                    }
                    release.TitleOriginal = title;
                }
                catch { }
                try
                {
                    var year = selectors.Where(s => s.TextContent.ToLower().Contains("año"))
                               .First().NextSibling.TextContent.Trim();
                    release.Year           = Int32.Parse(year);
                    release.TitleOriginal += " (" + year + ")";
                } catch { }
                try
                {
                    var dateStr = selectors.Where(s => s.TextContent.ToLower().Contains("fecha"))
                                  .First().NextSibling.TextContent.Trim();
                    var date = Convert.ToDateTime(dateStr);
                    release.PublishDate = date;
                } catch { }
                try
                {
                    var sizeStr = selectors.Where(s => s.TextContent.ToLower().Contains("tamaño"))
                                  .First().NextSibling.TextContent.Trim();
                    Regex rgx = new Regex(@"[^0-9,.]");
                    long  size;
                    if (sizeStr.ToLower().Trim().EndsWith("mb"))
                    {
                        size = ReleaseInfo.BytesFromMB(float.Parse(rgx.Replace(sizeStr, "")));
                    }
                    else
                    {
                        sizeStr = rgx.Replace(sizeStr, "").Replace(",", ".");
                        size    = ReleaseInfo.BytesFromGB(float.Parse(rgx.Replace(sizeStr, "")));
                    }
                    release.Size = size;
                } catch { }
                try
                {
                    var category = selectors.Where(s => s.TextContent.ToLower().Contains("formato"))
                                   .First().NextSibling.TextContent.Trim();
                    release.CategoryText = category;
                } catch { }
                try
                {
                    var title = titleSelector.TextContent;
                    if (title.Contains("(") && title.Contains(")") && title.Contains("3D"))
                    {
                        release.CategoryText = "3D";
                    }
                } catch { }
                try
                {
                    var title = titleSelector.TextContent;
                    if (title.Contains("(") && title.Contains(")") && title.Contains("4K"))
                    {
                        release.CategoryText = "4K";
                    }
                } catch { }
                try
                {
                    var title = titleSelector3do4k.TextContent;
                    if (title.Contains("[") && title.Contains("]") && title.Contains("3D"))
                    {
                        release.CategoryText = "3D";
                    }
                } catch { }
                try
                {
                    var title = titleSelector3do4k.TextContent;
                    if (title.Contains("[") && title.Contains("]") && title.Contains("4K"))
                    {
                        release.CategoryText = "4K";
                    }
                } catch { }
                try
                {
                    var link = html.QuerySelector("a[href*=\"sec=descargas\"]").GetAttribute("href");
                    release.Link = new Uri(WebUri, link);
                    release.Guid = release.Link;
                } catch { }
                return(release);
            }
Example #10
0
 public IEnumerable <Uri> Extract(IHtmlDocument html)
 {
     return(html.QuerySelectorAll("a[href*=\"/peli-\"]")
            .Select(e => e.GetAttribute("href"))
            .Select(relativeUri => new Uri(WebUri, relativeUri)));
 }
Example #11
0
 public IEnumerable <Uri> Extract(IHtmlDocument html)
 {
     return(html.QuerySelectorAll("a[href*=\".torrent\"]")
            .Select(e => e.Attributes["href"].Value)
            .Select(link => new Uri(WebUri, link)));
 }
Example #12
0
        private static MovieData Parse(IHtmlDocument doc)
        {
            MovieData movie = new MovieData();
            StateEnum state = StateEnum.ID;

            try
            {
                IElement element = doc.QuerySelector("head > meta[property=og\\3A url]");
                movie.ID = int.Parse(new DirectoryInfo(element
                                                       .GetAttribute("content")).Name);

                state            = StateEnum.Poster;
                movie.PosterLink = doc.QuerySelector("head > link[rel=image_src]")
                                   .GetAttribute("href");

                state   = StateEnum.Localized;
                element = doc.QuerySelector("div#viewFilmInfoWrapper");
                movie.LocalizedTitle = WebUtility.HtmlDecode(
                    element.QuerySelector("h1.moviename-big > span").InnerHtml);

                state = StateEnum.Original;
                movie.OriginalTitle = Regex.Replace
                                          (WebUtility.HtmlDecode(element.QuerySelector("span.alternativeHeadline").InnerHtml),
                                          "\\([Вв]идео\\)|\\(ТВ\\)|в\\s3D",
                                          "");

                state   = StateEnum.Year;
                element = element.QuerySelector("table.info");
                var elements = element
                               .QuerySelectorAll("tr");
                movie.Year = short.Parse(elements[0]
                                         .QuerySelector("a")
                                         .InnerHtml);

                state           = StateEnum.Country;
                movie.Countries = elements[1]
                                  .QuerySelectorAll("a")
                                  ?.Select(a => new CountryData(WebUtility.HtmlDecode(a.InnerHtml)))
                                  .ToList()
                                  ?? new List <CountryData>();

                state         = StateEnum.TagLine;
                movie.TagLine = WebUtility.HtmlDecode(elements[2]
                                                      .Children[1]
                                                      .InnerHtml);

                state           = StateEnum.Director;
                movie.Directors = elements[3]
                                  .QuerySelectorAll("a")
                                  ?.Where(a => !a.InnerHtml.Equals("..."))
                                  .Select(a => new PersonData(ConvertToPair(a)))
                                  .ToList()
                                  ?? new List <PersonData>();

                state = StateEnum.Screenwriter;
                movie.Screenwriters = elements[4]
                                      .QuerySelectorAll("a")
                                      ?.Where(a => !a.InnerHtml.Equals("..."))
                                      .Select(a => new PersonData(ConvertToPair(a)))
                                      .ToList()
                                      ?? new List <PersonData>();

                state        = StateEnum.Genre;
                movie.Genres = element
                               .QuerySelectorAll("span[itemprop=genre] > a")
                               ?.Select(a => new GenreData(WebUtility.HtmlDecode(a.InnerHtml)))
                               .ToList()
                               ?? new List <GenreData>();

                state = StateEnum.Runtime;
                if (!short.TryParse(element
                                    .QuerySelector("tr td.time")
                                    .InnerHtml
                                    .Split(' ')[0], out short runtime))
                {
                    runtime = 0;
                }
                movie.Runtime = runtime;

                state        = StateEnum.Actor;
                movie.Actors = doc.QuerySelector("div#actorList > ul")
                               .Children
                               ?.Select(li => li.FirstElementChild)
                               .Where(li => !li.InnerHtml.Equals("..."))
                               .Select(li => new PersonData(ConvertToPair(li)))
                               .ToList()
                               ?? new List <PersonData>();

                state           = StateEnum.Storyline;
                movie.Storyline = WebUtility.HtmlDecode(doc.QuerySelector("div.film-synopsys")
                                                        ?.InnerHtml
                                                        ?? "-");

                state    = StateEnum.RatingKP;
                elements = doc.QuerySelectorAll("div.block_2 > div");
                if (!float.TryParse(elements[0]
                                    .QuerySelector("span.rating_ball")
                                    ?.InnerHtml
                                    ?? "0", NumberStyles.Any, CultureInfo.InvariantCulture, out float rate))
                {
                    rate = 0;
                }
                movie.RatingKP = rate;

                state = StateEnum.RatingIMDB;
                if (!float.TryParse(elements[1]
                                    .InnerHtml
                                    .Split(' ')[1], NumberStyles.Any, CultureInfo.InvariantCulture, out rate))
                {
                    rate = 0;
                }
                movie.RatingIMDB = rate;

                return(movie);
            }
            catch (Exception)
            {
                throw new FormatException("Error occurred when parsing " + state.ToString());
            }
        }
Example #13
0
        public static void Parse(IHtmlDocument document)
        {
            var hItems = document.QuerySelectorAll("h1");

            if (hItems.Length == 0)
            {
                return;
            }

            string fullHead = hItems[0].TextContent;
            var    catName  = document.QuerySelectorAll("a.active div.inside div.counter-param");

            if (catName.Length == 0)
            {
                return;
            }

            string category = catName[0].TextContent;
            string fullName = hItems[0].TextContent.Replace(category + " ", "");
            string serv     = "Any";

            if (fullName.Contains(")"))
            {
                var tmpArr = fullName.Split('(');
                fullName = tmpArr[0].Substring(0, tmpArr[0].Length - 1);
                serv     = tmpArr[1];
                serv     = serv.Replace(")", "");
            }

            Dictionary <string, Dictionary <string, List <Dictionary <string, string[]> > > > globalDictionary =
                new Dictionary <string, Dictionary <string, List <Dictionary <string, string[]> > > >();

            var divItems = document.QuerySelectorAll("div[class = 'form-group']");
            // Where(item => item.ClassName != null && item.ClassName.Contains("form-group"));

            Dictionary <string, List <Dictionary <string, string[]> > > dic =
                new Dictionary <string, List <Dictionary <string, string[]> > >();

            foreach (var val in divItems)
            {
                var tmpArr = val.Children[0].Children.Where(x => x.LocalName == "option").ToArray();
                if (tmpArr.Length != 0)
                {
                    Dictionary <string, string[]> tmpDic = new Dictionary <string, string[]>();
                    string underCategory = tmpArr[0].TextContent;
                    tmpDic.Add(underCategory, new string[tmpArr.Length - 1]);
                    for (int i = 1; i < tmpArr.Length; i++)
                    {
                        tmpDic[underCategory][i - 1] = tmpArr[i].TextContent;
                    }

                    if (dic.ContainsKey(serv))
                    {
                        dic[serv].Add(tmpDic);
                    }
                    else
                    {
                        dic.Add(serv, new List <Dictionary <string, string[]> >()
                        {
                            tmpDic
                        });
                    }
                }
            }

            globalDictionary.Add(category, dic);
            if (Form1.GameCategoriesDictionary.ContainsKey(fullName))
            {
                if (Form1.GameCategoriesDictionary[fullName].ContainsKey(category))
                {
                    if (dic.ContainsKey(serv))
                    {
                        if (Form1.GameCategoriesDictionary[fullName][category].ContainsKey(serv))
                        {
                            return;
                        }
                        Form1.GameCategoriesDictionary[fullName][category].Add(serv, dic[serv]);
                    }
                    else
                    {
                        Form1.GameCategoriesDictionary[fullName][category].Add(serv, new List <Dictionary <string, string[]> >()
                        {
                            new Dictionary <string, string[]>()
                        });
                    }
                }
                else
                {
                    Form1.GameCategoriesDictionary[fullName].Add(category, dic);
                }
            }
            else
            {
                Form1.GameCategoriesDictionary.Add(fullName, globalDictionary);
            }
        }
Example #14
0
        protected virtual async Task EmbedImagesAsync(IHtmlDocument doc, OpfFile opfFile, Chapter chapter, string outputDir)
        {
            var tasks = new List<Task>();
            var images = new Dictionary<Uri, string>();

            foreach (var img in doc.QuerySelectorAll("img"))
            {
                string src = img.GetAttribute("src");
                if (src.StartsWith("//"))
                {
                    src = src.Substring(2);

                    if (!(src.StartsWith("http://") || src.StartsWith("https://")))
                        src = "http://" + src;
                }

                Uri uri;
                if (!Uri.TryCreate(src, UriKind.RelativeOrAbsolute, out uri))
                    continue;

                UriBuilder ub = new UriBuilder(uri) { Query = string.Empty };
                uri = ub.Uri;

                string fileName = $"{Path.GetRandomFileName()}.{Path.GetExtension(uri.ToString())}".ToValidFilePath();

                if (string.IsNullOrEmpty(fileName))
                    return;

                string path = Path.Combine(outputDir, fileName);

                if (!images.ContainsKey(uri))
                    images.Add(uri, path);

                string filePath = Path.Combine(new DirectoryInfo(outputDir).Name, Path.GetFileName(path)).Replace(@"\", "/");
                img.SetAttribute("src", filePath);
            }

            foreach (var img in images)
            {
                tasks.Add(Task.Run(async () =>
                {
                    Uri uri = img.Key;
                    string path = img.Value;
                    string outputPath = Path.Combine(new DirectoryInfo(outputDir).Name, Path.GetFileName(path)).Replace(@"\", "/");
                    string src = uri.ToString();
                    
                    if (uri.IsAbsoluteUri && !uri.IsFile)
                    {
                        try
                        {
                            using (HttpClient client = new HttpClient())
                            {
                                HttpResponseMessage resp = await client.GetAsync(src);
                                resp.EnsureSuccessStatusCode();

                                string mediaType = resp.Content.Headers.ContentType.MediaType.ToLower();

                                if (mediaType != MediaType.JpegType && mediaType != MediaType.PngType)
                                    return;

                                if (File.Exists(path))
                                    return;

                                using (FileStream fs = new FileStream(path, FileMode.CreateNew))
                                    await resp.Content.CopyToAsync(fs);
                            }
                        }
                        catch (Exception)
                        {
                            return;
                        }
                    }
                    else if (File.Exists(src))
                    {
                        File.Copy(src, path);
                    }

                    MediaType mType = MediaType.FromExtension(Path.GetExtension(path));

                    if (mType == null)
                        return;

                    opfFile.AddItem(new OpfItem(outputPath, StringUtilities.GenerateRandomString(),
                        mType), false);
                }));
            }

            await Task.WhenAll(tasks.ToArray());

            chapter.Content = doc.QuerySelector("body").ChildNodes.ToHtml(new XmlMarkupFormatter());
        }
Example #15
0
        public IEnumerable <Common.Documents.IDocument> Execute(IReadOnlyList <Common.Documents.IDocument> inputs, IExecutionContext context)
        {
            if (string.IsNullOrWhiteSpace(_metadataKey))
            {
                return(inputs);
            }

            // Build the query
            StringBuilder query = new StringBuilder();

            for (int level = 1; level <= _level; level++)
            {
                if (level > 1)
                {
                    query.Append(",");
                }
                query.Append("h");
                query.Append(level);
            }

            // Process documents
            HtmlParser parser = new HtmlParser();

            return(inputs.AsParallel().Select(context, input =>
            {
                // Parse the HTML content
                IHtmlDocument htmlDocument = input.ParseHtml(parser);
                if (htmlDocument == null)
                {
                    return input;
                }

                // Evaluate the query and create the holding nodes
                Heading previousHeading = null;
                List <Heading> headings = htmlDocument
                                          .QuerySelectorAll(query.ToString())
                                          .Select(x =>
                {
                    previousHeading = new Heading
                    {
                        Element = x,
                        Previous = previousHeading,
                        Level = int.Parse(x.NodeName.Substring(1))
                    };
                    return previousHeading;
                })
                                          .ToList();

                // Build the tree from the bottom-up
                for (int level = _level; level >= 1; level--)
                {
                    int currentLevel = level;
                    foreach (Heading heading in headings.Where(x => x.Level == currentLevel))
                    {
                        // Get the parent
                        Heading parent = null;
                        if (currentLevel > 1)
                        {
                            parent = heading.Previous;
                            while (parent != null && parent.Level >= currentLevel)
                            {
                                parent = parent.Previous;
                            }
                        }

                        // Create the document
                        MetadataItems metadata = new MetadataItems();
                        if (_levelKey != null)
                        {
                            metadata.Add(_levelKey, heading.Level);
                        }
                        if (_idKey != null && heading.Element.HasAttribute("id"))
                        {
                            metadata.Add(_idKey, heading.Element.GetAttribute("id"));
                        }
                        if (_headingKey != null)
                        {
                            metadata.Add(_headingKey, heading.Element.InnerHtml);
                        }
                        if (_childrenKey != null)
                        {
                            metadata.Add(_childrenKey, heading.Children.AsReadOnly());
                        }
                        if (_parentKey != null)
                        {
                            metadata.Add(_parentKey, new CachedDelegateMetadataValue(_ => parent?.Document));
                        }
                        heading.Document = context.GetDocument(heading.Element.InnerHtml, metadata);

                        // Add to parent
                        parent?.Children.Add(heading.Document);
                    }
                }

                return context.GetDocument(input,
                                           new MetadataItems
                {
                    {
                        _metadataKey,
                        _nesting
                                ? headings
                        .Where(x => x.Level == headings.Min(y => y.Level))
                        .Select(x => x.Document)
                        .ToArray()
                                : headings
                        .Select(x => x.Document)
                        .ToArray()
                    }
                });
            }));
        }
Example #16
0
 public static IEnumerable <string> QuerySelectorAttributes(this IHtmlDocument document, string s, string a)
 {
     return(document.QuerySelectorAll(s).Select(s => s.GetAttribute(a)));
 }
Example #17
0
 /// <summary>
 /// Find torrent rows in search pages
 /// </summary>
 /// <returns>List of rows</returns>
 private IHtmlCollection <IElement> FindTorrentRows(IHtmlDocument dom) =>
 dom.QuerySelectorAll("#torrentTable > tbody > tr").Skip(1).ToCollection();
Example #18
0
        /// <inheritdoc />
        public override async Task <IEnumerable <IDocument> > ExecuteAsync(IExecutionContext context)
        {
            HtmlParser            parser     = new HtmlParser();
            IJavaScriptEnginePool enginePool = context.GetJavaScriptEnginePool(x =>
            {
                if (string.IsNullOrWhiteSpace(_highlightJsFile))
                {
                    x.ExecuteResource("highlight-all.js", typeof(HighlightCode));
                }
                else
                {
                    x.ExecuteFile(_highlightJsFile);
                }
            });

            using (enginePool)
            {
                IEnumerable <IDocument> results = await context.Inputs.ParallelSelectAsync(async input =>
                {
                    try
                    {
                        using (Stream stream = input.GetStream())
                        {
                            using (IHtmlDocument htmlDocument = await parser.ParseAsync(stream))
                            {
                                foreach (AngleSharp.Dom.IElement element in htmlDocument.QuerySelectorAll(_codeQuerySelector))
                                {
                                    // Don't highlight anything that potentially is already highlighted
                                    if (element.ClassList.Contains("hljs"))
                                    {
                                        continue;
                                    }

                                    try
                                    {
                                        HighlightElement(enginePool, element);
                                    }
                                    catch (Exception innerEx)
                                    {
                                        if (innerEx.Message.Contains("Unknown language: ") && _warnOnMissingLanguage)
                                        {
                                            context.LogWarning($"Exception while highlighting source code: {innerEx.Message}");
                                        }
                                        else
                                        {
                                            context.LogInformation($"Exception while highlighting source code: {innerEx.Message}");
                                        }
                                    }
                                }

                                using (Stream contentStream = await context.GetContentStreamAsync())
                                {
                                    using (StreamWriter writer = contentStream.GetWriter())
                                    {
                                        htmlDocument.ToHtml(writer, HtmlMarkupFormatter.Instance);
                                        writer.Flush();
                                        return(input.Clone(context.GetContentProvider(contentStream)));
                                    }
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        context.LogWarning("Exception while highlighting source code for {0}: {1}", input.ToSafeDisplayString(), ex.Message);
                        return(input);
                    }
                });

                // Materialize the results before disposing the JS engine
                return(results.ToList());
            }
        }
        private string FindHiddenDescription(string SteamUser, string userId, string AppId, string DisplayName, string Lang)
        {
            if (htmlDocument == null)
            {
                logger.Debug($"SuccessStory - Load profil data for {SteamUser} - {AppId}");
                string url = string.Format(@"https://steamcommunity.com/id/{0}/stats/{1}/?tab=achievements",
                                           SteamUser, AppId);
                string ResultWeb = "";
                try
                {
                    var cookieLang = new Cookie("Steam_Language", Lang);
                    var cookies    = new List <Cookie>();
                    cookies.Add(cookieLang);
                    ResultWeb = HttpDownloader.DownloadString(url, cookies, Encoding.UTF8);
                }
                catch (WebException ex)
                {
                    if (ex.Status == WebExceptionStatus.ProtocolError && ex.Response != null)
                    {
                        var resp = (HttpWebResponse)ex.Response;
                        switch (resp.StatusCode)
                        {
                        case HttpStatusCode.BadRequest:     // HTTP 400
                            break;

                        case HttpStatusCode.ServiceUnavailable:     // HTTP 503
                            break;

                        default:
                            Common.LogError(ex, "SuccessStory", $"Failed to load from {url}. ");
                            break;
                        }
                    }
                }

                if (!ResultWeb.IsNullOrEmpty())
                {
                    HtmlParser parser = new HtmlParser();
                    htmlDocument = parser.Parse(ResultWeb);

                    if (htmlDocument.QuerySelectorAll("div.achieveRow").Length == 0)
                    {
                        logger.Debug($"SuccessStory - Load profil data for {userId} - {AppId}");
                        url = string.Format(@"https://steamcommunity.com/profiles/{0}/stats/{1}/?tab=achievements",
                                            userId, AppId);
                        ResultWeb = "";
                        try
                        {
                            var cookieLang = new Cookie("Steam_Language", Lang);
                            var cookies    = new List <Cookie>();
                            cookies.Add(cookieLang);
                            ResultWeb = HttpDownloader.DownloadString(url, cookies, Encoding.UTF8);
                        }
                        catch (WebException ex)
                        {
                            if (ex.Status == WebExceptionStatus.ProtocolError && ex.Response != null)
                            {
                                var resp = (HttpWebResponse)ex.Response;
                                switch (resp.StatusCode)
                                {
                                case HttpStatusCode.BadRequest:     // HTTP 400
                                    break;

                                case HttpStatusCode.ServiceUnavailable:     // HTTP 503
                                    break;

                                default:
                                    Common.LogError(ex, "SuccessStory", $"Failed to load from {url}. ");
                                    break;
                                }
                            }
                        }
                    }

                    if (!ResultWeb.IsNullOrEmpty())
                    {
                        parser       = new HtmlParser();
                        htmlDocument = parser.Parse(ResultWeb);
                    }
                }
            }

            if (htmlDocument != null)
            {
                foreach (var achieveRow in htmlDocument.QuerySelectorAll("div.achieveRow"))
                {
                    //logger.Debug($"SuccessStory - {DisplayName.Trim().ToLower()} - {achieveRow.QuerySelector("h3").InnerHtml.Trim().ToLower()}");
                    if (achieveRow.QuerySelector("h3").InnerHtml.Trim().ToLower() == DisplayName.Trim().ToLower())
                    {
                        return(achieveRow.QuerySelector("h5").InnerHtml);
                    }
                }
            }

            return("");
        }
        private Bet ParsePage(string href)
        {
#if DEBUGPAGE
            Stopwatch sw = new Stopwatch();
            sw.Start();
#endif
            HttpRequest req = new HttpRequest
            {
                CharacterSet = Encoding.GetEncoding(1251),
                UserAgent    =
                    "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36 OPR/42.0.2393.94"
            };
            if (UsingProxy)
            {
                req.Proxy = ProxyClient.Parse(ProxyList.GetRnd());
                req.Proxy.ConnectTimeout   = 1500;
                req.Proxy.ReadWriteTimeout = 1500;
            }

            var response = req.Get($"{_url}{href}").ToString();

            HtmlParser parser = new HtmlParser();

            IHtmlDocument document = parser.Parse(response);
            IElement      oddsList = document.GetElementById("oddsList");
            Bet           bet      = new Bet
            {
                Bookmaker = BookmakerType.Parimatch,
                Site      = "P",
                SportType = SportTypeHelper.Parse(oddsList.Children[0].ChildNodes[1].ChildNodes[2].TextContent),
                Groupe    = oddsList.Children[0].ChildNodes[1].ChildNodes[2].TextContent,
                Time      = "0",
                URL       = href
            };


            List <IElement> tables = document.QuerySelectorAll("div#oddsList div.wrapper > table > tbody").ToList();
            if (tables == null)
            {
                throw new ArgumentNullException("Parimatch: Ошибка получения таблицы со ставками");
            }

            IElement table = tables[1].Children.FirstOrDefault();
            if (table == null)
            {
                throw new ArgumentNullException(nameof(table));
            }



            //тупая система. Иногда есть картинка между командами
            //иногда команды обернуты в доп. теги
            bet.Team1 = table.Children[1].ChildNodes[0].TextContent;
            if (bet.Team1.Contains("угловые"))
            {
                throw new ArgumentException("Parimatch: статика");
            }
            for (int i = 1; i < table.Children[1].ChildNodes.Length; i++)
            {
                if (!String.IsNullOrWhiteSpace(table.Children[1].ChildNodes[i].TextContent))
                {
                    bet.Team2 = table.Children[1].ChildNodes[i].TextContent;
                    break;
                }
            }

            bet.Name = bet.Team1 + " - " + bet.Team2;
            //получаем заголовок таблицы
            //и таблицу
            IElement tableHeader   = tables[0].Children.FirstOrDefault();
            IElement tableMainDesc = tables[1].Children.FirstOrDefault();

            if (tableMainDesc != null)
            {
                tableMainDesc = FixTable(tableMainDesc);
            }


            if (tableHeader == null ||
                tableMainDesc == null ||
                tableHeader.ChildElementCount != tableMainDesc.ChildElementCount)
            {
                throw new ArgumentException("Parimatch main table is corrupt");
            }


            ParseBet(bet, tableHeader, tableMainDesc);



            if (tables.Count == 3)
            {
                IHtmlCollection <IElement> dopTables =
                    tables[2].Children.Where(x => x.Attributes["class"] != null && x.Attributes["class"].Value == "bk")
                    .ToCollection();

                for (int i = 0; i < dopTables.Length; i++)
                {
                    var dopTable = dopTables[i];

                    if (dopTable.TextContent.Contains("Прием ставок приостановлен"))
                    {
                        continue;
                    }
                    dopTable = FixTable(dopTable);
                    if (dopTable.ChildElementCount != tableHeader.ChildElementCount)
                    {
                        throw new ArgumentException();
                    }

                    var dopBet = bet.ShortCopy();

                    var t = dopTable.Children[1].TextContent.Split('-').First();
                    if (dopBet.SportType == SportType.Баскетбол)
                    {
                        if (dopTable.Children[1].TextContent.Contains("половина"))
                        {
                            if (dopTable.Children[1].TextContent.Contains("Первая половина:"))
                            {
                                t = "11";
                            }
                            else if (dopTable.Children[1].TextContent.Contains("Вторая половина:"))
                            {
                                t = "12";
                            }
                        }
                    }

                    var timePart = SportTimePartHelper.Parse(t);

                    if (timePart == SportTimePart.Nan)
                    {
                        continue;
                    }

                    ParseBet(dopBet, tableHeader, dopTable);
                    bet.Parts[timePart] = dopBet;
                }



                if (bet.SportType == SportType.Теннис)
                {
                    foreach (IElement tr in tables[2].Children)
                    {
                        Regex reg = new Regex("Сет . гейм .: победа");
                        if (reg.IsMatch(tr.TextContent))
                        {
                            var     data  = tr.Children[1].Children[0].TextContent.Split(' ');
                            GameBet gmBet = new GameBet();
                            gmBet.Set        = SportTimePartHelper.Parse(data[1]);
                            gmBet.Team1      = bet.Team1;
                            gmBet.Team2      = bet.Team2;
                            gmBet.GameNumber = (TenisGamePart)Enum.Parse(typeof(TenisGamePart), data[3].Replace(":", ""));
                            gmBet.Coef1      = TryFloatParse(tr.Children[1].Children[1].Children[0].Children.FirstOrDefault()?.Children.FirstOrDefault()?.TextContent);
                            if (gmBet.Coef1 != 0)
                            {
                                gmBet.Coef1o = tr.Children[1].Children[1].Children[0].Children[0].Children[0].Id;
                            }
                            gmBet.Coef2 = TryFloatParse(tr.Children[1].Children[2].Children[0].Children.FirstOrDefault()?.Children.FirstOrDefault()?.TextContent);
                            if (gmBet.Coef2 != 0)
                            {
                                gmBet.Coef2o = tr.Children[1].Children[2].Children[0].Children[0].Children[0].Id;
                            }

                            bet.Games.Add(gmBet);
                        }
                    }
                }
            }



#if DEBUGPAGE
            Console.WriteLine(bet);
            Console.WriteLine($"Parimatch pageparse: {href} {sw.Elapsed} мс");
#endif

            return(bet);
        }
Example #21
0
 private List <string> LinkTexts(IHtmlDocument html)
 {
     return(html.QuerySelectorAll(LinkQuerySelector)
            .Select(e => e.TextContent).ToList());
 }
Example #22
0
 protected string GetAttribute(IHtmlDocument html, string selector, string attribute, int index = 1)
 {
     return(html?.QuerySelectorAll(selector)[index - 1]?.GetAttribute(attribute));
 }
        private void GetLots(IHtmlCollection <IElement> lots, MySqlConnection connect, int idTender, int customerId,
                             string purObjInfo, IHtmlDocument document)
        {
            foreach (var lot in lots)
            {
                var lotNumT = (lot.QuerySelector("div.procedure__lot-header span")?.TextContent ?? "").Trim();
                lotNumT = lotNumT.GetDataFromRegex(@"Лот (\d+)");
                int.TryParse(lotNumT, out var lotNum);
                if (lotNum == 0)
                {
                    lotNum = 1;
                }
                var currency = (lot.QuerySelector("td:contains('Валюта:') +  td")?.TextContent ?? "").Trim();
                var nmckT    = (lot.QuerySelector("td:contains('Начальная цена:') +  td")?.TextContent ?? "0.0")
                               .Trim();
                var nmck    = SharedTekTorg.ParsePrice(nmckT);
                var purName =
                    (lot.QuerySelector("td:contains('Предмет договора:') +  td")?.TextContent ?? "").Trim();
                if (string.IsNullOrEmpty(purName))
                {
                    purName = purObjInfo;
                }
                var insertLot =
                    $"INSERT INTO {AppBuilder.Prefix}lot SET id_tender = @id_tender, lot_number = @lot_number, max_price = @max_price, currency = @currency, lot_name = @lot_name";
                var cmd18 = new MySqlCommand(insertLot, connect);
                cmd18.Prepare();
                cmd18.Parameters.AddWithValue("@id_tender", idTender);
                cmd18.Parameters.AddWithValue("@lot_number", lotNum);
                cmd18.Parameters.AddWithValue("@max_price", nmck);
                cmd18.Parameters.AddWithValue("@currency", currency);
                cmd18.Parameters.AddWithValue("@lot_name", purObjInfo);
                cmd18.ExecuteNonQuery();
                var idLot            = (int)cmd18.LastInsertedId;
                var customerFullName =
                    (lot.QuerySelector("td:contains('Заказчик:') +  td")?.TextContent ?? "0.0").Trim();
                if (!string.IsNullOrEmpty(customerFullName))
                {
                    var selectCustomer =
                        $"SELECT id_customer FROM {AppBuilder.Prefix}customer WHERE full_name = @full_name";
                    var cmd13 = new MySqlCommand(selectCustomer, connect);
                    cmd13.Prepare();
                    cmd13.Parameters.AddWithValue("@full_name", customerFullName);
                    var reader7 = cmd13.ExecuteReader();
                    if (reader7.HasRows)
                    {
                        reader7.Read();
                        customerId = (int)reader7["id_customer"];
                        reader7.Close();
                    }
                    else
                    {
                        reader7.Close();
                        var insertCustomer =
                            $"INSERT INTO {AppBuilder.Prefix}customer SET reg_num = @reg_num, full_name = @full_name, is223=1";
                        var cmd14 = new MySqlCommand(insertCustomer, connect);
                        cmd14.Prepare();
                        var customerRegNumber = Guid.NewGuid().ToString();
                        cmd14.Parameters.AddWithValue("@reg_num", customerRegNumber);
                        cmd14.Parameters.AddWithValue("@full_name", customerFullName);
                        cmd14.ExecuteNonQuery();
                        customerId = (int)cmd14.LastInsertedId;
                    }
                }

                var objectsP = document.QuerySelectorAll(
                    "table.tableUnit tbody tr");
                if (objectsP.Length == 0)
                {
                    var okpd2Temp =
                        (lot.QuerySelector("td:contains('Код классификатора ОКДП/ОКПД2') +  td")?.TextContent ?? "")
                        .Trim();
                    var okpd2Code            = okpd2Temp.GetDataFromRegex(@"^(\d[\.|\d]*\d)");
                    var okpd2GroupCode       = 0;
                    var okpd2GroupLevel1Code = "";
                    if (!String.IsNullOrEmpty(okpd2Code))
                    {
                        GetOkpd(okpd2Code, out okpd2GroupCode, out okpd2GroupLevel1Code);
                    }

                    var okpdName = okpd2Temp.GetDataFromRegex(@"^\d[\.|\d]*\d (.*)$");
                    if (!string.IsNullOrEmpty(purName))
                    {
                        var insertLotitem =
                            $"INSERT INTO {AppBuilder.Prefix}purchase_object SET id_lot = @id_lot, id_customer = @id_customer, name = @name, sum = @sum, okpd2_code = @okpd2_code, okpd2_group_code = @okpd2_group_code, okpd2_group_level1_code = @okpd2_group_level1_code, okpd_name = @okpd_name";
                        var cmd19 = new MySqlCommand(insertLotitem, connect);
                        cmd19.Prepare();
                        cmd19.Parameters.AddWithValue("@id_lot", idLot);
                        cmd19.Parameters.AddWithValue("@id_customer", customerId);
                        cmd19.Parameters.AddWithValue("@name", purName);
                        cmd19.Parameters.AddWithValue("@sum", nmck);
                        cmd19.Parameters.AddWithValue("@okpd2_code", okpd2Code);
                        cmd19.Parameters.AddWithValue("@okpd2_group_code", okpd2GroupCode);
                        cmd19.Parameters.AddWithValue("@okpd2_group_level1_code", okpd2GroupLevel1Code);
                        cmd19.Parameters.AddWithValue("@okpd_name", okpdName);
                        cmd19.ExecuteNonQuery();
                    }
                }
                else
                {
                    foreach (var po in objectsP)
                    {
                        var okpd2Temp =
                            (po.QuerySelector("td:nth-of-type(5)")?.TextContent ?? "")
                            .Trim();
                        var okpd2Code            = okpd2Temp.GetDataFromRegex(@"^(\d[\.|\d]*\d)");
                        var okpd2GroupCode       = 0;
                        var okpd2GroupLevel1Code = "";
                        if (!String.IsNullOrEmpty(okpd2Code))
                        {
                            GetOkpd(okpd2Code, out okpd2GroupCode, out okpd2GroupLevel1Code);
                        }

                        var okpdName = okpd2Temp.GetDataFromRegex(@"^\d[\.|\d]*\d (.*)$");
                        var poName   = (po.QuerySelector("td:nth-of-type(1)")?.TextContent ?? "")
                                       .Trim();
                        var okei = (po.QuerySelector("td:nth-of-type(3)")?.TextContent ?? "")
                                   .Trim();
                        var quant = (po.QuerySelector("td:nth-of-type(4)")?.TextContent ?? "")
                                    .Trim();
                        if (!string.IsNullOrEmpty(purName))
                        {
                            var insertLotitem =
                                $"INSERT INTO {AppBuilder.Prefix}purchase_object SET id_lot = @id_lot, id_customer = @id_customer, name = @name, sum = @sum, okpd2_code = @okpd2_code, okpd2_group_code = @okpd2_group_code, okpd2_group_level1_code = @okpd2_group_level1_code, okpd_name = @okpd_name, okei = @okei, 	quantity_value = @	quantity_value, customer_quantity_value = @customer_quantity_value";
                            var cmd19 = new MySqlCommand(insertLotitem, connect);
                            cmd19.Prepare();
                            cmd19.Parameters.AddWithValue("@id_lot", idLot);
                            cmd19.Parameters.AddWithValue("@id_customer", customerId);
                            cmd19.Parameters.AddWithValue("@name", poName);
                            cmd19.Parameters.AddWithValue("@sum", "");
                            cmd19.Parameters.AddWithValue("@okpd2_code", okpd2Code);
                            cmd19.Parameters.AddWithValue("@okpd2_group_code", okpd2GroupCode);
                            cmd19.Parameters.AddWithValue("@okpd2_group_level1_code", okpd2GroupLevel1Code);
                            cmd19.Parameters.AddWithValue("@okpd_name", okpdName);
                            cmd19.Parameters.AddWithValue("@okei", okei);
                            cmd19.Parameters.AddWithValue("@quantity_value", quant);
                            cmd19.Parameters.AddWithValue("@customer_quantity_value", quant);
                            cmd19.ExecuteNonQuery();
                        }
                    }
                }


                var appGuarAt = (lot.QuerySelector("td:contains('Обеспечение заявки:') +  td")?.TextContent ?? "")
                                .Trim();
                var appGuarA = SharedTekTorg.ParsePrice(appGuarAt);
                if (appGuarA != 0.0m)
                {
                    var insertCustomerRequirement =
                        $"INSERT INTO {AppBuilder.Prefix}customer_requirement SET id_lot = @id_lot, id_customer = @id_customer, application_guarantee_amount = @application_guarantee_amount, max_price = @max_price";
                    var cmd16 = new MySqlCommand(insertCustomerRequirement, connect);
                    cmd16.Prepare();
                    cmd16.Parameters.AddWithValue("@id_lot", idLot);
                    cmd16.Parameters.AddWithValue("@id_customer", customerId);
                    cmd16.Parameters.AddWithValue("@application_guarantee_amount", appGuarA);
                    cmd16.Parameters.AddWithValue("@max_price", nmck);
                    cmd16.ExecuteNonQuery();
                }
            }
        }
Example #24
0
        protected virtual async Task EmbedImagesAsync(IHtmlDocument doc, OpfFile opfFile, Chapter chapter, string outputDir)
        {
            var tasks  = new List <Task>();
            var images = new Dictionary <Uri, string>();

            foreach (var img in doc.QuerySelectorAll("img"))
            {
                string src = img.GetAttribute("src");
                if (src.StartsWith("//"))
                {
                    src = src.Substring(2);

                    if (!(src.StartsWith("http://") || src.StartsWith("https://")))
                    {
                        src = "http://" + src;
                    }
                }

                Uri uri;
                if (!Uri.TryCreate(src, UriKind.RelativeOrAbsolute, out uri))
                {
                    continue;
                }

                UriBuilder ub = new UriBuilder(uri)
                {
                    Query = string.Empty
                };
                uri = ub.Uri;

                string fileName = $"{Path.GetRandomFileName()}.{Path.GetExtension(uri.ToString())}".ToValidFilePath();

                if (string.IsNullOrEmpty(fileName))
                {
                    return;
                }

                string path = Path.Combine(outputDir, fileName);

                if (!images.ContainsKey(uri))
                {
                    images.Add(uri, path);
                }

                string filePath = Path.Combine(new DirectoryInfo(outputDir).Name, Path.GetFileName(path)).Replace(@"\", "/");
                img.SetAttribute("src", filePath);
            }

            foreach (var img in images)
            {
                tasks.Add(Task.Run(async() =>
                {
                    Uri uri           = img.Key;
                    string path       = img.Value;
                    string outputPath = Path.Combine(new DirectoryInfo(outputDir).Name, Path.GetFileName(path)).Replace(@"\", "/");
                    string src        = uri.ToString();

                    if (uri.IsAbsoluteUri && !uri.IsFile)
                    {
                        try
                        {
                            using (HttpClient client = new HttpClient())
                            {
                                HttpResponseMessage resp = await client.GetAsync(src);
                                resp.EnsureSuccessStatusCode();

                                string mediaType = resp.Content.Headers.ContentType.MediaType.ToLower();

                                if (mediaType != MediaType.JpegType && mediaType != MediaType.PngType)
                                {
                                    return;
                                }

                                if (File.Exists(path))
                                {
                                    return;
                                }

                                using (FileStream fs = new FileStream(path, FileMode.CreateNew))
                                    await resp.Content.CopyToAsync(fs);
                            }
                        }
                        catch (Exception)
                        {
                            return;
                        }
                    }
                    else if (File.Exists(src))
                    {
                        File.Copy(src, path);
                    }

                    MediaType mType = MediaType.FromExtension(Path.GetExtension(path));

                    if (mType == null)
                    {
                        return;
                    }

                    opfFile.AddItem(new OpfItem(outputPath, StringUtilities.GenerateRandomString(),
                                                mType), false);
                }));
            }

            await Task.WhenAll(tasks.ToArray());

            chapter.Content = doc.QuerySelector("body").ChildNodes.ToHtml(new XmlMarkupFormatter());
        }
Example #25
0
 private static string getImagesFromDOM(IHtmlDocument document)
 {
     return(document.QuerySelectorAll(".gallery__main-viewer-list li").Length.ToString());
 }
Example #26
0
 public IHtmlCollection <IElement> QuerySelectorAll(string selectors)
 {
     return(_doc.QuerySelectorAll(selectors));
 }
Example #27
0
        public IHtmlCollection <IElement> GetAnimeElements(IHtmlDocument doc)
        {
            var titles = doc.QuerySelectorAll("div.itemBox");

            return(titles);
        }
Example #28
0
        protected bool CheckIfLoginIsNeeded(WebClientStringResult Result, IHtmlDocument document)
        {
            if (Result.IsRedirect)
            {
                return true;
            }

            if (Definition.Login == null || Definition.Login.Test == null)
                return false;

            if (Definition.Login.Test.Selector != null)
            {
                var selection = document.QuerySelectorAll(Definition.Login.Test.Selector);
                if (selection.Length == 0)
                {
                    return true;
                }
            }
            return false;
        }
Example #29
0
        private async void Button_Click(object sender, RoutedEventArgs e)
        {
            if (string.IsNullOrEmpty(keyvalue.Text))
            {
                return;
            }
            var tempstr = await client.GetStringAsync("http://www.daihema.com/s/name/" + keyvalue.Text);

            IHtmlDocument tempdoc = await parser.ParseDocumentAsync(tempstr);

            var num = tempdoc.QuerySelector(".search-page")?.QuerySelectorAll(".red.sep-both05.bold")?.LastOrDefault()?.TextContent;

            if (string.IsNullOrEmpty(num) || num == "0")
            {
                MessageBox.Show("搜索结果为空", "Error", MessageBoxButton.OK, MessageBoxImage.Error);
                return;
            }
            var allcount = Convert.ToInt32(num);

            pages = allcount % 30 == 0 ? allcount / 30 : allcount / 30 + 1;
            AllDatas.Clear();
            var key       = keyvalue.Text;
            var tempvalue = 0;

            ModifyProgress(0);
            await Task.Run(() =>
            {
                for (int i = 1; i <= pages; i++)
                {
                    var str = client.GetStringAsync("http://www.daihema.com/s/name/" + key + "/" + i).Result;

                    IHtmlDocument doc = parser.ParseDocumentAsync(str).Result;
                    var results       = doc.QuerySelectorAll(".row").Select(o =>
                    {
                        var tempStr = o.QuerySelector(".small").TextContent;
                        var index1  = tempStr.IndexOf("发布");
                        var data    = tempStr.Substring(index1 - 10, 10);
                        if (data.Contains(" ") || data.Contains("今天"))
                        {
                            data = tempStr.Substring(index1 - 5, 5);
                        }
                        DateTime temp = default;
                        if (data.Length == 10)
                        {
                            temp = Convert.ToDateTime(data);
                        }
                        else if (data.Length == 5)
                        {
                            if (data.Contains(":"))
                            {
                                temp = DateTime.Now.Date;
                            }
                            else
                            {
                                temp = DateTime.Parse(DateTime.Now.Year + "-" + data);
                            }
                        }
                        return(new BaiduFileModel()
                        {
                            Title = o.QuerySelector("a").GetAttribute("title"),
                            Author = o.QuerySelector(".small").LastChild.TextContent,
                            Size = o.QuerySelector(".size").TextContent,
                            Time = temp.ToShortDateString().ToString(),
                            Url = o.QuerySelector("a").GetAttribute("href"),
                        });
                    });
                    AllDatas  = new ObservableCollection <BaiduFileModel>(AllDatas.Concat(results));
                    tempvalue = tempvalue + 100 / pages;
                    ModifyProgress(tempvalue);
                }
            });

            //初始化页数
            des.Content = $"共{num}条数据";
            ModifyProgress(100);
            page1.CurrentPage     = "1";
            page1.TotalPage       = pages + "";
            AllDatas              = new ObservableCollection <BaiduFileModel>(AllDatas.OrderByDescending(o => o.Time).ToList());
            datagrid1.ItemsSource = AllDatas.Take(30);
        }
Example #30
0
        private static string PlayerId(IHtmlDocument doc)
        {
            var lastScript = doc.QuerySelectorAll("script").Last().TextContent;

            return(PlayerIdRegex.Match(lastScript).Value);
        }
        protected override async Task <IEnumerable <Common.IDocument> > ExecuteConfigAsync(Common.IDocument input, IExecutionContext context, int value)
        {
            // Return the original document if no metadata key
            if (string.IsNullOrWhiteSpace(_metadataKey))
            {
                return(input.Yield());
            }

            // Parse the HTML content
            IHtmlDocument htmlDocument = await input.ParseHtmlAsync(context, HtmlParser);

            if (htmlDocument == null)
            {
                return(input.Yield());
            }

            // Validate the level
            if (value < 1)
            {
                throw new ArgumentException("Heading level cannot be less than 1");
            }
            if (value > 6)
            {
                throw new ArgumentException("Heading level cannot be greater than 6");
            }

            // Evaluate the query and create the holding nodes
            Heading        previousHeading = null;
            List <Heading> headings        = htmlDocument
                                             .QuerySelectorAll(GetHeadingQuery(value))
                                             .Select(x =>
            {
                previousHeading = new Heading
                {
                    Element  = x,
                    Previous = previousHeading,
                    Level    = int.Parse(x.NodeName.Substring(1))
                };
                return(previousHeading);
            })
                                             .ToList();

            // Build the tree from the bottom-up
            for (int level = value; level >= 1; level--)
            {
                int currentLevel = level;
                foreach (Heading heading in headings.Where(x => x.Level == currentLevel))
                {
                    // Get the parent
                    Heading parent = null;
                    if (currentLevel > 1)
                    {
                        parent = heading.Previous;
                        while (parent != null && parent.Level >= currentLevel)
                        {
                            parent = parent.Previous;
                        }
                    }

                    // Create the document
                    MetadataItems metadata = new MetadataItems();
                    if (_levelKey != null)
                    {
                        metadata.Add(_levelKey, heading.Level);
                    }
                    if (_idKey != null && heading.Element.HasAttribute("id"))
                    {
                        metadata.Add(_idKey, heading.Element.GetAttribute("id"));
                    }
                    if (_headingKey != null)
                    {
                        metadata.Add(_headingKey, heading.Element.InnerHtml);
                    }
                    if (_childrenKey != null)
                    {
                        metadata.Add(_childrenKey, heading.Children.AsReadOnly());
                    }

                    using (Stream contentStream = await context.GetContentStreamAsync())
                    {
                        using (StreamWriter writer = contentStream.GetWriter())
                        {
                            heading.Element.ChildNodes.ToHtml(writer, ProcessingInstructionFormatter.Instance);
                            writer.Flush();
                            heading.Document = context.CreateDocument(metadata, context.GetContentProvider(contentStream, MediaTypes.Html));
                        }
                    }

                    // Add to parent
                    parent?.Children.Add(heading.Document);
                }
            }

            return(input
                   .Clone(new MetadataItems
            {
                {
                    _metadataKey,
                    _nesting
                            ? headings
                    .Where(x => x.Level == headings.Min(y => y.Level))
                    .Select(x => x.Document)
                    .ToArray()
                            : headings
                    .Select(x => x.Document)
                    .ToArray()
                }
            })
                   .Yield());
        }
Example #32
0
 protected string GetPartial(IHtmlDocument html, string selector, int index = 1)
 {
     return(GetTextContent(html?.QuerySelectorAll(selector), index)
            ?? string.Empty);
 }
Example #33
0
    public static IHtmlAnchorElement HasLink(string selector, IHtmlDocument document)
    {
        var element = Assert.Single(document.QuerySelectorAll(selector));

        return(Assert.IsAssignableFrom <IHtmlAnchorElement>(element));
    }