示例#1
0
 /// <summary>
 /// Обновляет список категорий
 /// </summary>
 /// <returns>статус обновления</returns>
 public bool UpdateWorkCategory()
 {
     Log.ProcessMessage("Пытаемся обновить список категорий");
     try
     {
         string     get    = http.GetAsync(Domain + "/jobs/").Result.Content.ReadAsStringAsync().Result;
         HtmlParser Parser = new HtmlParser();
         AngleSharp.Html.Dom.IHtmlDocument html = Parser.ParseDocument(get);
         AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> categoriesElements = html.QuerySelectorAll(".collapse li a[data-category_id]");
         foreach (var elem in categoriesElements)
         {
             Objects.Category.Categories.Add(new Objects.Category
             {
                 Name = elem.TextContent,
                 Href = elem.GetAttribute("href")
             });
         }
         Log.GoodMessage("Обновили список категорий");
         return(true);
     }
     catch
     {
         Log.ExMessage("Не удалось обновить список категорий");
         return(false);
     }
 }
示例#2
0
        private static List <Track> AddProgramItems(
            List <Track> program, AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> trackElements, string trackId)
        {
            foreach (var trackElement in trackElements)
            {
                var topic = trackElement.QuerySelector("h3").TextContent.Trim();

                if (topic != "skip")
                {
                    var fullName = trackElement.QuerySelector("div.speakername .full-name").TextContent.Trim();

                    if (string.IsNullOrEmpty(fullName))
                    {
                        fullName = "Networking";
                    }

                    program.Add(new Track
                    {
                        TrackId = trackId,
                        Time    = DateTime.ParseExact(
                            trackElement.QuerySelector("time").TextContent.Trim().Split('-')[0], "H:mm",
                            CultureInfo.InvariantCulture, DateTimeStyles.None),
                        FullName    = fullName,
                        Topic       = CleanText(topic),
                        Description = CleanText(trackElement.QuerySelector("p").TextContent.Trim())
                    });
                }
            }

            return(program);
        }
示例#3
0
 /// <summary>
 /// Получает все задания с странцы категории
 /// </summary>
 /// <param name="link">ссылка на страницу без домена</param>
 /// <returns>Список все заданий</returns>
 public List <Objects.Task> GetTasksFromPage(string link)
 {
     Log.ProcessMessage("Пытаемся получить список заданий со страницы " + link);
     try
     {
         string     get    = http.GetAsync(Domain + link).Result.Content.ReadAsStringAsync().Result;
         HtmlParser Parser = new HtmlParser();
         AngleSharp.Html.Dom.IHtmlDocument html = Parser.ParseDocument(get);
         AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> taskElements = html.QuerySelectorAll("div.row.set_href .title a");
         var tasks = new List <Objects.Task> {
         };
         foreach (var elem in taskElements)
         {
             var task = GetTaskFromLink(elem.GetAttribute("href"));
             if (task == null)
             {
                 continue;
             }
             tasks.Add(task);
         }
         Log.GoodMessage("Получили список заданий со страницы " + link);
         return(tasks);
     }
     catch
     {
         Log.ExMessage("Не удалось получить список заданий со страницы " + link);
         return(null);
     }
 }
示例#4
0
        public static void ExtractTableFromHTML(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elements, string statementTitleHref, string outputPath, IDictionary <string, string> rowHeadOverrides, IDictionary <string, string> config)
        {
            // Find the href'd element
            int iElement = 0;

            for (; iElement < elements.Length; ++iElement)
            {
                var element = elements[iElement];
                if (element.GetAttribute("name") == statementTitleHref)
                {
                    break;
                }
            }
            if (iElement == elements.Length)
            {
                Console.WriteLine("Unable to find expected element with name {0}", statementTitleHref);
            }

            // See if that landmark is contained by a table (assumed, then, to be the statement table)
            int statementTableIndex = findContainingElementByType(elements, iElement, "table");

            if (statementTableIndex == -1)
            {
                // Landmark is not contained in a table, so statement table is assumed to be first table following the landmark.
                statementTableIndex = findFollowingElementByType(elements, iElement, "table");
            }
            if (statementTableIndex == -1)
            {
                Console.WriteLine("No landmarked table found");
                return;
            }

            ExtractTableFromHTML(elements, statementTableIndex, outputPath, rowHeadOverrides, config);
        }
示例#5
0
        static void quickSummaryPrint(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elements, int startFrom = 0)
        {
            for (int i = startFrom; i < elements.Length; ++i)
            {
                var element = elements[i];
                if (element.TagName.ToLower() == "table")
                {
                    var rows  = element.QuerySelectorAll("tr");
                    var cells = element.QuerySelectorAll("td");

                    Console.WriteLine("{0}: table: rows: {1}, cells: {2}", i, rows.Length, cells.Length);
                    Console.WriteLine(element.TextContent.Substring(0, Math.Min(80, element.TextContent.Length)).Replace("\r", "").Replace("\n", " ").Trim());
                }
                if (Regex.IsMatch(element.TextContent, "^(?:Condensed )?notes to .*consolidated (?:condensed )?financial statements|^Supplemental Financial Data", RegexOptions.IgnoreCase))
                {
                    Console.WriteLine("{0}: Found notes section title: {1}", i, element.TextContent);
                }
                if (Regex.IsMatch(element.TextContent, @"^united states", RegexOptions.IgnoreCase))
                {
                    Console.WriteLine("{0}: Found United States", i);
                }
                if (Regex.IsMatch(element.TextContent, @"^table of contents|^index", RegexOptions.IgnoreCase))
                {
                    Console.WriteLine("{0}: Found TOC landmark: {1}", i, element.TextContent);
                }
            }
        }
示例#6
0
        public async void ParserPhones()
        {
            string[] href       = File.ReadAllLines(@"C:\Users\aizhi\Desktop\parsed.txt");
            var      client     = new MongoClient("mongodb://localhost:27017");
            var      db         = client.GetDatabase("Kaspi_Store");
            var      collection = db.GetCollection <Item>("Phone");

            //  db.DropCollection("Phone");

            Console.WriteLine("Adding into database started...");

            foreach (string str in href)
            {
                var uri = str;
                var cancellationToken       = new CancellationTokenSource();
                var httpClient              = new HttpClient();
                HttpResponseMessage request = await httpClient.GetAsync(uri);

                cancellationToken.Token.ThrowIfCancellationRequested();

                //Get the response stream
                var response = await request.Content.ReadAsStreamAsync();

                cancellationToken.Token.ThrowIfCancellationRequested();

                //Parse the stream
                HtmlParser    parser   = new HtmlParser();
                IHtmlDocument document = parser.ParseDocument(response);
                //Do something with LINQ

                string header = document.QuerySelector("h2.item-content__el-heading").InnerHtml;
                header = header.Substring(20);
                string price = document.QuerySelector("div.item__price-once").InnerHtml;
                price = price.Trim();
                price = price.Substring(0, price.Length - 2);
                //var ImgNode = document.QuerySelector("img.item__slider-thumb-pic");
                //string ImgUrl = ImgNode.Attributes["src"].Value;
                Dictionary <string, string> Props = new Dictionary <string, string>();
                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> nodes = document.QuerySelectorAll("dl.specifications-list__spec");
                foreach (var node in nodes)
                {
                    string propName = node.QuerySelector("span").InnerHtml;
                    string propAttr = node.QuerySelector("dd").InnerHtml.Trim();
                    propAttr        = propAttr.Replace("&nbsp;", " ");
                    Props[propName] = propAttr;
                }
                Item itemData = new Item
                {
                    itemHeader = header,
                    itemPrice  = price,
                    itemProps  = Props
                };
                await collection.InsertOneAsync(itemData);

                Console.WriteLine(header);
            }
            Console.WriteLine("Adding completed");
            //  Console.ReadKey(true);
        }
示例#7
0
 private static void ShowDecks(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> decks)
 {
     Console.WriteLine($"Found {decks.Length} decks on page.");
     foreach (var deck in decks)
     {
         string deckName = GetTitle(deck);
         Console.WriteLine(deckName);
     }
 }
示例#8
0
        public static int skipPastLandmark(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elementsToScan, int startingIndex, string landmark)
        {
            int iElement;

            for (iElement = startingIndex; iElement < elementsToScan.Length && elementsToScan[iElement].TextContent.Trim() == landmark.Trim(); ++iElement)
            {
                ;
            }
            return(iElement);
        }
示例#9
0
        public static int skipPastRegex(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elementsToScan, int startingIndex, string pattern, RegexOptions regexOptions)
        {
            int iElement;

            for (iElement = startingIndex; iElement < elementsToScan.Length && Regex.IsMatch(elementsToScan[iElement].TextContent.Trim(), pattern, regexOptions); ++iElement)
            {
                ;
            }
            return(iElement);
        }
示例#10
0
 public static int findLandmark(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elementsToScan, int startingIndex, string landmark)
 {
     for (int iElement = startingIndex; iElement < elementsToScan.Length; ++iElement)
     {
         var element = elementsToScan[iElement];
         if (element.TextContent == landmark)
         {
             return(iElement);
         }
     }
     return(-1);  // Not found
 }
示例#11
0
 public static int findFollowingElementByType(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elementsToScan, int startingIndex, string elementType)
 {
     for (int iElement = startingIndex; iElement < elementsToScan.Length; ++iElement)
     {
         var element = elementsToScan[iElement];
         if (element.TagName.ToLower() == elementType.ToLower())
         {
             return(iElement);
         }
     }
     return(-1);  // Not found
 }
示例#12
0
        /// <summary>
        /// Load information for every book.
        /// </summary>
        /// <param name="book">Book that contains the url.</param>
        /// <param name="htmlParser">HTML parser.</param>
        /// <param name="webClient">Wbepages downloader.</param>
        private static void LoadBookMeta(Book book, HtmlParser htmlParser, HtmlWeb webClient)
        {
            //listen-download clearfix
            book.Chapters = new List <Chapter>();
            string        innerHtml = webClient.LoadFromWebAsync(book.Url).GetAwaiter().GetResult().DocumentNode.InnerHtml;
            IHtmlDocument document  = htmlParser.Parse(innerHtml);
            var           sidebar   = document.QuerySelector("div.book-page-sidebar");

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> htmlCollection = document.QuerySelectorAll("dd");
            var bookInfoSide = htmlCollection.Select(x => x.TextContent).ToArray();

            book.Duration = bookInfoSide[4];


            var bookTextNode = document.QuerySelectorAll("a").Where(x => x.TextContent.ToLower() == "online text").FirstOrDefault();

            if (bookTextNode != null)
            {
                book.OnlineText = bookTextNode.GetAttribute("href");
            }
            var chapterNodes = document.QuerySelector("table.chapter-download").QuerySelector("tbody").QuerySelectorAll("tr");

            var columns = document.QuerySelector("table.chapter-download").QuerySelector("thead").QuerySelectorAll("th").Select(x => x.TextContent).ToList();

            int chapterIndex    = columns.FindIndex(x => x.ToLower().Contains("chapter"));
            int sectionIndex    = columns.FindIndex(x => x.ToLower().Contains("section"));
            int readerIndex     = columns.FindIndex(x => x.ToLower().Contains("reader"));
            int durationIndex   = columns.FindIndex(x => x.ToLower().Contains("time"));
            int languageIndex   = columns.FindIndex(x => x.ToLower().Contains("language"));
            int sourceTextIndex = columns.FindIndex(x => x.ToLower().Contains("source"));

            foreach (var chapterNode in chapterNodes)
            {
                var    chapterInfo = chapterNode.QuerySelectorAll("td").ToArray();
                string chapterMp3  = chapterIndex != -1 ? chapterInfo[chapterIndex].QuerySelector("a").GetAttribute("href") : string.Empty;
                string chapterName = chapterIndex != -1 ? chapterInfo[chapterIndex].QuerySelector("a").TextContent : string.Empty;
                AngleSharp.Dom.IElement readerNameElement = chapterInfo[readerIndex].QuerySelector("a");
                string readerName      = readerNameElement != null ? readerNameElement.TextContent : "";
                string chapterDuration = chapterInfo[durationIndex].TextContent;

                book.Chapters.Add(new Chapter
                {
                    AudioLink  = chapterMp3,
                    Section    = chapterInfo[sectionIndex].TextContent,
                    Duration   = chapterDuration,
                    Name       = chapterName,
                    Reader     = readerName,
                    TextSource = sourceTextIndex != -1 ? chapterInfo[sourceTextIndex].GetAttribute("href") : string.Empty
                });
            }
        }
示例#13
0
        public static int findByRegex(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elementsToScan, int startingIndex, string pattern, RegexOptions regexOptions, int elementLimit = -1)
        {
            int lastElement = (elementLimit < 0) ? elementsToScan.Length : startingIndex + elementLimit;

            for (int iElement = startingIndex; iElement < lastElement; ++iElement)
            {
                var element = elementsToScan[iElement];
                if (Regex.IsMatch(element.TextContent, pattern, regexOptions))
                {
                    return(iElement);
                }
            }
            return(-1);  // Not found
        }
示例#14
0
        private string getSmall()
        {
            article = GetLsi();
            var    parser     = new AngleSharp.Parser.Html.HtmlParser();
            var    document   = parser.Parse(text);
            string anchorHTML = string.Empty;

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> anchors = document.QuerySelectorAll("a");
            foreach (AngleSharp.Dom.IElement anchor in anchors)
            {
                anchorHTML = anchor.OuterHtml;
            }
            article = anchorHTML + Environment.NewLine + article;
            return(article);
        }
示例#15
0
        private async Task <List <CarModelDto> > GenerateModelList(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> data, string carBrand)
        {
            _logger.Log($"Returned {data.Length} model entries for selected brand: {carBrand}");
            var carModelList = new List <CarModelDto>();

            foreach (var item in data)
            {
                carModelList.Add(new CarModelDto()
                {
                    ModelName  = item.TextContent.Replace("  ", string.Empty).ToString(),
                    ModelValue = item.GetAttribute("title").ToLower().Replace("  " + carBrand.ToLower().ToString() + " ", string.Empty).Replace(" ", "-").ToString()
                });
            }
            return(await Task.FromResult(carModelList));
        }
示例#16
0
        public static int findNextElementOfType(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elementsToScan, int startingIndex, string type, int elementLimit = -1)
        {
            int lastElement = (elementLimit < 0) ? elementsToScan.Length : startingIndex + elementLimit;

            type = type.ToLower();
            for (int iElement = startingIndex; iElement < lastElement; ++iElement)
            {
                var element = elementsToScan[iElement];
                if (element.TagName.ToLower() == type)
                {
                    return(iElement);
                }
            }
            return(-1);  // Not found
        }
示例#17
0
        public async Task <Dictionary <string, List <StringDictionary> > > Parse(IHtmlDocument document)
        {
            Dictionary <string, List <StringDictionary> > result = new Dictionary <string, List <StringDictionary> >();

            foreach (string tag in Settings.Tags)
            {
                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> items = document?.QuerySelectorAll(tag); //.Where(item => item.ClassName != null && item.ClassName.Contains(""));
                List <StringDictionary> elements = new List <StringDictionary>();
                foreach (AngleSharp.Dom.IElement item in items)
                {
                    elements.Add(ParseTag(item));
                }
                result.Add(tag, elements);
            }
            return(result);
        }
示例#18
0
        //Sanity check to ensure the page hasn't change format
        private static bool verifyHeaderNames(List <string> expected, IHtmlTableElement given)
        {
            List <string> headerNames = new List <string>();

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> headerCells = given.QuerySelectorAll("th");
            foreach (IHtmlTableHeaderCellElement headerCell in headerCells)
            {
                headerNames.Add(headerCell.TextContent);
            }

            if (!expected.ToList().SequenceEqual(headerNames))
            {
                throw new Exception("Headers do not match. Expected: |" + String.Join(",", expected) + "| but found |" + String.Join(",", headerNames));
            }
            return(true);
        }
示例#19
0
        private void button3_Click(object sender, EventArgs e)
        {
            readFile();
            article = text;
            article = article + Environment.NewLine + GetLsi();
            Clipboard.SetText(article);
            webBrowser1.DocumentText = article;
            var parser   = new AngleSharp.Parser.Html.HtmlParser();
            var document = parser.Parse(article);

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> anchors = document.QuerySelectorAll("a");
            foreach (AngleSharp.Dom.IElement anchor in anchors)
            {
                wName.Text = anchor.InnerHtml;
                wUrl.Text  = anchor.GetAttribute("href");
            }
        }
        /// <inheritdoc />
        public override async Task <IEnumerable <string> > EnumerateVersionsAsync(PackageURL purl, bool useCache = true, bool includePrerelease = true)
        {
            Logger.Trace("EnumerateVersions {0}", purl?.ToString());
            if (purl == null || purl.Name is null)
            {
                return(Array.Empty <string>());
            }

            try
            {
                string     packageName = purl.Name;
                HttpClient httpClient  = CreateHttpClient();

                System.Net.Http.HttpResponseMessage?html = await httpClient.GetAsync($"{ENV_HACKAGE_ENDPOINT}/package/{packageName}");

                html.EnsureSuccessStatusCode();
                HtmlParser parser = new();
                AngleSharp.Html.Dom.IHtmlDocument document = await parser.ParseDocumentAsync(await html.Content.ReadAsStringAsync());

                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> ths = document.QuerySelectorAll("th");
                List <string> versionList = new();
                foreach (AngleSharp.Dom.IElement th in ths)
                {
                    if (th.TextContent.StartsWith("Versions"))
                    {
                        AngleSharp.Dom.IElement td = th.NextElementSibling;
                        foreach (AngleSharp.Dom.IElement version in td.QuerySelectorAll("a,strong"))
                        {
                            string versionString = version.TextContent.ToLower().Trim();
                            Logger.Debug("Identified {0} version {1}.", packageName, versionString);
                            versionList.Add(versionString);
                        }
                        break;
                    }
                }

                return(SortVersions(versionList.Distinct()));
            }
            catch (Exception ex)
            {
                Logger.Debug("Unable to enumerate versions: {0}", ex.Message);
                throw;
            }
        }
示例#21
0
        public JsonResult ScrapeAlbumChart()
        {
            string topAlbumsUrl = "https://www.billboard.com/charts/current-albums";

            Billboard_Album[] topAlbums = new Billboard_Album[100];

            HttpResponseMessage request = client.GetAsync(topAlbumsUrl).Result;

            Stream response = request.Content.ReadAsStreamAsync().Result;

            HtmlParser    parser   = new HtmlParser();
            IHtmlDocument document = parser.ParseDocument(response);

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> albums = document.GetElementsByClassName("chart-list-item__first-row chart-list-item__cursor-pointer");

            for (int i = 0; i < albums.Length; i++)
            {
                string title  = albums[i].GetElementsByClassName("chart-list-item__title-text")[0].TextContent.Trim();
                string artist = "";

                // Some albums have a link tag, some don't.
                if (albums[i].GetElementsByClassName("chart-list-item__artist")[0].ChildElementCount > 0) //sometimes there is an <a> tag
                {
                    artist = albums[i].GetElementsByClassName("chart-list-item__artist")[0].FirstElementChild.TextContent.Trim();
                }
                else
                {
                    artist = albums[i].GetElementsByClassName("chart-list-item__artist")[0].TextContent.Trim();
                }

                var ab = new Billboard_Album
                {
                    Title  = title,
                    Artist = artist
                };
                topAlbums[i] = ab;
            }

            return(Json(new
            {
                success = true,
                albums = topAlbums
            }));
        }
示例#22
0
        private void button10_Click(object sender, EventArgs e)
        {
            article = GetLsi();
            var    parser     = new AngleSharp.Parser.Html.HtmlParser();
            var    document   = parser.Parse(text);
            string anchorHTML = string.Empty;

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> anchors = document.QuerySelectorAll("a");
            foreach (AngleSharp.Dom.IElement anchor in anchors)
            {
                string wantReplace   = anchor.OuterHtml;
                string link          = anchor.GetAttribute("href");
                string anchorKeyword = anchor.InnerHtml;
                string textTile      = @"[" + anchorKeyword + "]" + "(" + link + ")";
                article = textTile + Environment.NewLine + article;
            }
            //article = anchorHTML + Environment.NewLine + article;
            Clipboard.SetText(article);
        }
示例#23
0
        private string ProcessUrl(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> urlData)
        {
            if (!urlData.Any())
            {
                return(CommonTags.NotDefined);
            }

            string url = urlData.First().GetAttribute("href");

            if (url == CommonTags.JavascriptVoid)
            {
                return(CommonTags.NotDefined);
            }

            if (url.StartsWith("//"))
            {
                return($"https:{url}");
            }

            return(url);
        }
示例#24
0
        public static int findLandmarks(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elementsToScan, int startingIndex, IList <string> landmarks)
        {
            if (landmarks == null || landmarks.Count == 0)
            {
                return(startingIndex);
            }

            for (int iLandmark = 0; iLandmark < landmarks.Count; ++iLandmark)
            {
                string landmark = landmarks[iLandmark];
                startingIndex = findLandmark(elementsToScan, startingIndex, landmark);
                if (startingIndex == -1)
                {
                    return(-1);                            // Could not find one of the landmarks
                }
                else if (iLandmark != landmarks.Count - 1) // Don't skip past the last landmark found
                {
                    startingIndex = skipPastLandmark(elementsToScan, startingIndex, landmark);
                }
            }
            return(startingIndex);
        }
示例#25
0
        public static int findContainingElementByType(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elementsToScan, int startingIndex, string elementType)
        {
            var element = elementsToScan[startingIndex];

            while (element != null && element.TagName.ToLower() != elementType.ToLower())
            {
                element = element.ParentElement;
            }
            if (element != null && element.TagName.ToLower() == elementType.ToLower())
            {
                for (int iElement = 0; iElement < elementsToScan.Length; ++iElement)
                {
                    // Converting back to index numbers is fugly.
                    if (elementsToScan[iElement] == element)
                    {
                        return(iElement);
                    }
                }
                Debug.Assert(false, "findContainingElementByType could not find element known to be in elementsToScan");
                return(-1);  // Won't happen.
            }
            return(-1);      // No containing element of this type found.
        }
示例#26
0
 private static void WriteDeckDefinitionFiles(string outputFolder, string pageUrl, AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> decks)
 {
     foreach (var deck in decks)
     {
         string deckName = GetTitle(deck);
         Console.WriteLine($"\n{deckName}");
         string fileName = outputFolder + "\\" + deckName + ".txt";
         var    cards    = deck.QuerySelectorAll("div.sorted-by-overview-container span.row");
         using (StreamWriter writer = new StreamWriter(fileName))
         {
             writer.WriteLine("// " + deckName);
             writer.WriteLine("// Source: " + pageUrl);
             foreach (var card in cards)
             {
                 int    amount   = int.Parse(card.QuerySelector("span.card-count").TextContent);
                 string cardName = card.QuerySelector("span.card-name a").TextContent;
                 string line     = $"{amount}x {cardName}";
                 Console.WriteLine(line);
                 writer.WriteLine(line);
             }
         }
     }
 }
        private async Task <List <Models.SkuPhotoInfo> > getSkuPhotoInfoAsync(string skuid)
        {
            List <Models.SkuPhotoInfo> skuPhotoInfos = new List <Models.SkuPhotoInfo>();

            try
            {
                var config  = Configuration.Default.WithDefaultLoader();
                var address = "https://66123123.com/Goods/GoodsDetail?id=" + skuid;
                var context = BrowsingContext.New(config);
                AngleSharp.Dom.Document document = (AngleSharp.Dom.Document) await context.OpenAsync(address);

                //AngleSharp.Html.Dom.IHtmlDivElement skuIconImgDoc = (AngleSharp.Html.Dom.IHtmlDivElement)document.QuerySelector(".b-img");
                //AngleSharp.Html.Dom.IHtmlDivElement skuDetailImgDoc = (AngleSharp.Html.Dom.IHtmlDivElement)document.QuerySelector(".showimg");
                AngleSharp.Dom.IHtmlCollection <AngleSharp.Html.Dom.IHtmlImageElement> images = document.Images;
                List <AngleSharp.Html.Dom.IHtmlImageElement> imgs =
                    images.Where(i => i.AlternativeText == "商品图片" || i.AlternativeText == "商品详情图片").ToList();
                foreach (AngleSharp.Html.Dom.IHtmlImageElement item in imgs)
                {
                    Models.SkuPhotoInfo skuPhotoInfo = new Models.SkuPhotoInfo
                    {
                        skuId      = skuid,
                        photoUrl   = item.Source,
                        photoTitle = item.AlternativeText,
                        skuUrl     = "https://66123123.com/Goods/GoodsDetail?id=" + skuid
                    };
                    WebRequest request = WebRequest.Create(item.Source);
                    request.Credentials = CredentialCache.DefaultCredentials;
                    Stream s = request.GetResponse().GetResponseStream();
                    System.Drawing.Image image = System.Drawing.Image.FromStream(s);
                    s.Close();
                    skuPhotoInfo.photoHeight = image.Height;
                    skuPhotoInfo.photoWidth  = image.Width;
                    skuPhotoInfos.Add(skuPhotoInfo);
                }
            }
            catch (Exception ex)
            {
                string errorMsg = "爬取图片报异常:" + ex.Message;
            }
            return(skuPhotoInfos);
        }
示例#28
0
        /// <inheritdoc />
        public override async Task <IEnumerable <string> > EnumerateVersionsAsync(PackageURL purl, bool useCache = true, bool includePrerelease = true)
        {
            Logger.Trace("EnumerateVersions {0}", purl?.ToString());
            if (purl == null || purl.Name is null)
            {
                return(new List <string>());
            }

            try
            {
                string        packageName = purl.Name;
                List <string> versionList = new();
                HttpClient    httpClient  = CreateHttpClient();

                // Get the latest version
                System.Net.Http.HttpResponseMessage html = await httpClient.GetAsync($"{ENV_CRAN_ENDPOINT}/web/packages/{packageName}/index.html");

                html.EnsureSuccessStatusCode();
                HtmlParser?parser = new();
                AngleSharp.Html.Dom.IHtmlDocument document = await parser.ParseDocumentAsync(await html.Content.ReadAsStringAsync());

                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> tds = document.QuerySelectorAll("td");
                for (int i = 0; i < tds.Length; i++)
                {
                    if (tds[i].TextContent == "Version:")
                    {
                        string?value = tds[i + 1]?.TextContent?.Trim();
                        if (value != null)
                        {
                            versionList.Add(value);
                        }
                        break;
                    }
                }

                // Get the remaining versions
                html = await httpClient.GetAsync($"{ENV_CRAN_ENDPOINT}/src/contrib/Archive/{packageName}/");

                html.EnsureSuccessStatusCode();
                document = await parser.ParseDocumentAsync(await html.Content.ReadAsStringAsync());

                tds = document.QuerySelectorAll("a");
                foreach (AngleSharp.Dom.IElement td in tds)
                {
                    string?href = td.GetAttribute("href");
                    if (href?.Contains(".tar.gz") ?? false)
                    {
                        string version = href.Replace(".tar.gz", "");
                        version = version.Replace(packageName + "_", "").Trim();
                        Logger.Debug("Identified {0} version {1}.", packageName, version);
                        versionList.Add(version);
                    }
                }
                return(SortVersions(versionList.Distinct()));
            }
            catch (Exception ex)
            {
                Logger.Debug("Unable to enumerate versions: {0}", ex.Message);
                throw;
            }
        }
示例#29
0
        public override async Task <string?> GetMetadataAsync(PackageURL purl, bool useCache = true)
        {
            Logger.Trace("GetMetadata {0}", purl?.ToString());

            if (purl == null || purl.Name == null)
            {
                return(null);
            }

            StringBuilder metadataContent = new();
            HttpClient    httpClient      = CreateHttpClient();

            foreach (string distroUrlPrefix in GetBaseURLs(purl))
            {
                try
                {
                    string?html = await GetHttpStringCache(httpClient, distroUrlPrefix, useCache : useCache, neverThrow : true);

                    if (html != null)
                    {
                        AngleSharp.Html.Dom.IHtmlDocument?document = await new HtmlParser().ParseDocumentAsync(html);
                        foreach (AngleSharp.Dom.IElement?anchor in document.QuerySelectorAll("a"))
                        {
                            string?anchorHref = anchor.GetAttribute("href");
                            if (anchorHref.EndsWith(".dsc"))
                            {
                                Logger.Debug("Found a .dsc file: {0}", anchorHref);
                                string?dscContent = await GetHttpStringCache(httpClient, distroUrlPrefix + anchorHref, neverThrow : true);

                                if (dscContent == null)
                                {
                                    continue;
                                }
                                metadataContent.AppendLine(dscContent);
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    Logger.Debug("Error obtaining .dsc file for {0}: {1}", purl.ToString(), ex.Message);
                }

                // Fallback to packages.ubuntu.com if we haven't seen any .dsc files
                if (metadataContent.Length == 0)
                {
                    try
                    {
                        string?searchResults = await GetHttpStringCache(httpClient, $"{ENV_UBUNTU_ENDPOINT}/search?keywords={purl.Name}&searchon=names&exact=1&suite=all&section=all", useCache);

                        HtmlParser parser = new();
                        AngleSharp.Html.Dom.IHtmlDocument document = await parser.ParseDocumentAsync(searchResults);

                        AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> anchorItems = document.QuerySelectorAll("a.resultlink");
                        IEnumerable <string> metadataUrlList = anchorItems.Select(s => s.GetAttribute("href") ?? "");

                        foreach (string metadataUrl in metadataUrlList)
                        {
                            metadataContent.AppendLine(await GetHttpStringCache(httpClient, $"{ENV_UBUNTU_ENDPOINT}/{metadataUrl}"));
                        }
                    }
                    catch (Exception ex)
                    {
                        Logger.Debug(ex, "Error fetching Ubuntu metadata: {0}", ex.Message);
                    }
                }
            }

            return(metadataContent.ToString());
        }
示例#30
0
        public static void ExtractTableFromHTML(AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> elements, int tableIndex, string outputPath, IDictionary <string, string> rowHeadOverrides, IDictionary <string, string> config)
        {
            var statementTable = elements[tableIndex];

            // Parse statement table just found into a 2d matrix (actually a list of TableRow objects, which contain a list of TableCell objects)
            var tableData   = new List <TableRow>();
            var rowElements = statementTable.QuerySelectorAll("TR");

            foreach (var rowElement in rowElements)
            {
                var rowData = new TableRow();

                var colElements = rowElement.QuerySelectorAll("TD");
                foreach (var cellElement in colElements)
                {
                    // Extract cell value
                    TableCell tableCell = new TableCell(cellElement);

                    // Duplicate cell value across all cols it spans.
                    int colSpan     = 1;
                    var colAttrs    = cellElement.Attributes;
                    var colSpanAttr = colAttrs.Where(a => a.Name == "colspan");
                    if (colSpanAttr.Count() > 0)
                    {
                        string sColSpan = colSpanAttr.First().Value;
                        Int32.TryParse(sColSpan, out colSpan);
                    }
                    for (int j = 0; j < colSpan; ++j)
                    {
                        rowData.AddCell(tableCell);
                    }
                }

                tableData.Add(rowData);
            }

            // For diagnostic purposes save a csv of the parsed table
//            writeTableToFile(outputPath + ".tbl", tableData);

            // Extract the column Headings from the table into list
            IList <string> columnHeadings = ExtractColumnHeadings(tableData);

            if (columnHeadings == null)
            {
                Console.WriteLine("FATAL: Cannot find any qualifying heading rows in table");
            }

            // Post-process the rowheads in the table, calculating its relative indentation level (compared to the rest of the rowheads)
            CalcRowheadIndentationLevels(tableData);

            // Post-process the rowheads, linking each to any parents it has, based on rules and clues.
            BuildComplexRowHeads(tableData, rowHeadOverrides, config);

            // Flatten matrix to a list of tuples using some rules
            List <FlattenedRow> results = new List <FlattenedRow>();
            string attributeName        = "";

            for (int iRow = 0; iRow < tableData.Count; ++iRow)
            {
                var row   = tableData[iRow];
                int nCols = row.Cells.Count;

                // Create the attribute name for this row from its row head and those of its parents
                attributeName = row.RowHead.Text;
                TableRow row2 = row.parentRow;
                while (row2 != null && row2.RowHead.Text.Length > 0)
                {
                    attributeName = row2.RowHead.Text + "|" + attributeName;
                    row2          = row2.parentRow;
                }
                if (attributeName.Length == 0)
                {
                    continue;                               // Assumption: rows without attribute names should be skipped.
                }
                // Standardize attribute name format: only single spaces between words
                attributeName = ConsolidateWhitespace(attributeName);

                // Scan columns
                string colContent = "";
                for (int iCol = 1; iCol < nCols; ++iCol)
                {
                    var col = row.Cells[iCol];
                    colContent = col.Text;

                    // Process numeric columns only.  Exclude centered (heading) cols.
                    if (col.HorizontalAlignment == TableCell.HORIZONTAL_ALIGNMENT.CENTER || !Regex.IsMatch(colContent, @"\(?\d+\)?"))
                    {
                        continue;
                    }

                    // Convert (xxx) to -xxx.  Drop comma separators
                    colContent = colContent.Replace('(', '-').Replace(")", "").Replace(",", "");

                    // Get the heading for this col
                    string heading = columnHeadings[iCol];

                    // Create the tuple with this data and add it to the results list.
                    FlattenedRow flatRow = new FlattenedRow(attributeName, heading, colContent);
                    results.Add(flatRow);
                }
            }

            // HTML column spans can result in duplicated entries: get rid of them.
            var distinctFlatRows = results.Distinct();

            // Write the flattened matrix out to file
            using (StreamWriter fsw = File.CreateText(outputPath))
            {
                foreach (var record in distinctFlatRows)
                {
                    fsw.Write(record + "\r\n");
                }
            }
        }