public static Dictionary <int, string> ParseRegestrationEventTargets(IHtmlTableElement classesTable)
        {
            ConcurrentBag <(int ClassId, string ClassRegistrationEventTarget)> targets = new ConcurrentBag <(int, string)>();

            Parallel.ForEach(classesTable.Rows.Skip(1), row => targets.Add(ParseClassIdAndRegistrationEventTarget(row)));
            return(targets.ToDictionary(c => c.ClassId, c => c.ClassRegistrationEventTarget));
        }
Exemplo n.º 2
0
        private static async Task scrapeWeapons()
        {
            string[]          expectedNames = { "Type", "Id", "Name", "ModelType", "MainModel", "PartModel" };
            IHtmlTableElement table         = await getTable(baseURL + weaponURL);

            verifyHeaderNames(expectedNames.ToList(), table);

            List <Weapon> allWeapon          = new List <Weapon>();
            List <IHtmlTableRowElement> rows = getRows(table);

            foreach (IHtmlTableRowElement row in rows)
            {
                Weapon weapon = new Weapon();
                weapon.weapon_type = (Weapon.WEAPON_TYPE)Enum.Parse(typeof(Weapon.WEAPON_TYPE), row.Cells[0].InnerHtml, true);
                weapon.ID          = Int32.Parse(row.Cells[1].InnerHtml);
                weapon.name        = row.Cells[2].InnerHtml;
                weapon.model_type  = (Weapon.MODEL_TYPE)Enum.Parse(typeof(Weapon.MODEL_TYPE), row.Cells[3].InnerHtml, true);
                weapon.main_model  = row.Cells[4].InnerHtml;
                weapon.part_model  = row.Cells[5].InnerHtml;
                allWeapon.Add(weapon);
            }

            using (StreamWriter sw = new StreamWriter("weapon.json", false))
            {
                sw.Write(JsonConvert.SerializeObject(allWeapon, Formatting.Indented));
            }
        }
        public static UClass[] ParseClassesTable(IHtmlTableElement classesTable, int classYear = 0, USemester classSemester = USemester.Unknown)
        {
            var classes = new ConcurrentBag <UClass>();

            Parallel.ForEach(classesTable.Rows.Skip(1), row => classes.Add(ParseClass(row, classYear, classSemester)));
            return(classes.ToArray());
        }
Exemplo n.º 4
0
        private static async Task <IHtmlTableElement> getTable(string url)
        {
            CancellationTokenSource cancellationToken = new CancellationTokenSource();
            HttpClient          httpClient            = new HttpClient();
            HttpResponseMessage request = await httpClient.GetAsync(url);

            cancellationToken.Token.ThrowIfCancellationRequested();

            Stream response = await request.Content.ReadAsStreamAsync();

            cancellationToken.Token.ThrowIfCancellationRequested();

            HtmlParser    parser   = new HtmlParser();
            IHtmlDocument document = parser.ParseDocument(response);


            //A sanity check to ensure that there are no unexpected changes
            List <AngleSharp.Dom.IElement> tables = document.All.Where(elem => "table".Equals(elem.TagName, StringComparison.InvariantCultureIgnoreCase)).ToList();

            if (tables.Count != 1)
            {
                throw new Exception("Unexpect number of tables: " + tables.Count + ", expected 1");
            }
            IHtmlTableElement table = (IHtmlTableElement)tables[0];

            return(table);
        }
Exemplo n.º 5
0
        private static async Task scrapeArmor()
        {
            string[]          expectedNames = { "Type", "ArmorId", "Name", "LayeredId", "Male", "Female" };
            IHtmlTableElement table         = await getTable(baseURL + armorURL);

            verifyHeaderNames(expectedNames.ToList(), table);

            List <Armor> allArmor            = new List <Armor>();
            List <IHtmlTableRowElement> rows = getRows(table);

            foreach (IHtmlTableRowElement row in rows)
            {
                Armor armor = new Armor();
                armor.type            = (Armor.ARMOR_SLOT)Enum.Parse(typeof(Armor.ARMOR_SLOT), row.Cells[0].InnerHtml, true);
                armor.ID              = Int32.Parse(row.Cells[1].InnerHtml);
                armor.name            = row.Cells[2].InnerHtml;
                armor.layered_ID      = Int32.Parse(row.Cells[3].InnerHtml);
                armor.male_location   = row.Cells[4].InnerHtml;
                armor.female_location = row.Cells[5].InnerHtml;
                allArmor.Add(armor);
            }

            using (StreamWriter sw = new StreamWriter("armor.json", false))
            {
                sw.Write(JsonConvert.SerializeObject(allArmor, Formatting.Indented));
            }
        }
Exemplo n.º 6
0
 private Dictionary <string, R4UReleaseSet> CreatePRSets(IHtmlTableElement table)
 {
     if (table != null)
     {
         return(table.Rows                                                                       //
                .Select(row => releaseIDMatcher.Match(row.Cells[0].TextContent).Groups[1].Value) //
                .Distinct()                                                                      //
                .ToDictionary(rid => rid, rid => CreatePromoSet(rid))                            //
                );
     }
     else
     {
         return(new Dictionary <string, R4UReleaseSet>());
     }
 }
Exemplo n.º 7
0
        //Sanity check to ensure the page hasn't change format
        private static bool verifyHeaderNames(List <string> expected, IHtmlTableElement given)
        {
            List <string> headerNames = new List <string>();

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> headerCells = given.QuerySelectorAll("th");
            foreach (IHtmlTableHeaderCellElement headerCell in headerCells)
            {
                headerNames.Add(headerCell.TextContent);
            }

            if (!expected.ToList().SequenceEqual(headerNames))
            {
                throw new Exception("Headers do not match. Expected: |" + String.Join(",", expected) + "| but found |" + String.Join(",", headerNames));
            }
            return(true);
        }
        public static IEnumerable <FoodItem> ParseTable(IHtmlTableElement foodTable, LocalDate date, ILogger logger)
        {
            var allRows = foodTable.Bodies.First().Rows;
            var seed    = (currentSection : "none", rows : Enumerable.Empty <FoodItem>());

            logger.LogDebug($"found {allRows.Length} row(s) in food table");
            logger.LogTrace($"parsing food table {foodTable.ToHtml()}");

            return(allRows.Aggregate(seed,
                                     (acc, currentRow) =>
            {
                var(currentSection, rows) = acc;

                return IsSectionRow(currentRow)
                        ? (currentRow.TextContent.Trim(), rows)
                        : (currentSection, rows.Concat(ParseMealFoodItem(currentRow, currentSection, date, logger)));
            },
Exemplo n.º 9
0
        static void Inspect(IHtmlTableElement table, List <Entry> list)
        {
            var entry = new Entry();
            var cells = table.QuerySelectorAll("table td");
            var blub  = cells.Select(m => m.TextContent).ToArray();
            var path  = String.Empty;

            entry.Rules      = cells[2].TextContent;
            entry.FileName   = cells[4].TextContent;
            entry.Collection = cells[6].TextContent;
            entry.Text       = cells[7].TextContent.Trim();

            if (entry.Collection.StartsWith("IBM"))
            {
                path = IBMPath(entry.FileName);
            }
            else if (entry.Collection.StartsWith("James Clark"))
            {
                path = XmlTestPath(entry.FileName);
            }
            else if (entry.Collection.StartsWith("Sun"))
            {
                path = SunPath(entry.FileName);
            }
            else if (entry.Collection.StartsWith("OASIS"))
            {
                path = OasisPath(entry.FileName);
            }
            else if (entry.Collection.StartsWith("Fuji"))
            {
                path = XmlFujiPath(entry.FileName);
            }
            else
            {
                path = EdUniPath(entry.FileName);
            }

            if (File.Exists(path))
            {
                entry.Content = File.ReadAllText(path);
                list.Add(entry);
            }
        }
Exemplo n.º 10
0
        private HtmlTable(IHtmlTableElement e)
        {
            var headings = e.QuerySelectorAll("tr")
                           .Where(x => x.Children.All(y => y is IHtmlTableHeaderCellElement))
                           .Select(x => (IHtmlTableRowElement)x)
                           .ToList();

            if (headings.Any())
            {
                var rowChildren = headings.Select(x => (Row: x, Children: RowChildren(x).ToList())).ToList();

                var num = rowChildren.First().Children.Count;
                foreach (var(row, children) in rowChildren.Skip(1))
                {
                    if (children.Count != num)
                    {
                        throw new HtmlElementException(row,
                                                       $"Expected all of the rows to have the same amount of cells ({num}). But this one has {RowChildren(row).Count()}");
                    }
                }

                ColumnTitles = Linq.Range(num)
                               .Select(i => rowChildren.Select(x => x.Children[i]))
                               .Select(x => x.Select(y => y.TextContent.Trim()).Join(" ").Trim())
                               .ToList();

                _rows = e.QuerySelectorAll("tr").Skip(headings.Count).Select(row => (IHtmlTableRowElement)row).ToList();
            }
            else
            {
                ColumnTitles = Array.Empty <string>();

                _rows = e.QuerySelectorAll("tr").Select(row => (IHtmlTableRowElement)row).ToList();
            }

            _columnTitleToIndex = ColumnTitles.Enumerate()
                                  .Distinct((i1, i2) => i1.Index == i2.Index)
                                  .ToDictionary(tup => tup.Elem, tup => tup.Index);
        }
Exemplo n.º 11
0
        static async Task Main(string[] args)
        {
            Console.WriteLine("Hello World!");
            var config = Configuration.Default;

            //Create a new context for evaluating webpages with the given config
            var context = BrowsingContext.New(config);

            //Parse the document from the content of a response to a virtual request
            IDocument document = await context.OpenAsync(req => req.Content(new StreamReader("..\\..\\..\\..\\TestParser\\filmPage.html").BaseStream));

            var filmInfoTables = document.All.Where(t => t.LocalName == "table" && t.Id == "details");
            var filmInfoTable2 = document.QuerySelector <IHtmlTableElement>("table#details");

            if (filmInfoTables.Count() == 1)
            {
                IHtmlTableElement filmInfoTable = (IHtmlTableElement)filmInfoTables.ElementAt(0);
                var    htmlTableCellElement     = filmInfoTable.Rows[0].Cells[1];
                var    boldElements             = htmlTableCellElement.ChildNodes.Where(t => t is IHtmlElement && (t as IHtmlElement).NodeName == "B").ToArray();
                INode  boldElName     = boldElements.FirstOrDefault(t => t.TextContent.Contains("Название"));
                INode  boldElYear     = boldElements.FirstOrDefault(t => t.TextContent.Contains("Год"));
                INode  boldElCountry  = boldElements.FirstOrDefault(t => t.TextContent.Contains("Страна"));
                INode  boldElGenres   = boldElements.FirstOrDefault(t => t.TextContent.Contains("Жанр"));
                INode  boldElDuration = boldElements.FirstOrDefault(t => t.TextContent.Contains("Продолжительность"));
                string filmTitle      = boldElName?.NextSibling.TextContent;
                string filmYear       = boldElYear?.NextSibling.TextContent;
                string filmCountry    = boldElCountry?.NextSibling.TextContent;
                string filmGenres     = boldElGenres?.NextSibling.TextContent;
                string filmDuration   = boldElDuration?.NextSibling.TextContent;

                //Год выпуска: 1999
                //Страна: США
                //Жанр: детектив, криминал, драма, триллер
                //Продолжительность: 01:28:45
            }
        }
Exemplo n.º 12
0
        Table Render(IHtmlTableElement element)
        {
            var table = new Table();

            return(table);
        }
Exemplo n.º 13
0
        static void Inspect(IHtmlTableElement table, List<Entry> list)
        {
            var entry = new Entry();
            var cells = table.QuerySelectorAll("table td");
            var blub = cells.Select(m => m.TextContent).ToArray();
            var path = String.Empty;
            entry.Rules = cells[2].TextContent;
            entry.FileName = cells[4].TextContent;
            entry.Collection = cells[6].TextContent;
            entry.Text = cells[7].TextContent.Trim();

            if (entry.Collection.StartsWith("IBM"))
                path = IBMPath(entry.FileName);
            else if (entry.Collection.StartsWith("James Clark"))
                path = XmlTestPath(entry.FileName);
            else if (entry.Collection.StartsWith("Sun"))
                path =SunPath(entry.FileName);
            else if (entry.Collection.StartsWith("OASIS"))
                path = OasisPath(entry.FileName);
            else if (entry.Collection.StartsWith("Fuji"))
                path = XmlFujiPath(entry.FileName);
            else
                path = EdUniPath(entry.FileName);

            if (File.Exists(path))
            {
                entry.Content = File.ReadAllText(path);
                list.Add(entry);
            }
        }
Exemplo n.º 14
0
        private static List <IHtmlTableRowElement> getRows(IHtmlTableElement table)
        {
            IHtmlTableSectionElement body = (IHtmlTableSectionElement)table.QuerySelector("tbody");

            return(body.Rows.ToList());
        }
Exemplo n.º 15
0
 public static Try <HtmlTable, HtmlElementException> Create(IHtmlTableElement e) =>
 new Try <HtmlTable, HtmlElementException>(() => new HtmlTable(e));
Exemplo n.º 16
0
        private FinancialInfo ParseFinInfo(string content, DocLinkInfo.LinkTypeEnum linktype, FinInfoCategoria categoria, FinInfoTipo tipo)
        {
            var parser = new HtmlParser();
            var doc    = parser.Parse(content);

            FinancialInfo finInfo = new FinancialInfo();

            finInfo.Categoria = categoria;
            finInfo.Tipo      = tipo;


            IHtmlTableElement table = null;

            if (linktype == DocLinkInfo.LinkTypeEnum.Bovespa)
            {
                var div = doc.QuerySelector("div.ScrollMaker");

                if (div == null)
                {
                    var scripts = doc.QuerySelectorAll("script");
                    if (scripts.Any(s => s.TextContent.Contains("Não Possui Dados para Carregar a Página")))
                    {
                        // dado não existe
                        return(null);
                    }
                }

                table = div.FirstElementChild as IHtmlTableElement;
                //table anterior a table é a linha que contém o multiplicador
                var multiplierText = div.PreviousElementSibling.TextContent;
                if (multiplierText.Contains("Mil"))
                {
                    finInfo.Multiplicador = 1000;
                }
            }
            else //cvm
            {
                var title = doc.QuerySelector("#TituloTabelaSemBorda");
                if (title.TextContent.Contains("Reais Mil"))
                {
                    finInfo.Multiplicador = 1000;
                }

                table = title.NextElementSibling as IHtmlTableElement;

                if (table.InnerHtml.Contains("Justificativa para a não prestação da informação"))
                {
                    // dado não existe
                    return(null);
                }
            }

            foreach (var row in table.Rows)
            {
                bool isTopLine = false;
                if (linktype == DocLinkInfo.LinkTypeEnum.Bovespa)
                {
                    isTopLine = row.GetAttribute("valign") == "top";
                }
                else
                {
                    isTopLine = row.Cells[0].TextContent.Trim() == "Conta";
                }

                if (isTopLine) // linha de título
                {
                    // pega a data da terceira célula
                    // Valor do Trimestre Atual 01/04/2009 a 30/06/2009
                    var text       = row.Cells[2].TextContent;
                    var iUltimoNum = text.LastIndexOfNum();
                    var start      = iUltimoNum - 9;
                    var datetext   = text.Substring(start, 10).Trim();
                    finInfo.Data = DateTime.ParseExact(datetext, "dd/MM/yyyy", new CultureInfo("pt-BR"));
                }
                else
                {
                    var           codconta  = row.Cells[0].TextContent;
                    var           nomeconta = row.Cells[1].TextContent;
                    var           valortext = row.Cells[2].TextContent;
                    FinancialItem item      = new FinancialItem();
                    item.Conta = codconta.Trim();
                    item.Nome  = nomeconta.Trim();
                    item.Valor = ParseValor(valortext.Trim());
                    finInfo.Items.Add(item);
                }
            }

            return(finInfo);
        }