private static bool prepareItem(FIDCResult result, HtmlNode labelNode, HtmlNode valueTextNode, FIDCItem previousItem) { if (labelNode != null && valueTextNode != null) { // Key string key = ""; HtmlAttribute keyAttr = labelNode.Attributes.FirstOrDefault(x => x.Name.ToLower() == "id"); if (keyAttr != null) key = keyAttr.Value; // Label string label = labelNode.InnerText; // Value string value = ""; value = valueTextNode.InnerText; // Parent int level = labelNode.InnerText.CleanString(false).StartsWithCounter(" "); FIDCItem parent = (previousItem != null && level > 0) ? previousItem.GetParentFromLevel(level - 1) : null; result.Items.Add(new FIDCItem() { Key = key.CleanString(), Label = label.CleanString(), Value = value.CleanString(), Parent = parent }); return true; } return false; }
private FIDCResult Load() { string finalURL = URLSeed; foreach (ScrapperFilter filter in this.Filters) { finalURL += "&" + filter.ToURLParameter(); } HtmlDocument doc = Scrapper.GetDocument(finalURL); FIDCResult result = new FIDCResult() { Id = 0, TimeStamp = DateTime.Now, Items = new List<FIDCItem>() }; if (doc.DocumentNode != null) { // Cabeçalho HtmlNodeCollection tCabecalho = doc.DocumentNode.SelectNodes("//table[@id='tbCabecalhoInfo']/tr"); if (tCabecalho != null) { foreach (HtmlNode row in tCabecalho) { HtmlNode labelNode = row.SelectSingleNode("td[1]/span"); HtmlNode valueTextNode = row.SelectSingleNode("td[2]/span"); HtmlNode valueSelectNode = row.SelectSingleNode("td[2]/select/option[@selected]"); if (labelNode != null && (valueTextNode != null || valueSelectNode != null)) { // Key string key = ""; HtmlAttribute keyAttr = labelNode.Attributes.FirstOrDefault(x => x.Name.ToLower() == "id"); if (keyAttr != null) key = keyAttr.Value; // Label string label = labelNode.InnerText; // Value string value = ""; if (valueTextNode != null) value = valueTextNode.InnerText; else if (valueSelectNode != null) value = valueSelectNode.Attributes.FirstOrDefault(x => x.Name.ToLower() == "value").Value; result.Items.Add(new FIDCItem() { Key = key.CleanString(), Label = label.CleanString(), Value = value.CleanString() }); } else { System.Diagnostics.Debug.WriteLine("Row Not Processed : " + row.InnerHtml); } } } // Corpo HtmlNodeCollection tCorpo = doc.DocumentNode.SelectNodes("//table[@id='tbCorpoInfo']/tr"); if (tCorpo != null) { FIDCItem previous = null; foreach (HtmlNode row in tCorpo) { HtmlNode labelNode = row.SelectSingleNode("td[1]/span"); HtmlNode valueTextNode = row.SelectSingleNode("td[2]/span"); if (labelNode != null) { // Key string key = ""; HtmlAttribute keyAttr = labelNode.Attributes.FirstOrDefault(x => x.Name.ToLower() == "id"); if (keyAttr != null) key = keyAttr.Value.CleanString(); // Label string label = labelNode.InnerText.CleanString(); // Value string value = (valueTextNode != null) ? valueTextNode.InnerText.CleanString() : null; // Parent int level = labelNode.InnerText.CleanString(false).StartsWithCounter(" "); FIDCItem parent = (previous != null && level > 0) ? previous.GetParentFromLevel(level - 1) : null; System.Diagnostics.Debug.WriteLine(level + " ::: " + label + " ::: " + labelNode.InnerText); result.Items.Add(new FIDCItem() { Parent = parent, Key = key, Label = label, Value = value }); previous = result.Items.Last(); } else { System.Diagnostics.Debug.WriteLine("Row Not Processed : " + row.InnerHtml); } } } // CotaSenior HtmlNodeCollection tHorizontais = doc.DocumentNode.SelectNodes("//table[@id='dgInfoCotaSenior'] | //table[@id='dgInfoCotaSubrd']"); if (tHorizontais != null && tHorizontais.Count() == 2) { foreach (HtmlNode tCotaSenior in tHorizontais) { HtmlNode parentNode = tCotaSenior.SelectSingleNode("preceding-sibling::*[1]/tr[last()]"); FIDCItem parent = new FIDCItem() { Key = "Anexo I - " + parentNode.InnerText.ToKey(), Label = "Anexo I - " + parentNode.InnerText.CleanString() }; result.Items.Add(parent); if (tCotaSenior != null) { HtmlNodeCollection labelCol = tCotaSenior.SelectNodes("tr[1]/td"); HtmlNodeCollection valuesCol = tCotaSenior.SelectNodes("tr[2]/td"); if (labelCol != null && valuesCol != null) { int itemNumber = System.Math.Min(labelCol.Count(), valuesCol.Count()); string key = ""; string label = ""; string value = ""; for (int i = 0; i < itemNumber; i++) { label = labelCol[i].InnerText; value = valuesCol[i].InnerText; result.Items.Add(new FIDCItem() { Parent = parent, Key = key.CleanString(), Label = label.CleanString(), Value = value.CleanString() }); } } } } } else { System.Diagnostics.Debug.WriteLine("TABELAS HORIZONTAIS: ERROR!!!"); } } return result; }
private FIDCResult Load() { FIDCResult result = new FIDCResult() { Id = 0, TimeStamp = DateTime.Now, Items = new List<FIDCItem>() }; string finalURL = URLSeed; foreach (ScrapperFilter filter in this.Filters) { finalURL += "&" + filter.ToURLParameter(); } HtmlDocument doc = Scrapper.GetDocument(finalURL); //HtmlDocument doc = Scrapper.GetDocumentFromFile(AppDomain.CurrentDomain.BaseDirectory + "Services\\TestData\\Mensal489.html"); if (doc.DocumentNode != null) { FIDCItem previous = null; foreach (HtmlNode table in doc.DocumentNode.SelectNodes("//table")) { TableStyle style = TableStyle.NotDefined; // Try to find Table Style if (style == TableStyle.NotDefined) { int columns = 0; foreach (HtmlNode row in table.SelectNodes("tr")) { HtmlNodeCollection cols = row.SelectNodes("td"); if (cols != null && cols.Count > columns) { columns = cols.Count; } } if (columns == 2) style = TableStyle.Vertical; if (columns > 3) style = TableStyle.Horizontal; } if (style == TableStyle.Vertical) { foreach (HtmlNode row in table.SelectNodes("tr")) { HtmlNodeCollection labelNodes = row.SelectNodes("td[1]/span"); HtmlNodeCollection valueTextNodes = row.SelectNodes("td[2]/span"); if (labelNodes != null && valueTextNodes != null) { if (labelNodes.Count == 1 && valueTextNodes.Count == 1) { prepareItem(result, labelNodes[0], valueTextNodes[0], previous); previous = result.Items.Last(); } if (labelNodes.Count == 2) { prepareItem(result, labelNodes[0], labelNodes[1], previous); previous = result.Items.Last(); prepareItem(result, valueTextNodes[0], valueTextNodes[1], previous); previous = result.Items.Last(); } } } } else if (style == TableStyle.Horizontal) { } } } return result; }