public static DataElement ConvertHTML2DataElement(string html) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); string sx = doc.DocumentNode.InnerText; sx = sx.Substring(sx.IndexOf("ELEMENT NO.") + 12, 3); List <HtmlNode> tables = helpers.SplitTables(doc); List <ExtendedHTMLNode> tablesMerged = helpers.MergeTablesOnHeaders(tables); DataElement de = new DataElement(); de.ElementNumber = sx; foreach (ExtendedHTMLNode d in tablesMerged) { if (d.type == ElementType.Vers) { de.Version = d.node.InnerText.Replace("VERSION:", "").Trim(); } if (d.type == ElementType.FYear) { de.FirstYear = d.node.InnerText.Replace("FIRST YEAR:", "").Trim(); } if (d.type == ElementType.LYear) { de.LastYear = d.node.InnerText.Replace("LAST YEAR:", "").Trim(); } if (d.type == ElementType.Fld) { de.FieldName = d.node.InnerText.Replace("FIELD NAME:", "").Trim(); } if (d.type == ElementType.EleNM) { de.ElementName = d.node.InnerText.Replace("ELEMENT NAME:", "").Trim(); } if (d.type == ElementType.Desc) { de.Description = d.node.InnerText.Replace("DESCRIPTION:", "").Trim(); } if (d.type == ElementType.Frmt) { int indexOfDataType = 0; int indexOfUnits = 0; int indexOfWidth = 0; indexOfDataType = d.node.InnerText.IndexOf("Data Type:"); indexOfUnits = d.node.InnerText.IndexOf("Units:"); indexOfWidth = d.node.InnerText.IndexOf("Width:"); List <KeyValue> kv = new List <KeyValue>(); KeyValue kvDataType = new KeyValue(); KeyValue kvUnits = new KeyValue(); KeyValue kvWidth = new KeyValue(); kvDataType.Attr = d.node.InnerText.Substring(indexOfDataType, 9).Trim(); kvDataType.Value = d.node.InnerText.Substring(indexOfDataType + 10, indexOfUnits - (indexOfDataType + 11)).Trim(); kvUnits.Attr = d.node.InnerText.Substring(indexOfUnits, 5).Trim(); kvUnits.Value = d.node.InnerText.Substring(indexOfUnits + 6, indexOfWidth - (indexOfUnits + 7)).Trim(); kvWidth.Attr = d.node.InnerText.Substring(indexOfWidth, 5).Trim(); kvWidth.Value = d.node.InnerText.Substring(indexOfWidth + 6).Trim(); kv.Add(kvDataType); kv.Add(kvUnits); kv.Add(kvWidth); de.CodeFormat = kv; } if (d.type == ElementType.Clas) { bool lastLoopAdded = false; List <KeyValue> kv = new List <KeyValue>(); KeyValue kvInstance; List <string> para = d.node.InnerHtml.GetParagraphsListFromHtml(); try { for (int i = 1; i < para.Count(); i++) { if (!string.IsNullOrWhiteSpace(para[i])) { if (!lastLoopAdded) { kvInstance = new KeyValue(); kvInstance.Attr = para[i]; kvInstance.Value = para[i + 1]; if (!kv.Contains(kvInstance)) { kv.Add(kvInstance); } lastLoopAdded = true; } else { lastLoopAdded = false; } } } } catch { kv.Add(new KeyValue { Attr = "SystemIssue", Value = "Error reading" + d.type.ToString() }); } de.Classification = kv; } if (d.type == ElementType.CoNt) { de.CodingNotes = d.node.InnerText; } if (d.type == ElementType.InFi) { int indexOfVERSION = 0; List <string> para = d.node.InnerHtml.GetParagraphsListFromHtml(); List <string> inputFiles = new List <string>(); foreach (string xyz in para) { if (xyz.Equals("VERSION")) { indexOfVERSION = para.IndexOf(xyz); break; } string[] excludeWords = new string[] { "INPUT FILES:", "HEP - Student", "HEP - Staff", "HEP - Applications and Offers" }; if (!excludeWords.Contains(xyz)) { if (!inputFiles.Contains(xyz)) { inputFiles.Add(xyz); } } } de.InputFiles = inputFiles; List <ChangeRecord> changeHist = new List <ChangeRecord>(); ChangeRecord cr; try { for (int i = indexOfVERSION + 3; i < para.Count();) { if (!string.IsNullOrWhiteSpace(para[i])) { cr = new ChangeRecord(); cr.Version = para[i]; cr.RevisionDate = para[i + 1]; cr.ReportingYear = para[i + 2]; changeHist.Add(cr); i = i + 2; } } } catch {} de.ChangeHistory = changeHist; } if (d.type == ElementType.CHis) { System.Console.WriteLine(d.type.ToString()); System.Console.WriteLine(d.node.InnerHtml); } } return(de); }
public override bool Equals(object obj) { KeyValue item = (KeyValue)obj; return(this.Attr.Equals(item.Attr) && this.Value.Equals(item.Value)); }