protected virtual void ParseAndLoadLines(string lines) { foreach (string readLine in lines.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries)) { bool bImportRow = false; bool bLineParsed = false; if ((ContentExpression != null) && ContentExpression.IsMatch(readLine)) { var m = ContentExpression.Match(readLine); bImportRow = true; rowDict = new Dictionary <string, object>(); foreach (var sGroup in ContentExpression.GetGroupNames()) { if ((sGroup != DefaultGroup) && (!Int16.TryParse(sGroup, out short groupNum))) { RegexColumn curRegexColumn = _regexColumns.Find(r => r.ColumnName == sGroup); if (!String.IsNullOrWhiteSpace(curRegexColumn.ValueMatchingCondition) && (!Regex.IsMatch(m.Groups[sGroup].Value, curRegexColumn.ValueMatchingCondition))) { bImportRow = false; break; } string fieldValue = m.Groups[sGroup].Value; fieldValue = fieldValue.Trim('\"'); if (DataTable.Columns[sGroup] != null) { if (DataTable.Columns[sGroup].DataType == typeof(int)) { rowDict[sGroup] = Convert.ToInt32(fieldValue); } else if (DataTable.Columns[sGroup].DataType == typeof(double)) { rowDict[sGroup] = Convert.ToDouble(fieldValue); } else if (DataTable.Columns[sGroup].DataType == typeof(DateTime)) { rowDict[sGroup] = Convert.ToDateTime(fieldValue); } else { rowDict[sGroup] = fieldValue; } } } } if (bImportRow) { DataRow newRow = DataTable.NewRow(); PopulateDictionaryToRow(newRow); //foreach (KeyValuePair<string, object> kvPair in rowDict) //{ // newRow[kvPair.Key] = kvPair.Value; //} DataTable.Rows.Add(newRow); PopulateRowToDictionary(DataTable.Rows[DataTable.Rows.Count - 1]); bLineParsed = true; } } if (!bLineParsed) { foreach (ConditionalRegexParser crp in Parsers) { if (!bLineParsed && crp.ConditionRegex.IsMatch(readLine)) { bImportRow = true; DataTable crpDataTable = Tables[crp.TableName]; var m = crp.parseRegex.Match(readLine); foreach (var sGroup in crp.parseRegex.GetGroupNames()) { if ((sGroup != DefaultGroup) && (!Int16.TryParse(sGroup, out short groupNum))) { RegexColumn curRegexColumn = crp.RegexColumns.Find(r => r.ColumnName == sGroup); if (!String.IsNullOrWhiteSpace(curRegexColumn.ValueMatchingCondition) && (!Regex.IsMatch(m.Groups[sGroup].Value, curRegexColumn.ValueMatchingCondition))) { bImportRow = false; break; } string fieldValue = m.Groups[sGroup].Value; fieldValue = fieldValue.Trim('\"'); if (crpDataTable.Columns[sGroup] != null) { if (crpDataTable.Columns[sGroup].DataType == typeof(int)) { rowDict[sGroup] = Convert.ToInt32(fieldValue); } else if (crpDataTable.Columns[sGroup].DataType == typeof(double)) { rowDict[sGroup] = Convert.ToDouble(fieldValue); } else if (crpDataTable.Columns[sGroup].DataType == typeof(DateTime)) { rowDict[sGroup] = Convert.ToDateTime(fieldValue); } else { rowDict[sGroup] = fieldValue; } } } } if (bImportRow) { DataRow newRow = crpDataTable.NewRow(); PopulateDictionaryToRow(newRow); crpDataTable.Rows.Add(newRow); PopulateRowToDictionary(crpDataTable.Rows[crpDataTable.Rows.Count - 1]); bLineParsed = true; } } } } if (!bLineParsed) { AddMisRead(readLine); } } }
private void ConvertHTMLTablesToDataSet(string HTML) { DataTable dt = null; DataRow dr = null; string TableExpression = "<TABLE[^>]*>(.*?)</TABLE>"; string HeaderExpression = "(<TH>|<TH[\\s]>)(.*?)</TH>"; string RowExpression = "(<TR>|<TR[\\s]>)(.*?)</TR>"; string ColumnExpression = "(<TD>|<TD[\\s]>)(.*?)</TD>"; bool HeadersExist = false; int iCurrentColumn = 0; int iCurrentRow = 0; // Get a match for all the tables in the HTML MatchCollection Tables = Regex.Matches(HTML, TableExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase); // Loop through each table element foreach (Match Table in Tables) { // Reset the current row counter and the header flag iCurrentRow = 0; HeadersExist = false; // Add a new table to the DataSet dt = new DataTable { //Create the relevant amount of columns for this table (use the headers if they exist, otherwise use default names) TableName = "Table" + (this.Tables.Count + 1).ToString() }; Match TableNameMatch = null; if (Regex.IsMatch(Table.Value, "id=(?<TableName>.\\w+)")) { TableNameMatch = Regex.Match(Table.Value, "id=(?<TableName>.\\w+)"); } if (Regex.IsMatch(Table.Value, "name=(?<TableName>.\\w+)")) { TableNameMatch = Regex.Match(Table.Value, "name=(?<TableName>.\\w+)"); } if (TableNameMatch != null) { dt.TableName = TableNameMatch.Groups["TableName"].ToString().Trim('"'); } if (Table.Value.IndexOf("<TH", StringComparison.OrdinalIgnoreCase) >= 0) { // Set the HeadersExist flag HeadersExist = true; // Get a match for all the rows in the table MatchCollection Headers = Regex.Matches(Table.Value, HeaderExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase); // Loop through each header element foreach (Match Header in Headers) { if (!dt.Columns.Contains(Header.Groups[2].ToString())) { dt.Columns.Add(Header.Groups[2].ToString()); } } } else { for (int iColumns = 1; iColumns <= Regex.Matches(Regex.Matches(Regex.Matches(Table.Value, TableExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase)[0].ToString(), RowExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase)[0].ToString(), ColumnExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase).Count; iColumns++) { dt.Columns.Add("Column " + iColumns); } } //Get a match for all the rows in the table MatchCollection Rows = Regex.Matches(Table.Value, RowExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase); // Loop through each row element foreach (Match Row in Rows) { // Only loop through the row if it isn't a header row if (!(iCurrentRow == 0 && HeadersExist)) { // Create a new row and reset the current column counter dr = dt.NewRow(); iCurrentColumn = 0; // Get a match for all the columns in the row MatchCollection Columns = Regex.Matches(Row.Value, ColumnExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase); bool bImportRow = Columns.Count > 0; // Loop through each column element foreach (Match Column in Columns) { // Add the value to the DataRow if (dr.ItemArray.Count() > iCurrentColumn) { if (_regexColumns != null) { RegexColumn curRegexColumn = _regexColumns.Find(r => r.ColumnName == dt.Columns[iCurrentColumn].ColumnName); if (curRegexColumn != null) { if (!String.IsNullOrWhiteSpace(curRegexColumn.ValueMatchingCondition) && (!Regex.IsMatch(Column.Groups[2].ToString(), curRegexColumn.ValueMatchingCondition))) { bImportRow = false; break; } } } dr[iCurrentColumn] = Column.Groups[2].ToString(); } // Increase the current column iCurrentColumn++; } // Add the DataRow to the DataTable if (bImportRow) { dt.Rows.Add(dr); } } // Increase the current row counter SendMessageToCallingApplicationHandler(iCurrentRow, "Processed record for Table [" + dt.TableName + "]"); iCurrentRow++; } // Add the DataTable to the DataSet this.Tables.Add(dt); } }
private void ParseColumnOrParser(RegexColumnBuilder columnBuilder, XmlNode childNode, string separator = "") { if (childNode.Name.ToUpper() == "IF") { string strCondition = String.Empty; string strTableName = TableName; string strTableStructureType = String.Empty; //Conditional Table level attributes... foreach (XmlAttribute xAttr in childNode.Attributes) { switch (xAttr.Name.ToUpper()) { case "SEPARATOR": separator = xAttr.Value; break; case "CONDITION": strCondition = xAttr.Value; break; case "TABLENAME": strTableName = xAttr.Value; break; case "TABLESTRUCTURETYPE": strTableStructureType = xAttr.Value; break; } } RegexColumnBuilder conditionalRCB = new RegexColumnBuilder(separator); XmlNode tableNode = childNode; if (!String.IsNullOrWhiteSpace(strTableStructureType)) { tableNode = Configuration.GetDataTableNode(strTableStructureType); } foreach (XmlNode subNode in tableNode.ChildNodes) { ParseColumnOrParser(conditionalRCB, subNode, separator); } ConditionalRegexParser crp = new ConditionalRegexParser() { ConditionRegex = new Regex(strCondition), TableName = strTableName, parseRegex = conditionalRCB.CreateRegularExpression(), RegexColumns = conditionalRCB.Columns }; Parsers.Add(crp); } else { string prefix = ""; string suffix = ""; string strCondition = String.Empty; bool hasDoubleQuotes = false; bool bAutoIncrement = false; Int32 intStartValue = 1; Int32 intIncrement = 1; bool bForeignKey = false; bool bPrimaryKey = false; string strExpression = String.Empty; string strDisplayName = childNode.Name; string strDescription = String.Empty; string strDefault = String.Empty; int columnLength = 0; RegexColumnType rct = RegexColumnType.STRING; //Column level attributes... foreach (XmlAttribute xAttr in childNode.Attributes) { switch (xAttr.Name.ToUpper()) { case "SEPARATOR": separator = xAttr.Value; break; case "PREFIX": prefix = xAttr.Value; break; case "SUFFIX": suffix = xAttr.Value; break; case "QUOTES": hasDoubleQuotes = Boolean.Parse(xAttr.Value); break; case "LENGTH": columnLength = Int16.Parse(xAttr.Value); break; case "TYPE": rct = (RegexColumnType)Enum.Parse(typeof(RegexColumnType), xAttr.Value); break; case "CONDITION": strCondition = xAttr.Value; break; case "AUTOINCREMENT": bAutoIncrement = Boolean.Parse(xAttr.Value); break; case "STARTVALUE": case "START": case "SEED": intStartValue = Int32.Parse(xAttr.Value); break; case "INCREMENT": intIncrement = Int32.Parse(xAttr.Value); break; case "EXPRESSION": strExpression = xAttr.Value; break; case "FOREIGNKEY": bForeignKey = Boolean.Parse(xAttr.Value); break; case "UNIQUE": case "PRIMARYKEY": case "PRIMARY": bPrimaryKey = Boolean.Parse(xAttr.Value); break; case "DISPLAYNAME": case "CAPTION": strDisplayName = xAttr.Value; break; case "DESCRIPTION": strDescription = xAttr.Value; break; case "DEFAULT": strDefault = xAttr.Value; break; } } bool bColumnAdded = false; string strColumnName = childNode.Name; if (strColumnName.Trim('_') == String.Empty) { strColumnName = String.Empty; } if (bAutoIncrement) { columnBuilder.AddColumn(strColumnName, bAutoIncrement, intStartValue, intIncrement); bColumnAdded = true; } if (!bColumnAdded && !String.IsNullOrEmpty(strExpression)) { columnBuilder.AddColumn(strColumnName, rct, strExpression); bColumnAdded = true; } if ((!bColumnAdded) && (bForeignKey)) { columnBuilder.AddColumn(strColumnName, bForeignKey); bColumnAdded = true; } if (!bColumnAdded) //This is a regular column with regex... let us add this to the column builder... { if (!String.IsNullOrEmpty(separator)) { if (hasDoubleQuotes) { columnBuilder.AddColumn('\"' + strColumnName + '\"', separator[0], rct); } else { if (childNode.NextSibling == null) { columnBuilder.AddColumn(strColumnName, ".*", rct); } else { columnBuilder.AddColumn(strColumnName, "[^" + columnBuilder.RegexFormattedOutput(separator[0]) + "\\n]*", prefix, suffix, rct); } } } else { if (columnLength > 0) { columnBuilder.AddColumn(strColumnName, columnLength, rct); } else { columnBuilder.AddColumn(strColumnName, ".*", rct); } } RegexColumn addedColumn = columnBuilder.Columns[columnBuilder.Columns.Count - 1]; if (!String.IsNullOrWhiteSpace(strCondition)) { //There is a condition to be matched with the value... let us set it to the last column added... addedColumn.ValueMatchingCondition = strCondition; } if (bPrimaryKey) { addedColumn.IsUnique = bPrimaryKey; } if (strDisplayName != strColumnName) { addedColumn.DisplayName = strDisplayName; } if (!String.IsNullOrEmpty(strDescription)) { addedColumn.Description = strDescription; } if (!String.IsNullOrEmpty(strDefault)) { addedColumn.Default = strDefault; } } } }