コード例 #1
0
ファイル: RegexDataset.cs プロジェクト: xuan2261/EasyETL.Net
        protected virtual void ParseAndLoadLines(string lines)
        {
            foreach (string readLine in lines.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries))
            {
                bool bImportRow  = false;
                bool bLineParsed = false;
                if ((ContentExpression != null) && ContentExpression.IsMatch(readLine))
                {
                    var m = ContentExpression.Match(readLine);
                    bImportRow = true;
                    rowDict    = new Dictionary <string, object>();
                    foreach (var sGroup in ContentExpression.GetGroupNames())
                    {
                        if ((sGroup != DefaultGroup) && (!Int16.TryParse(sGroup, out short groupNum)))
                        {
                            RegexColumn curRegexColumn = _regexColumns.Find(r => r.ColumnName == sGroup);
                            if (!String.IsNullOrWhiteSpace(curRegexColumn.ValueMatchingCondition) && (!Regex.IsMatch(m.Groups[sGroup].Value, curRegexColumn.ValueMatchingCondition)))
                            {
                                bImportRow = false;
                                break;
                            }
                            string fieldValue = m.Groups[sGroup].Value;
                            fieldValue = fieldValue.Trim('\"');
                            if (DataTable.Columns[sGroup] != null)
                            {
                                if (DataTable.Columns[sGroup].DataType == typeof(int))
                                {
                                    rowDict[sGroup] = Convert.ToInt32(fieldValue);
                                }
                                else if (DataTable.Columns[sGroup].DataType == typeof(double))
                                {
                                    rowDict[sGroup] = Convert.ToDouble(fieldValue);
                                }
                                else if (DataTable.Columns[sGroup].DataType == typeof(DateTime))
                                {
                                    rowDict[sGroup] = Convert.ToDateTime(fieldValue);
                                }
                                else
                                {
                                    rowDict[sGroup] = fieldValue;
                                }
                            }
                        }
                    }

                    if (bImportRow)
                    {
                        DataRow newRow = DataTable.NewRow();
                        PopulateDictionaryToRow(newRow);
                        //foreach (KeyValuePair<string, object> kvPair in rowDict)
                        //{
                        //    newRow[kvPair.Key] = kvPair.Value;
                        //}
                        DataTable.Rows.Add(newRow);
                        PopulateRowToDictionary(DataTable.Rows[DataTable.Rows.Count - 1]);
                        bLineParsed = true;
                    }
                }

                if (!bLineParsed)
                {
                    foreach (ConditionalRegexParser crp in Parsers)
                    {
                        if (!bLineParsed && crp.ConditionRegex.IsMatch(readLine))
                        {
                            bImportRow = true;
                            DataTable crpDataTable = Tables[crp.TableName];
                            var       m            = crp.parseRegex.Match(readLine);
                            foreach (var sGroup in crp.parseRegex.GetGroupNames())
                            {
                                if ((sGroup != DefaultGroup) && (!Int16.TryParse(sGroup, out short groupNum)))
                                {
                                    RegexColumn curRegexColumn = crp.RegexColumns.Find(r => r.ColumnName == sGroup);
                                    if (!String.IsNullOrWhiteSpace(curRegexColumn.ValueMatchingCondition) && (!Regex.IsMatch(m.Groups[sGroup].Value, curRegexColumn.ValueMatchingCondition)))
                                    {
                                        bImportRow = false;
                                        break;
                                    }
                                    string fieldValue = m.Groups[sGroup].Value;
                                    fieldValue = fieldValue.Trim('\"');
                                    if (crpDataTable.Columns[sGroup] != null)
                                    {
                                        if (crpDataTable.Columns[sGroup].DataType == typeof(int))
                                        {
                                            rowDict[sGroup] = Convert.ToInt32(fieldValue);
                                        }
                                        else if (crpDataTable.Columns[sGroup].DataType == typeof(double))
                                        {
                                            rowDict[sGroup] = Convert.ToDouble(fieldValue);
                                        }
                                        else if (crpDataTable.Columns[sGroup].DataType == typeof(DateTime))
                                        {
                                            rowDict[sGroup] = Convert.ToDateTime(fieldValue);
                                        }
                                        else
                                        {
                                            rowDict[sGroup] = fieldValue;
                                        }
                                    }
                                }
                            }
                            if (bImportRow)
                            {
                                DataRow newRow = crpDataTable.NewRow();
                                PopulateDictionaryToRow(newRow);
                                crpDataTable.Rows.Add(newRow);
                                PopulateRowToDictionary(crpDataTable.Rows[crpDataTable.Rows.Count - 1]);
                                bLineParsed = true;
                            }
                        }
                    }
                }
                if (!bLineParsed)
                {
                    AddMisRead(readLine);
                }
            }
        }
コード例 #2
0
        private void ConvertHTMLTablesToDataSet(string HTML)
        {
            DataTable dt = null;
            DataRow   dr = null;
            string    TableExpression  = "<TABLE[^>]*>(.*?)</TABLE>";
            string    HeaderExpression = "(<TH>|<TH[\\s]>)(.*?)</TH>";
            string    RowExpression    = "(<TR>|<TR[\\s]>)(.*?)</TR>";
            string    ColumnExpression = "(<TD>|<TD[\\s]>)(.*?)</TD>";
            bool      HeadersExist     = false;
            int       iCurrentColumn   = 0;
            int       iCurrentRow      = 0;
            // Get a match for all the tables in the HTML
            MatchCollection Tables = Regex.Matches(HTML, TableExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);

            // Loop through each table element
            foreach (Match Table in Tables)
            {
                // Reset the current row counter and the header flag
                iCurrentRow  = 0;
                HeadersExist = false;
                // Add a new table to the DataSet
                dt = new DataTable
                {
                    //Create the relevant amount of columns for this table (use the headers if they exist, otherwise use default names)
                    TableName = "Table" + (this.Tables.Count + 1).ToString()
                };
                Match TableNameMatch = null;
                if (Regex.IsMatch(Table.Value, "id=(?<TableName>.\\w+)"))
                {
                    TableNameMatch = Regex.Match(Table.Value, "id=(?<TableName>.\\w+)");
                }
                if (Regex.IsMatch(Table.Value, "name=(?<TableName>.\\w+)"))
                {
                    TableNameMatch = Regex.Match(Table.Value, "name=(?<TableName>.\\w+)");
                }

                if (TableNameMatch != null)
                {
                    dt.TableName = TableNameMatch.Groups["TableName"].ToString().Trim('"');
                }

                if (Table.Value.IndexOf("<TH", StringComparison.OrdinalIgnoreCase) >= 0)
                {
                    // Set the HeadersExist flag
                    HeadersExist = true;
                    // Get a match for all the rows in the table
                    MatchCollection Headers = Regex.Matches(Table.Value, HeaderExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);
                    // Loop through each header element
                    foreach (Match Header in Headers)
                    {
                        if (!dt.Columns.Contains(Header.Groups[2].ToString()))
                        {
                            dt.Columns.Add(Header.Groups[2].ToString());
                        }
                    }
                }
                else
                {
                    for (int iColumns = 1; iColumns <= Regex.Matches(Regex.Matches(Regex.Matches(Table.Value, TableExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase)[0].ToString(), RowExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase)[0].ToString(), ColumnExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase).Count; iColumns++)
                    {
                        dt.Columns.Add("Column " + iColumns);
                    }
                }
                //Get a match for all the rows in the table
                MatchCollection Rows = Regex.Matches(Table.Value, RowExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);
                // Loop through each row element
                foreach (Match Row in Rows)
                {
                    // Only loop through the row if it isn't a header row
                    if (!(iCurrentRow == 0 && HeadersExist))
                    {
                        // Create a new row and reset the current column counter
                        dr             = dt.NewRow();
                        iCurrentColumn = 0;
                        // Get a match for all the columns in the row
                        MatchCollection Columns    = Regex.Matches(Row.Value, ColumnExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);
                        bool            bImportRow = Columns.Count > 0;

                        // Loop through each column element
                        foreach (Match Column in Columns)
                        {
                            // Add the value to the DataRow
                            if (dr.ItemArray.Count() > iCurrentColumn)
                            {
                                if (_regexColumns != null)
                                {
                                    RegexColumn curRegexColumn = _regexColumns.Find(r => r.ColumnName == dt.Columns[iCurrentColumn].ColumnName);
                                    if (curRegexColumn != null)
                                    {
                                        if (!String.IsNullOrWhiteSpace(curRegexColumn.ValueMatchingCondition) && (!Regex.IsMatch(Column.Groups[2].ToString(), curRegexColumn.ValueMatchingCondition)))
                                        {
                                            bImportRow = false;
                                            break;
                                        }
                                    }
                                }

                                dr[iCurrentColumn] = Column.Groups[2].ToString();
                            }
                            // Increase the current column
                            iCurrentColumn++;
                        }

                        // Add the DataRow to the DataTable
                        if (bImportRow)
                        {
                            dt.Rows.Add(dr);
                        }
                    }
                    // Increase the current row counter
                    SendMessageToCallingApplicationHandler(iCurrentRow, "Processed record for Table [" + dt.TableName + "]");
                    iCurrentRow++;
                }
                // Add the DataTable to the DataSet
                this.Tables.Add(dt);
            }
        }
コード例 #3
0
ファイル: RegexDataset.cs プロジェクト: xuan2261/EasyETL.Net
        private void ParseColumnOrParser(RegexColumnBuilder columnBuilder, XmlNode childNode, string separator = "")
        {
            if (childNode.Name.ToUpper() == "IF")
            {
                string strCondition          = String.Empty;
                string strTableName          = TableName;
                string strTableStructureType = String.Empty;
                //Conditional Table level attributes...
                foreach (XmlAttribute xAttr in childNode.Attributes)
                {
                    switch (xAttr.Name.ToUpper())
                    {
                    case "SEPARATOR":
                        separator = xAttr.Value;
                        break;

                    case "CONDITION":
                        strCondition = xAttr.Value;
                        break;

                    case "TABLENAME":
                        strTableName = xAttr.Value;
                        break;

                    case "TABLESTRUCTURETYPE":
                        strTableStructureType = xAttr.Value;
                        break;
                    }
                }
                RegexColumnBuilder conditionalRCB = new RegexColumnBuilder(separator);

                XmlNode tableNode = childNode;
                if (!String.IsNullOrWhiteSpace(strTableStructureType))
                {
                    tableNode = Configuration.GetDataTableNode(strTableStructureType);
                }

                foreach (XmlNode subNode in tableNode.ChildNodes)
                {
                    ParseColumnOrParser(conditionalRCB, subNode, separator);
                }
                ConditionalRegexParser crp = new ConditionalRegexParser()
                {
                    ConditionRegex = new Regex(strCondition), TableName = strTableName, parseRegex = conditionalRCB.CreateRegularExpression(), RegexColumns = conditionalRCB.Columns
                };
                Parsers.Add(crp);
            }
            else
            {
                string          prefix          = "";
                string          suffix          = "";
                string          strCondition    = String.Empty;
                bool            hasDoubleQuotes = false;
                bool            bAutoIncrement  = false;
                Int32           intStartValue   = 1;
                Int32           intIncrement    = 1;
                bool            bForeignKey     = false;
                bool            bPrimaryKey     = false;
                string          strExpression   = String.Empty;
                string          strDisplayName  = childNode.Name;
                string          strDescription  = String.Empty;
                string          strDefault      = String.Empty;
                int             columnLength    = 0;
                RegexColumnType rct             = RegexColumnType.STRING;
                //Column level attributes...
                foreach (XmlAttribute xAttr in childNode.Attributes)
                {
                    switch (xAttr.Name.ToUpper())
                    {
                    case "SEPARATOR":
                        separator = xAttr.Value;
                        break;

                    case "PREFIX":
                        prefix = xAttr.Value;
                        break;

                    case "SUFFIX":
                        suffix = xAttr.Value;
                        break;

                    case "QUOTES":
                        hasDoubleQuotes = Boolean.Parse(xAttr.Value);
                        break;

                    case "LENGTH":
                        columnLength = Int16.Parse(xAttr.Value);
                        break;

                    case "TYPE":
                        rct = (RegexColumnType)Enum.Parse(typeof(RegexColumnType), xAttr.Value);
                        break;

                    case "CONDITION":
                        strCondition = xAttr.Value;
                        break;

                    case "AUTOINCREMENT":
                        bAutoIncrement = Boolean.Parse(xAttr.Value);
                        break;

                    case "STARTVALUE":
                    case "START":
                    case "SEED":
                        intStartValue = Int32.Parse(xAttr.Value);
                        break;

                    case "INCREMENT":
                        intIncrement = Int32.Parse(xAttr.Value);
                        break;

                    case "EXPRESSION":
                        strExpression = xAttr.Value;
                        break;

                    case "FOREIGNKEY":
                        bForeignKey = Boolean.Parse(xAttr.Value);
                        break;

                    case "UNIQUE":
                    case "PRIMARYKEY":
                    case "PRIMARY":
                        bPrimaryKey = Boolean.Parse(xAttr.Value);
                        break;

                    case "DISPLAYNAME":
                    case "CAPTION":
                        strDisplayName = xAttr.Value;
                        break;

                    case "DESCRIPTION":
                        strDescription = xAttr.Value;
                        break;

                    case "DEFAULT":
                        strDefault = xAttr.Value;
                        break;
                    }
                }
                bool   bColumnAdded  = false;
                string strColumnName = childNode.Name;
                if (strColumnName.Trim('_') == String.Empty)
                {
                    strColumnName = String.Empty;
                }
                if (bAutoIncrement)
                {
                    columnBuilder.AddColumn(strColumnName, bAutoIncrement, intStartValue, intIncrement);
                    bColumnAdded = true;
                }
                if (!bColumnAdded && !String.IsNullOrEmpty(strExpression))
                {
                    columnBuilder.AddColumn(strColumnName, rct, strExpression);
                    bColumnAdded = true;
                }

                if ((!bColumnAdded) && (bForeignKey))
                {
                    columnBuilder.AddColumn(strColumnName, bForeignKey);
                    bColumnAdded = true;
                }

                if (!bColumnAdded) //This is a regular column with regex... let us add this to the column builder...
                {
                    if (!String.IsNullOrEmpty(separator))
                    {
                        if (hasDoubleQuotes)
                        {
                            columnBuilder.AddColumn('\"' + strColumnName + '\"', separator[0], rct);
                        }
                        else
                        {
                            if (childNode.NextSibling == null)
                            {
                                columnBuilder.AddColumn(strColumnName, ".*", rct);
                            }
                            else
                            {
                                columnBuilder.AddColumn(strColumnName, "[^" + columnBuilder.RegexFormattedOutput(separator[0]) + "\\n]*", prefix, suffix, rct);
                            }
                        }
                    }
                    else
                    {
                        if (columnLength > 0)
                        {
                            columnBuilder.AddColumn(strColumnName, columnLength, rct);
                        }
                        else
                        {
                            columnBuilder.AddColumn(strColumnName, ".*", rct);
                        }
                    }

                    RegexColumn addedColumn = columnBuilder.Columns[columnBuilder.Columns.Count - 1];

                    if (!String.IsNullOrWhiteSpace(strCondition))
                    {
                        //There is a condition to be matched with the value... let us set it to the last column added...
                        addedColumn.ValueMatchingCondition = strCondition;
                    }

                    if (bPrimaryKey)
                    {
                        addedColumn.IsUnique = bPrimaryKey;
                    }

                    if (strDisplayName != strColumnName)
                    {
                        addedColumn.DisplayName = strDisplayName;
                    }
                    if (!String.IsNullOrEmpty(strDescription))
                    {
                        addedColumn.Description = strDescription;
                    }
                    if (!String.IsNullOrEmpty(strDefault))
                    {
                        addedColumn.Default = strDefault;
                    }
                }
            }
        }