public override void LoadProfileSettings(XmlNode xNode) { if (xNode == null) { return; } bool hasHeaderRow = false; bool skipFirstRow = false; string separator = ""; string TableStructureType = String.Empty; //Table level attributes... foreach (XmlAttribute xAttr in xNode.Attributes) { switch (xAttr.Name.ToUpper()) { case "SEPARATOR": separator = xAttr.Value; break; case "TABLENAME": TableName = xAttr.Value; break; case "TABLESTRUCTURETYPE": TableStructureType = xAttr.Value; break; case "HASHEADER": hasHeaderRow = Boolean.Parse(xAttr.Value); break; case "SKIPFIRSTROW": skipFirstRow = Boolean.Parse(xAttr.Value); break; } } UseFirstRowNamesAsColumnNames = hasHeaderRow; SkipFirstRow = skipFirstRow; RegexColumnBuilder rcb = new RegexColumnBuilder(separator); XmlNode tableNode = xNode; if (!String.IsNullOrWhiteSpace(TableStructureType)) { tableNode = Configuration.GetDataTableNode(TableStructureType); } if (tableNode.HasChildNodes) { foreach (XmlNode childNode in tableNode.ChildNodes) { ParseColumnOrParser(rcb, childNode, separator); } } ColumnBuilder = rcb; BuildRegexSchemaIntoDataSet(); }
public RegexDataSet(string fileName = "", string fieldSeparator = "", string tableName = "Table1", bool useFirstRowNamesAsColumns = true, bool skipFirstRow = false, params string[] columnNames) { TableName = tableName; UseFirstRowNamesAsColumnNames = useFirstRowNamesAsColumns; SkipFirstRow = skipFirstRow; if (useFirstRowNamesAsColumns) { if ((!String.IsNullOrWhiteSpace(fileName)) && (File.Exists(fileName))) { using (StreamReader sr = new StreamReader(fileName)) { string firstRow = sr.ReadLine(); if (String.IsNullOrWhiteSpace(fieldSeparator)) { Regex regexSeparator = new Regex("^([a-zA-Z0-9_\"]*)(?<Separator>.)"); Match match = regexSeparator.Match(firstRow); if (match.Success) { fieldSeparator = match.Groups["Separator"].ToString(); } } columnNames = firstRow.Split(fieldSeparator[0]); } UseFirstRowNamesAsColumnNames = false; } else { FirstRowExpression = new Regex(@"([^" + fieldSeparator + @"\n]*)[" + fieldSeparator + @"\w]"); } SkipFirstRow = true; } if (columnNames.Length > 0) { ColumnBuilder = new RegexColumnBuilder(fieldSeparator, columnNames); Fill(fileName); } else { ColumnBuilder = null; } }
public override void Fill(Stream textFile) { UseFirstRowNamesAsColumnNames = _hasHeaderRow; if (_hasHeaderRow) { StreamReader sr = new StreamReader(textFile, Encoding.UTF8, true, 1024, true); string firstRow = sr.ReadLine(); sr.Close(); if (String.IsNullOrWhiteSpace(_delimiter)) { Regex regexSeparator = new Regex("^([a-zA-Z0-9_\"]*)(?<Separator>.)"); Match match = regexSeparator.Match(firstRow); if (match.Success) { _delimiter = match.Groups["Separator"].ToString(); } } string[] columnNames = firstRow.Split(_delimiter[0]); ColumnBuilder = new RegexColumnBuilder(_delimiter, columnNames); SkipFirstRow = true; } base.Fill(textFile); }
/// <summary> /// Reads every line in the text file and tries to match /// it with the given regular expression. /// Every match will be placed as a new row in the /// datatable /// </summary> /// <returns></returns> public override void Fill() { if (TextFile == null) { throw new ApplicationException("No stream available to convert to a DataSet"); } TextFile.Seek(0, SeekOrigin.Begin); using (StreamReader sr = new StreamReader(TextFile)) { var readLine = sr.ReadLine(); var isFirstLine = true; int lineNumber = 1; SendMessageToCallingApplicationHandler(0, "Loading First Line"); if (UseFirstRowNamesAsColumnNames && (FirstRowExpression == null) && ((_regexColumns == null) || (_regexColumns.Count == 0))) { string firstRow = readLine; Regex regexSeparator = new Regex("^([a-zA-Z0-9_\"]*)(?<Separator>.)"); Match match = regexSeparator.Match(firstRow); if (match.Success) { string fieldSeparator = match.Groups["Separator"].ToString(); string[] columnNames = firstRow.Split(fieldSeparator[0]); ColumnBuilder = new RegexColumnBuilder(fieldSeparator, columnNames); } UseFirstRowNamesAsColumnNames = false; SkipFirstRow = true; } SendMessageToCallingApplicationHandler(lineNumber, "Building Schema..."); BuildRegexSchemaIntoDataSet(); while (readLine != null) { if (isFirstLine && UseFirstRowNamesAsColumnNames && !SkipFirstRow) { SendMessageToCallingApplicationHandler(lineNumber, "Building First Row..."); if (FirstRowExpression == null) { throw new RegexDataSetException( "FirstRowExpression is not set, but UseFirstRowNamesAsColumnNames is set to true"); } if (!FirstRowExpression.IsMatch(readLine)) { throw new RegexDataSetException( "The first row in the file does not match the FirstRowExpression"); } var m = FirstRowExpression.Match(readLine); foreach (var sGroup in FirstRowExpression.GetGroupNames()) { if (sGroup != DefaultGroup) { DataTable.Columns[sGroup].ExtendedProperties.Add(NewName, m.Groups[sGroup].Value); } } } else if (!(isFirstLine && SkipFirstRow)) { ProcessRowObject(readLine); } SendMessageToCallingApplicationHandler(lineNumber, "Processed line"); readLine = sr.ReadLine(); lineNumber += 1; isFirstLine = false; } } if (!UseFirstRowNamesAsColumnNames) { return; } foreach (DataColumn column in DataTable.Columns) { if (column.ExtendedProperties.ContainsKey(NewName)) { column.ColumnName = column.ExtendedProperties[NewName].ToString(); } } }
private void ParseColumnOrParser(RegexColumnBuilder columnBuilder, XmlNode childNode, string separator = "") { if (childNode.Name.ToUpper() == "IF") { string strCondition = String.Empty; string strTableName = TableName; string strTableStructureType = String.Empty; //Conditional Table level attributes... foreach (XmlAttribute xAttr in childNode.Attributes) { switch (xAttr.Name.ToUpper()) { case "SEPARATOR": separator = xAttr.Value; break; case "CONDITION": strCondition = xAttr.Value; break; case "TABLENAME": strTableName = xAttr.Value; break; case "TABLESTRUCTURETYPE": strTableStructureType = xAttr.Value; break; } } RegexColumnBuilder conditionalRCB = new RegexColumnBuilder(separator); XmlNode tableNode = childNode; if (!String.IsNullOrWhiteSpace(strTableStructureType)) { tableNode = Configuration.GetDataTableNode(strTableStructureType); } foreach (XmlNode subNode in tableNode.ChildNodes) { ParseColumnOrParser(conditionalRCB, subNode, separator); } ConditionalRegexParser crp = new ConditionalRegexParser() { ConditionRegex = new Regex(strCondition), TableName = strTableName, parseRegex = conditionalRCB.CreateRegularExpression(), RegexColumns = conditionalRCB.Columns }; Parsers.Add(crp); } else { string prefix = ""; string suffix = ""; string strCondition = String.Empty; bool hasDoubleQuotes = false; bool bAutoIncrement = false; Int32 intStartValue = 1; Int32 intIncrement = 1; bool bForeignKey = false; bool bPrimaryKey = false; string strExpression = String.Empty; string strDisplayName = childNode.Name; string strDescription = String.Empty; string strDefault = String.Empty; int columnLength = 0; RegexColumnType rct = RegexColumnType.STRING; //Column level attributes... foreach (XmlAttribute xAttr in childNode.Attributes) { switch (xAttr.Name.ToUpper()) { case "SEPARATOR": separator = xAttr.Value; break; case "PREFIX": prefix = xAttr.Value; break; case "SUFFIX": suffix = xAttr.Value; break; case "QUOTES": hasDoubleQuotes = Boolean.Parse(xAttr.Value); break; case "LENGTH": columnLength = Int16.Parse(xAttr.Value); break; case "TYPE": rct = (RegexColumnType)Enum.Parse(typeof(RegexColumnType), xAttr.Value); break; case "CONDITION": strCondition = xAttr.Value; break; case "AUTOINCREMENT": bAutoIncrement = Boolean.Parse(xAttr.Value); break; case "STARTVALUE": case "START": case "SEED": intStartValue = Int32.Parse(xAttr.Value); break; case "INCREMENT": intIncrement = Int32.Parse(xAttr.Value); break; case "EXPRESSION": strExpression = xAttr.Value; break; case "FOREIGNKEY": bForeignKey = Boolean.Parse(xAttr.Value); break; case "UNIQUE": case "PRIMARYKEY": case "PRIMARY": bPrimaryKey = Boolean.Parse(xAttr.Value); break; case "DISPLAYNAME": case "CAPTION": strDisplayName = xAttr.Value; break; case "DESCRIPTION": strDescription = xAttr.Value; break; case "DEFAULT": strDefault = xAttr.Value; break; } } bool bColumnAdded = false; string strColumnName = childNode.Name; if (strColumnName.Trim('_') == String.Empty) { strColumnName = String.Empty; } if (bAutoIncrement) { columnBuilder.AddColumn(strColumnName, bAutoIncrement, intStartValue, intIncrement); bColumnAdded = true; } if (!bColumnAdded && !String.IsNullOrEmpty(strExpression)) { columnBuilder.AddColumn(strColumnName, rct, strExpression); bColumnAdded = true; } if ((!bColumnAdded) && (bForeignKey)) { columnBuilder.AddColumn(strColumnName, bForeignKey); bColumnAdded = true; } if (!bColumnAdded) //This is a regular column with regex... let us add this to the column builder... { if (!String.IsNullOrEmpty(separator)) { if (hasDoubleQuotes) { columnBuilder.AddColumn('\"' + strColumnName + '\"', separator[0], rct); } else { if (childNode.NextSibling == null) { columnBuilder.AddColumn(strColumnName, ".*", rct); } else { columnBuilder.AddColumn(strColumnName, "[^" + columnBuilder.RegexFormattedOutput(separator[0]) + "\\n]*", prefix, suffix, rct); } } } else { if (columnLength > 0) { columnBuilder.AddColumn(strColumnName, columnLength, rct); } else { columnBuilder.AddColumn(strColumnName, ".*", rct); } } RegexColumn addedColumn = columnBuilder.Columns[columnBuilder.Columns.Count - 1]; if (!String.IsNullOrWhiteSpace(strCondition)) { //There is a condition to be matched with the value... let us set it to the last column added... addedColumn.ValueMatchingCondition = strCondition; } if (bPrimaryKey) { addedColumn.IsUnique = bPrimaryKey; } if (strDisplayName != strColumnName) { addedColumn.DisplayName = strDisplayName; } if (!String.IsNullOrEmpty(strDescription)) { addedColumn.Description = strDescription; } if (!String.IsNullOrEmpty(strDefault)) { addedColumn.Default = strDefault; } } } }
public RegexDataSet(RegexColumnBuilder columnBuilder) { ColumnBuilder = columnBuilder; }