private void BuildRegexSchemaIntoDataSet() { if (ContentExpression != null || ContentExpressionHasChanged) { RemoveDataTable(); foreach (string sGroup in ContentExpression.GetGroupNames()) { if (sGroup != DefaultGroup) { DataColumn newDc = new DataColumn { DataType = typeof(string) }; if (_regexColumns != null) { foreach (RegexColumn r in _regexColumns) { if (r.ColumnName == sGroup) { newDc.DataType = r.ColumnTypeAsType; break; } } } newDc.ColumnName = sGroup; DataTable.Columns.Add(newDc); } } } }
/// <summary> /// Reads every line in the text file and tries to match /// it with the given regular expression. /// Every match will be placed as a new row in the /// datatable /// </summary> /// <returns></returns> public void Fill() { BuildRegexSchemaIntoDataSet(); if (TextFile == null) { throw new ApplicationException("No stream available to convert to a DataSet"); } TextFile.Seek(0, SeekOrigin.Begin); StreamReader sr = new StreamReader(this.TextFile); string Line = sr.ReadLine(); bool IsFirstLine = true; while (Line != null) { if (IsFirstLine && UseFirstRowNamesAsColumnNames && !SkipFirstRow) { if (FirstRowExpression == null) { throw new TextFileDataSetException("FirstRowExpression is not set, but UseFirstRowNamesAsColumnNames is set to true"); } if (!FirstRowExpression.IsMatch(Line)) { throw new TextFileDataSetException("The first row in the file does not match the FirstRowExpression"); } Match m = FirstRowExpression.Match(Line); foreach (string sGroup in FirstRowExpression.GetGroupNames()) { if (sGroup != DefaultGroup) { DataTable.Columns[sGroup].ExtendedProperties.Add(NewName, m.Groups[sGroup].Value); } } } else if (!(IsFirstLine && SkipFirstRow) && ContentExpression.IsMatch(Line)) { Match m = ContentExpression.Match(Line); DataRow newRow = DataTable.NewRow();; foreach (string sGroup in ContentExpression.GetGroupNames()) { if (sGroup != DefaultGroup) { if (newRow.Table.Columns[sGroup].DataType == typeof(int)) { newRow[sGroup] = Convert.ToInt32(m.Groups[sGroup].Value); } else if (newRow.Table.Columns[sGroup].DataType == typeof(double)) { newRow[sGroup] = Convert.ToDouble(m.Groups[sGroup].Value); } else if (newRow.Table.Columns[sGroup].DataType == typeof(DateTime)) { newRow[sGroup] = Convert.ToDateTime(m.Groups[sGroup].Value); } else { newRow[sGroup] = m.Groups[sGroup].Value; } } } DataTable.Rows.Add(newRow); } else if (!(IsFirstLine && SkipFirstRow)) { AddMisRead(Line); } Line = sr.ReadLine(); IsFirstLine = false; } if (UseFirstRowNamesAsColumnNames) { foreach (DataColumn column in DataTable.Columns) { if (column.ExtendedProperties.ContainsKey(NewName)) { column.ColumnName = column.ExtendedProperties[NewName].ToString(); } } } }
protected virtual void ParseAndLoadLines(string lines) { foreach (string readLine in lines.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries)) { bool bImportRow = false; bool bLineParsed = false; if ((ContentExpression != null) && ContentExpression.IsMatch(readLine)) { var m = ContentExpression.Match(readLine); bImportRow = true; rowDict = new Dictionary <string, object>(); foreach (var sGroup in ContentExpression.GetGroupNames()) { if ((sGroup != DefaultGroup) && (!Int16.TryParse(sGroup, out short groupNum))) { RegexColumn curRegexColumn = _regexColumns.Find(r => r.ColumnName == sGroup); if (!String.IsNullOrWhiteSpace(curRegexColumn.ValueMatchingCondition) && (!Regex.IsMatch(m.Groups[sGroup].Value, curRegexColumn.ValueMatchingCondition))) { bImportRow = false; break; } string fieldValue = m.Groups[sGroup].Value; fieldValue = fieldValue.Trim('\"'); if (DataTable.Columns[sGroup] != null) { if (DataTable.Columns[sGroup].DataType == typeof(int)) { rowDict[sGroup] = Convert.ToInt32(fieldValue); } else if (DataTable.Columns[sGroup].DataType == typeof(double)) { rowDict[sGroup] = Convert.ToDouble(fieldValue); } else if (DataTable.Columns[sGroup].DataType == typeof(DateTime)) { rowDict[sGroup] = Convert.ToDateTime(fieldValue); } else { rowDict[sGroup] = fieldValue; } } } } if (bImportRow) { DataRow newRow = DataTable.NewRow(); PopulateDictionaryToRow(newRow); //foreach (KeyValuePair<string, object> kvPair in rowDict) //{ // newRow[kvPair.Key] = kvPair.Value; //} DataTable.Rows.Add(newRow); PopulateRowToDictionary(DataTable.Rows[DataTable.Rows.Count - 1]); bLineParsed = true; } } if (!bLineParsed) { foreach (ConditionalRegexParser crp in Parsers) { if (!bLineParsed && crp.ConditionRegex.IsMatch(readLine)) { bImportRow = true; DataTable crpDataTable = Tables[crp.TableName]; var m = crp.parseRegex.Match(readLine); foreach (var sGroup in crp.parseRegex.GetGroupNames()) { if ((sGroup != DefaultGroup) && (!Int16.TryParse(sGroup, out short groupNum))) { RegexColumn curRegexColumn = crp.RegexColumns.Find(r => r.ColumnName == sGroup); if (!String.IsNullOrWhiteSpace(curRegexColumn.ValueMatchingCondition) && (!Regex.IsMatch(m.Groups[sGroup].Value, curRegexColumn.ValueMatchingCondition))) { bImportRow = false; break; } string fieldValue = m.Groups[sGroup].Value; fieldValue = fieldValue.Trim('\"'); if (crpDataTable.Columns[sGroup] != null) { if (crpDataTable.Columns[sGroup].DataType == typeof(int)) { rowDict[sGroup] = Convert.ToInt32(fieldValue); } else if (crpDataTable.Columns[sGroup].DataType == typeof(double)) { rowDict[sGroup] = Convert.ToDouble(fieldValue); } else if (crpDataTable.Columns[sGroup].DataType == typeof(DateTime)) { rowDict[sGroup] = Convert.ToDateTime(fieldValue); } else { rowDict[sGroup] = fieldValue; } } } } if (bImportRow) { DataRow newRow = crpDataTable.NewRow(); PopulateDictionaryToRow(newRow); crpDataTable.Rows.Add(newRow); PopulateRowToDictionary(crpDataTable.Rows[crpDataTable.Rows.Count - 1]); bLineParsed = true; } } } } if (!bLineParsed) { AddMisRead(readLine); } } }
public SubstitutionToken(ContentExpression expression) { this.expression = expression; }