// when left.CurrentRecord is always != null. // joiner also does table conversion if necessary. public RecordJoiner(InternalRecordSource left, InternalRecordSource right, char columnSeparator) { Hashtable nameHash = new Hashtable(); _separator = columnSeparator; _leftWrapper = new ColumnWrapper(left.CurrentRecord, left.CurrentSourceName, _separator); _rightWrapper = new ColumnWrapper(right.CurrentRecord, right.CurrentSourceName, _separator); _leftWrapper.SetRecord(left.CurrentRecord); _rightWrapper.SetRecord(right.CurrentRecord); int numNewCols = _leftWrapper.ColumnNames.Length; // In calculating new column names we remove the Key Columns // from the right side since this is redundant information. numNewCols += (_rightWrapper.ColumnNames.Length - _rightWrapper.KeyColumnNos.Length); _outputColumnNames = new string[numNewCols]; int newCol = 0; foreach (string name in _leftWrapper.ColumnNames) { _outputColumnNames[newCol] = _leftWrapper.ColumnNames[newCol++]; nameHash[name] = 1; } int j = 0; for (int i = 0; i < _rightWrapper.ColumnNames.Length; i++) { // skip keys if (i == _rightWrapper.KeyColumnNos[j]) { if (_rightWrapper.KeyColumnNos.Length > j + 1) j++; } else { string name = _rightWrapper.ColumnNames[i]; object o = nameHash[name]; if (o != null) { name = Path.GetFileNameWithoutExtension(right.CurrentSourceName) + ":" + name; } _outputColumnNames[newCol++] = name; } } // create empy columns for right side int numEmptyColumns = _rightWrapper.ColumnNames.Length - _rightWrapper.KeyColumnNos.Length; for (int i = 0; i < numEmptyColumns; i++) { _emptyRightColumns += _separator; } }
public override void ProcessRecord(DataRecord record, RecordAccepter accepter) { if (_wrapper == null) { bool hasHeaders = false; HintMessageToConsole(); if (record is TableRecord) { _wrapper = new ColumnWrapper(record as TableRecord); _keyColumnNos = _wrapper.GetColumnNos(KeyExpression); } else { hasHeaders = ColumnWrapper.HasHeaders(KeyExpression); _wrapper = new ColumnWrapper(record, _sourceName, _separator, hasHeaders); _keyColumnNos = _wrapper.GetColumnNos(KeyExpression); } if (hasHeaders) return; // eat up this record containing headers } //if (_outRecord == null) { // _outRecord = new DataRecord(); //} DataRecord outRecord = new DataRecord(); _wrapper.SetRecord(record); // build the key _stringBuilder.Length = 0; for (int i = 0; i < _keyColumnNos.Length; i++) { if (i != 0) _stringBuilder.Append(_separator); _wrapper.AppendColumn(_keyColumnNos[i], _stringBuilder); } outRecord.Key = _stringBuilder.ToString(); accepter.AddRecord(outRecord); }
public override void ProcessRecord(DataRecord record, RecordAccepter accepter) { // caseNum 0: null conversion (key expression == incoming key) // caseNum 1: table record // caseNum 2: everything else // So, the thing is a null conversion is costly, potentially super // costly. Instead of trying to rebuild the whole tree without // conversion (which brought with it potential sorting nodes as // well) we will error and message the user to remove the null // conversion. // initialize the wrapper if (_wrapper == null) { bool hasHeaders = false; int caseNum = 0; HintMessageToConsole(); // case 1: table record. if (record is TableRecord) { TableRecord t = record as TableRecord; _wrapper = new ColumnWrapper(t); caseNum = 1; // table record } else { caseNum = 2; // everything else hasHeaders = ColumnWrapper.HasHeaders(KeyExpression); _wrapper = new ColumnWrapper(record, _sourceName, _columnSeparator, hasHeaders); // if columnNames provided by filter user if (_columnNames != null) { if (_columnNames.Length != _wrapper.ColumnNames.Length) { Console.WriteLine("too few column names provided"); Environment.Exit(1); } } else { _columnNames = _wrapper.ColumnNames; // use default } } // making table records is costly. If the key columns have not // changed there is no reason to do the conversion. (since // sorting and joining work the same for all records). _keyColumnNos = _wrapper.GetColumnNos(KeyExpression); int[] currentKeyColumnNos = _wrapper.KeyColumnNos; // _keyColumnNos == the requested new key columns if (_keyColumnNos.Length == currentKeyColumnNos.Length) { for (int i = 0; i < _keyColumnNos.Length; i++) { if (_keyColumnNos[i] != currentKeyColumnNos[i]) _caseNum = caseNum; } } else _caseNum = caseNum; // we special case flat files converting to tables allowing null conversions // since when they define headers ToTable is evaluating and dropping them // from the input. if (hasHeaders) { _caseNum = 2; return; // eat up this record containing headers } } switch (_caseNum) { case 0: // null conversion see comments above Console.WriteLine("Null-table conversions are costly (i.e. key expression equal to incoming key)."); Console.WriteLine("Remove unnecessary 'ToTable(" + KeyExpression + ")' from expression."); Environment.Exit(1); break; case 1: // table record TableRecord outRecord = record as TableRecord; outRecord.KeyColumnNos = _keyColumnNos; accepter.AddRecord(outRecord); break; case 2: // everything else _wrapper.SetRecord(record); // ignore null records: no key no data if (record.Key.Length == 0 && record.Data == null) return; outRecord = new TableRecord(_columnNames, _columnSeparator); //if (_outRecord == null) { // _outRecord = new TableRecord(_columnNames, _columnSeparator); //} outRecord.KeyColumnNos = _keyColumnNos; _stringBuilder.Length = 0; for (int i = 0; i < _wrapper.ColumnNames.Length; i++) { if (i != 0) _stringBuilder.Append('\t'); _wrapper.AppendColumn(i, _stringBuilder); } outRecord.DelimitedColumns = _stringBuilder.ToString(); accepter.AddRecord(outRecord); break; } }
public override void ProcessRecord(DataRecord record, RecordAccepter accepter) { if (_wrapper == null) { bool hasHeaders = false; HintMessageToConsole(); if (record is TableRecord) { _wrapper = new ColumnWrapper(record as TableRecord); _keyColumnNos = _wrapper.GetColumnNos(KeyExpression); } else { hasHeaders = ColumnWrapper.HasHeaders(KeyExpression); _wrapper = new ColumnWrapper(record, _sourceName, _separator, hasHeaders); _keyColumnNos = _wrapper.GetColumnNos(KeyExpression); } // if the countColumnExpression ends in a bang! we allow ulong.Parse errors. // otherwise we abort on an error. if (_countExpression != null && _countExpression.EndsWith("!")) { _interpretAsZero = true; _countExpression = _countExpression.TrimEnd('!'); } if (_countExpression != null) { int[] countColumnNos = _wrapper.GetColumnNos(_countExpression); if (countColumnNos.Length != 1) { Console.WriteLine("Illegal Count Column expression"); Environment.Exit(1); } _countColumnNo = countColumnNos[0]; if (_countColumnNo > _wrapper.ColumnNames.Length - 1) { Console.WriteLine("Illegal Count Column expression"); Environment.Exit(1); } } // if countRecord and count column is last column. if (record is CountRecord && _countColumnNo == _wrapper.ColumnNames.Length - 1) { _caseNum = 0; } // if no expression given use 1 else if (_countExpression == null) { _caseNum = 1; } else _caseNum = 2; if (hasHeaders) return; // eat up this record containing headers } // cases: // 0 : record = CountRecord && countColumn refers to the count. // 1 : countExpression == null. Just 1-count the keys // 2 : everything else // not sure if this is the best way to ignore blank lines coming in. if (record.Key.Length == 0) return; CountRecord outRecord = new CountRecord(); _wrapper.SetRecord(record); // build the key _stringBuilder.Length = 0; for (int i = 0; i < _keyColumnNos.Length; i++) { if (i != 0) _stringBuilder.Append(_separator); _wrapper.AppendColumn(_keyColumnNos[i], _stringBuilder); } outRecord.Key = _stringBuilder.ToString(); // we special case 0, because then we can avoid converting from ulong to string // and back to ulong. switch (_caseNum) { case 0: outRecord.Count = ((CountRecord)record).Count; break; case 1: outRecord.Count = 1; break; case 2: _stringBuilder.Length = 0; _wrapper.AppendColumn(_countColumnNo, _stringBuilder); try { outRecord.Count = ulong.Parse(_stringBuilder.ToString()); } catch { if (!_interpretAsZero) { Console.WriteLine("Illegal ulong string '{0}'.\nTo interpret as zero: count column expression = ${1}!", _stringBuilder.ToString(), _countColumnNo+1); Environment.Exit(-1); } outRecord.Count = 0; _numParseErrors++; //return; // abort this record } break; } accepter.AddRecord(outRecord); }