コード例 #1
0
ファイル: RecordReduction.cs プロジェクト: zbxzc35/BoostTree
        public void ProcessRecord(DataRecord record, RecordAccepter accepter)
        {
            DataRecord returnRecord = null;

            // eat the first record
            if (_prevInputRecord == null) {
                _prevInputRecord = record;
                return; // give nothing to the accepter
            }

            _currentInputRecord = record;

            // we get called a final time with null so we can output any state we have
            int diff;
            if (record != null) {
                //diff = _prevInputRecord.Key.CompareTo(_currentInputRecord.Key);
                diff = TMSNStoreUtils.Utf8BytesCompare(_prevInputRecord.KeyBytes, _currentInputRecord.KeyBytes);
            }

            else diff = -1; // force a non-match

            // the keys match
            if (diff == 0) {
                // if reduction hasn't occured yet then this is the first time thru here.
                // use the prevInputRecord as an accumulator
                if (!_reductionOccured) {
                    _accumulatorRecord = _prevInputRecord;
                    _reductionOccured = true;
                }

                ((IReducableRecord)_accumulatorRecord).ReduceData((IReducableRecord)_currentInputRecord);
                returnRecord = null; // no record to return yet
            }

                // the keys don't match
            else {
                // if no reduction occured, the prev record needs to get out
                if (!_reductionOccured) {
                    returnRecord = (DataRecord)_prevInputRecord;
                }

                    // reduction occured in the _accumulatorRecord output it
                else {
                    // set up for next time around
                    _reductionOccured = false; //
                    returnRecord = (DataRecord)_accumulatorRecord;
                }
            }

            // advance
            _prevInputRecord = _currentInputRecord;

            if (returnRecord != null) accepter.AddRecord(returnRecord);
        }
コード例 #2
0
ファイル: RecordFilter.cs プロジェクト: zbxzc35/BoostTree
        public void ProcessRecord(DataRecord record, RecordAccepter accepter)
        {
            // here we load the accepter with the stats we get from the source
            // and we call it a day.
            foreach (string stat in _source.Statistics) {
                DataRecord outRecord = new DataRecord();
                outRecord.Key = stat;
                accepter.AddRecord(outRecord);
            }

            accepter.IsDone = true;
        }
コード例 #3
0
ファイル: RecordFilter.cs プロジェクト: zbxzc35/BoostTree
 public void Finish(RecordAccepter accepter)
 {
 }
コード例 #4
0
ファイル: RecordFilter.cs プロジェクト: zbxzc35/BoostTree
 public virtual void ProcessRecord(DataRecord record, RecordAccepter accepter)
 {
 }
コード例 #5
0
ファイル: RecordFilter.cs プロジェクト: zbxzc35/BoostTree
 public virtual void Finish(RecordAccepter accepter)
 {
 }
コード例 #6
0
ファイル: RecordFilter.cs プロジェクト: zbxzc35/BoostTree
        public RecordFilterDriver(IRecordFilter filter)
        {
            _filter = filter;
            _recordAccepter = new RecordAccepter();

            // some filters (ToTable, ToCount, ...) can bless the
            // input to be sorted (the responsibility falls on the
            // user to be correct).  If the filter implements
            // ISortHint we ask the filter if it has a hint.
            // if True: set _passThruInputSorting = false;
            //          set _sorting.IsSorted = ISortHint.IsSorted
            //          set _sorting.IsSortedAscending = ISortHint.IsSortedAscending
            //
            // if False: set _passThruInputSorting = filter.KeyOrderIsPreserved;

            if (filter is SortHintingFilter && ((SortHintingFilter)filter).HaveHint) {
                _passThruInputSorting = false;
                _sorting.IsSorted = ((SortHintingFilter)filter).IsSorted;
                _sorting.IsSortedAscending = ((SortHintingFilter)filter).IsSortedAscending;
            }

            else {
                // if keyOrder preserved then just pass thru
                _passThruInputSorting = filter.KeyOrderIsPreserved;
            }

            _filterType = _filter.GetType().ToString();
            if (_filterType.StartsWith("Microsoft.TMSN.Data.")) {
                _filterType = _filterType.Remove(0, 20);
            }

            ProcessTreeComment = "[filter:" + _filterType + "]";
        }
コード例 #7
0
ファイル: RecordFilter.cs プロジェクト: zbxzc35/BoostTree
        // EACH RECORD (LINE)
        public void ProcessRecord(DataRecord record, RecordAccepter accepter)
        {
            if (_rank < _recordArray.Length) {
                _recordArray[_rank] = record;
            }

            else {
                // pick random from 0 to rank
                long l = (long)(_random.NextDouble() * (double)_rank);
                if (l < _recordArray.Length) {
                    _recordArray[l] = record;
                }
            }

            // output no records till done
            // accepter.AddRecord(record);

            _rank++;
        }
コード例 #8
0
ファイル: RecordFilter.cs プロジェクト: zbxzc35/BoostTree
        // END
        public void Finish(RecordAccepter accepter)
        {
            // we may have not have had enough input to fill array.
            int numKept = _recordArray.Length;
            if (_rank < _recordArray.Length) numKept = (int)_rank;

            for (int i = 0; i < numKept; i++) {
                accepter.AddRecord(_recordArray[i]);
            }
        }
コード例 #9
0
ファイル: RecordFilter.cs プロジェクト: zbxzc35/BoostTree
        public void ProcessRecord(DataRecord record, RecordAccepter accepter)
        {
            if (_numRecords >= _recordLimit) {
                accepter.IsDone = true;
            }

            else {
                accepter.AddRecord(record);
                _numRecords++;
            }
        }
コード例 #10
0
ファイル: RecordFilter.cs プロジェクト: zbxzc35/BoostTree
 public void Finish(RecordAccepter accepter)
 {
     // do nothing
 }
コード例 #11
0
ファイル: RecordReduction.cs プロジェクト: zbxzc35/BoostTree
 // just call the Process routine with a null
 public void Finish(RecordAccepter accepter)
 {
     ProcessRecord(null, accepter);
 }
コード例 #12
0
ファイル: TableJoin.cs プロジェクト: zbxzc35/BoostTree
        public override void ProcessRecord(DataRecord record, RecordAccepter accepter)
        {
            // caseNum 0: null conversion (key expression == incoming key)
            // caseNum 1: table record
            // caseNum 2: everything else

            // So, the thing is a null conversion is costly, potentially super
            // costly.  Instead of trying to rebuild the whole tree without
            // conversion (which brought with it potential sorting nodes as
            // well) we will error and message the user to remove the null
            // conversion.

            // initialize the wrapper
            if (_wrapper == null) {
                bool hasHeaders = false;
                int caseNum = 0;

                HintMessageToConsole();

                // case 1: table record.
                if (record is TableRecord) {
                    TableRecord t = record as TableRecord;
                    _wrapper = new ColumnWrapper(t);
                    caseNum = 1; // table record
                }

                else {
                    caseNum = 2; // everything else
                    hasHeaders = ColumnWrapper.HasHeaders(KeyExpression);
                    _wrapper = new ColumnWrapper(record, _sourceName, _columnSeparator, hasHeaders);

                    // if columnNames provided by filter user
                    if (_columnNames != null) {
                        if (_columnNames.Length != _wrapper.ColumnNames.Length) {
                            Console.WriteLine("too few column names provided");
                            Environment.Exit(1);
                        }
                    }

                    else {
                        _columnNames = _wrapper.ColumnNames; // use default
                    }
                }

                // making table records is costly.  If the key columns have not
                // changed there is no reason to do the conversion.  (since
                // sorting and joining work the same for all records).

                _keyColumnNos = _wrapper.GetColumnNos(KeyExpression);
                int[] currentKeyColumnNos = _wrapper.KeyColumnNos;

                // _keyColumnNos == the requested new key columns
                if (_keyColumnNos.Length == currentKeyColumnNos.Length) {
                    for (int i = 0; i < _keyColumnNos.Length; i++) {
                        if (_keyColumnNos[i] != currentKeyColumnNos[i])
                            _caseNum = caseNum;
                    }
                }

                else _caseNum = caseNum;

                // we special case flat files converting to tables allowing null conversions
                // since when they define headers ToTable is evaluating and dropping them
                // from the input.
                if (hasHeaders) {
                    _caseNum = 2;
                    return;  // eat up this record containing headers
                }
            }

            switch (_caseNum) {
                case 0: // null conversion see comments above
                    Console.WriteLine("Null-table conversions are costly (i.e. key expression equal to incoming key).");
                    Console.WriteLine("Remove unnecessary 'ToTable(" + KeyExpression + ")' from expression.");
                    Environment.Exit(1);
                    break;

                case 1: // table record
                    TableRecord outRecord = record as TableRecord;
                    outRecord.KeyColumnNos = _keyColumnNos;
                    accepter.AddRecord(outRecord);
                    break;

                case 2: // everything else
                    _wrapper.SetRecord(record);

                    // ignore null records: no key no data
                    if (record.Key.Length == 0 && record.Data == null)
                        return;

                    outRecord = new TableRecord(_columnNames, _columnSeparator);
                    //if (_outRecord == null) {
                    //	_outRecord = new TableRecord(_columnNames, _columnSeparator);
                    //}

                    outRecord.KeyColumnNos = _keyColumnNos;
                    _stringBuilder.Length = 0;

                    for (int i = 0; i < _wrapper.ColumnNames.Length; i++) {
                        if (i != 0) _stringBuilder.Append('\t');
                        _wrapper.AppendColumn(i, _stringBuilder);
                    }

                    outRecord.DelimitedColumns = _stringBuilder.ToString();
                    accepter.AddRecord(outRecord);
                    break;
            }
        }
コード例 #13
0
ファイル: TableJoin.cs プロジェクト: zbxzc35/BoostTree
        public override void ProcessRecord(DataRecord record, RecordAccepter accepter)
        {
            if (_wrapper == null) {
                bool hasHeaders = false;

                HintMessageToConsole();

                if (record is TableRecord) {
                    _wrapper = new ColumnWrapper(record as TableRecord);
                    _keyColumnNos = _wrapper.GetColumnNos(KeyExpression);
                }

                else {
                    hasHeaders = ColumnWrapper.HasHeaders(KeyExpression);
                    _wrapper = new ColumnWrapper(record, _sourceName, _separator, hasHeaders);
                    _keyColumnNos = _wrapper.GetColumnNos(KeyExpression);
                }

                if (hasHeaders) return; // eat up this record containing headers
            }

            //if (_outRecord == null) {
            //	_outRecord = new DataRecord();
            //}
            DataRecord outRecord = new DataRecord();

            _wrapper.SetRecord(record);

            // build the key
            _stringBuilder.Length = 0;

            for (int i = 0; i < _keyColumnNos.Length; i++) {
                if (i != 0) _stringBuilder.Append(_separator);
                _wrapper.AppendColumn(_keyColumnNos[i], _stringBuilder);
            }

            outRecord.Key = _stringBuilder.ToString();
            accepter.AddRecord(outRecord);
        }
コード例 #14
0
ファイル: TableJoin.cs プロジェクト: zbxzc35/BoostTree
 public override void Finish(RecordAccepter accepter)
 {
 }
コード例 #15
0
ファイル: TableJoin.cs プロジェクト: zbxzc35/BoostTree
        public override void ProcessRecord(DataRecord record, RecordAccepter accepter)
        {
            if (_wrapper == null) {
                bool hasHeaders = false;

                HintMessageToConsole();

                if (record is TableRecord) {
                    _wrapper = new ColumnWrapper(record as TableRecord);
                    _keyColumnNos = _wrapper.GetColumnNos(KeyExpression);
                }

                else {
                    hasHeaders = ColumnWrapper.HasHeaders(KeyExpression);
                    _wrapper = new ColumnWrapper(record, _sourceName, _separator, hasHeaders);
                    _keyColumnNos = _wrapper.GetColumnNos(KeyExpression);
                }

                // if the countColumnExpression ends in a bang! we allow ulong.Parse errors.
                // otherwise we abort on an error.

                if (_countExpression != null && _countExpression.EndsWith("!")) {
                    _interpretAsZero = true;
                    _countExpression = _countExpression.TrimEnd('!');
                }

                if (_countExpression != null) {
                    int[] countColumnNos = _wrapper.GetColumnNos(_countExpression);
                    if (countColumnNos.Length != 1) {
                        Console.WriteLine("Illegal Count Column expression");
                        Environment.Exit(1);
                    }

                    _countColumnNo = countColumnNos[0];
                    if (_countColumnNo > _wrapper.ColumnNames.Length - 1) {
                        Console.WriteLine("Illegal Count Column expression");
                        Environment.Exit(1);
                    }
                }

                // if countRecord and count column is last column.
                if (record is CountRecord && _countColumnNo == _wrapper.ColumnNames.Length - 1) {
                    _caseNum = 0;
                }

                // if no expression given use 1
                else if (_countExpression == null) {
                    _caseNum = 1;
                }

                else _caseNum = 2;

                if (hasHeaders) return; // eat up this record containing headers
            }

            // cases:
            // 0 : record = CountRecord && countColumn refers to the count.
            // 1 : countExpression == null.  Just 1-count the keys
            // 2 : everything else

            // not sure if this is the best way to ignore blank lines coming in.
            if (record.Key.Length == 0) return;

            CountRecord outRecord = new CountRecord();
            _wrapper.SetRecord(record);

            // build the key
            _stringBuilder.Length = 0;

            for (int i = 0; i < _keyColumnNos.Length; i++) {
                if (i != 0) _stringBuilder.Append(_separator);
                _wrapper.AppendColumn(_keyColumnNos[i], _stringBuilder);
            }

            outRecord.Key = _stringBuilder.ToString();

            // we special case 0, because then we can avoid converting from ulong to string
            // and back to ulong.
            switch (_caseNum) {
                case 0:
                    outRecord.Count = ((CountRecord)record).Count;
                    break;

                case 1:
                    outRecord.Count = 1;
                    break;

                case 2:
                    _stringBuilder.Length = 0;
                    _wrapper.AppendColumn(_countColumnNo, _stringBuilder);
                    try {
                        outRecord.Count = ulong.Parse(_stringBuilder.ToString());
                    }
                    catch {
                        if (!_interpretAsZero) {
                            Console.WriteLine("Illegal ulong string '{0}'.\nTo interpret as zero: count column expression = ${1}!", _stringBuilder.ToString(), _countColumnNo+1);
                            Environment.Exit(-1);
                        }

                        outRecord.Count = 0;

                        _numParseErrors++;
                        //return; // abort this record
                    }
                    break;
            }

            accepter.AddRecord(outRecord);
        }
コード例 #16
0
ファイル: TableJoin.cs プロジェクト: zbxzc35/BoostTree
 public override void Finish(RecordAccepter accepter)
 {
     if (_interpretAsZero) {
         Console.Error.WriteLine(_numParseErrors + "interpretted zeros");
     }
 }