Ejemplo n.º 1
0
        public override bool MoveNext()
        {
            if (_source == null) {
                _source = (InternalRecordSource)_inputList[_currentSourceNo++];
            }

            _notDone = _source.MoveNext();

            if (!_notDone && _currentSourceNo < 2) {
                _source = (InternalRecordSource)_inputList[_currentSourceNo++];
                _notDone = _source.MoveNext();
            }

            CurrentRecord = _source.CurrentRecord;
            return _notDone;
        }
Ejemplo n.º 2
0
        internal bool WriteRecordFileMaxSize(InternalRecordSource input, long maxFileSize)
        {
            // initialize
            if (_outputStream == null) {
                OpenOutputStream();
                _outputWriter = new VariableLengthBinaryWriter(_outputStream);
                input.WriteProperties(_outputStream);
            }

            bool done = false;
            while (!done && input.MoveNext()) {
                DataRecord record = input.CurrentRecord;
                // output key
                _outputWriter.WriteVariableLength((uint)record.KeyBytes.Length);
                _outputWriter.Write(record.KeyBytes);

                // output data
                if (record.Data != null) {
                    _outputWriter.WriteVariableLength((uint)record.Data.Length);
                    _outputWriter.Write(record.Data);
                }
                else {
                    _outputWriter.WriteVariableLength((uint)0);
                }

                if (maxFileSize != -1 && _outputStream.Position >= maxFileSize) {
                    done = true;
                }
            }

            _outputStream.Close();
            _outputStream = null;

            // we return notDone to our caller, i.e. we return true if there is more
            // to be written, false if we wrote it all.
            return done;
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Chooses random records from source.
        /// </summary>
        /// <param name="numToKeep">number of random records to pass through from source.</param>
        /// <param name="seed">a seed to the random number generator.</param>
        public void Random(int numToKeep, int seed)
        {
            RandomFilter filter = new RandomFilter(numToKeep, seed);
            RecordFilterDriver filterDriver = new RecordFilterDriver(filter);

            double fractionKept = (double)numToKeep / (double)InternalSource.TotalRecordsEstimate;
            long bytesEstimate = (long)(fractionKept * InternalSource.TotalRecordBytesEstimate);

            if (InternalSource is LoggingSource) {
                filterDriver.AddInput(InternalSource.Inputs[0]);
                filterDriver.TotalRecordsEstimate = (long)numToKeep; // unfortunately must be tweaked from outside after AddInput
                filterDriver.TotalRecordBytesEstimate = bytesEstimate;
                InternalSource.ClearInputs();
                InternalSource.AddInput(filterDriver);
            }

            else {
                filterDriver.AddInput(InternalSource);
                filterDriver.TotalRecordsEstimate = (long)numToKeep; // unfortunately must be tweaked from outside after AddInput
                filterDriver.TotalRecordBytesEstimate = bytesEstimate;
                InternalSource = filterDriver;
            }
        }
Ejemplo n.º 4
0
        public override bool MoveNext()
        {
            // initialize
            if (_input == null) {
                _input = (InternalRecordSource)_inputList[0];
            }

            // fill up the accepter with at least one record
            while (_recordAccepter.Empty && _notDone) {

                _notDone = _input.MoveNext();

                // run the current record through the filter
                if (_notDone) {
                    // put a try catch around user code
                    try {
                        _filter.ProcessRecord(_input.CurrentRecord, _recordAccepter);
                    }

                    catch (Exception ex) {
                        _caughtErrors++;
                        //if (_caughtErrors == 1 || (_caughtErrors % 1000 == 0)) {
                            Console.Error.WriteLine(_caughtErrors + " caught " + _filterType + " error(s)");
                            Console.Error.WriteLine(ex.Message + " " + _notDone + " " + _recordAccepter.IsDone);
                            Console.Error.WriteLine(ex.StackTrace);
                        //}
                    }

                    _notDone = !_recordAccepter.IsDone; // filter may elect to finish early
                }

                    // there is a finish call because the filter might have state
                // this gives it a chance to output one last record.
                else {
                    try {
                        _filter.Finish(_recordAccepter);
                    }
                    catch {
                        _caughtErrors++;
                    }
                }
            }

            // grab the record out of the accepter
            bool notDone = _recordAccepter.MoveNext();
            if (!notDone) return false;

            CurrentRecord = _recordAccepter.CurrentRecord;

            return true;
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Adds an input into the InternalRecordSource.  At this point we set our properties to be a function
        /// of our input's properties.  If the component doesn't want properties to propagate then it should
        /// set the appropriate _passThruInput... to false.
        /// </summary>
        /// <param name="input"></param>
        public void AddInput(InternalRecordSource input)
        {
            _inputList.Add(input);
            if (!_propertiesSet) SetProperties();
            _totalRecordsEstimate += input.TotalRecordsEstimate;
            _totalRecordBytesEstimate += input.TotalRecordBytesEstimate;

            // default is our output has the same record type as the input
            _recordInfo.Update(input._recordInfo);
        }
Ejemplo n.º 6
0
        public override bool MoveNext()
        {
            bool notDone = true;

            if (_source == null) {
                _source = (InternalRecordSource)_inputList[0];
                notDone = _source.MoveNext();
                CurrentRecord = _source.CurrentRecord;

                TotalRecordBytesEstimate = _source.TotalRecordBytesEstimate;
                TotalRecordsEstimate = _source.TotalRecordsEstimate;

                if (_displayString == null) {

                    if (_source is InternalUserSource) {
                        _displayString = "[UserSource]";
                    }

                    else {
                        _displayString = "[" + _source.StorageType + ":" + _source.CurrentSourceName + "]";
                    }
                }

                Console.Error.Write(_displayString + "\tBegin Read\t" + TotalRecordsEstimate + " records");

                if (_source is FlatFileMapper || _source is DirectoryMapper || _source is InternalUserSource) {
                    Console.Error.Write(" (estimate)");
                    _doEstimate = true;
                }

                Console.Error.WriteLine();

                _logInterval = TotalRecordsEstimate / _numLogs;
                if (_logInterval == 0) _logInterval = 1;

                _nextLogCount = _logInterval;
                _beginDateTime = DateTime.UtcNow;
            }

            else notDone = _source.MoveNext();
            CurrentRecord = _source.CurrentRecord;

            if (_recordCount == _nextLogCount) {

                Console.Error.Write(_displayString + "\tBegin Read\t");
                double percent = 100.0 * _recordCount / TotalRecordsEstimate;
                Console.Error.Write("{0:0.00}% done\t", percent);

                TimeSpan span = DateTime.UtcNow - _beginDateTime;
                _beginDateTime = DateTime.UtcNow;
                double recsPerSecond = _logInterval / (span.Seconds + 1); // div by 0
                Console.Error.WriteLine(recsPerSecond + " recs/sec");

                if (_doEstimate) {
                    double aveBytesPerRecord = (double)_lineBytes / (double)(_recordCount + 1); // div by 0
                    TotalRecordsEstimate = (long)(((double)TotalRecordBytesEstimate) / aveBytesPerRecord);
                    long recordsLeft = TotalRecordsEstimate - _recordCount;
                    _logInterval = recordsLeft / (_numLogs - 1);

                    // only reestimate one time
                    _doEstimate = false;
                }

                _nextLogCount += _logInterval;
            }

            _recordCount++;

            if (!notDone) {
                Console.Error.WriteLine(_displayString + "\tEnd Read\t" + DateTime.Now);
            }

            if (_doEstimate && notDone) {
                _lineBytes += _source.CurrentRecord.Key.Length;
            }

            return notDone;
        }
Ejemplo n.º 7
0
        private void _Initialize()
        {
            // assume theyre either all sorted or all not.  Also, some of these might
            // be empty assuming there were empty buckets so handle that.

            InternalRecordSource mergeDevice = null;

            if (Sorting.IsSorted) {
                mergeDevice = new SortedRecordMerger();
            }

            else {
                // is it right to hard code sortAscending!?!?
                mergeDevice = new RecordSorterReducer(_tempDir, true, _reductionEnabled);
            }

            // add the sources
            foreach (InternalRecordSource source in Inputs) {
                mergeDevice.AddInput(source);
            }

            _output = mergeDevice;
        }
Ejemplo n.º 8
0
 public void SetInput(InternalRecordSource input)
 {
     _output = input;
 }
Ejemplo n.º 9
0
        /// <summary>
        /// Finalizes the writing of a TStore to disk.
        /// </summary>
        public void Write()
        {
            // we use the same disk structure to store the cache
            TMSNStoreWriter keyCacheWriter = new TMSNStoreWriter(_outputDir, true);
            uint cacheKeyGroupSize = 32; // we fix this one
            keyCacheWriter.KeyGroupSize = cacheKeyGroupSize;

            // if we are in the push mode use the sorterReducer that the records were pushed into.
            if (_input == null) {
                _input = _sorterReducer;
            }
            // Note, _input can be null if the user never pushed anything onto this writer.
            if (_input != null)
            {
                while (_input.MoveNext())
                {
                    // initialize file lengths.  We wait till after the first MoveNext so props are set
                    if (_numWritten == 0)
                    {
                        TotalRecordBytesEstimate = _input.TotalRecordBytesEstimate;
                        TotalRecordsEstimate = _input.TotalRecordsEstimate;
                        Initialize();

                        keyCacheWriter.TotalRecordsEstimate = TotalRecordsEstimate / (long)KeyGroupSize;
                        keyCacheWriter.TotalRecordBytesEstimate = TotalRecordBytesEstimate / (long)KeyGroupSize;
                        keyCacheWriter.Initialize();
                    }

                    DataRecord record = _input.CurrentRecord;

                    // write first key of group to cache
                    if (_numWritten % KeyGroupSize == 0)
                    {
                        // write the key of the first group member to the cache.
                        keyCacheWriter.AddRecord(record.KeyBytes, record.Data, false);
                    }

                    // write to main store
                    AddRecord(record.KeyBytes, record.Data, true);
                }

                if (_numWritten == 0) return;

                // if needed, pad the last group written with null entries so that
                // we don't have to special-case reading the last group.
                long unfullGroupNumMembers = _numWritten % KeyGroupSize;
                if (unfullGroupNumMembers > 0)
                {
                    uint numPadEntries = (uint)(KeyGroupSize - unfullGroupNumMembers);
                    for (int i = 0; i < numPadEntries; i++)
                    {
                        _keyFileWriter.Write((byte)0); // overlap
                        _keyFileWriter.Write((byte)0); // neu
                        _keyFileWriter.Write((byte)0); // dataLen
                    }
                }

                // take the input source and write it's properties to disk
                string recordInfoFile = Path.Combine(_outputDir, "record-info");
                using (Stream sw = new FileStream(recordInfoFile, FileMode.Create))
                {
                    _input.TotalRecordsEstimate = _numWritten;
                    _input.TotalRecordBytesEstimate = _keyFileStream.Position;
                    _input.WriteProperties(sw);
                }
            }

            Finish();
            keyCacheWriter.Finish();
            if (_sorterReducer != null) _sorterReducer.Close();
        }
Ejemplo n.º 10
0
        // when left.CurrentRecord is always != null.
        // joiner also does table conversion if necessary.
        public RecordJoiner(InternalRecordSource left, InternalRecordSource right, char columnSeparator)
        {
            Hashtable nameHash = new Hashtable();
            _separator = columnSeparator;

            _leftWrapper = new ColumnWrapper(left.CurrentRecord, left.CurrentSourceName, _separator);
            _rightWrapper = new ColumnWrapper(right.CurrentRecord, right.CurrentSourceName, _separator);

            _leftWrapper.SetRecord(left.CurrentRecord);
            _rightWrapper.SetRecord(right.CurrentRecord);

            int numNewCols = _leftWrapper.ColumnNames.Length;

            // In calculating new column names we remove the Key Columns
            // from the right side since this is redundant information.

            numNewCols += (_rightWrapper.ColumnNames.Length - _rightWrapper.KeyColumnNos.Length);

            _outputColumnNames = new string[numNewCols];

            int newCol = 0;
            foreach (string name in _leftWrapper.ColumnNames) {
                _outputColumnNames[newCol] = _leftWrapper.ColumnNames[newCol++];
                nameHash[name] = 1;
            }

            int j = 0;
            for (int i = 0; i < _rightWrapper.ColumnNames.Length; i++) {

                // skip keys
                if (i == _rightWrapper.KeyColumnNos[j]) {
                    if (_rightWrapper.KeyColumnNos.Length > j + 1) j++;
                }

                else {
                    string name = _rightWrapper.ColumnNames[i];

                    object o = nameHash[name];
                    if (o != null) {
                        name = Path.GetFileNameWithoutExtension(right.CurrentSourceName) + ":" + name;
                    }

                    _outputColumnNames[newCol++] = name;
                }
            }

            // create empy columns for right side
            int numEmptyColumns = _rightWrapper.ColumnNames.Length - _rightWrapper.KeyColumnNos.Length;
            for (int i = 0; i < numEmptyColumns; i++) {
                _emptyRightColumns += _separator;
            }
        }
Ejemplo n.º 11
0
        public override bool MoveNext()
        {
            if (_source == null) {
                _source = Inputs[0];
            }

            // we already retrieved the next record and stuck it here.
            if (_nextRecord != null) {
                CurrentRecord = _nextRecord;
                _nextRecord = null; // reset it.
                return true;
            }

            bool notDone = _source.MoveNext();
            CurrentRecord = _source.CurrentRecord;

            return notDone;
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Advances iteration.
        /// </summary>
        /// <returns>False if at end of iteration, true otherwise.</returns>
        public override bool MoveNext()
        {
            if (_leftSource == null) {
                _leftSource = (InternalRecordSource)_inputList[0];
                _rightSource = (InternalRecordSource)_inputList[1];

                // advance the right side from the start so that
                // a record is there waiting to be compared to
                _rightNotDone = _rightSource.MoveNext();
                if (_rightNotDone) _currentRightRecord = _rightSource.CurrentRecord;
            }

            _leftNotDone = _leftSource.MoveNext();
            if (!_leftNotDone) return false;
            _currentLeftRecord = _leftSource.CurrentRecord;

            // Don't need a match to create a joiner
            if (_joiner == null) {
                _joiner = new RecordJoiner(_leftSource, _rightSource, _tableColumnSeparator);
            }

            // advance the right side
            int diff = -1;
            bool firstTime = true;
            while (diff < 0) {

                // the first time we test we check against the currentRightSource
                // (i.e. don't go in this block) since we allow dups on the left side.
                if (!firstTime && _rightNotDone) {
                    _rightSource.MoveNextHint = _currentLeftRecord.Key;
                    _rightNotDone = _rightSource.MoveNext();
                    if (!_rightNotDone) _currentRightRecord = null;
                    else _currentRightRecord = _rightSource.CurrentRecord;
                }

                if (_currentRightRecord != null) {
                    diff = TMSNStoreUtils.Utf8BytesCompare(_currentLeftRecord.KeyBytes, _currentRightRecord.KeyBytes);
                }

                else diff = 1; // break out of loop

                firstTime = false;
            }

            CurrentRecord = _joiner.Join(_currentLeftRecord, _currentRightRecord, (diff == 0));
            return true;
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Advances Iteration.
        /// </summary>
        /// <returns>False if at end of iteration, true otherwise.</returns>
        public override bool MoveNext()
        {
            if (_leftSource == null) {
                _leftSource = (InternalRecordSource)_inputList[0];
                _rightSource = (InternalRecordSource)_inputList[1];

                _rightNotDone = _rightSource.MoveNext();
                if (!_rightNotDone) return false;
                //_currentRightRecord = _rightSource.CurrentRecord;
            }

            while (true) {
                // advance the left side first
                _leftNotDone = _leftSource.MoveNext();
                if (!_leftNotDone) return false;

                // advance the right side
                int diff = -1;
                bool firstTime = true;
                while (diff < 0) {

                    // the first time we test we check against the currentRightSource since
                    // we allow dups on the left side.
                    if (!firstTime && _rightNotDone) {
                        _rightSource.MoveNextHint = _leftSource.CurrentRecord.Key;
                        _rightNotDone = _rightSource.MoveNext();
                        if (!_rightNotDone) return false;
                    }

                    diff = TMSNStoreUtils.Utf8BytesCompare(_leftSource.CurrentRecord.KeyBytes, _rightSource.CurrentRecord.KeyBytes);
                    firstTime = false;
                }

                // if there's a match, join the right to the left
                if (diff == 0) {
                    if (_joiner == null) {
                        _joiner = new RecordJoiner(_leftSource, _rightSource, _tableColumnSeparator);
                    }

                    CurrentRecord = _joiner.Join(_leftSource.CurrentRecord, _rightSource.CurrentRecord, true);
                    return true;
                }
            }
        }
Ejemplo n.º 14
0
        private InternalRecordSource _AddLogging(InternalRecordSource source)
        {
            // _logFile == null -> no logging
            // _logFile == "stderr" -> log to Console.Error
            // _logFile == "whatever" -> log to whatever file

            if (_logFile == null) {
                StreamWriter err = new StreamWriter(Stream.Null);
                Console.SetError(err);
                return source; // return source with no logging
            }

            // if != stderr
            if (!_logFile.Equals("stderr")) {
                StreamWriter err = new StreamWriter(_logFile);
                Console.SetError(err);
            }

            LoggingSource logger = new LoggingSource();
            logger.AddInput(source); // insert the logger

            return logger;
        }
Ejemplo n.º 15
0
        private void _SortReduce(bool sortAscending, bool reductionEnabled)
        {
            // lets change sort needs to a number for easy comparison.
            // no sort = 0, sort ascending = 1, sort descending = 2

            int askSortNum = 1; // we are asking for sorting on our output
            if (!sortAscending) askSortNum = 2;

            bool inputIsSorted = InternalSource.Sorting.IsSorted;
            bool inputIsSortedAscending = InternalSource.Sorting.IsSortedAscending;

            int inputSortNum = 0;
            if (inputIsSorted)
                inputSortNum = 1;
            if (inputIsSorted && !inputIsSortedAscending)
                inputSortNum = 2;

            // (aside: one might ask, why not just have a separate sorter
            // and reducer.  Reduction is combined in the sorter so that
            // reduction can happen in memory before temp files are written
            // to disk.

            if (askSortNum == inputSortNum && !reductionEnabled) return; // don't insert a sorter or reducer

            if (askSortNum != inputSortNum) {
                RecordSorterReducer sr = new RecordSorterReducer(Processor.TempDir, sortAscending, reductionEnabled);
                sr.MaxMemorySize = Processor.MaxMemorySize;
                sr.DeleteTempFiles = Processor.DeleteTempFiles;

                InternalRecordSource temp = InternalSource; // grab our input
                sr.AddInput(temp); // pipe it into the sorterReducer
                this.InternalSource = sr; // make the sorterReducer the output of this source
            }

            else {
                IRecordFilter reducer = new ReduceFilter();
                RecordFilterDriver driver = new RecordFilterDriver(reducer);
                driver.AddInput(InternalSource);
                InternalSource = driver;
            }
        }
Ejemplo n.º 16
0
        private void _AddInput(InternalRecordSource input)
        {
            if (!input.Sorting.IsSorted) {
                throw new Exception("can't merge non-sortable record source");
            }

            bool notEmpty = input.MoveNext();
            if (!notEmpty) return;

            int current = _numSources++;

            _heapArray[current] = input;

            int parent = _Parent(current);

            // while the current is less than the parent
            while ((current != 0) &&
                _MergeCompare(_heapArray[current].CurrentRecord.KeyBytes,
                _heapArray[parent].CurrentRecord.KeyBytes)) {
                _Swap(current, parent);
                current = _Parent(current);
                parent = _Parent(current);
            }
        }
Ejemplo n.º 17
0
        internal RecordConstructor GetRecordConstructor(InternalRecordSource source)
        {
            if (_type == typeof(DataRecord)) {
                return new RecordConstructor();
            }
            else if (_type == typeof(StringRecord))
            {
                return new StringRecordConstructor();
            }
            else if (_type == typeof(CountRecord))
            {
                return new CountRecordConstructor();
            }

            else if (_type == typeof(TableRecord))
            {
                return new TableRecordConstructor(_tableColumnNames, KeyColumnNos, TableColumnSeparator);
            }
            else if (_type == typeof(BipartiteRecord))
            {
                return new BipartiteRecordConstructor();
            }

            Assembly asm = Assembly.GetEntryAssembly();

            CSharpCodeProvider codeProvider = new CSharpCodeProvider();
            CompilerParameters parms = new CompilerParameters();

            // if we're a webservice no entry assembly
            if (asm != null) {
                string binDir = Path.GetDirectoryName(asm.Location);
                parms.ReferencedAssemblies.Add(Path.Combine(binDir, "TMSNStore.dll"));
            }

            // if we can get type from a loaded assembly add that assembly
            asm = Assembly.GetAssembly(_type);
            parms.ReferencedAssemblies.Add(asm.Location);

            // the assemblyLocation could be external and the source knows where
            string assemblyLocation = source.GetAssemblyLocation(_type.ToString());

            if (assemblyLocation != null) {
                parms.ReferencedAssemblies.Add(assemblyLocation);
            }

            parms.GenerateExecutable = false;
            parms.GenerateInMemory = true;
            parms.IncludeDebugInformation = true;

            #if DOTNET2
            CompilerResults results = codeProvider.CompileAssemblyFromSource(parms, SourceCode);
            #else
            CompilerResults results = codeProvider.CreateCompiler().CompileAssemblyFromSource(parms, SourceCode);
            #endif

            if (results.Errors.HasErrors) {
                Console.WriteLine(SourceCode);
                foreach (CompilerError e in results.Errors) {
                    Console.WriteLine(e.ErrorText);
                }
                Environment.Exit(-1);
            }

            asm = results.CompiledAssembly;
            Type type = asm.GetType("Microsoft.TMSN.Data.UserRecordConstructor");
            ConstructorInfo constInfo = type.GetConstructor(new Type[0]);
            RecordConstructor constructor = (RecordConstructor)constInfo.Invoke(null);

            return constructor;
        }
Ejemplo n.º 18
0
        private void _Construct(InternalRecordSource input, string outputDir, string fileSetName)
        {
            if (input != null && !input.Sorting.IsSorted) {
                throw new Exception("can't output non-sorted input as TMSNStore");
            }

            _input = input;
            _outputDir = outputDir;
            _fileSetName = fileSetName;

            // if directory doesn't exist make it
            if (!Directory.Exists(outputDir)) {
                Directory.CreateDirectory(outputDir);
            }

            string keyFile = Path.Combine(outputDir, _fileSetName);
            string indexFile = Path.Combine(outputDir, _fileSetName + "-index");

            // create fileSetName file
            _keyFileStream = new FileStream(keyFile, FileMode.Create, FileAccess.Write, FileShare.None, 1024 * 1024 * 16);
            _keyFileWriter = new VariableLengthBinaryWriter(_keyFileStream);

            // create index file
            _indexFileStream = new FileStream(indexFile, FileMode.Create, FileAccess.Write, FileShare.None, 1024 * 1024 * 16);
            _indexFileWriter = new BinaryWriter(_indexFileStream);
        }
Ejemplo n.º 19
0
        // we output TableRecords in a special way.  First we output the column names
        // then we output only the data.  The key is one of the columns in the data.
        private void _OutputTable(InternalRecordSource output, StreamWriter writer)
        {
            TableRecord record = _output.CurrentRecord as TableRecord;
            char separator = record.TableColumnSeparator;

            if (!SuppressTableHeaders) {
                for (int i = 0; i < record.ColumnNames.Length; i++) {
                    if (i != 0) _flatFileWriter.Write(separator);
                    _flatFileWriter.Write(record.ColumnNames[i]);

                    // mark the keys with =Key.<keyColumnNo>
                    for (int j = 0; j < record.KeyColumnNos.Length; j++) {
                        if (record.KeyColumnNos[j] == i) {
                            _flatFileWriter.Write("=Key.");
                            _flatFileWriter.Write(j + 1);
                        }
                    }
                }

                _flatFileWriter.WriteLine();
            }

            do {
                _flatFileWriter.WriteLine(_output.CurrentRecord.DataAsString);
            } while (_output.MoveNext());
        }
Ejemplo n.º 20
0
 internal TMSNStoreWriter(InternalRecordSource input, string outputDir)
 {
     _Construct(input, outputDir, "keys-data");
 }
Ejemplo n.º 21
0
        private void _Finish()
        {
            if (_doThreading) {
                _SortAndWriteThreaded();
                // wait on all previous threads, sorts and writes
                foreach (Thread t in _threads) {
                    t.Join();
                }
                _threads.Clear();
            }

            else {
                // don't write the last one to disk
                _recordsToSort.Sort();
                _sortedRecords = _recordsToSort;
            }

            int numToMerge = _numTempFiles;

            if (_sortedRecords != null && _sortedRecords.NumRecords != 0) {
                numToMerge++;
            }

            if (numToMerge > 1) {
                _merger = new SortedRecordMerger();

                if (_sortedRecords != null && _sortedRecords.NumRecords != 0) {
                    _merger.AddInput(_sortedRecords);
                }

                for (int i = 0; i < _numTempFiles; i++) {
                    InternalRecordSource source = new InternalRecordFileReader(GetTempFilename(i));
                    _merger.AddInput(source);
                }

                _output = _merger;
            }

            else {
                _output = _sortedRecords;
            }

            // set up reduction filter for reduction across merged sources
            if (_internalReductionEnabled) {
                ReduceFilter reducer = new ReduceFilter();
                RecordFilterDriver filterDriver = new RecordFilterDriver(reducer);
                filterDriver.AddInput(_output);
                _output = filterDriver;
            }

            // this is kind of a hack till i figure out how these should be set
            TotalRecordBytesEstimate = _output.TotalRecordBytesEstimate;
            TotalRecordsEstimate = _output.TotalRecordsEstimate;
        }
Ejemplo n.º 22
0
        public override bool MoveNext()
        {
            if (_leftSource == null) {
                _leftSource = (InternalRecordSource)_inputList[0];
                _rightSource = (InternalRecordSource)_inputList[1];
            }

            while (true) {
                _leftNotDone = _leftSource.MoveNext();
                if (!_leftNotDone) return false;
                _currentLeftRecord = _leftSource.CurrentRecord;

                // advance the right side
                int diff = -1;
                bool firstTime = true;
                while (diff < 0) {

                    // the first time we test we check against the currentRightSource since
                    // we allow dups on the left side.
                    if (!firstTime || _currentRightRecord == null) {
                        if (_rightNotDone) _rightNotDone = _rightSource.MoveNext();

                        // passThruOnMatch == left & right
                        // !passThruOnMatch == left &! right

                        // if left & right then when right is done we're done.
                        // if left &! right when right is done keep going so we
                        // can emit all the lefts that come after the last right.
                        if (!_rightNotDone) {
                            if (_passThruOnMatch) return false;
                            else {
                                CurrentRecord = _currentLeftRecord;
                                return true;
                            }
                        }

                        _currentRightRecord = _rightSource.CurrentRecord;
                    }

                    diff = TMSNStoreUtils.Utf8BytesCompare(_currentLeftRecord.KeyBytes, _currentRightRecord.KeyBytes);
                    firstTime = false;
                }

                // if there's a match
                if (diff == 0) {
                    if (_passThruOnMatch) {
                        CurrentRecord = _currentLeftRecord;
                        return true;
                    }
                }

                else if (!_passThruOnMatch) {
                    CurrentRecord = _currentLeftRecord;
                    return true;
                }
            }
        }
Ejemplo n.º 23
0
        public override bool MoveNext()
        {
            if (_tstoreEnumer == null) {
                InternalRecordSource source = (InternalRecordSource)Inputs[0];

                TotalRecordBytesEstimate = 0; // don' know
                TotalRecordsEstimate = 0; // don' know

                // if logging source is in the way look around it.
                if (source is LoggingSource && source.Inputs[0] is TMSNStoreRecordSource) {
                    source = source.Inputs[0];
                }

                if (source is TMSNStoreRecordSource) {
                    TMSNStoreReader tstoreReader = ((TMSNStoreRecordSource)source).GetReader();

                    if (_prefix) {
                        _tstoreEnumer = tstoreReader.GetMatchingPrefix(_query).GetEnumerator();
                    }

                    else {
                        _tstoreEnumer = tstoreReader.GetMatch(_query).GetEnumerator();
                    }
                }

                else {
                    _source = Inputs[0] as InternalRecordSource;
                }
            }

            bool notDone = true;
            if (_tstoreEnumer != null) {
                notDone = _tstoreEnumer.MoveNext();
                CurrentRecord = (DataRecord)_tstoreEnumer.Current;
                return notDone;
            }

            else {
                while (notDone) {
                    notDone = _source.MoveNext();
                    if (!notDone) return false;

                    if (_prefix && _source.CurrentRecord.Key.StartsWith(_query)) {
                        CurrentRecord = _source.CurrentRecord;
                        return true;
                    }

                    else if (_source.CurrentRecord.Key.Equals(_query)) {
                        CurrentRecord = _source.CurrentRecord;
                        return true;
                    }
                }

                return false;
            }
        }
Ejemplo n.º 24
0
        public override bool MoveNext()
        {
            if (_leftSource == null) {
                _leftSource = (InternalRecordSource)_inputList[0];
                _rightSource = (InternalRecordSource)_inputList[1];
            }

            if (_advanceLeft) {
                _leftNotDone = _leftSource.MoveNext();
                _advanceLeft = false;
            }

            if (_advanceRight) {
                _rightNotDone = _rightSource.MoveNext();
                _advanceRight = false;
            }

            int diff = 0;

            // if left is done
            if (!_leftNotDone) {
                if (!_rightNotDone) return false; // if right is done too
                diff = -1; // take right since right not done
            }

            // else left is NOT done, if right is done...
            else if (!_rightNotDone) diff = 1; // take left

            // else both NOT done compare
            else diff = _ascendingFactor * TMSNStoreUtils.Utf8BytesCompare(_leftSource.CurrentRecord.KeyBytes, _rightSource.CurrentRecord.KeyBytes);

            if (diff > 0) { // take left
                _currentOutputSource = _leftSource;
                _advanceLeft = true;
            }

            else { // take right
                _currentOutputSource = _rightSource;
                _advanceRight = true;
            }

            CurrentRecord = _currentOutputSource.CurrentRecord;
            return true;
        }
Ejemplo n.º 25
0
        public override bool MoveNext()
        {
            // initialize
            if (_input == null) {
                _input = (InternalRecordSource)_inputList[0];
            }

            // get at least one record into the accepter
            while (_recordAccepter.Empty && _notDone) {

                _notDone = _input.MoveNext();

                // run the current record through the filter
                if (_notDone) {

                    // build the filter and put in place
                    if (_filter == null) {
                        _filter = _GetFilter(_input.CurrentRecord.GetType().ToString(), _assemblyLocations);

                        // we already determined if the filter's KeyOrderIsPreserved
                        // make sure we were right
                        if (_passThruInputSorting != _filter.KeyOrderIsPreserved) {
                            throw new Exception("user filter error: improper KeyOrderIsPreserved detection");
                        }
                    }

                    // put a try catch around user code
                    try {
                        _filter.ProcessRecord(_input.CurrentRecord, _recordAccepter);
                    }

                    catch {
                        _caughtErrors++;
                        if (_caughtErrors == 1 || (_caughtErrors % 1000 == 0)) {
                            Console.WriteLine(_caughtErrors + " caught " + _filterType + " error(s) d");
                        }
                    }

                    _notDone = !_recordAccepter.IsDone; // filter may elect to finish early
                }

                    // there is a finish call because the filter might have state
                // this gives it a chance to output one last record.
                else {
                    try {
                        _filter.Finish(_recordAccepter);
                    }
                    catch {
                        _caughtErrors++;
                    }
                }
            }

            // grab a record from the accepter
            bool notDone = _recordAccepter.MoveNext();
            if (!notDone) return false;

            CurrentRecord = _recordAccepter.CurrentRecord;

            return true;
        }
Ejemplo n.º 26
0
        internal void Write(InternalRecordSource input)
        {
            while (input.MoveNext()) {
                DataRecord record = input.CurrentRecord;

                // we need to do a moveNext on our input in order for RecordInstance
                // to be valid.
                if (_outputStream == null) {
                    OpenOutputStream();
                    input.WriteProperties(_outputStream);
                    _outputWriter = new VariableLengthBinaryWriter(_outputStream);
                }

                // output key
                _outputWriter.WriteVariableLength((uint)record.KeyBytes.Length);
                _outputWriter.Write(record.KeyBytes);

                // output data
                if (record.Data != null) {
                    _outputWriter.WriteVariableLength((uint)record.Data.Length);
                    _outputWriter.Write(record.Data);
                }
                else {
                    _outputWriter.WriteVariableLength((uint)0);
                }

                // slow. do elsewhere _totalRecordBytesEstimate = _outputStream.Position;
                _totalRecordsEstimate++;
            }

            _totalRecordBytesEstimate = _outputStream.Position;
            InternalRecordSource.WriteEstimates(_outputStream, _totalRecordBytesEstimate, _totalRecordsEstimate);
            _outputStream.Close();
            _outputStream = null;
        }
Ejemplo n.º 27
0
 public StatisticsPseudoFilter(InternalRecordSource source)
 {
     _source = source;
 }
Ejemplo n.º 28
0
        /// <summary>
        /// Truncates the number of records coming from the record source.
        /// </summary>
        /// <param name="recordLimit">Number of records to limit source to.</param>
        public void Limit(long recordLimit)
        {
            LimitFilter filter = new LimitFilter(recordLimit);
            RecordFilterDriver filterDriver = new RecordFilterDriver(filter);

            double fractionKept = (double)recordLimit / (double)InternalSource.TotalRecordsEstimate;
            long bytesEstimate = (long)(fractionKept * InternalSource.TotalRecordBytesEstimate);

            if (InternalSource is LoggingSource) {
                filterDriver.AddInput(InternalSource.Inputs[0]);
                filterDriver.TotalRecordsEstimate = recordLimit; // unfortunately must be tweaked from outside after AddInput
                filterDriver.TotalRecordBytesEstimate = bytesEstimate;
                InternalSource.ClearInputs();
                InternalSource.AddInput(filterDriver);
            }

            else {
                filterDriver.AddInput(InternalSource);
                filterDriver.TotalRecordsEstimate = recordLimit; // unfortunately must be tweaked from outside after AddInput
                filterDriver.TotalRecordBytesEstimate = bytesEstimate;
                InternalSource = filterDriver;
            }
        }