public override bool MoveNext() { if (_source == null) { _source = (InternalRecordSource)_inputList[_currentSourceNo++]; } _notDone = _source.MoveNext(); if (!_notDone && _currentSourceNo < 2) { _source = (InternalRecordSource)_inputList[_currentSourceNo++]; _notDone = _source.MoveNext(); } CurrentRecord = _source.CurrentRecord; return _notDone; }
internal bool WriteRecordFileMaxSize(InternalRecordSource input, long maxFileSize) { // initialize if (_outputStream == null) { OpenOutputStream(); _outputWriter = new VariableLengthBinaryWriter(_outputStream); input.WriteProperties(_outputStream); } bool done = false; while (!done && input.MoveNext()) { DataRecord record = input.CurrentRecord; // output key _outputWriter.WriteVariableLength((uint)record.KeyBytes.Length); _outputWriter.Write(record.KeyBytes); // output data if (record.Data != null) { _outputWriter.WriteVariableLength((uint)record.Data.Length); _outputWriter.Write(record.Data); } else { _outputWriter.WriteVariableLength((uint)0); } if (maxFileSize != -1 && _outputStream.Position >= maxFileSize) { done = true; } } _outputStream.Close(); _outputStream = null; // we return notDone to our caller, i.e. we return true if there is more // to be written, false if we wrote it all. return done; }
/// <summary> /// Chooses random records from source. /// </summary> /// <param name="numToKeep">number of random records to pass through from source.</param> /// <param name="seed">a seed to the random number generator.</param> public void Random(int numToKeep, int seed) { RandomFilter filter = new RandomFilter(numToKeep, seed); RecordFilterDriver filterDriver = new RecordFilterDriver(filter); double fractionKept = (double)numToKeep / (double)InternalSource.TotalRecordsEstimate; long bytesEstimate = (long)(fractionKept * InternalSource.TotalRecordBytesEstimate); if (InternalSource is LoggingSource) { filterDriver.AddInput(InternalSource.Inputs[0]); filterDriver.TotalRecordsEstimate = (long)numToKeep; // unfortunately must be tweaked from outside after AddInput filterDriver.TotalRecordBytesEstimate = bytesEstimate; InternalSource.ClearInputs(); InternalSource.AddInput(filterDriver); } else { filterDriver.AddInput(InternalSource); filterDriver.TotalRecordsEstimate = (long)numToKeep; // unfortunately must be tweaked from outside after AddInput filterDriver.TotalRecordBytesEstimate = bytesEstimate; InternalSource = filterDriver; } }
public override bool MoveNext() { // initialize if (_input == null) { _input = (InternalRecordSource)_inputList[0]; } // fill up the accepter with at least one record while (_recordAccepter.Empty && _notDone) { _notDone = _input.MoveNext(); // run the current record through the filter if (_notDone) { // put a try catch around user code try { _filter.ProcessRecord(_input.CurrentRecord, _recordAccepter); } catch (Exception ex) { _caughtErrors++; //if (_caughtErrors == 1 || (_caughtErrors % 1000 == 0)) { Console.Error.WriteLine(_caughtErrors + " caught " + _filterType + " error(s)"); Console.Error.WriteLine(ex.Message + " " + _notDone + " " + _recordAccepter.IsDone); Console.Error.WriteLine(ex.StackTrace); //} } _notDone = !_recordAccepter.IsDone; // filter may elect to finish early } // there is a finish call because the filter might have state // this gives it a chance to output one last record. else { try { _filter.Finish(_recordAccepter); } catch { _caughtErrors++; } } } // grab the record out of the accepter bool notDone = _recordAccepter.MoveNext(); if (!notDone) return false; CurrentRecord = _recordAccepter.CurrentRecord; return true; }
/// <summary> /// Adds an input into the InternalRecordSource. At this point we set our properties to be a function /// of our input's properties. If the component doesn't want properties to propagate then it should /// set the appropriate _passThruInput... to false. /// </summary> /// <param name="input"></param> public void AddInput(InternalRecordSource input) { _inputList.Add(input); if (!_propertiesSet) SetProperties(); _totalRecordsEstimate += input.TotalRecordsEstimate; _totalRecordBytesEstimate += input.TotalRecordBytesEstimate; // default is our output has the same record type as the input _recordInfo.Update(input._recordInfo); }
public override bool MoveNext() { bool notDone = true; if (_source == null) { _source = (InternalRecordSource)_inputList[0]; notDone = _source.MoveNext(); CurrentRecord = _source.CurrentRecord; TotalRecordBytesEstimate = _source.TotalRecordBytesEstimate; TotalRecordsEstimate = _source.TotalRecordsEstimate; if (_displayString == null) { if (_source is InternalUserSource) { _displayString = "[UserSource]"; } else { _displayString = "[" + _source.StorageType + ":" + _source.CurrentSourceName + "]"; } } Console.Error.Write(_displayString + "\tBegin Read\t" + TotalRecordsEstimate + " records"); if (_source is FlatFileMapper || _source is DirectoryMapper || _source is InternalUserSource) { Console.Error.Write(" (estimate)"); _doEstimate = true; } Console.Error.WriteLine(); _logInterval = TotalRecordsEstimate / _numLogs; if (_logInterval == 0) _logInterval = 1; _nextLogCount = _logInterval; _beginDateTime = DateTime.UtcNow; } else notDone = _source.MoveNext(); CurrentRecord = _source.CurrentRecord; if (_recordCount == _nextLogCount) { Console.Error.Write(_displayString + "\tBegin Read\t"); double percent = 100.0 * _recordCount / TotalRecordsEstimate; Console.Error.Write("{0:0.00}% done\t", percent); TimeSpan span = DateTime.UtcNow - _beginDateTime; _beginDateTime = DateTime.UtcNow; double recsPerSecond = _logInterval / (span.Seconds + 1); // div by 0 Console.Error.WriteLine(recsPerSecond + " recs/sec"); if (_doEstimate) { double aveBytesPerRecord = (double)_lineBytes / (double)(_recordCount + 1); // div by 0 TotalRecordsEstimate = (long)(((double)TotalRecordBytesEstimate) / aveBytesPerRecord); long recordsLeft = TotalRecordsEstimate - _recordCount; _logInterval = recordsLeft / (_numLogs - 1); // only reestimate one time _doEstimate = false; } _nextLogCount += _logInterval; } _recordCount++; if (!notDone) { Console.Error.WriteLine(_displayString + "\tEnd Read\t" + DateTime.Now); } if (_doEstimate && notDone) { _lineBytes += _source.CurrentRecord.Key.Length; } return notDone; }
private void _Initialize() { // assume theyre either all sorted or all not. Also, some of these might // be empty assuming there were empty buckets so handle that. InternalRecordSource mergeDevice = null; if (Sorting.IsSorted) { mergeDevice = new SortedRecordMerger(); } else { // is it right to hard code sortAscending!?!? mergeDevice = new RecordSorterReducer(_tempDir, true, _reductionEnabled); } // add the sources foreach (InternalRecordSource source in Inputs) { mergeDevice.AddInput(source); } _output = mergeDevice; }
public void SetInput(InternalRecordSource input) { _output = input; }
/// <summary> /// Finalizes the writing of a TStore to disk. /// </summary> public void Write() { // we use the same disk structure to store the cache TMSNStoreWriter keyCacheWriter = new TMSNStoreWriter(_outputDir, true); uint cacheKeyGroupSize = 32; // we fix this one keyCacheWriter.KeyGroupSize = cacheKeyGroupSize; // if we are in the push mode use the sorterReducer that the records were pushed into. if (_input == null) { _input = _sorterReducer; } // Note, _input can be null if the user never pushed anything onto this writer. if (_input != null) { while (_input.MoveNext()) { // initialize file lengths. We wait till after the first MoveNext so props are set if (_numWritten == 0) { TotalRecordBytesEstimate = _input.TotalRecordBytesEstimate; TotalRecordsEstimate = _input.TotalRecordsEstimate; Initialize(); keyCacheWriter.TotalRecordsEstimate = TotalRecordsEstimate / (long)KeyGroupSize; keyCacheWriter.TotalRecordBytesEstimate = TotalRecordBytesEstimate / (long)KeyGroupSize; keyCacheWriter.Initialize(); } DataRecord record = _input.CurrentRecord; // write first key of group to cache if (_numWritten % KeyGroupSize == 0) { // write the key of the first group member to the cache. keyCacheWriter.AddRecord(record.KeyBytes, record.Data, false); } // write to main store AddRecord(record.KeyBytes, record.Data, true); } if (_numWritten == 0) return; // if needed, pad the last group written with null entries so that // we don't have to special-case reading the last group. long unfullGroupNumMembers = _numWritten % KeyGroupSize; if (unfullGroupNumMembers > 0) { uint numPadEntries = (uint)(KeyGroupSize - unfullGroupNumMembers); for (int i = 0; i < numPadEntries; i++) { _keyFileWriter.Write((byte)0); // overlap _keyFileWriter.Write((byte)0); // neu _keyFileWriter.Write((byte)0); // dataLen } } // take the input source and write it's properties to disk string recordInfoFile = Path.Combine(_outputDir, "record-info"); using (Stream sw = new FileStream(recordInfoFile, FileMode.Create)) { _input.TotalRecordsEstimate = _numWritten; _input.TotalRecordBytesEstimate = _keyFileStream.Position; _input.WriteProperties(sw); } } Finish(); keyCacheWriter.Finish(); if (_sorterReducer != null) _sorterReducer.Close(); }
// when left.CurrentRecord is always != null. // joiner also does table conversion if necessary. public RecordJoiner(InternalRecordSource left, InternalRecordSource right, char columnSeparator) { Hashtable nameHash = new Hashtable(); _separator = columnSeparator; _leftWrapper = new ColumnWrapper(left.CurrentRecord, left.CurrentSourceName, _separator); _rightWrapper = new ColumnWrapper(right.CurrentRecord, right.CurrentSourceName, _separator); _leftWrapper.SetRecord(left.CurrentRecord); _rightWrapper.SetRecord(right.CurrentRecord); int numNewCols = _leftWrapper.ColumnNames.Length; // In calculating new column names we remove the Key Columns // from the right side since this is redundant information. numNewCols += (_rightWrapper.ColumnNames.Length - _rightWrapper.KeyColumnNos.Length); _outputColumnNames = new string[numNewCols]; int newCol = 0; foreach (string name in _leftWrapper.ColumnNames) { _outputColumnNames[newCol] = _leftWrapper.ColumnNames[newCol++]; nameHash[name] = 1; } int j = 0; for (int i = 0; i < _rightWrapper.ColumnNames.Length; i++) { // skip keys if (i == _rightWrapper.KeyColumnNos[j]) { if (_rightWrapper.KeyColumnNos.Length > j + 1) j++; } else { string name = _rightWrapper.ColumnNames[i]; object o = nameHash[name]; if (o != null) { name = Path.GetFileNameWithoutExtension(right.CurrentSourceName) + ":" + name; } _outputColumnNames[newCol++] = name; } } // create empy columns for right side int numEmptyColumns = _rightWrapper.ColumnNames.Length - _rightWrapper.KeyColumnNos.Length; for (int i = 0; i < numEmptyColumns; i++) { _emptyRightColumns += _separator; } }
public override bool MoveNext() { if (_source == null) { _source = Inputs[0]; } // we already retrieved the next record and stuck it here. if (_nextRecord != null) { CurrentRecord = _nextRecord; _nextRecord = null; // reset it. return true; } bool notDone = _source.MoveNext(); CurrentRecord = _source.CurrentRecord; return notDone; }
/// <summary> /// Advances iteration. /// </summary> /// <returns>False if at end of iteration, true otherwise.</returns> public override bool MoveNext() { if (_leftSource == null) { _leftSource = (InternalRecordSource)_inputList[0]; _rightSource = (InternalRecordSource)_inputList[1]; // advance the right side from the start so that // a record is there waiting to be compared to _rightNotDone = _rightSource.MoveNext(); if (_rightNotDone) _currentRightRecord = _rightSource.CurrentRecord; } _leftNotDone = _leftSource.MoveNext(); if (!_leftNotDone) return false; _currentLeftRecord = _leftSource.CurrentRecord; // Don't need a match to create a joiner if (_joiner == null) { _joiner = new RecordJoiner(_leftSource, _rightSource, _tableColumnSeparator); } // advance the right side int diff = -1; bool firstTime = true; while (diff < 0) { // the first time we test we check against the currentRightSource // (i.e. don't go in this block) since we allow dups on the left side. if (!firstTime && _rightNotDone) { _rightSource.MoveNextHint = _currentLeftRecord.Key; _rightNotDone = _rightSource.MoveNext(); if (!_rightNotDone) _currentRightRecord = null; else _currentRightRecord = _rightSource.CurrentRecord; } if (_currentRightRecord != null) { diff = TMSNStoreUtils.Utf8BytesCompare(_currentLeftRecord.KeyBytes, _currentRightRecord.KeyBytes); } else diff = 1; // break out of loop firstTime = false; } CurrentRecord = _joiner.Join(_currentLeftRecord, _currentRightRecord, (diff == 0)); return true; }
/// <summary> /// Advances Iteration. /// </summary> /// <returns>False if at end of iteration, true otherwise.</returns> public override bool MoveNext() { if (_leftSource == null) { _leftSource = (InternalRecordSource)_inputList[0]; _rightSource = (InternalRecordSource)_inputList[1]; _rightNotDone = _rightSource.MoveNext(); if (!_rightNotDone) return false; //_currentRightRecord = _rightSource.CurrentRecord; } while (true) { // advance the left side first _leftNotDone = _leftSource.MoveNext(); if (!_leftNotDone) return false; // advance the right side int diff = -1; bool firstTime = true; while (diff < 0) { // the first time we test we check against the currentRightSource since // we allow dups on the left side. if (!firstTime && _rightNotDone) { _rightSource.MoveNextHint = _leftSource.CurrentRecord.Key; _rightNotDone = _rightSource.MoveNext(); if (!_rightNotDone) return false; } diff = TMSNStoreUtils.Utf8BytesCompare(_leftSource.CurrentRecord.KeyBytes, _rightSource.CurrentRecord.KeyBytes); firstTime = false; } // if there's a match, join the right to the left if (diff == 0) { if (_joiner == null) { _joiner = new RecordJoiner(_leftSource, _rightSource, _tableColumnSeparator); } CurrentRecord = _joiner.Join(_leftSource.CurrentRecord, _rightSource.CurrentRecord, true); return true; } } }
private InternalRecordSource _AddLogging(InternalRecordSource source) { // _logFile == null -> no logging // _logFile == "stderr" -> log to Console.Error // _logFile == "whatever" -> log to whatever file if (_logFile == null) { StreamWriter err = new StreamWriter(Stream.Null); Console.SetError(err); return source; // return source with no logging } // if != stderr if (!_logFile.Equals("stderr")) { StreamWriter err = new StreamWriter(_logFile); Console.SetError(err); } LoggingSource logger = new LoggingSource(); logger.AddInput(source); // insert the logger return logger; }
private void _SortReduce(bool sortAscending, bool reductionEnabled) { // lets change sort needs to a number for easy comparison. // no sort = 0, sort ascending = 1, sort descending = 2 int askSortNum = 1; // we are asking for sorting on our output if (!sortAscending) askSortNum = 2; bool inputIsSorted = InternalSource.Sorting.IsSorted; bool inputIsSortedAscending = InternalSource.Sorting.IsSortedAscending; int inputSortNum = 0; if (inputIsSorted) inputSortNum = 1; if (inputIsSorted && !inputIsSortedAscending) inputSortNum = 2; // (aside: one might ask, why not just have a separate sorter // and reducer. Reduction is combined in the sorter so that // reduction can happen in memory before temp files are written // to disk. if (askSortNum == inputSortNum && !reductionEnabled) return; // don't insert a sorter or reducer if (askSortNum != inputSortNum) { RecordSorterReducer sr = new RecordSorterReducer(Processor.TempDir, sortAscending, reductionEnabled); sr.MaxMemorySize = Processor.MaxMemorySize; sr.DeleteTempFiles = Processor.DeleteTempFiles; InternalRecordSource temp = InternalSource; // grab our input sr.AddInput(temp); // pipe it into the sorterReducer this.InternalSource = sr; // make the sorterReducer the output of this source } else { IRecordFilter reducer = new ReduceFilter(); RecordFilterDriver driver = new RecordFilterDriver(reducer); driver.AddInput(InternalSource); InternalSource = driver; } }
private void _AddInput(InternalRecordSource input) { if (!input.Sorting.IsSorted) { throw new Exception("can't merge non-sortable record source"); } bool notEmpty = input.MoveNext(); if (!notEmpty) return; int current = _numSources++; _heapArray[current] = input; int parent = _Parent(current); // while the current is less than the parent while ((current != 0) && _MergeCompare(_heapArray[current].CurrentRecord.KeyBytes, _heapArray[parent].CurrentRecord.KeyBytes)) { _Swap(current, parent); current = _Parent(current); parent = _Parent(current); } }
internal RecordConstructor GetRecordConstructor(InternalRecordSource source) { if (_type == typeof(DataRecord)) { return new RecordConstructor(); } else if (_type == typeof(StringRecord)) { return new StringRecordConstructor(); } else if (_type == typeof(CountRecord)) { return new CountRecordConstructor(); } else if (_type == typeof(TableRecord)) { return new TableRecordConstructor(_tableColumnNames, KeyColumnNos, TableColumnSeparator); } else if (_type == typeof(BipartiteRecord)) { return new BipartiteRecordConstructor(); } Assembly asm = Assembly.GetEntryAssembly(); CSharpCodeProvider codeProvider = new CSharpCodeProvider(); CompilerParameters parms = new CompilerParameters(); // if we're a webservice no entry assembly if (asm != null) { string binDir = Path.GetDirectoryName(asm.Location); parms.ReferencedAssemblies.Add(Path.Combine(binDir, "TMSNStore.dll")); } // if we can get type from a loaded assembly add that assembly asm = Assembly.GetAssembly(_type); parms.ReferencedAssemblies.Add(asm.Location); // the assemblyLocation could be external and the source knows where string assemblyLocation = source.GetAssemblyLocation(_type.ToString()); if (assemblyLocation != null) { parms.ReferencedAssemblies.Add(assemblyLocation); } parms.GenerateExecutable = false; parms.GenerateInMemory = true; parms.IncludeDebugInformation = true; #if DOTNET2 CompilerResults results = codeProvider.CompileAssemblyFromSource(parms, SourceCode); #else CompilerResults results = codeProvider.CreateCompiler().CompileAssemblyFromSource(parms, SourceCode); #endif if (results.Errors.HasErrors) { Console.WriteLine(SourceCode); foreach (CompilerError e in results.Errors) { Console.WriteLine(e.ErrorText); } Environment.Exit(-1); } asm = results.CompiledAssembly; Type type = asm.GetType("Microsoft.TMSN.Data.UserRecordConstructor"); ConstructorInfo constInfo = type.GetConstructor(new Type[0]); RecordConstructor constructor = (RecordConstructor)constInfo.Invoke(null); return constructor; }
private void _Construct(InternalRecordSource input, string outputDir, string fileSetName) { if (input != null && !input.Sorting.IsSorted) { throw new Exception("can't output non-sorted input as TMSNStore"); } _input = input; _outputDir = outputDir; _fileSetName = fileSetName; // if directory doesn't exist make it if (!Directory.Exists(outputDir)) { Directory.CreateDirectory(outputDir); } string keyFile = Path.Combine(outputDir, _fileSetName); string indexFile = Path.Combine(outputDir, _fileSetName + "-index"); // create fileSetName file _keyFileStream = new FileStream(keyFile, FileMode.Create, FileAccess.Write, FileShare.None, 1024 * 1024 * 16); _keyFileWriter = new VariableLengthBinaryWriter(_keyFileStream); // create index file _indexFileStream = new FileStream(indexFile, FileMode.Create, FileAccess.Write, FileShare.None, 1024 * 1024 * 16); _indexFileWriter = new BinaryWriter(_indexFileStream); }
// we output TableRecords in a special way. First we output the column names // then we output only the data. The key is one of the columns in the data. private void _OutputTable(InternalRecordSource output, StreamWriter writer) { TableRecord record = _output.CurrentRecord as TableRecord; char separator = record.TableColumnSeparator; if (!SuppressTableHeaders) { for (int i = 0; i < record.ColumnNames.Length; i++) { if (i != 0) _flatFileWriter.Write(separator); _flatFileWriter.Write(record.ColumnNames[i]); // mark the keys with =Key.<keyColumnNo> for (int j = 0; j < record.KeyColumnNos.Length; j++) { if (record.KeyColumnNos[j] == i) { _flatFileWriter.Write("=Key."); _flatFileWriter.Write(j + 1); } } } _flatFileWriter.WriteLine(); } do { _flatFileWriter.WriteLine(_output.CurrentRecord.DataAsString); } while (_output.MoveNext()); }
internal TMSNStoreWriter(InternalRecordSource input, string outputDir) { _Construct(input, outputDir, "keys-data"); }
private void _Finish() { if (_doThreading) { _SortAndWriteThreaded(); // wait on all previous threads, sorts and writes foreach (Thread t in _threads) { t.Join(); } _threads.Clear(); } else { // don't write the last one to disk _recordsToSort.Sort(); _sortedRecords = _recordsToSort; } int numToMerge = _numTempFiles; if (_sortedRecords != null && _sortedRecords.NumRecords != 0) { numToMerge++; } if (numToMerge > 1) { _merger = new SortedRecordMerger(); if (_sortedRecords != null && _sortedRecords.NumRecords != 0) { _merger.AddInput(_sortedRecords); } for (int i = 0; i < _numTempFiles; i++) { InternalRecordSource source = new InternalRecordFileReader(GetTempFilename(i)); _merger.AddInput(source); } _output = _merger; } else { _output = _sortedRecords; } // set up reduction filter for reduction across merged sources if (_internalReductionEnabled) { ReduceFilter reducer = new ReduceFilter(); RecordFilterDriver filterDriver = new RecordFilterDriver(reducer); filterDriver.AddInput(_output); _output = filterDriver; } // this is kind of a hack till i figure out how these should be set TotalRecordBytesEstimate = _output.TotalRecordBytesEstimate; TotalRecordsEstimate = _output.TotalRecordsEstimate; }
public override bool MoveNext() { if (_leftSource == null) { _leftSource = (InternalRecordSource)_inputList[0]; _rightSource = (InternalRecordSource)_inputList[1]; } while (true) { _leftNotDone = _leftSource.MoveNext(); if (!_leftNotDone) return false; _currentLeftRecord = _leftSource.CurrentRecord; // advance the right side int diff = -1; bool firstTime = true; while (diff < 0) { // the first time we test we check against the currentRightSource since // we allow dups on the left side. if (!firstTime || _currentRightRecord == null) { if (_rightNotDone) _rightNotDone = _rightSource.MoveNext(); // passThruOnMatch == left & right // !passThruOnMatch == left &! right // if left & right then when right is done we're done. // if left &! right when right is done keep going so we // can emit all the lefts that come after the last right. if (!_rightNotDone) { if (_passThruOnMatch) return false; else { CurrentRecord = _currentLeftRecord; return true; } } _currentRightRecord = _rightSource.CurrentRecord; } diff = TMSNStoreUtils.Utf8BytesCompare(_currentLeftRecord.KeyBytes, _currentRightRecord.KeyBytes); firstTime = false; } // if there's a match if (diff == 0) { if (_passThruOnMatch) { CurrentRecord = _currentLeftRecord; return true; } } else if (!_passThruOnMatch) { CurrentRecord = _currentLeftRecord; return true; } } }
public override bool MoveNext() { if (_tstoreEnumer == null) { InternalRecordSource source = (InternalRecordSource)Inputs[0]; TotalRecordBytesEstimate = 0; // don' know TotalRecordsEstimate = 0; // don' know // if logging source is in the way look around it. if (source is LoggingSource && source.Inputs[0] is TMSNStoreRecordSource) { source = source.Inputs[0]; } if (source is TMSNStoreRecordSource) { TMSNStoreReader tstoreReader = ((TMSNStoreRecordSource)source).GetReader(); if (_prefix) { _tstoreEnumer = tstoreReader.GetMatchingPrefix(_query).GetEnumerator(); } else { _tstoreEnumer = tstoreReader.GetMatch(_query).GetEnumerator(); } } else { _source = Inputs[0] as InternalRecordSource; } } bool notDone = true; if (_tstoreEnumer != null) { notDone = _tstoreEnumer.MoveNext(); CurrentRecord = (DataRecord)_tstoreEnumer.Current; return notDone; } else { while (notDone) { notDone = _source.MoveNext(); if (!notDone) return false; if (_prefix && _source.CurrentRecord.Key.StartsWith(_query)) { CurrentRecord = _source.CurrentRecord; return true; } else if (_source.CurrentRecord.Key.Equals(_query)) { CurrentRecord = _source.CurrentRecord; return true; } } return false; } }
public override bool MoveNext() { if (_leftSource == null) { _leftSource = (InternalRecordSource)_inputList[0]; _rightSource = (InternalRecordSource)_inputList[1]; } if (_advanceLeft) { _leftNotDone = _leftSource.MoveNext(); _advanceLeft = false; } if (_advanceRight) { _rightNotDone = _rightSource.MoveNext(); _advanceRight = false; } int diff = 0; // if left is done if (!_leftNotDone) { if (!_rightNotDone) return false; // if right is done too diff = -1; // take right since right not done } // else left is NOT done, if right is done... else if (!_rightNotDone) diff = 1; // take left // else both NOT done compare else diff = _ascendingFactor * TMSNStoreUtils.Utf8BytesCompare(_leftSource.CurrentRecord.KeyBytes, _rightSource.CurrentRecord.KeyBytes); if (diff > 0) { // take left _currentOutputSource = _leftSource; _advanceLeft = true; } else { // take right _currentOutputSource = _rightSource; _advanceRight = true; } CurrentRecord = _currentOutputSource.CurrentRecord; return true; }
public override bool MoveNext() { // initialize if (_input == null) { _input = (InternalRecordSource)_inputList[0]; } // get at least one record into the accepter while (_recordAccepter.Empty && _notDone) { _notDone = _input.MoveNext(); // run the current record through the filter if (_notDone) { // build the filter and put in place if (_filter == null) { _filter = _GetFilter(_input.CurrentRecord.GetType().ToString(), _assemblyLocations); // we already determined if the filter's KeyOrderIsPreserved // make sure we were right if (_passThruInputSorting != _filter.KeyOrderIsPreserved) { throw new Exception("user filter error: improper KeyOrderIsPreserved detection"); } } // put a try catch around user code try { _filter.ProcessRecord(_input.CurrentRecord, _recordAccepter); } catch { _caughtErrors++; if (_caughtErrors == 1 || (_caughtErrors % 1000 == 0)) { Console.WriteLine(_caughtErrors + " caught " + _filterType + " error(s) d"); } } _notDone = !_recordAccepter.IsDone; // filter may elect to finish early } // there is a finish call because the filter might have state // this gives it a chance to output one last record. else { try { _filter.Finish(_recordAccepter); } catch { _caughtErrors++; } } } // grab a record from the accepter bool notDone = _recordAccepter.MoveNext(); if (!notDone) return false; CurrentRecord = _recordAccepter.CurrentRecord; return true; }
internal void Write(InternalRecordSource input) { while (input.MoveNext()) { DataRecord record = input.CurrentRecord; // we need to do a moveNext on our input in order for RecordInstance // to be valid. if (_outputStream == null) { OpenOutputStream(); input.WriteProperties(_outputStream); _outputWriter = new VariableLengthBinaryWriter(_outputStream); } // output key _outputWriter.WriteVariableLength((uint)record.KeyBytes.Length); _outputWriter.Write(record.KeyBytes); // output data if (record.Data != null) { _outputWriter.WriteVariableLength((uint)record.Data.Length); _outputWriter.Write(record.Data); } else { _outputWriter.WriteVariableLength((uint)0); } // slow. do elsewhere _totalRecordBytesEstimate = _outputStream.Position; _totalRecordsEstimate++; } _totalRecordBytesEstimate = _outputStream.Position; InternalRecordSource.WriteEstimates(_outputStream, _totalRecordBytesEstimate, _totalRecordsEstimate); _outputStream.Close(); _outputStream = null; }
public StatisticsPseudoFilter(InternalRecordSource source) { _source = source; }
/// <summary> /// Truncates the number of records coming from the record source. /// </summary> /// <param name="recordLimit">Number of records to limit source to.</param> public void Limit(long recordLimit) { LimitFilter filter = new LimitFilter(recordLimit); RecordFilterDriver filterDriver = new RecordFilterDriver(filter); double fractionKept = (double)recordLimit / (double)InternalSource.TotalRecordsEstimate; long bytesEstimate = (long)(fractionKept * InternalSource.TotalRecordBytesEstimate); if (InternalSource is LoggingSource) { filterDriver.AddInput(InternalSource.Inputs[0]); filterDriver.TotalRecordsEstimate = recordLimit; // unfortunately must be tweaked from outside after AddInput filterDriver.TotalRecordBytesEstimate = bytesEstimate; InternalSource.ClearInputs(); InternalSource.AddInput(filterDriver); } else { filterDriver.AddInput(InternalSource); filterDriver.TotalRecordsEstimate = recordLimit; // unfortunately must be tweaked from outside after AddInput filterDriver.TotalRecordBytesEstimate = bytesEstimate; InternalSource = filterDriver; } }