// write this chunk to disk. This is where reduction happens if it happens. private void _WriteSortedRecords() { if (!Directory.Exists(_tempDir)) { Directory.CreateDirectory(_tempDir); } InternalRecordSource output = _recordsToWrite; // set up reduction filter. The whole reason we have a sorterReducer // and not just a sorter is so that we can reduce before we write our // temp files to disk. if (_internalReductionEnabled) { ReduceFilter rf = new ReduceFilter(); RecordFilterDriver filterDriver = new RecordFilterDriver(rf); filterDriver.AddInput(output); output = filterDriver; } RecordFileWriter recordFileWriter = new RecordFileWriter(GetTempFilename(_numTempFiles)); _numTempFiles++; try { recordFileWriter.Write(output); } finally { _recordsToWrite.Close(); } }
public bool WriteRecordsMaxSize(long maxFileSize, string filePath) { // if we have a final filter insert it. Here we're assuming that on // subsequent calls of this method that the _input is waiting at the // place we left it. RecordFileWriter recordFileWriter = new RecordFileWriter(filePath); return recordFileWriter.WriteRecordFileMaxSize(_output, maxFileSize); }
public void AddRawRecord(byte[] rawRecord) { if (_recordFileWriters == null) { _recordFileWriters = new RecordFileWriter[_recordFilesOut.Length]; for (int i = 0; i < _recordFilesOut.Length; i++) { _recordFileWriters[i] = new RecordFileWriter(_recordFilesOut[i], _recordInstance); _recordFileWriters[i].AddedRecordsInSortedOrder = _isSorted; } // we could have been set by the customHasher property otherwise... if (_hasher == null) { _hasher = new TMSNStoreHasher(); } } // if we're bucketting based on begin strings if (_bucketBoundaryBeginBytes != null) { while ((_currentBoundaryNo != _bucketBoundaryBeginBytes.Length - 1) && (TMSNStoreUtils.Utf8BytesCompare(_bucketBoundaryBeginBytes[_currentBoundaryNo+1], rawRecord) >= 0)) { _currentBoundaryNo++; } //_recordFileWriters. _recordFileWriters[_currentBoundaryNo].AddRawRecord(rawRecord); } // else if we only have one bucket else if (_recordFileWriters.Length == 1) { _recordFileWriters[0].AddRawRecord(rawRecord); } // else we have multiple buckets and we bucket based on a hash of the key else { // the default hasher respects null termination. uint bucketNo = _hasher.GetHashCode(rawRecord, rawRecord.Length) % (uint)_recordFileWriters.Length; _recordFileWriters[bucketNo].AddRawRecord(rawRecord); } }
public void Write() { TStorageType outputType = TStorageType.FlatFile; if (_outputUri == null) { throw new Exception("must use constructor with OutputUri parameter when using this method"); } outputType = _outputUri.StorageType; Console.Error.Write("[" + outputType + ":" + _outputUri.FilePath + "]\tBegin Write\t"); Console.Error.WriteLine(DateTime.Now); Console.Error.Flush(); switch (outputType) { case TStorageType.TStore: TMSNStoreWriter twriter = new TMSNStoreWriter(_output, _outputUri.FilePath); twriter.KeyGroupSize = TStoreGroupSize; twriter.Write(); break; case TStorageType.FlatFile: _WriteFlatFile(_outputUri.FilePath); break; case TStorageType.RecordFile: RecordFileWriter writer = new RecordFileWriter(_outputUri.FilePath); writer.Write(_output); break; } _output.Close(); Console.Error.Write("[" + outputType + ":" + _outputUri.FilePath + "]\tEnd Write\t"); Console.Error.WriteLine(DateTime.Now); Console.Error.Flush(); }