コード例 #1
0
ファイル: Sorting.cs プロジェクト: zbxzc35/BoostTree
        private void _Finish()
        {
            if (_doThreading) {
                _SortAndWriteThreaded();
                // wait on all previous threads, sorts and writes
                foreach (Thread t in _threads) {
                    t.Join();
                }
                _threads.Clear();
            }

            else {
                // don't write the last one to disk
                _recordsToSort.Sort();
                _sortedRecords = _recordsToSort;
            }

            int numToMerge = _numTempFiles;

            if (_sortedRecords != null && _sortedRecords.NumRecords != 0) {
                numToMerge++;
            }

            if (numToMerge > 1) {
                _merger = new SortedRecordMerger();

                if (_sortedRecords != null && _sortedRecords.NumRecords != 0) {
                    _merger.AddInput(_sortedRecords);
                }

                for (int i = 0; i < _numTempFiles; i++) {
                    InternalRecordSource source = new InternalRecordFileReader(GetTempFilename(i));
                    _merger.AddInput(source);
                }

                _output = _merger;
            }

            else {
                _output = _sortedRecords;
            }

            // set up reduction filter for reduction across merged sources
            if (_internalReductionEnabled) {
                ReduceFilter reducer = new ReduceFilter();
                RecordFilterDriver filterDriver = new RecordFilterDriver(reducer);
                filterDriver.AddInput(_output);
                _output = filterDriver;
            }

            // this is kind of a hack till i figure out how these should be set
            TotalRecordBytesEstimate = _output.TotalRecordBytesEstimate;
            TotalRecordsEstimate = _output.TotalRecordsEstimate;
        }
コード例 #2
0
ファイル: Sorting.cs プロジェクト: zbxzc35/BoostTree
        // write this chunk to disk.  This is where reduction happens if it happens.
        private void _WriteSortedRecords()
        {
            if (!Directory.Exists(_tempDir)) {
                Directory.CreateDirectory(_tempDir);
            }

            InternalRecordSource output = _recordsToWrite;

            // set up reduction filter.  The whole reason we have a sorterReducer
            // and not just a sorter is so that we can reduce before we write our
            // temp files to disk.
            if (_internalReductionEnabled) {
                ReduceFilter rf = new ReduceFilter();
                RecordFilterDriver filterDriver = new RecordFilterDriver(rf);
                filterDriver.AddInput(output);
                output = filterDriver;
            }

            RecordFileWriter recordFileWriter = new RecordFileWriter(GetTempFilename(_numTempFiles));
            _numTempFiles++;
            try {
                recordFileWriter.Write(output);
            }

            finally {
                _recordsToWrite.Close();
            }
        }
コード例 #3
0
ファイル: TStoreProcessor.cs プロジェクト: zbxzc35/BoostTree
 /// <summary>
 /// This operation on a record source converts it to a source containing
 /// DataRecords with information about the source itself.  The records of
 /// the input source are ignored completely and information like source name,
 /// estimated size, sorted-ness, etc. flow from the source.
 /// </summary>
 /// <param name="input">Input record source.</param>
 /// <returns>Output Records source.</returns>
 public RecordSource GetStatistics(RecordSource input)
 {
     // this is implemented like a filter.
     StatisticsPseudoFilter filter = new StatisticsPseudoFilter(input.InternalSource);
     RecordFilterDriver filterDriver = new RecordFilterDriver(filter);
     filterDriver.AddInput(input.InternalSource);
     RecordSource source2Bfiltered = new RecordSource(this);
     source2Bfiltered.InternalSource = filterDriver;
     return source2Bfiltered;
 }
コード例 #4
0
ファイル: TStoreProcessor.cs プロジェクト: zbxzc35/BoostTree
        /// <summary>
        /// Filters a RecordSource using a user provided filter
        /// </summary>
        /// <param name="input">The input record source</param>
        /// <param name="filter">The user provided recordFilter</param>
        /// <returns>A RecordSource for further processing</returns>
        public RecordSource Filter(RecordSource input, IRecordFilter filter)
        {
            if (filter is ISetRecordSource) {
                // if this filter implements IRecordSourceAccess then
                // we use it to set the RecordSource so the filter
                // itself will have access to it.
                ((ISetRecordSource)filter).Source = input;
            }

            RecordFilterDriver filterDriver = new RecordFilterDriver(filter);
            filterDriver.AddInput(input.InternalSource);
            RecordSource source2Bfiltered = new RecordSource(this);
            source2Bfiltered.InternalSource = filterDriver;
            return source2Bfiltered;
        }
コード例 #5
0
ファイル: TStoreProcessor.cs プロジェクト: zbxzc35/BoostTree
        private void _SortReduce(bool sortAscending, bool reductionEnabled)
        {
            // lets change sort needs to a number for easy comparison.
            // no sort = 0, sort ascending = 1, sort descending = 2

            int askSortNum = 1; // we are asking for sorting on our output
            if (!sortAscending) askSortNum = 2;

            bool inputIsSorted = InternalSource.Sorting.IsSorted;
            bool inputIsSortedAscending = InternalSource.Sorting.IsSortedAscending;

            int inputSortNum = 0;
            if (inputIsSorted)
                inputSortNum = 1;
            if (inputIsSorted && !inputIsSortedAscending)
                inputSortNum = 2;

            // (aside: one might ask, why not just have a separate sorter
            // and reducer.  Reduction is combined in the sorter so that
            // reduction can happen in memory before temp files are written
            // to disk.

            if (askSortNum == inputSortNum && !reductionEnabled) return; // don't insert a sorter or reducer

            if (askSortNum != inputSortNum) {
                RecordSorterReducer sr = new RecordSorterReducer(Processor.TempDir, sortAscending, reductionEnabled);
                sr.MaxMemorySize = Processor.MaxMemorySize;
                sr.DeleteTempFiles = Processor.DeleteTempFiles;

                InternalRecordSource temp = InternalSource; // grab our input
                sr.AddInput(temp); // pipe it into the sorterReducer
                this.InternalSource = sr; // make the sorterReducer the output of this source
            }

            else {
                IRecordFilter reducer = new ReduceFilter();
                RecordFilterDriver driver = new RecordFilterDriver(reducer);
                driver.AddInput(InternalSource);
                InternalSource = driver;
            }
        }
コード例 #6
0
ファイル: TStoreProcessor.cs プロジェクト: zbxzc35/BoostTree
        /// <summary>
        /// Chooses random records from source.
        /// </summary>
        /// <param name="numToKeep">number of random records to pass through from source.</param>
        /// <param name="seed">a seed to the random number generator.</param>
        public void Random(int numToKeep, int seed)
        {
            RandomFilter filter = new RandomFilter(numToKeep, seed);
            RecordFilterDriver filterDriver = new RecordFilterDriver(filter);

            double fractionKept = (double)numToKeep / (double)InternalSource.TotalRecordsEstimate;
            long bytesEstimate = (long)(fractionKept * InternalSource.TotalRecordBytesEstimate);

            if (InternalSource is LoggingSource) {
                filterDriver.AddInput(InternalSource.Inputs[0]);
                filterDriver.TotalRecordsEstimate = (long)numToKeep; // unfortunately must be tweaked from outside after AddInput
                filterDriver.TotalRecordBytesEstimate = bytesEstimate;
                InternalSource.ClearInputs();
                InternalSource.AddInput(filterDriver);
            }

            else {
                filterDriver.AddInput(InternalSource);
                filterDriver.TotalRecordsEstimate = (long)numToKeep; // unfortunately must be tweaked from outside after AddInput
                filterDriver.TotalRecordBytesEstimate = bytesEstimate;
                InternalSource = filterDriver;
            }
        }
コード例 #7
0
ファイル: TStoreProcessor.cs プロジェクト: zbxzc35/BoostTree
        /// <summary>
        /// Truncates the number of records coming from the record source.
        /// </summary>
        /// <param name="recordLimit">Number of records to limit source to.</param>
        public void Limit(long recordLimit)
        {
            LimitFilter filter = new LimitFilter(recordLimit);
            RecordFilterDriver filterDriver = new RecordFilterDriver(filter);

            double fractionKept = (double)recordLimit / (double)InternalSource.TotalRecordsEstimate;
            long bytesEstimate = (long)(fractionKept * InternalSource.TotalRecordBytesEstimate);

            if (InternalSource is LoggingSource) {
                filterDriver.AddInput(InternalSource.Inputs[0]);
                filterDriver.TotalRecordsEstimate = recordLimit; // unfortunately must be tweaked from outside after AddInput
                filterDriver.TotalRecordBytesEstimate = bytesEstimate;
                InternalSource.ClearInputs();
                InternalSource.AddInput(filterDriver);
            }

            else {
                filterDriver.AddInput(InternalSource);
                filterDriver.TotalRecordsEstimate = recordLimit; // unfortunately must be tweaked from outside after AddInput
                filterDriver.TotalRecordBytesEstimate = bytesEstimate;
                InternalSource = filterDriver;
            }
        }