Exemple #1
0
        public Page(int fieldToSortBy, string[] records)
        {
            int i;
            for (i = 0; i < records.Length && records[i] != null; i++)
            {
                string[] fieldsToParse = records[i].Split(',');
                if (_records == null)
                {
                    _numberOfFields = fieldsToParse.Length;
                    _records = new Record[records.Length];
                }

                int[] fields = new int[_numberOfFields];

                for (int j = 0; j < _numberOfFields; j++)
                {
                    fields[j] = int.Parse(fieldsToParse[j]);
                }

                _records[i] = new Record(fieldToSortBy ,fields);
            }
        }
Exemple #2
0
        public string Sort(string filename, int fieldIndexToSortBy, int fieldCount)
        {
            FirstIteration(filename, fieldIndexToSortBy);

            while (_iterationToSortedruns[_iteration - 1].Count > 1)
            {
                Console.WriteLine("\nCommencing iteration {0} \n", _iteration);

                int sortedrunIndex = 0;
                int sortedrunsUsed = 0;
                _pool = new Page[_numberOfBufferPages];

                // load the first page of each of the B-1 sortedruns from the last iteration
                // and merge sort them using the last page in the buffer pool
                for (int i = 0; i < _iterationToSortedruns[_iteration - 1].Count; i++)
                {
                    if (i == _numberOfBufferPages - 1) // Use only B-1 sortedruns
                    {
                        sortedrunsUsed = i;
                        break;
                    }
                    _pool[i] = new Page(fieldIndexToSortBy, ReadPage(_iterationToSortedruns[_iteration - 1][i].GetReader()));
                }

                // point to current index in each sortedrun and also the buffer page
                int[] pointers = new int[_numberOfBufferPages];
                // reset the buffer page to contains 0 records
                _pool[_numberOfBufferPages - 1] = new Page(fieldCount, _pageSizeInRecords);

                // initialize the current iteration's pages
                int producedSortedruns = 0;
                _iterationToSortedruns[_iteration] = new List<SortedRun>();
                string newSortedrunFilename = Path.GetTempFileName();
                var sortedRun = new SortedRun();
                sortedRun.Filename = newSortedrunFilename;
                sortedRun.GetWriter();
                _iterationToSortedruns[_iteration].Add(sortedRun);

                int pageCount = 0;
                bool finishedMergingSet = false;

                // while there are still sortedruns from previous iteration
                while (!finishedMergingSet)
                {
                    finishedMergingSet = true;
                    for (int i = 0; i < _numberOfBufferPages - 1; i++)
                    {
                        if (_pool[i] != null && _pool[i].Records != null && _pool[i].Records[pointers[i]] != null)
                        {
                            finishedMergingSet = false;
                            break;
                        }
                    }
                    if (finishedMergingSet)
                    {
                        // close the latest sortedrun written
                        _iterationToSortedruns[_iteration][producedSortedruns++].Writer.Close();

                        // load the first page from B-1 sortedruns of the previous iteration
                        // continuing from the next sortedrun which was not read
                        sortedrunIndex = sortedrunsUsed;
                        for (int i = sortedrunsUsed; i < _iterationToSortedruns[_iteration - 1].Count; i++)
                        {
                            if (i - sortedrunIndex == (_numberOfBufferPages - 1)) // Use up to B-1 sortedruns
                            {
                                break;
                            }
                            _pool[i - sortedrunIndex] = new Page(fieldIndexToSortBy, ReadPage(_iterationToSortedruns[_iteration - 1][i].GetReader()));
                            sortedrunsUsed++;
                        }

                        // no more sorted runs?
                        if (sortedrunIndex == sortedrunsUsed)
                        {
                            continue;
                        }
                        foreach (var item in _pool.Take(_numberOfBufferPages - 1))
                        {
                            if (item != null && item.Records != null)
                            {
                                finishedMergingSet = false;
                            }
                        }
                        if (finishedMergingSet)
                            continue;

                        // there're more sorted runs, prepare a new merged sortedrun
                        String tempFilename = Path.GetTempFileName();
                        _iterationToSortedruns[_iteration].Add(new SortedRun() { Filename = tempFilename });
                        pageCount = 0;
                    }

                    Record smallest = null;
                    // while the buffer page doesn't exceed
                    while (pointers[_numberOfBufferPages-1] < _pageSizeInRecords)
                    {
                        // initialize the lowest records
                        smallest = null;
                        int firstPageThatIsNotEmpty = 0;
                        // find the first page that is not empty
                        for (int i = 0; i < _numberOfBufferPages; i++)
                        {
                            if (_pool[i] != null && _pool[i].Records != null && _pool[i].Records[pointers[i]] != null)
                            {
                                // choose the first non-null record as the smallest
                                smallest = new Record(_pool[i].Records[pointers[i]]);
                                firstPageThatIsNotEmpty = i;
                                break;
                            }
                        }

                        // ran out of values?
                        if (smallest == null)
                            break;

                        int pageIndexContainingLowestKey = firstPageThatIsNotEmpty;
                        // search for the smallest key in any of the current pages
                        for (int i = firstPageThatIsNotEmpty + 1; i < _numberOfBufferPages; i++)
                        {
                            if (_pool[i] != null && _pool[i].Records != null
                                && _pool[i].Records[pointers[i]] != null
                                && _pool[i].Records[pointers[i]].Fields[fieldIndexToSortBy] < smallest[fieldIndexToSortBy])
                            {
                                smallest = _pool[i].Records[pointers[i]];
                                pageIndexContainingLowestKey = i;
                            }
                        }

                        // insert the smallest record into the buffer page and increase the page's pointers.
                        _pool[_numberOfBufferPages - 1].Records[pointers[_numberOfBufferPages-1]] = smallest;
                        pointers[_numberOfBufferPages-1]++;
                        pointers[pageIndexContainingLowestKey]++;

                        if (pointers[pageIndexContainingLowestKey] == _pageSizeInRecords)
                        {
                            // get the next page of the sortedrun, and start the count over.
                            _pool[pageIndexContainingLowestKey] = new Page(fieldIndexToSortBy,
                                ReadPage(_iterationToSortedruns[_iteration - 1][sortedrunIndex + pageIndexContainingLowestKey].GetReader()));

                            pointers[pageIndexContainingLowestKey] = 0;
                        }
                    }

                    // completed another page, write it to the current sortedrun in the current iteration
                    _pool[_numberOfBufferPages - 1].WritePage(_iterationToSortedruns[_iteration][producedSortedruns].GetWriter());
                    pageCount++;
                    Console.WriteLine("Wrote page {0} of sorted run #{1}", pageCount, producedSortedruns);
                    // reset the buffer page to contains 0 records
                    _pool[_numberOfBufferPages - 1] = new Page(fieldCount, _pageSizeInRecords);
                    pointers[_numberOfBufferPages - 1] = 0;
                }
                _iteration++;
            }
            return _iterationToSortedruns.Last().Value.Last().Filename;
        }
Exemple #3
0
 public Record(Record record)
 {
     FieldToSortBy = record.FieldToSortBy;
     _fields = record.Fields;
 }