public Page(int fieldToSortBy, string[] records) { int i; for (i = 0; i < records.Length && records[i] != null; i++) { string[] fieldsToParse = records[i].Split(','); if (_records == null) { _numberOfFields = fieldsToParse.Length; _records = new Record[records.Length]; } int[] fields = new int[_numberOfFields]; for (int j = 0; j < _numberOfFields; j++) { fields[j] = int.Parse(fieldsToParse[j]); } _records[i] = new Record(fieldToSortBy ,fields); } }
public string Sort(string filename, int fieldIndexToSortBy, int fieldCount) { FirstIteration(filename, fieldIndexToSortBy); while (_iterationToSortedruns[_iteration - 1].Count > 1) { Console.WriteLine("\nCommencing iteration {0} \n", _iteration); int sortedrunIndex = 0; int sortedrunsUsed = 0; _pool = new Page[_numberOfBufferPages]; // load the first page of each of the B-1 sortedruns from the last iteration // and merge sort them using the last page in the buffer pool for (int i = 0; i < _iterationToSortedruns[_iteration - 1].Count; i++) { if (i == _numberOfBufferPages - 1) // Use only B-1 sortedruns { sortedrunsUsed = i; break; } _pool[i] = new Page(fieldIndexToSortBy, ReadPage(_iterationToSortedruns[_iteration - 1][i].GetReader())); } // point to current index in each sortedrun and also the buffer page int[] pointers = new int[_numberOfBufferPages]; // reset the buffer page to contains 0 records _pool[_numberOfBufferPages - 1] = new Page(fieldCount, _pageSizeInRecords); // initialize the current iteration's pages int producedSortedruns = 0; _iterationToSortedruns[_iteration] = new List<SortedRun>(); string newSortedrunFilename = Path.GetTempFileName(); var sortedRun = new SortedRun(); sortedRun.Filename = newSortedrunFilename; sortedRun.GetWriter(); _iterationToSortedruns[_iteration].Add(sortedRun); int pageCount = 0; bool finishedMergingSet = false; // while there are still sortedruns from previous iteration while (!finishedMergingSet) { finishedMergingSet = true; for (int i = 0; i < _numberOfBufferPages - 1; i++) { if (_pool[i] != null && _pool[i].Records != null && _pool[i].Records[pointers[i]] != null) { finishedMergingSet = false; break; } } if (finishedMergingSet) { // close the latest sortedrun written _iterationToSortedruns[_iteration][producedSortedruns++].Writer.Close(); // load the first page from B-1 sortedruns of the previous iteration // continuing from the next sortedrun which was not read sortedrunIndex = sortedrunsUsed; for (int i = sortedrunsUsed; i < _iterationToSortedruns[_iteration - 1].Count; i++) { if (i - sortedrunIndex == (_numberOfBufferPages - 1)) // Use up to B-1 sortedruns { break; } _pool[i - sortedrunIndex] = new Page(fieldIndexToSortBy, ReadPage(_iterationToSortedruns[_iteration - 1][i].GetReader())); sortedrunsUsed++; } // no more sorted runs? if (sortedrunIndex == sortedrunsUsed) { continue; } foreach (var item in _pool.Take(_numberOfBufferPages - 1)) { if (item != null && item.Records != null) { finishedMergingSet = false; } } if (finishedMergingSet) continue; // there're more sorted runs, prepare a new merged sortedrun String tempFilename = Path.GetTempFileName(); _iterationToSortedruns[_iteration].Add(new SortedRun() { Filename = tempFilename }); pageCount = 0; } Record smallest = null; // while the buffer page doesn't exceed while (pointers[_numberOfBufferPages-1] < _pageSizeInRecords) { // initialize the lowest records smallest = null; int firstPageThatIsNotEmpty = 0; // find the first page that is not empty for (int i = 0; i < _numberOfBufferPages; i++) { if (_pool[i] != null && _pool[i].Records != null && _pool[i].Records[pointers[i]] != null) { // choose the first non-null record as the smallest smallest = new Record(_pool[i].Records[pointers[i]]); firstPageThatIsNotEmpty = i; break; } } // ran out of values? if (smallest == null) break; int pageIndexContainingLowestKey = firstPageThatIsNotEmpty; // search for the smallest key in any of the current pages for (int i = firstPageThatIsNotEmpty + 1; i < _numberOfBufferPages; i++) { if (_pool[i] != null && _pool[i].Records != null && _pool[i].Records[pointers[i]] != null && _pool[i].Records[pointers[i]].Fields[fieldIndexToSortBy] < smallest[fieldIndexToSortBy]) { smallest = _pool[i].Records[pointers[i]]; pageIndexContainingLowestKey = i; } } // insert the smallest record into the buffer page and increase the page's pointers. _pool[_numberOfBufferPages - 1].Records[pointers[_numberOfBufferPages-1]] = smallest; pointers[_numberOfBufferPages-1]++; pointers[pageIndexContainingLowestKey]++; if (pointers[pageIndexContainingLowestKey] == _pageSizeInRecords) { // get the next page of the sortedrun, and start the count over. _pool[pageIndexContainingLowestKey] = new Page(fieldIndexToSortBy, ReadPage(_iterationToSortedruns[_iteration - 1][sortedrunIndex + pageIndexContainingLowestKey].GetReader())); pointers[pageIndexContainingLowestKey] = 0; } } // completed another page, write it to the current sortedrun in the current iteration _pool[_numberOfBufferPages - 1].WritePage(_iterationToSortedruns[_iteration][producedSortedruns].GetWriter()); pageCount++; Console.WriteLine("Wrote page {0} of sorted run #{1}", pageCount, producedSortedruns); // reset the buffer page to contains 0 records _pool[_numberOfBufferPages - 1] = new Page(fieldCount, _pageSizeInRecords); pointers[_numberOfBufferPages - 1] = 0; } _iteration++; } return _iterationToSortedruns.Last().Value.Last().Filename; }
public Record(Record record) { FieldToSortBy = record.FieldToSortBy; _fields = record.Fields; }