Beispiel #1
0
 private void AddCurrentItemIfNotExist(ChunkInfoItem chunkInfoItem)
 {
     if (!_listOfCurrentItemsToProcess.Contains(chunkInfoItem))
     {
         _listOfCurrentItemsToProcess.Add(chunkInfoItem);
     }
 }
Beispiel #2
0
        private ChunkInfoItem ReadAndSortInitialChunk(StreamReader streamReader)
        {
            // we don't pad number lines because length can be changed later
            if (streamReader == null)
            {
                throw new ArgumentNullException(nameof(streamReader));
            }

            var item = new ChunkInfoItem();

            string line;

            while ((line = streamReader.ReadLine()) != null)
            {
                _initialCountOfLines++;

                var linePart = LinePreProcessingFunc(line);
                item.Buffer.Add(linePart);

                if (item.Buffer.Count >= AllowedCountOfFileLinesToSortInMemory)
                {
                    break;
                }
            }

            item.Buffer.Sort(_comparer);

            item.FirstPart          = item.Buffer.FirstOrDefault();
            item.LastPart           = item.Buffer.LastOrDefault();
            item.CountOfLinesInFile = item.Buffer.Count;

            WriteBufferToFile(item);

            return(item);
        }
Beispiel #3
0
        private void ProcessNewChunk(ChunkInfoItem treeItemToProcess)
        {
            // new chunk - sort - write
            treeItemToProcess.Buffer.Sort(_comparer);

            treeItemToProcess.FirstPart          = treeItemToProcess.Buffer.FirstOrDefault();
            treeItemToProcess.LastPart           = treeItemToProcess.Buffer.LastOrDefault();
            treeItemToProcess.CountOfLinesInFile = treeItemToProcess.Buffer.Count;

            WriteBufferToFile(treeItemToProcess);
        }
Beispiel #4
0
        private void WriteBufferToFile(ChunkInfoItem item)
        {
            item.StringFilePath = Path.Combine(_tempFolderForChunksPath, Guid.NewGuid().ToString());
            item.NumberFilePath = Path.Combine(_tempFolderForChunksPath, Guid.NewGuid().ToString());

            using (var stringWriter = new StreamWriter(item.StringFilePath))
            {
                using (var numberWriter = new StreamWriter(item.NumberFilePath))
                {
                    foreach (var pair in item.Buffer)
                    {
                        stringWriter.WriteLine(pair.StringPart);
                        numberWriter.WriteLine(pair.NumberPart);
                    }
                }
            }

            item.StringFileLength = new FileInfo(item.StringFilePath).Length;
            item.NumberFileLength = new FileInfo(item.NumberFilePath).Length;
            item.Buffer.Clear();
        }
Beispiel #5
0
        private LinkedListNode <ChunkInfoItem> SortChunks(LinkedListNode <ChunkInfoItem> first, LinkedListNode <ChunkInfoItem> second)
        {
            var parts = new List <StringNumberPart>();

            // read first chunk
            using (var stringReader = new StreamReader(first.Value.StringFilePath))
            {
                using (var numberReader = new StreamReader(first.Value.NumberFilePath))
                {
                    string stringLine;
                    while ((stringLine = stringReader.ReadLine()) != null)
                    {
                        var numberLine = numberReader.ReadLine();
                        parts.Add(new StringNumberPart(stringLine, numberLine));
                    }
                }
            }

            // read second chunk
            using (var stringReader = new StreamReader(second.Value.StringFilePath))
            {
                using (var numberReader = new StreamReader(second.Value.NumberFilePath))
                {
                    string stringLine;
                    while ((stringLine = stringReader.ReadLine()) != null)
                    {
                        var numberLine = numberReader.ReadLine();
                        parts.Add(new StringNumberPart(stringLine, numberLine));
                    }
                }
            }

            parts.Sort(_comparer);

            var firstPartOfList = parts.Count / 2;
            var lastPartOfList  = parts.Count - firstPartOfList;

            var firstList = parts.GetRange(0, firstPartOfList);
            var lastList  = parts.GetRange(firstPartOfList, lastPartOfList);

            var newFirstItem = new ChunkInfoItem
            {
                FirstPart          = firstList.FirstOrDefault(),
                LastPart           = firstList.LastOrDefault(),
                CountOfLinesInFile = firstList.Count,
                Buffer             = firstList,
            };

            WriteBufferToFile(newFirstItem);

            var newLastItem = new ChunkInfoItem
            {
                FirstPart          = lastList.FirstOrDefault(),
                LastPart           = lastList.LastOrDefault(),
                CountOfLinesInFile = lastList.Count,
                Buffer             = lastList,
            };

            WriteBufferToFile(newLastItem);

            var result = _linkedlist.AddBefore(first, newFirstItem);

            _linkedlist.AddAfter(second, newLastItem);

            _linkedlist.Remove(first);
            _linkedlist.Remove(second);

            File.Delete(first.Value.StringFilePath);
            File.Delete(second.Value.NumberFilePath);

            Console.WriteLine($"{DateTime.Now}. Sorting existing chunks to ensure correct order.");

            return(result);
        }
Beispiel #6
0
        private void ProcessExistingChunkWithBuffer(ChunkInfoItem treeItemToProcess)
        {
            // additional chunk - find node - read old - join lines - sort - split to 2 chunks, write and update linked list
            var node = _linkedlist.Find(treeItemToProcess);

            if (node == null)
            {
                throw new InvalidOperationException("Cannot find mandatory node");
            }

            var parts = new List <StringNumberPart>((int)treeItemToProcess.CountOfLinesInFile + treeItemToProcess.Buffer.Count);

            using (var stringReader = new StreamReader(treeItemToProcess.StringFilePath))
            {
                using (var numberReader = new StreamReader(treeItemToProcess.NumberFilePath))
                {
                    string stringLine;
                    while ((stringLine = stringReader.ReadLine()) != null)
                    {
                        var numberLine = numberReader.ReadLine();
                        parts.Add(new StringNumberPart(stringLine, numberLine));
                    }
                }
            }

            parts.AddRange(treeItemToProcess.Buffer);
            treeItemToProcess.Buffer.Clear();

            parts.Sort(_comparer);

            var firstPartOfList = parts.Count / 2;
            var lastPartOfList  = parts.Count - firstPartOfList;

            var firstList = parts.GetRange(0, firstPartOfList);
            var lastList  = parts.GetRange(firstPartOfList, lastPartOfList);

            var firstItem = new ChunkInfoItem
            {
                FirstPart          = firstList.FirstOrDefault(),
                LastPart           = firstList.LastOrDefault(),
                CountOfLinesInFile = firstList.Count,
                Buffer             = firstList,
            };

            WriteBufferToFile(firstItem);

            var lastItem = new ChunkInfoItem
            {
                FirstPart          = lastList.FirstOrDefault(),
                LastPart           = lastList.LastOrDefault(),
                CountOfLinesInFile = lastList.Count,
                Buffer             = lastList,
            };

            WriteBufferToFile(lastItem);

            _linkedlist.AddBefore(node, firstItem);
            _linkedlist.AddAfter(node, lastItem);

            _linkedlist.Remove(node);

            File.Delete(node.Value.StringFilePath);
            File.Delete(node.Value.NumberFilePath);

            Console.WriteLine($"{DateTime.Now}. Processed buffer and split chunk to two new. Chunks count: {_linkedlist.Count}");
        }
Beispiel #7
0
        private void ProcessInitialLinePart(StringNumberPart linePart)
        {
            var node = _linkedlist.First;

            while (node != null)
            {
                var chunkInfoItem = node.Value;

                var compareToFirst = _comparer.Compare(linePart, chunkInfoItem.FirstPart);
                var compareToLast  = _comparer.Compare(linePart, chunkInfoItem.LastPart);

                //if (compareToFirst < 0 && compareToLast < 0)
                //{
                //    // add node before
                //    var newBeforeChunkInfo = new ChunkInfoItem();
                //    newBeforeChunkInfo.Buffer.Add(linePart);
                //    newBeforeChunkInfo.FirstPart = linePart;
                //    newBeforeChunkInfo.LastPart = linePart;

                //    _linkedlist.AddBefore(node, newBeforeChunkInfo);
                //    AddCurrentItemIfNotExist(newBeforeChunkInfo);
                //    return;
                //}

                //if (compareToFirst == 0 && compareToLast < 0)
                //{
                //    chunkInfoItem.Buffer.Add(linePart);
                //    chunkInfoItem.FirstPart = linePart;
                //    AddCurrentItemIfNotExist(chunkInfoItem);
                //    return;
                //}

                // will not create a lot of small chunks with unsorted data(more time to sort and merge) - hard to say - what will be better - depends on input data
                if ((compareToFirst == 0 && compareToLast < 0) || (compareToFirst < 0 && compareToLast < 0))
                {
                    chunkInfoItem.Buffer.Add(linePart);
                    chunkInfoItem.FirstPart = linePart;
                    AddCurrentItemIfNotExist(chunkInfoItem);
                    return;
                }

                // item in
                if (compareToFirst > 0 && compareToLast < 0)
                {
                    chunkInfoItem.Buffer.Add(linePart);
                    AddCurrentItemIfNotExist(chunkInfoItem);
                    return;
                }

                if (compareToLast == 0 && compareToFirst > 0)
                {
                    chunkInfoItem.Buffer.Add(linePart);
                    chunkInfoItem.LastPart = linePart;
                    AddCurrentItemIfNotExist(chunkInfoItem);
                    return;
                }

                // this will help to rechecking and resorting already sorted chunks
                if (compareToFirst > 0 && compareToLast > 0)
                {
                    if (node.Next == null)
                    {
                        // end of list - last node
                        var newAfterChunkInfo = new ChunkInfoItem();
                        newAfterChunkInfo.Buffer.Add(linePart);
                        newAfterChunkInfo.FirstPart = linePart;
                        newAfterChunkInfo.LastPart  = linePart;

                        _linkedlist.AddAfter(node, newAfterChunkInfo);
                        AddCurrentItemIfNotExist(newAfterChunkInfo);
                        return;
                    }
                    else
                    {
                        // check next node
                        node = node.Next;
                        continue;
                    }
                }

                node = node.Next;
            }
        }