public List<byte> processBytes(PADIMapNoReduce.Pair<long, long> byteInterval, string filePath) { //System.Console.WriteLine("Received request for bytes from " + byteInterval.First + " to " + byteInterval.Second); List<byte> result = new List<byte>(); long fileSize = new FileInfo(filePath).Length; if (byteInterval.First == 0 && byteInterval.Second == fileSize) { return new List<byte>(readByteInterval(byteInterval, filePath)); } else { if (byteInterval.First == 0) //FirstSplit { long lastByte; result.AddRange(new List<byte>(readByteInterval(byteInterval, filePath))); if (getCharFromBytePosition(result[result.Count - 1], filePath) != '\n') { result.AddRange(new List<byte>(readUntilNewLine(byteInterval.Second + 1, filePath, out lastByte))); } } else if (byteInterval.Second == fileSize) //lastSplit { long lastByte = byteInterval.First; char lastChar = getCharFromBytePosition(byteInterval.First - 1, filePath); if (lastChar != '\n') { readUntilNewLine(byteInterval.First, filePath, out lastByte); lastByte++; } if (lastByte >= byteInterval.Second) { return result; } result.AddRange(readByteInterval(new PADIMapNoReduce.Pair<long, long>(lastByte, fileSize), filePath)); } else //middle split { long lastByte = byteInterval.First; char lastChar = getCharFromBytePosition(byteInterval.First - 1, filePath); if (lastChar != '\n') { readUntilNewLine(byteInterval.First, filePath, out lastByte); lastByte++; } if (lastByte > byteInterval.Second) { return result; } result.AddRange(readByteInterval(new PADIMapNoReduce.Pair<long, long>(lastByte, byteInterval.Second), filePath)); if (getCharFromBytePosition(result[result.Count - 1], filePath) != '\n') { result.AddRange(readUntilNewLine(byteInterval.Second + 1, filePath, out lastByte)); } } } return result; }
public FileSplit(int i, PADIMapNoReduce.Pair<long, long> pair) { this.splitId = i; this.pair = pair; }
//Returns the string that corresponds to the byteInterval received private byte[] readByteInterval(PADIMapNoReduce.Pair<long, long> byteInterval, string filePath) { if (byteInterval.First > byteInterval.Second) { return new byte[0]; } byte[] bytes = new byte[byteInterval.Second - byteInterval.First + 1]; using (BinaryReader reader = new BinaryReader(new BufferedStream(new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read)))) { reader.BaseStream.Seek(byteInterval.First, SeekOrigin.Begin); reader.Read(bytes, 0, bytes.Length); } return bytes; }