public StoreThread(string[] source, int firstSource, long startPosition, int lastSource, long endPosition, string dest, bool breakAtLines, bool textMode, StoreThread next) { //Console.WriteLine("> " + breakAtLines + ", " + textMode); this.source = source; this.dest = dest; this.firstSource = firstSource; this.lastSource = lastSource; this.startPosition = startPosition; this.endPosition = endPosition; this.breakAtLines = breakAtLines; this.textMode = textMode; this.next = next; }
/// <summary> /// Store concatenated data to Cosmos with enhanced speed. /// </summary> /// <param name="source">the source files or wildcard patterns (normally local)</param> /// <param name="destination">the Cosmos stream name to store the data to</param> /// <param name="breakAtLines">if true, break extents only at the end of lines; /// otherwise, break at exact byte limits</param> /// <remarks> /// <p> /// This will blast data onto Cosmos at higher speed than a normal copy. /// It is helpful to set the <see cref="ExtentSize"/> and <see cref="StoreParallelLevel"/> /// parameters to match the system being used. /// </p> /// <p> /// The order of the input files will be preserved, and the order within each file is /// also preserved. /// </p> /// </remarks> /// <exception cref="ArgumentException">The destination is not a valid Cosmos streamname, /// or no source files are specified.</exception> /// <exception cref="IOException">The stream cannot be written or the source cannot be read.</exception> public static void Store(string[] source, string destination, bool breakAtLines) { if (source == null) source = new string[0]; if (destination == null || destination.Length == 0 || !destination.ToLower().StartsWith("cosmos://")) { throw new ArgumentException("destination is not a valid Cosmos streamname: " + destination, "destination"); } int parallelLevel = StoreParallelLevel; int extentSize = ExtentSize; bool breakFiles = StoreBreakFiles; bool textMode = breakAtLines; // hack to account for boundaries: if (breakAtLines) { extentSize = extentSize - 8*1024; } if (!breakFiles) { // might as well, for efficiency: // (this actually affects the encoding transformation and such, since // it turns it into a binary transfer...) *** breakAtLines = false; } // expand sources: ArrayList fullSource = new ArrayList(); for (int i = 0; i < source.Length; i++) { fullSource.AddRange(IOUtil.ExpandWildcards(source[i])); } source = (string[])fullSource.ToArray(typeof(string)); // handle empty case: if (source.Length == 0) { // using (Stream empty = ZStreamOut.Open(destination)) // { // } // return; throw new ArgumentException("No source files specified.", "source"); } // get lengths: // this is a problem - gzip streams do not give the correct length!! *** long[] lengths = new long[source.Length]; long totalLength = 0; for (int i = 0; i < source.Length; i++) { lengths[i] = IOUtil.GetLength(source[i]); if (lengths[i] < 0) { // throw new NotSupportedException("Cannot Store files whose length is unknown: " + // source[i]); // hack this in?? lengths[1] = ExtentSize; } totalLength += lengths[i]; } // handle empty case: if (source.Length == 1 && lengths[0] == 0) { using (Stream empty = ZStreamOut.Open(destination)) { } return; } // handle simple case of small input: // we could make this fast, also, at the cost of unfull extents... *** if (totalLength <= ExtentSize) { if (textMode) { using (StreamWriter d = ZStreamWriter.Open(destination)) { string line; for (int i = 0; i < source.Length; i++) { using (StreamReader s = ZStreamReader.Open(source[i])) { while ((line = s.ReadLine()) != null) { d.WriteLine(line); } } } } } else { using (Stream d = ZStreamOut.Open(destination)) { byte[] buf = new byte[256*1024]; int count; for (int i = 0; i < source.Length; i++) { using (Stream s = ZStreamIn.Open(source[i])) { while ((count = s.Read(buf, 0, buf.Length)) > 0) { d.Write(buf, 0, count); } } } } } return; } // we will split this up if possible... if (parallelLevel > Math.Ceiling(totalLength / (double)extentSize)) { parallelLevel = (int)Math.Ceiling(totalLength / (double)extentSize); } int[] startFile = new int[parallelLevel]; long[] startPos = new long[parallelLevel]; long sum = 0; long sumWithinCur = 0; int curSource = 0; // Console.WriteLine("totalLength: " + totalLength); for (int i = 0; i < parallelLevel; i++) { long target = (long)(i * (totalLength / (double)parallelLevel)); while (sum < target) { // Console.WriteLine("parallelLevel: " + i + // " target: " + target + // " sum: " + sum + // " curSource: " + curSource); if (sum + lengths[curSource] - sumWithinCur < target) { sum += lengths[curSource] - sumWithinCur; curSource++; sumWithinCur = 0; if (curSource >= source.Length) break; } else { break; } } if (curSource >= source.Length) { // we made a mistake. remove a level... // (should not happen) parallelLevel = i; int[] oldStartFile = startFile; long[] oldStartPos = startPos; startFile = new int[parallelLevel]; startPos = new long[parallelLevel]; Array.Copy(oldStartFile, startFile, startFile.Length); Array.Copy(oldStartPos, startPos, startPos.Length); break; } startFile[i] = curSource; startPos[i] = breakFiles ? target - sum + sumWithinCur : 0; if (breakFiles) { sumWithinCur += (target - sum); sum = target; } else { // should really pull back, not advance!!! sum += lengths[curSource]; curSource++; sumWithinCur = 0; } } // Spin off the copies // threadpool or explicit threads? Or async delegate calls? StoreThread[] copyThreads = new StoreThread[parallelLevel]; StoreThread next = null; for (int i = copyThreads.Length - 1; i >= 0; i--) { int endFile; long endPos; if (i == startFile.Length - 1) { endFile = source.Length - 1; endPos = long.MaxValue; } else { if (startPos[i+1] == 0) { endFile = startFile[i+1] - 1; endPos = long.MaxValue; } else { endFile = startFile[i+1]; endPos = startPos[i+1]; } } //Console.WriteLine("Store: start = " + startFile[i] + ":" + startPos[i] + ", end = " + endFile + ":" + endPos + // (textMode ? " (txt)" : "")); copyThreads[i] = new StoreThread(source, startFile[i], startPos[i], endFile, endPos, destination + ".store_" + i, breakAtLines, textMode, next); next = copyThreads[i]; } for (int i = 0; i < copyThreads.Length; i++) { copyThreads[i].Start(); } for (int i = 0; i < copyThreads.Length; i++) { copyThreads[i].End(); } for (int i = 0; i < copyThreads.Length; i++) { if (copyThreads[i].HasError) { for (int j = 0; j < copyThreads.Length; j++) { try { Delete(copyThreads[j].FileName); } catch { } } throw new IOException("Could not copy source data."); } } try { string[] threadSources = new string[copyThreads.Length]; for (int i = 0; i < copyThreads.Length; i++) { try { threadSources[i] = copyThreads[i].FileName; } catch { } } Concatenate(destination, threadSources); } finally { for (int i = 0; i < copyThreads.Length; i++) { try { Delete(copyThreads[i].FileName); } catch { } } } }