Ejemplo n.º 1
0
			public StoreThread(string[] source, int firstSource, long startPosition, int lastSource, long endPosition,
				string dest, bool breakAtLines, bool textMode, StoreThread next)
			{
				//Console.WriteLine("> " + breakAtLines + ", " + textMode);
				this.source = source;
				this.dest = dest;
				this.firstSource = firstSource;
				this.lastSource = lastSource;
				this.startPosition = startPosition;
				this.endPosition = endPosition;
				this.breakAtLines = breakAtLines;
				this.textMode = textMode;
				this.next = next;
			}
Ejemplo n.º 2
0
		/// <summary>
		/// Store concatenated data to Cosmos with enhanced speed.
		/// </summary>
		/// <param name="source">the source files or wildcard patterns (normally local)</param>
		/// <param name="destination">the Cosmos stream name to store the data to</param>
		/// <param name="breakAtLines">if true, break extents only at the end of lines;
		/// otherwise, break at exact byte limits</param>
		/// <remarks>
		/// <p>
		/// This will blast data onto Cosmos at higher speed than a normal copy.
		/// It is helpful to set the <see cref="ExtentSize"/> and <see cref="StoreParallelLevel"/>
		/// parameters to match the system being used.
		/// </p>
		/// <p>
		/// The order of the input files will be preserved, and the order within each file is
		/// also preserved.
		/// </p>
		/// </remarks>
		/// <exception cref="ArgumentException">The destination is not a valid Cosmos streamname,
		/// or no source files are specified.</exception>
		/// <exception cref="IOException">The stream cannot be written or the source cannot be read.</exception>
		public static void Store(string[] source, string destination, bool breakAtLines)
		{
			if (source == null) source = new string[0];
			if (destination == null || destination.Length == 0 || !destination.ToLower().StartsWith("cosmos://"))
			{
				throw new ArgumentException("destination is not a valid Cosmos streamname: " + destination, "destination");
			}
			int parallelLevel = StoreParallelLevel;
			int extentSize = ExtentSize;
			bool breakFiles = StoreBreakFiles;
			bool textMode = breakAtLines;
			// hack to account for boundaries:
			if (breakAtLines)
			{
				extentSize = extentSize - 8*1024;
			}
			if (!breakFiles)
			{
				// might as well, for efficiency:
				// (this actually affects the encoding transformation and such, since
				// it turns it into a binary transfer...) ***
				breakAtLines = false;
			}

			// expand sources:
			ArrayList fullSource = new ArrayList();
			for (int i = 0; i < source.Length; i++)
			{
				fullSource.AddRange(IOUtil.ExpandWildcards(source[i]));
			}
			source = (string[])fullSource.ToArray(typeof(string));
			// handle empty case:
			if (source.Length == 0)
			{
				//				using (Stream empty = ZStreamOut.Open(destination))
				//				{
				//				}
				//				return;
				throw new ArgumentException("No source files specified.", "source");
			}

			// get lengths:
			// this is a problem - gzip streams do not give the correct length!! ***
			long[] lengths = new long[source.Length];
			long totalLength = 0;
			for (int i = 0; i < source.Length; i++)
			{
				lengths[i] = IOUtil.GetLength(source[i]);
				if (lengths[i] < 0)
				{
					//					throw new NotSupportedException("Cannot Store files whose length is unknown: " +
					//						source[i]);
					// hack this in??
					lengths[1] = ExtentSize;
				}
				totalLength += lengths[i];
			}

			// handle empty case:
			if (source.Length == 1 && lengths[0] == 0)
			{
				using (Stream empty = ZStreamOut.Open(destination))
				{
				}
				return;
			}

			// handle simple case of small input:
			// we could make this fast, also, at the cost of unfull extents... ***
			if (totalLength <= ExtentSize)
			{
				if (textMode)
				{
					using (StreamWriter d = ZStreamWriter.Open(destination))
					{
						string line;
						for (int i = 0; i < source.Length; i++)
						{
							using (StreamReader s = ZStreamReader.Open(source[i]))
							{
								while ((line = s.ReadLine()) != null)
								{
									d.WriteLine(line);
								}
							}
						}
					}
				}
				else
				{
					using (Stream d = ZStreamOut.Open(destination))
					{
						byte[] buf = new byte[256*1024];
						int count;
						for (int i = 0; i < source.Length; i++)
						{
							using (Stream s = ZStreamIn.Open(source[i]))
							{
								while ((count = s.Read(buf, 0, buf.Length)) > 0)
								{
									d.Write(buf, 0, count);
								}
							}
						}
					}
				}
				return;
			}

			// we will split this up if possible...
			if (parallelLevel > Math.Ceiling(totalLength / (double)extentSize))
			{
				parallelLevel = (int)Math.Ceiling(totalLength / (double)extentSize);
			}
			int[] startFile = new int[parallelLevel];
			long[] startPos = new long[parallelLevel];
			long sum = 0;
			long sumWithinCur = 0;
			int curSource = 0;
			//			Console.WriteLine("totalLength: " + totalLength);
			for (int i = 0; i < parallelLevel; i++)
			{
				long target = (long)(i * (totalLength / (double)parallelLevel));
				while (sum < target)
				{
					//					Console.WriteLine("parallelLevel: " + i +
					//						"  target: " + target +
					//						"  sum: " + sum +
					//						"  curSource: " + curSource);
					if (sum + lengths[curSource] - sumWithinCur < target)
					{
						sum += lengths[curSource] - sumWithinCur;
						curSource++;
						sumWithinCur = 0;
						if (curSource >= source.Length)  break;
					}
					else
					{
						break;
					}
				}
				if (curSource >= source.Length)
				{
					// we made a mistake. remove a level...
					// (should not happen)
					parallelLevel = i;
					int[] oldStartFile = startFile;
					long[] oldStartPos = startPos;
					startFile = new int[parallelLevel];
					startPos = new long[parallelLevel];
					Array.Copy(oldStartFile, startFile, startFile.Length);
					Array.Copy(oldStartPos, startPos, startPos.Length);
					break;
				}
				startFile[i] = curSource;
				startPos[i] = breakFiles ? target - sum + sumWithinCur : 0;
				if (breakFiles)
				{
					sumWithinCur += (target - sum);
					sum = target;
				}
				else
				{
					// should really pull back, not advance!!!
					sum += lengths[curSource];
					curSource++;
					sumWithinCur = 0;
				}
			}

			// Spin off the copies
			// threadpool or explicit threads? Or async delegate calls?
			StoreThread[] copyThreads = new StoreThread[parallelLevel];
			StoreThread next = null;
			for (int i = copyThreads.Length - 1; i >= 0; i--)
			{
				int endFile;
				long endPos;
				if (i == startFile.Length - 1)
				{
					endFile = source.Length - 1;
					endPos = long.MaxValue;
				}
				else
				{
					if (startPos[i+1] == 0)
					{
						endFile = startFile[i+1] - 1;
						endPos = long.MaxValue;
					}
					else
					{
						endFile = startFile[i+1];
						endPos = startPos[i+1];
					}
				}
				//Console.WriteLine("Store: start = " + startFile[i] + ":" + startPos[i] + ", end = " + endFile + ":" + endPos +
				//	(textMode ? " (txt)" : ""));
				copyThreads[i] = new StoreThread(source, startFile[i], startPos[i],
					endFile, endPos, destination + ".store_" + i, breakAtLines, textMode, next);
				next = copyThreads[i];
			}

			for (int i = 0; i < copyThreads.Length; i++)
			{
				copyThreads[i].Start();
			}
			for (int i = 0; i < copyThreads.Length; i++)
			{
				copyThreads[i].End();
			}
			for (int i = 0; i < copyThreads.Length; i++)
			{
				if (copyThreads[i].HasError)
				{
					for (int j = 0; j < copyThreads.Length; j++)
					{
						try
						{
							Delete(copyThreads[j].FileName);
						}
						catch
						{
						}
					}
					throw new IOException("Could not copy source data.");
				}
			}

			try
			{
				string[] threadSources = new string[copyThreads.Length];
				for (int i = 0; i < copyThreads.Length; i++)
				{
					try
					{
						threadSources[i] = copyThreads[i].FileName;
					}
					catch
					{
					}
				}
				Concatenate(destination, threadSources);
			}
			finally
			{
				for (int i = 0; i < copyThreads.Length; i++)
				{
					try
					{
						Delete(copyThreads[i].FileName);
					}
					catch
					{
					}
				}
			}
		}