public HpcVertexReader(HpcLinqVertexEnv denv, HpcLinqFactory <T> readerFactory, UInt32 startPort, UInt32 endPort) { this.m_dvertexEnv = denv; this.m_nativeHandle = denv.NativeHandle; this.m_readerFactory = readerFactory; this.m_startPort = startPort; this.m_numberOfInputs = endPort - startPort; this.m_portPermArray = new UInt32[this.NumberOfInputs]; for (UInt32 i = 0; i < this.NumberOfInputs; i++) { this.m_portPermArray[i] = i; } if (!denv.KeepInputPortOrder) { Random rdm = new Random(System.Diagnostics.Process.GetCurrentProcess().Id); Int32 max = (Int32)this.NumberOfInputs; for (UInt32 i = 1; i < this.NumberOfInputs; i++) { int idx = rdm.Next(max); UInt32 n = this.m_portPermArray[max - 1]; this.m_portPermArray[max - 1] = this.m_portPermArray[idx]; this.m_portPermArray[idx] = n; max--; } } this.m_readers = new HpcRecordReader <T> [this.NumberOfInputs]; for (UInt32 i = 0; i < this.NumberOfInputs; i++) { this.m_readers[i] = this.m_readerFactory.MakeReader(this.m_nativeHandle, startPort + i); } this.m_isUsed = false; }
public HpcVertexWriter(HpcLinqVertexEnv denv, HpcLinqFactory <T> writerFactory, UInt32 portNum) { this.m_dvertexEnv = denv; this.m_nativeHandle = denv.NativeHandle; this.m_startPort = portNum; this.m_numberOfOutputs = 1; this.m_writerFactory = writerFactory; Int32 buffSize = this.m_dvertexEnv.GetWriteBuffSize(); HpcRecordWriter <T> writer = writerFactory.MakeWriter(this.m_nativeHandle, portNum, buffSize); this.m_writers = new HpcRecordWriter <T>[] { writer }; }
public HpcVertexReader(HpcLinqVertexEnv denv, HpcLinqFactory <T> readerFactory, UInt32 portNum) { this.m_dvertexEnv = denv; this.m_nativeHandle = denv.NativeHandle; this.m_readerFactory = readerFactory; this.m_startPort = portNum; this.m_numberOfInputs = 1; this.m_portPermArray = new UInt32[] { 0 }; HpcRecordReader <T> reader = readerFactory.MakeReader(this.m_nativeHandle, portNum); this.m_readers = new HpcRecordReader <T>[] { reader }; this.m_isUsed = false; }
public HpcVertexWriter(HpcLinqVertexEnv denv, HpcLinqFactory <T> writerFactory, UInt32 startPort, UInt32 endPort) { this.m_dvertexEnv = denv; this.m_nativeHandle = denv.NativeHandle; this.m_startPort = startPort; this.m_numberOfOutputs = endPort - startPort; this.m_writerFactory = writerFactory; this.m_writers = new HpcRecordWriter <T> [this.m_numberOfOutputs]; Int32 buffSize = this.m_dvertexEnv.GetWriteBuffSize(); for (UInt32 i = 0; i < this.m_numberOfOutputs; i++) { this.m_writers[i] = writerFactory.MakeWriter(this.m_nativeHandle, i + startPort, buffSize); } }
public static IEnumerable <K> Phase1Sampling <T, K>(IEnumerable <T> source, Func <T, K> keySelector, HpcLinqVertexEnv denv) { // note: vertexID is constant for each repetition of a specific vertex (eg in fail-and-retry scenarios) // this is very good as it ensure the sampling is idempotent w.r.t. retries. long vertexID = HpcLinqNative.GetVertexId(denv.NativeHandle); int seed = unchecked ((int)(vertexID)); long nEmitted = 0; Random rdm = new Random(seed); List <K> allSoFar = new List <K>(); List <K> samples = new List <K>(); // try to collect 10 samples, but keep all the records just in case IEnumerator <T> sourceEnumerator = source.GetEnumerator(); while (sourceEnumerator.MoveNext()) { T elem = sourceEnumerator.Current; K key = keySelector(elem); allSoFar.Add(key); if (rdm.NextDouble() < SAMPLE_RATE) { samples.Add(key); if (samples.Count >= 10) { break; } } } if (samples.Count >= 10) { // we have lots of samples.. emit them and continue sampling allSoFar = null; // not needed. foreach (K key in samples) { yield return(key); nEmitted++; } while (sourceEnumerator.MoveNext()) { T elem = sourceEnumerator.Current; if (rdm.NextDouble() < SAMPLE_RATE) { yield return(keySelector(elem)); nEmitted++; } } } else { // sampling didn't produce much, so emit all the records instead. DryadLinqLog.Add("Sampling produced only {0} records. Emitting all records instead.", samples.Count()); Debug.Assert(sourceEnumerator.MoveNext() == false, "The source enumerator wasn't finished"); samples = null; // the samples list is not needed. foreach (K key in allSoFar) { yield return(key); nEmitted++; } } DryadLinqLog.Add("Stage1 sampling: num keys emitted = {0}", nEmitted); }