Esempio n. 1
0
        public DryadLinqVertexWriter(VertexEnv denv, DryadLinqFactory <T> writerFactory, UInt32 portNum)
        {
            this.m_dvertexEnv      = denv;
            this.m_nativeHandle    = denv.NativeHandle;
            this.m_startPort       = portNum;
            this.m_numberOfOutputs = 1;
            this.m_writerFactory   = writerFactory;
            Int32 buffSize = this.m_dvertexEnv.GetWriteBuffSize();
            DryadLinqRecordWriter <T> writer = writerFactory.MakeWriter(this.m_nativeHandle, portNum, buffSize);

            this.m_writers = new DryadLinqRecordWriter <T>[] { writer };
        }
Esempio n. 2
0
        public DryadLinqVertexReader(VertexEnv denv, DryadLinqFactory <T> readerFactory, UInt32 portNum)
        {
            this.m_dvertexEnv         = denv;
            this.m_nativeHandle       = denv.NativeHandle;
            this.m_readerFactory      = readerFactory;
            this.m_startPort          = portNum;
            this.m_numberOfInputs     = 1;
            this.m_keepInputPortOrder = false;
            this.m_portPermArray      = new UInt32[] { 0 };
            DryadLinqRecordReader <T> reader = readerFactory.MakeReader(this.m_nativeHandle, portNum);

            this.m_readers = new DryadLinqRecordReader <T>[] { reader };
            this.m_isUsed  = false;
        }
Esempio n. 3
0
        public DryadLinqVertexWriter(VertexEnv denv, DryadLinqFactory <T> writerFactory, UInt32 startPort, UInt32 endPort)
        {
            this.m_dvertexEnv      = denv;
            this.m_nativeHandle    = denv.NativeHandle;
            this.m_startPort       = startPort;
            this.m_numberOfOutputs = endPort - startPort;
            this.m_writerFactory   = writerFactory;
            this.m_writers         = new DryadLinqRecordWriter <T> [this.m_numberOfOutputs];
            Int32 buffSize = this.m_dvertexEnv.GetWriteBuffSize();

            for (UInt32 i = 0; i < this.m_numberOfOutputs; i++)
            {
                this.m_writers[i] = writerFactory.MakeWriter(this.m_nativeHandle, i + startPort, buffSize);
            }
        }
Esempio n. 4
0
        public DryadLinqVertexReader(VertexEnv denv,
                                     DryadLinqFactory <T> readerFactory,
                                     UInt32 startPort,
                                     UInt32 endPort,
                                     bool keepInputPortOrder)
        {
            this.m_dvertexEnv         = denv;
            this.m_nativeHandle       = denv.NativeHandle;
            this.m_readerFactory      = readerFactory;
            this.m_startPort          = startPort;
            this.m_numberOfInputs     = endPort - startPort;
            this.m_keepInputPortOrder = keepInputPortOrder;

            this.m_portPermArray = new UInt32[this.NumberOfInputs];
            for (UInt32 i = 0; i < this.NumberOfInputs; i++)
            {
                this.m_portPermArray[i] = i;
            }
            if (!keepInputPortOrder)
            {
                Random rdm = new Random(System.Diagnostics.Process.GetCurrentProcess().Id);
                Int32  max = (Int32)this.NumberOfInputs;
                for (UInt32 i = 1; i < this.NumberOfInputs; i++)
                {
                    int    idx = rdm.Next(max);
                    UInt32 n   = this.m_portPermArray[max - 1];
                    this.m_portPermArray[max - 1] = this.m_portPermArray[idx];
                    this.m_portPermArray[idx]     = n;
                    max--;
                }
            }

            this.m_readers = new DryadLinqRecordReader <T> [this.NumberOfInputs];
            for (UInt32 i = 0; i < this.NumberOfInputs; i++)
            {
                this.m_readers[i] = this.m_readerFactory.MakeReader(this.m_nativeHandle, startPort + i);
            }
            this.m_isUsed = false;
        }
Esempio n. 5
0
        public static IEnumerable<K> Phase1Sampling<T, K>(IEnumerable<T> source,
                                                          Func<T, K> keySelector,
                                                          VertexEnv denv)
        {
            // note: vertexID is constant for each repetition of a specific vertex (eg in fail-and-retry scenarios)
            //       this is very good as it ensure the sampling is idempotent w.r.t. retries.

            long vertexID = DryadLinqNative.GetVertexId(denv.NativeHandle);
            int seed = unchecked((int)(vertexID));
            long nEmitted = 0;

            Random rdm = new Random(seed);
            
            List<K> allSoFar = new List<K>();
            List<K> samples = new List<K>();
            
            // try to collect 10 samples, but keep all the records just in case
            IEnumerator<T> sourceEnumerator = source.GetEnumerator();
            while (sourceEnumerator.MoveNext())
            {
                T elem = sourceEnumerator.Current;
                K key = keySelector(elem);
                allSoFar.Add(key);
                if (rdm.NextDouble() < SAMPLE_RATE)
                {
                    samples.Add(key);
                    if (samples.Count >= 10)
                        break;
                }
            }

            if (samples.Count >= 10)
            {
                // we have lots of samples.. emit them and continue sampling
                allSoFar = null; // not needed.
                foreach (K key in samples)
                {
                    yield return key;
                    nEmitted++;
                }
                while (sourceEnumerator.MoveNext())
                {
                    T elem = sourceEnumerator.Current;
                    if (rdm.NextDouble() < SAMPLE_RATE)
                    {
                        yield return keySelector(elem);
                        nEmitted++;
                    }
                }
            }
            else
            {
                // sampling didn't produce much, so emit all the records instead.
                DryadLinqLog.AddInfo("Sampling produced only {0} records.  Emitting all records instead.", samples.Count());
                Debug.Assert(sourceEnumerator.MoveNext() == false, "The source enumerator wasn't finished");
                samples = null; // the samples list is not needed.
                foreach (K key in allSoFar)
                {
                    yield return key;
                    nEmitted++;
                }
            }

            DryadLinqLog.AddInfo("Stage1 sampling: num keys emitted = {0}", nEmitted);
        }