Ejemplo n.º 1
0
        public HpcVertexReader(HpcLinqVertexEnv denv, HpcLinqFactory <T> readerFactory, UInt32 startPort, UInt32 endPort)
        {
            this.m_dvertexEnv     = denv;
            this.m_nativeHandle   = denv.NativeHandle;
            this.m_readerFactory  = readerFactory;
            this.m_startPort      = startPort;
            this.m_numberOfInputs = endPort - startPort;
            this.m_portPermArray  = new UInt32[this.NumberOfInputs];
            for (UInt32 i = 0; i < this.NumberOfInputs; i++)
            {
                this.m_portPermArray[i] = i;
            }
            if (!denv.KeepInputPortOrder)
            {
                Random rdm = new Random(System.Diagnostics.Process.GetCurrentProcess().Id);
                Int32  max = (Int32)this.NumberOfInputs;
                for (UInt32 i = 1; i < this.NumberOfInputs; i++)
                {
                    int    idx = rdm.Next(max);
                    UInt32 n   = this.m_portPermArray[max - 1];
                    this.m_portPermArray[max - 1] = this.m_portPermArray[idx];
                    this.m_portPermArray[idx]     = n;
                    max--;
                }
            }

            this.m_readers = new HpcRecordReader <T> [this.NumberOfInputs];
            for (UInt32 i = 0; i < this.NumberOfInputs; i++)
            {
                this.m_readers[i] = this.m_readerFactory.MakeReader(this.m_nativeHandle, startPort + i);
            }
            this.m_isUsed = false;
        }
Ejemplo n.º 2
0
        public HpcVertexWriter(HpcLinqVertexEnv denv, HpcLinqFactory <T> writerFactory, UInt32 portNum)
        {
            this.m_dvertexEnv      = denv;
            this.m_nativeHandle    = denv.NativeHandle;
            this.m_startPort       = portNum;
            this.m_numberOfOutputs = 1;
            this.m_writerFactory   = writerFactory;
            Int32 buffSize             = this.m_dvertexEnv.GetWriteBuffSize();
            HpcRecordWriter <T> writer = writerFactory.MakeWriter(this.m_nativeHandle, portNum, buffSize);

            this.m_writers = new HpcRecordWriter <T>[] { writer };
        }
Ejemplo n.º 3
0
        public HpcVertexReader(HpcLinqVertexEnv denv, HpcLinqFactory <T> readerFactory, UInt32 portNum)
        {
            this.m_dvertexEnv     = denv;
            this.m_nativeHandle   = denv.NativeHandle;
            this.m_readerFactory  = readerFactory;
            this.m_startPort      = portNum;
            this.m_numberOfInputs = 1;
            this.m_portPermArray  = new UInt32[] { 0 };
            HpcRecordReader <T> reader = readerFactory.MakeReader(this.m_nativeHandle, portNum);

            this.m_readers = new HpcRecordReader <T>[] { reader };
            this.m_isUsed  = false;
        }
Ejemplo n.º 4
0
        public HpcVertexWriter(HpcLinqVertexEnv denv, HpcLinqFactory <T> writerFactory, UInt32 startPort, UInt32 endPort)
        {
            this.m_dvertexEnv      = denv;
            this.m_nativeHandle    = denv.NativeHandle;
            this.m_startPort       = startPort;
            this.m_numberOfOutputs = endPort - startPort;
            this.m_writerFactory   = writerFactory;
            this.m_writers         = new HpcRecordWriter <T> [this.m_numberOfOutputs];
            Int32 buffSize = this.m_dvertexEnv.GetWriteBuffSize();

            for (UInt32 i = 0; i < this.m_numberOfOutputs; i++)
            {
                this.m_writers[i] = writerFactory.MakeWriter(this.m_nativeHandle, i + startPort, buffSize);
            }
        }
Ejemplo n.º 5
0
        public static IEnumerable <K> Phase1Sampling <T, K>(IEnumerable <T> source,
                                                            Func <T, K> keySelector,
                                                            HpcLinqVertexEnv denv)
        {
            // note: vertexID is constant for each repetition of a specific vertex (eg in fail-and-retry scenarios)
            //       this is very good as it ensure the sampling is idempotent w.r.t. retries.

            long vertexID = HpcLinqNative.GetVertexId(denv.NativeHandle);
            int  seed     = unchecked ((int)(vertexID));
            long nEmitted = 0;

            Random rdm = new Random(seed);

            List <K> allSoFar = new List <K>();
            List <K> samples  = new List <K>();

            // try to collect 10 samples, but keep all the records just in case
            IEnumerator <T> sourceEnumerator = source.GetEnumerator();

            while (sourceEnumerator.MoveNext())
            {
                T elem = sourceEnumerator.Current;
                K key  = keySelector(elem);
                allSoFar.Add(key);
                if (rdm.NextDouble() < SAMPLE_RATE)
                {
                    samples.Add(key);
                    if (samples.Count >= 10)
                    {
                        break;
                    }
                }
            }

            if (samples.Count >= 10)
            {
                // we have lots of samples.. emit them and continue sampling
                allSoFar = null; // not needed.
                foreach (K key in samples)
                {
                    yield return(key);

                    nEmitted++;
                }
                while (sourceEnumerator.MoveNext())
                {
                    T elem = sourceEnumerator.Current;
                    if (rdm.NextDouble() < SAMPLE_RATE)
                    {
                        yield return(keySelector(elem));

                        nEmitted++;
                    }
                }
            }
            else
            {
                // sampling didn't produce much, so emit all the records instead.
                DryadLinqLog.Add("Sampling produced only {0} records.  Emitting all records instead.", samples.Count());
                Debug.Assert(sourceEnumerator.MoveNext() == false, "The source enumerator wasn't finished");
                samples = null; // the samples list is not needed.
                foreach (K key in allSoFar)
                {
                    yield return(key);

                    nEmitted++;
                }
            }

            DryadLinqLog.Add("Stage1 sampling: num keys emitted = {0}", nEmitted);
        }