protected override bool MoveNextCore()
            {
                if (!_input.MoveNext())
                {
                    return(false);
                }
                if (!_newGroupInInputCursorDel())
                {
                    return(true);
                }

                // If this is the first step, we need to move next on _groupCursor. Otherwise, the position of _groupCursor is
                // at the start of the next group.
                if (_groupCursor.State == CursorState.NotStarted)
                {
                    // The two cursors should have the same number of elements, so if _input.MoveNext() returned true,
                    // then it must return true here too.
                    var good = _groupCursor.MoveNext() && _newGroupInGroupCursorDel();
                    Ch.Assert(good);
                }

                // Read the whole group from the auxiliary cursor.
                while (_groupCursor.State != CursorState.Done && !_newGroupInGroupCursorDel())
                {
                    TLabel label = default(TLabel);
                    TScore score = default(TScore);
                    _labelGetter(ref label);
                    _scoreGetter(ref score);
                    _parent.ProcessExample(_state, label, score);
                    _groupCursor.MoveNext();
                }

                _parent.UpdateState(_state);
                return(true);
            }
Exemplo n.º 2
0
            public bool MoveNext()
            {
                if (_copy > 0)
                {
                    --_copy;
                    return(true);
                }

                bool r = _inputCursor.MoveNext();

                if (!r)
                {
                    return(r);
                }

                if (_cache != null)
                {
                    _idGetter(ref _currentId);
                    _copy = _cache[_currentId];
                }
                else
                {
                    _copy = NextPoisson(_lambda, _rand);
                    _copy = Math.Min(_copy, _maxReplica);
                }

                while (_copy <= 0)
                {
                    r = _inputCursor.MoveNext();
                    if (!r)
                    {
                        return(r);
                    }
                    if (_cache != null)
                    {
                        _idGetter(ref _currentId);
                        _copy = _cache[_currentId];
                    }
                    else if (_classGetter == null)
                    {
                        _copy = NextPoisson(_lambda, _rand);
                        _copy = Math.Min(_copy, _maxReplica);
                    }
                    else
                    {
                        _classGetter(ref _currentCl);
                        _copy = _currentCl.Equals(_classValue) ? NextPoisson(_lambda, _rand) : 1;
                        _copy = Math.Min(_copy, _maxReplica);
                    }
                }
                --_copy;
                return(true);
            }
            protected override bool MoveNextCore()
            {
                // If leading cursor is not started, start it.
                if (_leadingCursor.State == CursorState.NotStarted)
                {
                    _leadingCursor.MoveNext();
                }

                if (_leadingCursor.State == CursorState.Done)
                {
                    // Leading cursor reached the end of the input on the previous MoveNext.
                    return(false);
                }

                // Then, advance the leading cursor until it hits the end of the group (or the end of the data).
                int groupSize = 0;

                while (_leadingCursor.State == CursorState.Good && IsSameGroup())
                {
                    groupSize++;
                    _leadingCursor.MoveNext();
                }

                // The group can only be empty if the leading cursor immediately reaches the end of the data.
                // This is handled by the check above.
                Ch.Assert(groupSize > 0);

                // Catch up with the trailing cursor and populate all the aggregates.
                // REVIEW: this could be done lazily, but still all aggregators together.
                foreach (var agg in _aggregators.Where(x => x != null))
                {
                    agg.SetSize(groupSize);
                }

                for (int i = 0; i < groupSize; i++)
                {
                    var res = _trailingCursor.MoveNext();
                    Ch.Assert(res);

                    foreach (var agg in _aggregators.Where(x => x != null))
                    {
                        agg.ReadValue(i);
                    }
                }

                return(true);
            }
Exemplo n.º 4
0
        internal static List <ConfusionMatrix> Create(IHostEnvironment env, IDataView confusionMatrix)
        {
            Contracts.AssertValue(env);
            env.AssertValue(confusionMatrix);

            if (!confusionMatrix.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.Count, out int countColumn))
            {
                throw env.Except($"ConfusionMatrix data view did not contain a {nameof(MetricKinds.ColumnNames.Count)} column.");
            }

            IRowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn);
            var        slots  = default(VBuffer <ReadOnlyMemory <char> >);

            confusionMatrix.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countColumn, ref slots);
            var slotsValues = slots.GetValues();

            string[] classNames = new string[slotsValues.Length];
            for (int i = 0; i < slotsValues.Length; i++)
            {
                classNames[i] = slotsValues[i].ToString();
            }

            ColumnType type = confusionMatrix.Schema.GetColumnType(countColumn);

            env.Assert(type.IsVector);
            ValueGetter <VBuffer <double> > countGetter = cursor.GetGetter <VBuffer <double> >(countColumn);
            VBuffer <double>       countValues          = default;
            List <ConfusionMatrix> confusionMatrices    = new List <ConfusionMatrix>();

            int valuesRowIndex = 0;

            double[,] elements = null;
            while (cursor.MoveNext())
            {
                if (valuesRowIndex == 0)
                {
                    elements = new double[type.VectorSize, type.VectorSize];
                }

                countGetter(ref countValues);
                ReadOnlySpan <double> values = countValues.GetValues();
                for (int i = 0; i < values.Length; i++)
                {
                    elements[valuesRowIndex, i] = values[i];
                }

                valuesRowIndex++;

                if (valuesRowIndex == type.VectorSize)
                {
                    valuesRowIndex = 0;
                    confusionMatrices.Add(new ConfusionMatrix(elements, classNames));
                }
            }

            return(confusionMatrices);
        }
Exemplo n.º 5
0
            private async Task LoopProducerWorker()
            {
                try
                {
                    int circularIndex = 0;
                    for (; ;)
                    {
                        int requested = await _toProduce.ReceiveAsync();

                        if (requested == 0)
                        {
                            // We had some sort of early exit. Just go out, do not post even the
                            // sentinel to the consumer, as nothing will be consumed any more.
                            return;
                        }
                        Ch.Assert(requested >= _blockSize);
                        int numRows;
                        for (numRows = 0; numRows < requested; ++numRows)
                        {
                            Ch.Assert(0 <= circularIndex & circularIndex < _pipeIndices.Length);
                            if (!_input.MoveNext())
                            {
                                break;
                            }
                            int pipeIndex = _pipeIndices[circularIndex++];
                            Ch.Assert(0 <= pipeIndex & pipeIndex < _pipeIndices.Length);
                            for (int c = 0; c < _pipes.Length; ++c)
                            {
                                _pipes[c].Fill(pipeIndex);
                            }
                            if (circularIndex == _pipeIndices.Length)
                            {
                                circularIndex = 0;
                            }
                        }
                        PostAssert(_toConsume, numRows);
                        if (numRows < requested)
                        {
                            // We've reached the end of the cursor. Send the sentinel, then exit.
                            // This assumes that the receiver will receive things in Post order
                            // (so that the sentinel is received, after the last Post).
                            if (numRows > 0)
                            {
                                PostAssert(_toConsume, 0);
                            }
                            return;
                        }
                    }
                }
                catch (Exception ex)
                {
                    _producerTaskException = ex;
                    // Send the sentinel in this case as well, the field will be checked.
                    PostAssert(_toConsume, 0);
                }
            }
Exemplo n.º 6
0
            public bool MoveNext()
            {
                var res = _inputCursor.MoveNext();

                if (!res)
                {
                    return(res);
                }
                RetrieveNeighbors();
                return(true);
            }
Exemplo n.º 7
0
            protected override bool MoveNextCore()
            {
                // Iterate sub cursor or move to the next file.
                while (_subCursor == null || !_subCursor.MoveNext())
                {
                    // Cleanup old sub cursor
                    if (_subCursor != null)
                    {
                        _subCursor.Dispose();
                        _subCursor = null;
                    }

                    if (!TryGetNextPathAndValues(out string path, out string relativePath, out List <string> values))
                    {
                        return(false);
                    }

                    IDataLoader loader = null;
                    try
                    {
                        // Load the sub cursor and reset the data.
                        loader = _parent.CreateLoaderFromBytes(_parent._subLoaderBytes, new MultiFileSource(path));
                    }
                    catch (Exception e)
                    {
                        Ch.Warning($"Failed to load file {path} due to a loader exception. Moving on to the next file. Ex: {e.Message}");
                        continue;
                    }

                    _subCursor = loader.GetRowCursor(col => _subActive[col]);

                    try
                    {
                        UpdateSubGetters();
                        UpdateColumnValues(relativePath, values);
                    }
                    catch (InvalidOperationException e)
                    {
                        // Failed to load this file so skip.
                        Ch.Warning(MessageSensitivity.Schema, e.Message);
                        if (_subCursor != null)
                        {
                            _subCursor.Dispose();
                            _subCursor = null;
                        }
                    }
                }

                return(true);
            }
            protected override bool MoveNextCore()
            {
                bool isAccepted = false;

                while (!isAccepted)
                {
                    if (!_input.MoveNext())
                    {
                        return(false);
                    }
                    RunLambda(out isAccepted);
                }
                return(true);
            }
Exemplo n.º 9
0
        private int RowsPerBlockHeuristic(IDataView data, ColumnCodec[] actives)
        {
            // If we did not set a size bound, return the old bound.
            if (!_maxBytesPerBlock.HasValue)
            {
                _host.Assert(_maxRowsPerBlock.HasValue && _maxRowsPerBlock.Value > 0); // argument validation should have ensured this
                return(_maxRowsPerBlock.Value);
            }
            long maxBytes = _maxBytesPerBlock.Value;

            // First get the cursor.
            HashSet <int> active = new HashSet <int>(actives.Select(cc => cc.SourceIndex));
            Random        rand   = data.CanShuffle ? new TauswortheHybrid(_host.Rand) : null;
            // Get the estimators.
            EstimatorDelegate del      = EstimatorCore <int>;
            MethodInfo        methInfo = del.GetMethodInfo().GetGenericMethodDefinition();

            using (IRowCursor cursor = data.GetRowCursor(active.Contains, rand))
            {
                object[] args       = new object[] { cursor, null, null, null };
                var      writers    = new IValueWriter[actives.Length];
                var      estimators = new Func <long> [actives.Length];
                for (int c = 0; c < actives.Length; ++c)
                {
                    var col = actives[c];
                    args[1] = col;
                    methInfo.MakeGenericMethod(col.Codec.Type.RawType).Invoke(this, args);
                    estimators[c] = (Func <long>)args[2];
                    writers[c]    = (IValueWriter)args[3];
                }

                int rows = 0;
                // We can't really support more than this.
                int maxRowsPerBlock = _maxRowsPerBlock.GetValueOrDefault(int.MaxValue);
                while (rows < maxRowsPerBlock)
                {
                    if (!cursor.MoveNext())
                    {
                        break; // We'll just have one block for each column.
                    }
                    long totalEstimate = estimators.Sum(c => c());
                    if (totalEstimate > maxBytes)
                    {
                        break;
                    }
                    rows++;
                }
                return(Math.Max(1, rows)); // Possible that even a single row exceeds the "limit".
            }
        }
Exemplo n.º 10
0
            public void Run(IRowCursor cursor, ref long count, out int minLen, out int maxLen)
            {
                minLen = int.MaxValue;
                maxLen = 0;

                Action <StringBuilder, int> append       = (sb, index) => AppendItem(sb, index, _pipes[_col].Default);
                Action <StringBuilder, int> appendHeader = (sb, index) => AppendItem(sb, index, "");

                if (_hasHeader)
                {
                    StartLine();
                    while (_col < _pipes.Length)
                    {
                        int len;
                        _pipes[_col].WriteHeader(appendHeader, out len);
                        Contracts.Assert(len >= 0);
                        EndColumn(len);
                    }
                    EndLine("\"\"");
                    _writer.WriteLine();
                }

                while (cursor.MoveNext())
                {
                    // Start a new line. This also starts the first column.
                    StartLine();

                    while (_col < _pipes.Length)
                    {
                        int len;
                        _pipes[_col].WriteData(append, out len);
                        Contracts.Assert(len >= 0);
                        EndColumn(len);
                    }

                    if (minLen > _dstBase)
                    {
                        minLen = _dstBase;
                    }
                    if (maxLen < _dstBase)
                    {
                        maxLen = _dstBase;
                    }

                    EndLine();
                    _writer.WriteLine();
                    count++;
                }
            }
Exemplo n.º 11
0
            protected override bool MoveNextCore()
            {
                Ch.AssertValue(_currentCursor);
                while (!_currentCursor.MoveNext())
                {
                    // Mark the current cursor as finished.
                    _currentCursor.Dispose();
                    _currentCursor = null;
                    if (++_currentSourceIndex >= Sources.Length)
                    {
                        return(false);
                    }
                    _currentCursor   = Sources[_currentSourceIndex].GetRowCursor(c => IsColumnActive(c));
                    _currentIdGetter = _currentCursor.GetIdGetter();
                }

                return(true);
            }
Exemplo n.º 12
0
        internal static ConfusionMatrix Create(IHostEnvironment env, IDataView confusionMatrix)
        {
            Contracts.AssertValue(env);
            env.AssertValue(confusionMatrix);

            if (!confusionMatrix.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.Count, out int countColumn))
            {
                env.Except($"ConfusionMatrix data view did not contain a {nameof(MetricKinds.ColumnNames.Count)} column.");
            }

            ColumnType type = confusionMatrix.Schema.GetColumnType(countColumn);

            env.Assert(type.IsVector);

            double[,] elements = new double[type.VectorSize, type.VectorSize];

            IRowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn);
            ValueGetter <VBuffer <double> > countGetter = cursor.GetGetter <VBuffer <double> >(countColumn);
            VBuffer <double> countValues = default;

            int valuesRowIndex = 0;

            while (cursor.MoveNext())
            {
                countGetter(ref countValues);
                for (int i = 0; i < countValues.Length; i++)
                {
                    elements[valuesRowIndex, i] = countValues.Values[i];
                }

                valuesRowIndex++;
            }

            var slots = default(VBuffer <DvText>);

            confusionMatrix.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countColumn, ref slots);
            string[] classNames = new string[slots.Count];
            for (int i = 0; i < slots.Count; i++)
            {
                classNames[i] = slots.Values[i].ToString();
            }

            return(new ConfusionMatrix(elements, classNames));
        }
 private static KdTree BuildKDTree <TLabel>(IDataView data, IRowCursor cursor,
                                            int featureIndex, int labelIndex, int idIndex, int weightIndex,
                                            out Dictionary <long, Tuple <TLabel, float> > labelsWeights, NearestNeighborsArguments args)
     where TLabel : IComparable <TLabel>
 {
     using (cursor)
     {
         var featureGetter = cursor.GetGetter <VBuffer <float> >(featureIndex);
         var labelGetter   = labelIndex >= 0 ? cursor.GetGetter <TLabel>(labelIndex) : null;
         var weightGetter  = weightIndex >= 0 ? cursor.GetGetter <float>(weightIndex) : null;
         var idGetter      = idIndex >= 0 ? cursor.GetGetter <long>(idIndex) : null;
         var kdtree        = new KdTree(distance: args.distance, seed: args.seed);
         labelsWeights = new Dictionary <long, Tuple <TLabel, float> >();
         VBuffer <float> features = new VBuffer <float>();
         TLabel          label    = default(TLabel);
         float           weight   = 1;
         long            lid      = default(long);
         while (cursor.MoveNext())
         {
             featureGetter(ref features);
             if (labelGetter != null)
             {
                 labelGetter(ref label);
             }
             if (weightGetter != null)
             {
                 weightGetter(ref weight);
             }
             if (idGetter != null)
             {
                 idGetter(ref lid);
             }
             else
             {
                 lid = labelsWeights.Count;
             }
             labelsWeights[lid] = new Tuple <TLabel, float>(label, weight);
             var point = new PointIdFloat(lid, features, true);
             kdtree.Add(point);
         }
         return(kdtree);
     }
 }
Exemplo n.º 14
0
            public bool MoveNext()
            {
                var r = _inputCursor.MoveNext();

                if (!r)
                {
                    return(r);
                }
                _inputGetter(ref _inputValue);
                switch (_args.algo)
                {
                case ShakeInputAlgorithm.exhaustive:
                    FillShakingValuesExhaustive();
                    break;

                default:
                    throw Contracts.Except("Not available algo {0}", _args.algo);
                }
                return(true);
            }
Exemplo n.º 15
0
        void LoadCache <TClass>(IRandom rand, IRowCursor cur, int classColumn, TClass valueClass, IChannel ch)
        {
            _cacheReplica = new Dictionary <UInt128, int>();
            var     hist  = new Dictionary <TClass, long>();
            var     gid   = cur.GetIdGetter();
            var     gcl   = cur.GetGetter <TClass>(classColumn);
            UInt128 did   = default(UInt128);
            TClass  cl    = default(TClass);
            long    nbIn  = 0;
            long    nbOut = 0;
            int     rep;

            while (cur.MoveNext())
            {
                gcl(ref cl);
                gid(ref did);
                if (!hist.ContainsKey(cl))
                {
                    hist[cl] = 1;
                }
                else
                {
                    ++hist[cl];
                }
                if (cl.Equals(valueClass))
                {
                    rep = NextPoisson(_args.lambda, rand);
                    ++nbIn;
                }
                else
                {
                    rep = 1;
                    ++nbOut;
                }
                _cacheReplica[did] = rep;
            }
            if (nbIn == 0)
            {
                ch.Warning("Resample on a condition never happened: nbIn={0} nbOut={1}", nbIn, nbOut);
            }
        }
Exemplo n.º 16
0
 public bool MoveNext()
 {
     return(_input.MoveNext());
 }
Exemplo n.º 17
0
        protected bool CheckSameValues(IRowCursor curs1, IDataView view2, bool exactTypes = true, bool exactDoubles = true, bool checkId = true)
        {
            Contracts.Assert(curs1.Schema.ColumnCount == view2.Schema.ColumnCount);

            // Get a cursor for each column.
            int colLim  = curs1.Schema.ColumnCount;
            var cursors = new IRowCursor[colLim];

            try
            {
                for (int col = 0; col < colLim; col++)
                {
                    // curs1 should have all columns active (for simplicity of the code here).
                    Contracts.Assert(curs1.IsColumnActive(col));
                    cursors[col] = view2.GetRowCursor(c => c == col);
                }

                // Get the comparison delegates for each column.
                Func <bool>[] comps = new Func <bool> [colLim];
                // We have also one ID comparison delegate for each cursor.
                Func <bool>[] idComps = new Func <bool> [cursors.Length];
                for (int col = 0; col < colLim; col++)
                {
                    Contracts.Assert(cursors[col] != null);
                    var type1 = curs1.Schema.GetColumnType(col);
                    var type2 = cursors[col].Schema.GetColumnType(col);
                    if (!EqualTypes(type1, type2, exactTypes))
                    {
                        Fail("Different types");
                        return(Failed());
                    }
                    comps[col] = GetColumnComparer(curs1, cursors[col], col, type1, exactDoubles);
                    ValueGetter <UInt128> idGetter;
                    idComps[col] = checkId ? GetIdComparer(curs1, cursors[col], out idGetter) : null;
                }

                for (; ;)
                {
                    bool f1 = curs1.MoveNext();
                    for (int col = 0; col < colLim; col++)
                    {
                        bool f2 = cursors[col].MoveNext();
                        if (f1 != f2)
                        {
                            if (f1)
                            {
                                Fail("Left has more rows at position: {0}", curs1.Position);
                            }
                            else
                            {
                                Fail("Right {0} has more rows at position: {1}", col, cursors[2].Position);
                            }
                            return(Failed());
                        }
                    }

                    if (!f1)
                    {
                        return(true);
                    }

                    for (int col = 0; col < colLim; col++)
                    {
                        Contracts.Assert(curs1.Position == cursors[col].Position);
                        var comp = comps[col];
                        if (comp != null && !comp())
                        {
                            Fail("Different values in column {0} of row {1}", col, curs1.Position);
                            return(Failed());
                        }
                        comp = idComps[col];
                        if (comp != null && !comp())
                        {
                            Fail("Different values in ID values for column {0} cursor of row {1}", col, curs1.Position);
                            return(Failed());
                        }
                    }
                }
            }
            finally
            {
                for (int col = 0; col < colLim; col++)
                {
                    var c = cursors[col];
                    if (c != null)
                    {
                        c.Dispose();
                    }
                }
            }
        }
Exemplo n.º 18
0
        public HashTransform(IHostEnvironment env, Arguments args, IDataView input)
            : base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column,
                   input, TestType)
        {
            if (args.HashBits < NumBitsMin || args.HashBits >= NumBitsLim)
            {
                throw Host.ExceptUserArg(nameof(args.HashBits), "hashBits should be between {0} and {1} inclusive", NumBitsMin, NumBitsLim - 1);
            }

            _exes = new ColInfoEx[Infos.Length];
            List <int> invertIinfos        = null;
            List <int> invertHashMaxCounts = null;

            for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
            {
                _exes[iinfo] = new ColInfoEx(args, args.Column[iinfo]);
                int invertHashMaxCount = GetAndVerifyInvertHashMaxCount(args, args.Column[iinfo], _exes[iinfo]);
                if (invertHashMaxCount > 0)
                {
                    Utils.Add(ref invertIinfos, iinfo);
                    Utils.Add(ref invertHashMaxCounts, invertHashMaxCount);
                }
            }

            _types = InitColumnTypes();

            if (Utils.Size(invertIinfos) > 0)
            {
                // Build the invert hashes for all columns for which it was requested.
                var srcs = new HashSet <int>(invertIinfos.Select(i => Infos[i].Source));
                using (IRowCursor srcCursor = input.GetRowCursor(srcs.Contains))
                {
                    using (var ch = Host.Start("Invert hash building"))
                    {
                        InvertHashHelper[] helpers  = new InvertHashHelper[invertIinfos.Count];
                        Action             disposer = null;
                        for (int i = 0; i < helpers.Length; ++i)
                        {
                            int iinfo = invertIinfos[i];
                            Host.Assert(_types[iinfo].ItemType.KeyCount > 0);
                            var dstGetter = GetGetterCore(ch, srcCursor, iinfo, out disposer);
                            Host.Assert(disposer == null);
                            var ex       = _exes[iinfo];
                            var maxCount = invertHashMaxCounts[i];
                            helpers[i] = InvertHashHelper.Create(srcCursor, Infos[iinfo], ex, maxCount, dstGetter);
                        }
                        while (srcCursor.MoveNext())
                        {
                            for (int i = 0; i < helpers.Length; ++i)
                            {
                                helpers[i].Process();
                            }
                        }
                        _keyValues = new VBuffer <DvText> [_exes.Length];
                        _kvTypes   = new ColumnType[_exes.Length];
                        for (int i = 0; i < helpers.Length; ++i)
                        {
                            _keyValues[invertIinfos[i]] = helpers[i].GetKeyValuesMetadata();
                            Host.Assert(_keyValues[invertIinfos[i]].Length == _types[invertIinfos[i]].ItemType.KeyCount);
                            _kvTypes[invertIinfos[i]] = new VectorType(TextType.Instance, _keyValues[invertIinfos[i]].Length);
                        }
                        ch.Done();
                    }
                }
            }
            SetMetadata();
        }
Exemplo n.º 19
0
 public bool MoveNext()
 {
     currentCursor = cursors[cursorIndex];
     if (!currentCursor.MoveNext()) {
         // Set to before the first of the next cursor in the set
         currentCursor = cursors[++cursorIndex];
         cursorOffset = -1;
         currentCursor.MoveTo(cursorOffset);
     }
     return ++position < Count;
 }
 public bool MoveNext()
 {
     return(_cursor.MoveNext());
 }
Exemplo n.º 21
0
 public bool MoveNext() => _input.MoveNext();
 public bool MoveNext()
 {
     return(_inputCursor.MoveNext());
 }
Exemplo n.º 23
0
        protected bool CheckSameValues(IRowCursor curs1, IRowCursor curs2, bool exactTypes, bool exactDoubles, bool checkId, bool checkIdCollisions = true)
        {
            Contracts.Assert(curs1.Schema.ColumnCount == curs2.Schema.ColumnCount);

            // Get the comparison delegates for each column.
            int colLim = curs1.Schema.ColumnCount;

            Func <bool>[] comps = new Func <bool> [colLim];
            for (int col = 0; col < colLim; col++)
            {
                var f1 = curs1.IsColumnActive(col);
                var f2 = curs2.IsColumnActive(col);

                if (f1 && f2)
                {
                    var type1 = curs1.Schema.GetColumnType(col);
                    var type2 = curs2.Schema.GetColumnType(col);
                    if (!EqualTypes(type1, type2, exactTypes))
                    {
                        Fail("Different types");
                        return(Failed());
                    }
                    comps[col] = GetColumnComparer(curs1, curs2, col, type1, exactDoubles);
                }
            }
            ValueGetter <UInt128> idGetter = null;
            Func <bool>           idComp   = checkId ? GetIdComparer(curs1, curs2, out idGetter) : null;
            HashSet <UInt128>     idsSeen  = null;

            if (checkIdCollisions && idGetter == null)
            {
                idGetter = curs1.GetIdGetter();
            }
            long    idCollisions = 0;
            UInt128 id           = default(UInt128);

            for (; ;)
            {
                bool f1 = curs1.MoveNext();
                bool f2 = curs2.MoveNext();
                if (f1 != f2)
                {
                    if (f1)
                    {
                        Fail("Left has more rows at position: {0}", curs1.Position);
                    }
                    else
                    {
                        Fail("Right has more rows at position: {0}", curs2.Position);
                    }
                    return(Failed());
                }

                if (!f1)
                {
                    if (idCollisions > 0)
                    {
                        Fail("{0} id collisions among {1} items", idCollisions, Utils.Size(idsSeen) + idCollisions);
                    }
                    return(idCollisions == 0);
                }
                else if (checkIdCollisions)
                {
                    idGetter(ref id);
                    if (!Utils.Add(ref idsSeen, id))
                    {
                        if (idCollisions == 0)
                        {
                            idCollisions++;
                        }
                    }
                }

                Contracts.Assert(curs1.Position == curs2.Position);

                for (int col = 0; col < colLim; col++)
                {
                    var comp = comps[col];
                    if (comp != null && !comp())
                    {
                        Fail("Different values in column {0} of row {1}", col, curs1.Position);
                        return(Failed());
                    }
                    if (idComp != null && !idComp())
                    {
                        Fail("Different values in ID of row {0}", curs1.Position);
                        return(Failed());
                    }
                }
            }
        }
Exemplo n.º 24
0
        private void FetchWorker(BlockingCollection <Block> toCompress, IDataView data,
                                 ColumnCodec[] activeColumns, int rowsPerBlock, Stopwatch sw, IChannel ch, IProgressChannel pch, ExceptionMarshaller exMarshaller)
        {
            Contracts.AssertValue(ch);
            Contracts.AssertValueOrNull(pch);
            ch.AssertValue(exMarshaller);
            try
            {
                ch.AssertValue(toCompress);
                ch.AssertValue(data);
                ch.AssertValue(activeColumns);
                ch.AssertValue(sw);
                ch.Assert(rowsPerBlock > 0);

                // The main thread handles fetching from the cursor, and storing it into blocks passed to toCompress.
                HashSet <int> activeSet        = new HashSet <int>(activeColumns.Select(col => col.SourceIndex));
                long          blockIndex       = 0;
                int           remainingInBlock = rowsPerBlock;
                using (IRowCursor cursor = data.GetRowCursor(activeSet.Contains))
                {
                    WritePipe[] pipes = new WritePipe[activeColumns.Length];
                    for (int c = 0; c < activeColumns.Length; ++c)
                    {
                        pipes[c] = WritePipe.Create(this, cursor, activeColumns[c]);
                    }
                    for (int c = 0; c < pipes.Length; ++c)
                    {
                        pipes[c].BeginBlock();
                    }

                    long rows = 0;
                    if (pch != null)
                    {
                        pch.SetHeader(new ProgressHeader(new[] { "rows" }), e => e.SetProgress(0, rows));
                    }

                    while (cursor.MoveNext())
                    {
                        for (int c = 0; c < pipes.Length; ++c)
                        {
                            pipes[c].FetchAndWrite();
                        }
                        if (--remainingInBlock == 0)
                        {
                            for (int c = 0; c < pipes.Length; ++c)
                            {
                                // REVIEW: It may be better if EndBlock got moved to a different worker thread.
                                toCompress.Add(new Block(pipes[c].EndBlock(), c, blockIndex), exMarshaller.Token);
                                pipes[c].BeginBlock();
                            }
                            remainingInBlock = rowsPerBlock;
                            blockIndex++;
                        }

                        rows++;
                    }
                    if (remainingInBlock < rowsPerBlock)
                    {
                        for (int c = 0; c < pipes.Length; ++c)
                        {
                            toCompress.Add(new Block(pipes[c].EndBlock(), c, blockIndex), exMarshaller.Token);
                        }
                    }

                    Contracts.Assert(rows == (blockIndex + 1) * rowsPerBlock - remainingInBlock);
                    _rowCount = rows;
                    if (pch != null)
                    {
                        pch.Checkpoint(rows);
                    }
                }

                toCompress.CompleteAdding();
            }
            catch (Exception ex)
            {
                exMarshaller.Set("cursoring", ex);
            }
        }