private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearModelParameters predictor, int weightSetCount)
        {
            int numFeatures   = data.Schema.Feature.Value.Type.GetVectorSize();
            var cursorFactory = new FloatLabelCursor.Factory(data, CursOpt.Label | CursOpt.Features);
            int numThreads    = 1;

            ch.CheckUserArg(numThreads > 0, nameof(_options.NumberOfThreads),
                            "The number of threads must be either null or a positive integer.");

            var             positiveInstanceWeight = _options.PositiveInstanceWeight;
            VBuffer <float> weights = default;
            float           bias    = 0.0f;

            if (predictor != null)
            {
                predictor.GetFeatureWeights(ref weights);
                VBufferUtils.Densify(ref weights);
                bias = predictor.Bias;
            }
            else
            {
                weights = VBufferUtils.CreateDense <float>(numFeatures);
            }

            var weightsEditor = VBufferEditor.CreateFromBuffer(ref weights);

            // Reference: Parasail. SymSGD.
            bool tuneLR = _options.LearningRate == null;
            var  lr     = _options.LearningRate ?? 1.0f;

            bool tuneNumLocIter = (_options.UpdateFrequency == null);
            var  numLocIter     = _options.UpdateFrequency ?? 1;

            var l2Const = _options.L2Regularization;
            var piw     = _options.PositiveInstanceWeight;

            // This is state of the learner that is shared with the native code.
            State    state         = new State();
            GCHandle stateGCHandle = default;

            try
            {
                stateGCHandle = GCHandle.Alloc(state, GCHandleType.Pinned);

                state.TotalInstancesProcessed = 0;
                using (InputDataManager inputDataManager = new InputDataManager(this, cursorFactory, ch))
                {
                    bool shouldInitialize = true;
                    using (var pch = Host.StartProgressChannel("Preprocessing"))
                        inputDataManager.LoadAsMuchAsPossible();

                    int iter = 0;
                    if (inputDataManager.IsFullyLoaded)
                    {
                        ch.Info("Data fully loaded into memory.");
                    }
                    using (var pch = Host.StartProgressChannel("Training"))
                    {
                        if (inputDataManager.IsFullyLoaded)
                        {
                            pch.SetHeader(new ProgressHeader(new[] { "iterations" }),
                                          entry => entry.SetProgress(0, state.PassIteration, _options.NumberOfIterations));
                            // If fully loaded, call the SymSGDNative and do not come back until learned for all iterations.
                            Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weightsEditor.Values, ref bias, numFeatures,
                                            _options.NumberOfIterations, numThreads, tuneNumLocIter, ref numLocIter, _options.Tolerance, _options.Shuffle, shouldInitialize,
                                            stateGCHandle, ch.Info);
                            shouldInitialize = false;
                        }
                        else
                        {
                            pch.SetHeader(new ProgressHeader(new[] { "iterations" }),
                                          entry => entry.SetProgress(0, iter, _options.NumberOfIterations));

                            // Since we loaded data in batch sizes, multiple passes over the loaded data is feasible.
                            int numPassesForABatch = inputDataManager.Count / 10000;
                            while (iter < _options.NumberOfIterations)
                            {
                                // We want to train on the final passes thoroughly (without learning on the same batch multiple times)
                                // This is for fine tuning the AUC. Experimentally, we found that 1 or 2 passes is enough
                                int numFinalPassesToTrainThoroughly = 2;
                                // We also do not want to learn for more passes than what the user asked
                                int numPassesForThisBatch = Math.Min(numPassesForABatch, _options.NumberOfIterations - iter - numFinalPassesToTrainThoroughly);
                                // If all of this leaves us with 0 passes, then set numPassesForThisBatch to 1
                                numPassesForThisBatch = Math.Max(1, numPassesForThisBatch);
                                state.PassIteration   = iter;
                                Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weightsEditor.Values, ref bias, numFeatures,
                                                numPassesForThisBatch, numThreads, tuneNumLocIter, ref numLocIter, _options.Tolerance, _options.Shuffle, shouldInitialize,
                                                stateGCHandle, ch.Info);
                                shouldInitialize = false;

                                // Check if we are done with going through the data
                                if (inputDataManager.FinishedTheLoad)
                                {
                                    iter += numPassesForThisBatch;
                                    // Check if more passes are left
                                    if (iter < _options.NumberOfIterations)
                                    {
                                        inputDataManager.RestartLoading(_options.Shuffle, Host);
                                    }
                                }

                                // If more passes are left, load as much as possible
                                if (iter < _options.NumberOfIterations)
                                {
                                    inputDataManager.LoadAsMuchAsPossible();
                                }
                            }
                        }

                        // Maps back the dense features that are mislocated
                        if (numThreads > 1)
                        {
                            Native.MapBackWeightVector(weightsEditor.Values, stateGCHandle);
                        }
                        Native.DeallocateSequentially(stateGCHandle);
                    }
                }
            }
            finally
            {
                if (stateGCHandle.IsAllocated)
                {
                    stateGCHandle.Free();
                }
            }
            return(CreatePredictor(weights, bias));
        }
            private ValueGetter <VBuffer <ushort> > MakeGetterVec(DataViewRow input, int iinfo)
            {
                Host.AssertValue(input);

                int cv = input.Schema[ColMapNewToOld[iinfo]].Type.GetVectorSize();

                Contracts.Assert(cv >= 0);

                var getSrc = input.GetGetter <VBuffer <ReadOnlyMemory <char> > >(input.Schema[ColMapNewToOld[iinfo]]);
                var src    = default(VBuffer <ReadOnlyMemory <char> >);

                ValueGetter <VBuffer <ushort> > getterWithStartEndSep = (ref VBuffer <ushort> dst) =>
                {
                    getSrc(ref src);

                    int len       = 0;
                    var srcValues = src.GetValues();
                    for (int i = 0; i < srcValues.Length; i++)
                    {
                        if (!srcValues[i].IsEmpty)
                        {
                            len += srcValues[i].Length;
                            if (_parent._useMarkerChars)
                            {
                                len += TextMarkersCount;
                            }
                        }
                    }

                    var editor = VBufferEditor.Create(ref dst, len);
                    if (len > 0)
                    {
                        int index = 0;
                        for (int i = 0; i < srcValues.Length; i++)
                        {
                            if (srcValues[i].IsEmpty)
                            {
                                continue;
                            }
                            if (_parent._useMarkerChars)
                            {
                                editor.Values[index++] = TextStartMarker;
                            }
                            var span = srcValues[i].Span;
                            for (int ich = 0; ich < srcValues[i].Length; ich++)
                            {
                                editor.Values[index++] = span[ich];
                            }
                            if (_parent._useMarkerChars)
                            {
                                editor.Values[index++] = TextEndMarker;
                            }
                        }
                        Contracts.Assert(index == len);
                    }

                    dst = editor.Commit();
                };

                ValueGetter <VBuffer <ushort> > getterWithUnitSep = (ref VBuffer <ushort> dst) =>
                {
                    getSrc(ref src);

                    int len = 0;

                    var srcValues = src.GetValues();
                    for (int i = 0; i < srcValues.Length; i++)
                    {
                        if (!srcValues[i].IsEmpty)
                        {
                            len += srcValues[i].Length;

                            if (i > 0)
                            {
                                len += 1;  // add UnitSeparator character to len that will be added
                            }
                        }
                    }

                    if (_parent._useMarkerChars)
                    {
                        len += TextMarkersCount;
                    }

                    var editor = VBufferEditor.Create(ref dst, len);
                    if (len > 0)
                    {
                        int index = 0;

                        // ReadOnlyMemory can be a result of either concatenating text columns together
                        // or application of word tokenizer before char tokenizer in TextFeaturizingEstimator.
                        //
                        // Considering VBuffer<ReadOnlyMemory> as a single text stream.
                        // Therefore, prepend and append start and end markers only once i.e. at the start and at end of vector.
                        // Insert UnitSeparator after every piece of text in the vector.
                        if (_parent._useMarkerChars)
                        {
                            editor.Values[index++] = TextStartMarker;
                        }

                        for (int i = 0; i < srcValues.Length; i++)
                        {
                            if (srcValues[i].IsEmpty)
                            {
                                continue;
                            }

                            if (i > 0)
                            {
                                editor.Values[index++] = UnitSeparator;
                            }

                            var span = srcValues[i].Span;
                            for (int ich = 0; ich < srcValues[i].Length; ich++)
                            {
                                editor.Values[index++] = span[ich];
                            }
                        }

                        if (_parent._useMarkerChars)
                        {
                            editor.Values[index++] = TextEndMarker;
                        }

                        Contracts.Assert(index == len);
                    }

                    dst = editor.Commit();
                };

                return(_parent._isSeparatorStartEnd ? getterWithStartEndSep : getterWithUnitSep);
            }
Ejemplo n.º 3
0
        /// <inheritdoc/>
        private protected override void TrainWithoutLock(IProgressChannelProvider progress, FloatLabelCursor.Factory cursorFactory, Random rand,
                                                         IdToIdxLookup idToIdx, int numThreads, DualsTableBase duals, Float[] biasReg, Float[] invariants, Float lambdaNInv,
                                                         VBuffer <Float>[] weights, Float[] biasUnreg, VBuffer <Float>[] l1IntermediateWeights, Float[] l1IntermediateBias, Float[] featureNormSquared)
        {
            Contracts.AssertValueOrNull(progress);
            Contracts.Assert(Args.L1Threshold.HasValue);
            Contracts.AssertValueOrNull(idToIdx);
            Contracts.AssertValueOrNull(invariants);
            Contracts.AssertValueOrNull(featureNormSquared);
            int numClasses = Utils.Size(weights);

            Contracts.Assert(Utils.Size(biasReg) == numClasses);
            Contracts.Assert(Utils.Size(biasUnreg) == numClasses);

            int  maxUpdateTrials = 2 * numThreads;
            var  l1Threshold     = Args.L1Threshold.Value;
            bool l1ThresholdZero = l1Threshold == 0;
            var  lr = Args.BiasLearningRate * Args.L2Const.Value;

            var pch = progress != null?progress.StartProgressChannel("Dual update") : null;

            using (pch)
                using (var cursor = Args.Shuffle ? cursorFactory.Create(rand) : cursorFactory.Create())
                {
                    long rowCount = 0;
                    if (pch != null)
                    {
                        pch.SetHeader(new ProgressHeader("examples"), e => e.SetProgress(0, rowCount));
                    }

                    Func <RowId, long> getIndexFromId = GetIndexFromIdGetter(idToIdx, biasReg.Length);
                    while (cursor.MoveNext())
                    {
                        long  idx = getIndexFromId(cursor.Id);
                        long  dualIndexInitPos = idx * numClasses;
                        var   features         = cursor.Features;
                        var   label            = (int)cursor.Label;
                        Float invariant;
                        Float normSquared;
                        if (invariants != null)
                        {
                            invariant = invariants[idx];
                            Contracts.AssertValue(featureNormSquared);
                            normSquared = featureNormSquared[idx];
                        }
                        else
                        {
                            normSquared = VectorUtils.NormSquared(in features);
                            if (Args.BiasLearningRate == 0)
                            {
                                normSquared += 1;
                            }

                            invariant = _loss.ComputeDualUpdateInvariant(2 * normSquared * lambdaNInv * GetInstanceWeight(cursor));
                        }

                        // The output for the label class using current weights and bias.
                        var labelOutput    = WDot(in features, in weights[label], biasReg[label] + biasUnreg[label]);
                        var instanceWeight = GetInstanceWeight(cursor);

                        // This will be the new dual variable corresponding to the label class.
                        Float labelDual = 0;

                        // This will be used to update the weights and regularized bias corresponding to the label class.
                        Float labelPrimalUpdate = 0;

                        // This will be used to update the unregularized bias corresponding to the label class.
                        Float labelAdjustment = 0;

                        // Iterates through all classes.
                        for (int iClass = 0; iClass < numClasses; iClass++)
                        {
                            // Skip the dual/weights/bias update for label class. Will be taken care of at the end.
                            if (iClass == label)
                            {
                                continue;
                            }

                            var weightsEditor = VBufferEditor.CreateFromBuffer(ref weights[iClass]);
                            var l1IntermediateWeightsEditor =
                                !l1ThresholdZero?VBufferEditor.CreateFromBuffer(ref l1IntermediateWeights[iClass]) :
                                    default;

                            // Loop trials for compare-and-swap updates of duals.
                            // In general, concurrent update conflict to the same dual variable is rare
                            // if data is shuffled.
                            for (int numTrials = 0; numTrials < maxUpdateTrials; numTrials++)
                            {
                                long dualIndex  = iClass + dualIndexInitPos;
                                var  dual       = duals[dualIndex];
                                var  output     = labelOutput + labelPrimalUpdate * normSquared - WDot(in features, in weights[iClass], biasReg[iClass] + biasUnreg[iClass]);
                                var  dualUpdate = _loss.DualUpdate(output, 1, dual, invariant, numThreads);

                                // The successive over-relaxation apporach to adjust the sum of dual variables (biasReg) to zero.
                                // Reference to details: http://stat.rutgers.edu/home/tzhang/papers/ml02_dual.pdf, pp. 16-17.
                                var adjustment = l1ThresholdZero ? lr * biasReg[iClass] : lr * l1IntermediateBias[iClass];
                                dualUpdate -= adjustment;
                                bool success = false;
                                duals.ApplyAt(dualIndex, (long index, ref Float value) =>
                                              success = Interlocked.CompareExchange(ref value, dual + dualUpdate, dual) == dual);

                                if (success)
                                {
                                    // Note: dualConstraint[iClass] = lambdaNInv * (sum of duals[iClass])
                                    var primalUpdate = dualUpdate * lambdaNInv * instanceWeight;
                                    labelDual         -= dual + dualUpdate;
                                    labelPrimalUpdate += primalUpdate;
                                    biasUnreg[iClass] += adjustment * lambdaNInv * instanceWeight;
                                    labelAdjustment   -= adjustment;

                                    if (l1ThresholdZero)
                                    {
                                        VectorUtils.AddMult(in features, weightsEditor.Values, -primalUpdate);
                                        biasReg[iClass] -= primalUpdate;
                                    }
                                    else
                                    {
                                        //Iterative shrinkage-thresholding (aka. soft-thresholding)
                                        //Update v=denseWeights as if there's no L1
                                        //Thresholding: if |v[j]| < threshold, turn off weights[j]
                                        //If not, shrink: w[j] = v[i] - sign(v[j]) * threshold
                                        l1IntermediateBias[iClass] -= primalUpdate;
                                        if (Args.BiasLearningRate == 0)
                                        {
                                            biasReg[iClass] = Math.Abs(l1IntermediateBias[iClass]) - l1Threshold > 0.0
                                        ? l1IntermediateBias[iClass] - Math.Sign(l1IntermediateBias[iClass]) * l1Threshold
                                        : 0;
                                        }

                                        var featureValues = features.GetValues();
                                        if (features.IsDense)
                                        {
                                            CpuMathUtils.SdcaL1UpdateDense(-primalUpdate, featureValues.Length, featureValues, l1Threshold, l1IntermediateWeightsEditor.Values, weightsEditor.Values);
                                        }
                                        else if (featureValues.Length > 0)
                                        {
                                            CpuMathUtils.SdcaL1UpdateSparse(-primalUpdate, featureValues.Length, featureValues, features.GetIndices(), l1Threshold, l1IntermediateWeightsEditor.Values, weightsEditor.Values);
                                        }
                                    }

                                    break;
                                }
                            }
                        }

                        // Updating with label class weights and dual variable.
                        duals[label + dualIndexInitPos] = labelDual;
                        biasUnreg[label] += labelAdjustment * lambdaNInv * instanceWeight;
                        if (l1ThresholdZero)
                        {
                            var weightsEditor = VBufferEditor.CreateFromBuffer(ref weights[label]);
                            VectorUtils.AddMult(in features, weightsEditor.Values, labelPrimalUpdate);
                            biasReg[label] += labelPrimalUpdate;
                        }
                        else
                        {
                            l1IntermediateBias[label] += labelPrimalUpdate;
                            var intermediateBias = l1IntermediateBias[label];
                            biasReg[label] = Math.Abs(intermediateBias) - l1Threshold > 0.0
                            ? intermediateBias - Math.Sign(intermediateBias) * l1Threshold
                            : 0;

                            var weightsEditor = VBufferEditor.CreateFromBuffer(ref weights[label]);
                            var l1IntermediateWeightsEditor = VBufferEditor.CreateFromBuffer(ref l1IntermediateWeights[label]);
                            var featureValues = features.GetValues();
                            if (features.IsDense)
                            {
                                CpuMathUtils.SdcaL1UpdateDense(labelPrimalUpdate, featureValues.Length, featureValues, l1Threshold, l1IntermediateWeightsEditor.Values, weightsEditor.Values);
                            }
                            else if (featureValues.Length > 0)
                            {
                                CpuMathUtils.SdcaL1UpdateSparse(labelPrimalUpdate, featureValues.Length, featureValues, features.GetIndices(), l1Threshold, l1IntermediateWeightsEditor.Values, weightsEditor.Values);
                            }
                        }

                        rowCount++;
                    }
                }
        }
Ejemplo n.º 4
0
        private protected override Delegate[] CreateGettersCore(Row input, Func <int, bool> activeCols, out Action disposer)
        {
            disposer = null;

            var getters = new Delegate[3];

            if (!activeCols(ClusterIdCol) && !activeCols(SortedClusterCol) && !activeCols(SortedClusterScoreCol))
            {
                return(getters);
            }

            long             cachedPosition = -1;
            VBuffer <Single> scores         = default(VBuffer <Single>);
            var scoresArr = new Single[_numClusters];

            int[] sortedIndices = new int[_numClusters];

            var    scoreGetter         = input.GetGetter <VBuffer <Single> >(ScoreIndex);
            Action updateCacheIfNeeded =
                () =>
            {
                if (cachedPosition != input.Position)
                {
                    scoreGetter(ref scores);
                    scores.CopyTo(scoresArr);
                    int j = 0;
                    foreach (var index in Enumerable.Range(0, scoresArr.Length).OrderBy(i => scoresArr[i]))
                    {
                        sortedIndices[j++] = index;
                    }
                    cachedPosition = input.Position;
                }
            };

            if (activeCols(ClusterIdCol))
            {
                ValueGetter <uint> assignedFn =
                    (ref uint dst) =>
                {
                    updateCacheIfNeeded();
                    dst = (uint)sortedIndices[0] + 1;
                };
                getters[ClusterIdCol] = assignedFn;
            }

            if (activeCols(SortedClusterScoreCol))
            {
                ValueGetter <VBuffer <Single> > topKScoresFn =
                    (ref VBuffer <Single> dst) =>
                {
                    updateCacheIfNeeded();
                    var editor = VBufferEditor.Create(ref dst, _numClusters);
                    for (int i = 0; i < _numClusters; i++)
                    {
                        editor.Values[i] = scores.GetItemOrDefault(sortedIndices[i]);
                    }
                    dst = editor.Commit();
                };
                getters[SortedClusterScoreCol] = topKScoresFn;
            }

            if (activeCols(SortedClusterCol))
            {
                ValueGetter <VBuffer <uint> > topKClassesFn =
                    (ref VBuffer <uint> dst) =>
                {
                    updateCacheIfNeeded();
                    var editor = VBufferEditor.Create(ref dst, _numClusters);
                    for (int i = 0; i < _numClusters; i++)
                    {
                        editor.Values[i] = (uint)sortedIndices[i] + 1;
                    }
                    dst = editor.Commit();
                };
                getters[SortedClusterCol] = topKClassesFn;
            }
            return(getters);
        }
        /// <summary>
        /// Getter generator for inputs of type <typeparamref name="TSrc"/>, where output type is a vector of hashes
        /// </summary>
        /// <typeparam name="TSrc">Input type. Must be a vector</typeparam>
        /// <param name="input">Row input</param>
        /// <param name="iinfo">Index of the getter</param>
        private ValueGetter <VBuffer <uint> > ComposeGetterVecToVec <TSrc>(Row input, int iinfo)
        {
            Host.AssertValue(input);
            Host.Assert(Infos[iinfo].TypeSrc.IsVector);

            var getSrc            = GetSrcGetter <VBuffer <TSrc> >(input, iinfo);
            var hashFunction      = ComposeHashDelegate <TSrc>();
            var src               = default(VBuffer <TSrc>);
            int n                 = _exes[iinfo].OutputValueCount;
            int expectedSrcLength = Infos[iinfo].TypeSrc.VectorSize;

            int[][] slotMap = _exes[iinfo].SlotMap;
            // REVIEW: consider adding a fix-zero functionality (subtract emptyTextHash from all hashes)
            var  mask     = (1U << _exes[iinfo].HashBits) - 1;
            var  hashSeed = _exes[iinfo].HashSeed;
            bool ordered  = _exes[iinfo].Ordered;

            TSrc[] denseValues = null;
            return
                ((ref VBuffer <uint> dst) =>
            {
                getSrc(ref src);
                Host.Check(src.Length == expectedSrcLength);
                ReadOnlySpan <TSrc> values;

                // force-densify the input
                // REVIEW: this performs poorly if only a fraction of sparse vector is used for hashing.
                // This scenario was unlikely at the time of writing. Regardless of performance, the hash value
                // needs to be consistent across equivalent representations - sparse vs dense.
                if (src.IsDense)
                {
                    values = src.GetValues();
                }
                else
                {
                    if (denseValues == null)
                    {
                        denseValues = new TSrc[expectedSrcLength];
                    }
                    src.CopyTo(denseValues);
                    values = denseValues;
                }

                var hashes = VBufferEditor.Create(ref dst, n);

                for (int i = 0; i < n; i++)
                {
                    uint hash = hashSeed;

                    foreach (var srcSlot in slotMap[i])
                    {
                        // REVIEW: some legacy code hashes 0 for srcSlot in ord- case, do we need to preserve this behavior?
                        if (ordered)
                        {
                            hash = Hashing.MurmurRound(hash, (uint)srcSlot);
                        }
                        hash = hashFunction(in values[srcSlot], hash);
                    }

                    hashes.Values[i] = (Hashing.MixHash(hash) & mask) + 1;     // +1 to offset from zero, which has special meaning for KeyType
                }

                dst = hashes.Commit();
            });
        }
Ejemplo n.º 6
0
            private protected override sealed void TransformCore(ref TInput input, FixedSizeQueue <TInput> windowedBuffer, long iteration, ref VBuffer <Double> dst)
            {
                var outputLength = Parent.OutputLength;

                Host.Assert(outputLength >= 2);

                var   result   = VBufferEditor.Create(ref dst, outputLength);
                float rawScore = 0;

                for (int i = 0; i < outputLength; ++i)
                {
                    result.Values[i] = Double.NaN;
                }

                // Step 1: Computing the raw anomaly score
                result.Values[1] = ComputeRawAnomalyScore(ref input, windowedBuffer, iteration);

                if (Double.IsNaN(result.Values[1]))
                {
                    result.Values[0] = 0;
                }
                else
                {
                    if (WindowSize > 0)
                    {
                        // Step 2: Computing the p-value score
                        rawScore = (float)result.Values[1];
                        if (Parent.ThresholdScore == AlertingScore.RawScore)
                        {
                            switch (Parent.Side)
                            {
                            case AnomalySide.Negative:
                                rawScore = (float)(-result.Values[1]);
                                break;

                            case AnomalySide.Positive:
                                break;

                            default:
                                rawScore = (float)Math.Abs(result.Values[1]);
                                break;
                            }
                        }
                        else
                        {
                            result.Values[2] = ComputeKernelPValue(rawScore);

                            switch (Parent.Side)
                            {
                            case AnomalySide.Negative:
                                result.Values[2] = 1 - result.Values[2];
                                break;

                            case AnomalySide.Positive:
                                break;

                            default:
                                result.Values[2] = Math.Min(result.Values[2], 1 - result.Values[2]);
                                break;
                            }

                            // Keeping the p-value in the safe range
                            if (result.Values[2] < SequentialAnomalyDetectionTransformBase <TInput, TState> .MinPValue)
                            {
                                result.Values[2] = SequentialAnomalyDetectionTransformBase <TInput, TState> .MinPValue;
                            }
                            else if (result.Values[2] > SequentialAnomalyDetectionTransformBase <TInput, TState> .MaxPValue)
                            {
                                result.Values[2] = SequentialAnomalyDetectionTransformBase <TInput, TState> .MaxPValue;
                            }

                            RawScoreBuffer.AddLast(rawScore);

                            // Step 3: Computing the martingale value
                            if (Parent.Martingale != MartingaleType.None && Parent.ThresholdScore == AlertingScore.MartingaleScore)
                            {
                                Double martingaleUpdate = 0;
                                switch (Parent.Martingale)
                                {
                                case MartingaleType.Power:
                                    martingaleUpdate = Parent.LogPowerMartigaleBettingFunc(result.Values[2], Parent.PowerMartingaleEpsilon);
                                    break;

                                case MartingaleType.Mixture:
                                    martingaleUpdate = Parent.LogMixtureMartigaleBettingFunc(result.Values[2]);
                                    break;
                                }

                                if (LogMartingaleUpdateBuffer.Count == 0)
                                {
                                    for (int i = 0; i < LogMartingaleUpdateBuffer.Capacity; ++i)
                                    {
                                        LogMartingaleUpdateBuffer.AddLast(martingaleUpdate);
                                    }
                                    _logMartingaleValue += LogMartingaleUpdateBuffer.Capacity * martingaleUpdate;
                                }
                                else
                                {
                                    _logMartingaleValue += martingaleUpdate;
                                    _logMartingaleValue -= LogMartingaleUpdateBuffer.PeekFirst();
                                    LogMartingaleUpdateBuffer.AddLast(martingaleUpdate);
                                }

                                result.Values[3] = Math.Exp(_logMartingaleValue);
                            }
                        }
                    }

                    // Generating alert
                    bool alert = false;

                    if (RawScoreBuffer.IsFull)     // No alert until the buffer is completely full.
                    {
                        switch (Parent.ThresholdScore)
                        {
                        case AlertingScore.RawScore:
                            alert = rawScore >= Parent.AlertThreshold;
                            break;

                        case AlertingScore.PValueScore:
                            alert = result.Values[2] <= Parent.AlertThreshold;
                            break;

                        case AlertingScore.MartingaleScore:
                            alert = (Parent.Martingale != MartingaleType.None) && (result.Values[3] >= Parent.AlertThreshold);

                            if (alert)
                            {
                                if (_martingaleAlertCounter > 0)
                                {
                                    alert = false;
                                }
                                else
                                {
                                    _martingaleAlertCounter = Parent.WindowSize;
                                }
                            }

                            _martingaleAlertCounter--;
                            _martingaleAlertCounter = _martingaleAlertCounter < 0 ? 0 : _martingaleAlertCounter;
                            break;
                        }
                    }

                    result.Values[0] = Convert.ToDouble(alert);
                }

                dst = result.Commit();
            }
        // This converts in place.
        private static void FillValues(IExceptionContext ectx, ref VBuffer <Float> buffer)
        {
            int size = buffer.Length;

            ectx.Check(0 <= size & size < int.MaxValue / 2);

            var values = buffer.GetValues();
            var editor = VBufferEditor.Create(ref buffer, size * 2, values.Length);
            int iivDst = 0;

            if (buffer.IsDense)
            {
                // Currently, it's dense. We always produce sparse.

                for (int ivSrc = 0; ivSrc < values.Length; ivSrc++)
                {
                    ectx.Assert(iivDst <= ivSrc);
                    var val = values[ivSrc];
                    if (val == 0)
                    {
                        continue;
                    }
                    if (Float.IsNaN(val))
                    {
                        editor.Values[iivDst]  = 1;
                        editor.Indices[iivDst] = 2 * ivSrc + 1;
                    }
                    else
                    {
                        editor.Values[iivDst]  = val;
                        editor.Indices[iivDst] = 2 * ivSrc;
                    }
                    iivDst++;
                }
            }
            else
            {
                // Currently, it's sparse.

                var indices = buffer.GetIndices();
                int ivPrev  = -1;
                for (int iivSrc = 0; iivSrc < values.Length; iivSrc++)
                {
                    ectx.Assert(iivDst <= iivSrc);
                    var val = values[iivSrc];
                    if (val == 0)
                    {
                        continue;
                    }
                    int iv = indices[iivSrc];
                    ectx.Assert(ivPrev < iv & iv < size);
                    ivPrev = iv;
                    if (Float.IsNaN(val))
                    {
                        editor.Values[iivDst]  = 1;
                        editor.Indices[iivDst] = 2 * iv + 1;
                    }
                    else
                    {
                        editor.Values[iivDst]  = val;
                        editor.Indices[iivDst] = 2 * iv;
                    }
                    iivDst++;
                }
            }

            ectx.Assert(0 <= iivDst & iivDst <= values.Length);
            buffer = editor.CommitTruncated(iivDst);
        }
        private void GetSlotNames(int iinfo, ref VBuffer <ReadOnlyMemory <char> > dst)
        {
            Host.Assert(0 <= iinfo && iinfo < Infos.Length);

            int size = _types[iinfo].VectorSize;

            if (size == 0)
            {
                throw MetadataUtils.ExceptGetMetadata();
            }

            var editor = VBufferEditor.Create(ref dst, size);

            var type = Infos[iinfo].TypeSrc;

            if (!type.IsVector)
            {
                Host.Assert(_types[iinfo].VectorSize == 2);
                var columnName = Source.Schema.GetColumnName(Infos[iinfo].Source);
                editor.Values[0] = columnName.AsMemory();
                editor.Values[1] = (columnName + IndicatorSuffix).AsMemory();
            }
            else
            {
                Host.Assert(type.IsKnownSizeVector);
                Host.Assert(size == 2 * type.VectorSize);

                // REVIEW: Do we need to verify that there is metadata or should we just call GetMetadata?
                var typeNames = Source.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, Infos[iinfo].Source);
                if (typeNames == null || typeNames.VectorSize != type.VectorSize || !typeNames.ItemType.IsText)
                {
                    throw MetadataUtils.ExceptGetMetadata();
                }

                var names = default(VBuffer <ReadOnlyMemory <char> >);
                Source.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, Infos[iinfo].Source, ref names);

                // We both assert and check. If this fails, there is a bug somewhere (possibly in this code
                // but more likely in the implementation of Base. On the other hand, we don't want to proceed
                // if we've received garbage.
                Host.Check(names.Length == type.VectorSize, "Unexpected slot name vector size");

                var sb   = new StringBuilder();
                int slot = 0;
                foreach (var kvp in names.Items(all: true))
                {
                    Host.Assert(0 <= slot && slot < size);
                    Host.Assert(slot % 2 == 0);

                    sb.Clear();
                    if (kvp.Value.IsEmpty)
                    {
                        sb.Append('[').Append(slot / 2).Append(']');
                    }
                    else
                    {
                        sb.AppendMemory(kvp.Value);
                    }

                    int len = sb.Length;
                    sb.Append(IndicatorSuffix);
                    var str = sb.ToString();

                    editor.Values[slot++] = str.AsMemory().Slice(0, len);
                    editor.Values[slot++] = str.AsMemory();
                }
                Host.Assert(slot == size);
            }

            dst = editor.Commit();
        }
        internal static DataViewSchema GetModelSchema(IExceptionContext ectx, Graph graph, string opType = null)
        {
            var schemaBuilder = new DataViewSchema.Builder();

            foreach (Operation op in graph)
            {
                if (opType != null && opType != op.OpType)
                {
                    continue;
                }

                var tfType = op.OutputType(0);
                // Determine element type in Tensorflow tensor. For example, a vector of floats may get NumberType.R4 here.
                var mlType = DnnUtils.Tf2MlNetTypeOrNull(tfType);

                // If the type is not supported in ML.NET then we cannot represent it as a column in an Schema.
                // We also cannot output it with a TensorFlowTransform, so we skip it.
                // Furthermore, operators which have NumOutputs <= 0 needs to be filtered.
                // The 'GetTensorShape' method crashes TensorFlow runtime
                // (https://github.com/dotnet/machinelearning/issues/2156) when the operator has no outputs.
                if (mlType == null || op.NumOutputs <= 0)
                {
                    continue;
                }

                // Construct the final ML.NET type of a Tensorflow variable.
                var tensorShape = op.output.TensorShape.Dimensions;
                var columnType  = new VectorDataViewType(mlType);
                if (!(Utils.Size(tensorShape) == 1 && tensorShape[0] <= 0) &&
                    (Utils.Size(tensorShape) > 0 && tensorShape.Skip(1).All(x => x > 0)))
                {
                    columnType = new VectorDataViewType(mlType, tensorShape[0] > 0 ? tensorShape : tensorShape.Skip(1).ToArray());
                }

                // There can be at most two metadata fields.
                //  1. The first field always presents. Its value is this operator's type. For example,
                //     if an output is produced by an "Softmax" operator, the value of this field should be "Softmax".
                //  2. The second field stores operators whose outputs are consumed by this operator. In other words,
                //     these values are names of some upstream operators which should be evaluated before executing
                //     the current operator. It's possible that one operator doesn't need any input, so this field
                //     can be missing.
                var metadataBuilder = new DataViewSchema.Annotations.Builder();
                // Create the first metadata field.
                metadataBuilder.Add(TensorflowOperatorTypeKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = op.OpType.AsMemory());
                if (op.NumInputs > 0)
                {
                    // Put upstream operators' names to an array (type: VBuffer) of string (type: ReadOnlyMemory<char>).
                    VBuffer <ReadOnlyMemory <char> > upstreamOperatorNames = default;
                    var bufferEditor = VBufferEditor.Create(ref upstreamOperatorNames, op.NumInputs);
                    for (int i = 0; i < op.NumInputs; ++i)
                    {
                        bufferEditor.Values[i] = op.inputs[i].op.name.AsMemory();
                    }
                    upstreamOperatorNames = bufferEditor.Commit(); // Used in metadata's getter.

                    // Create the second metadata field.
                    metadataBuilder.Add(TensorflowUpstreamOperatorsKind, new VectorDataViewType(TextDataViewType.Instance, op.NumInputs),
                                        (ref VBuffer <ReadOnlyMemory <char> > value) => { upstreamOperatorNames.CopyTo(ref value); });
                }

                schemaBuilder.AddColumn(op.name, columnType, metadataBuilder.ToAnnotations());
            }
            return(schemaBuilder.ToSchema());
        }
Ejemplo n.º 10
0
            private void GetLabels(Transposer trans, DataViewType labelType, int labelCol)
            {
                int min;
                int lim;
                var labels = default(VBuffer <int>);

                // Note: NAs have their own separate bin.
                if (labelType == NumberDataViewType.Int32)
                {
                    var tmp = default(VBuffer <int>);
                    trans.GetSingleSlotValue(labelCol, ref tmp);
                    BinInts(in tmp, ref labels, _numBins, out min, out lim);
                    _numLabels = lim - min;
                }
                else if (labelType == NumberDataViewType.Single)
                {
                    var tmp = default(VBuffer <Single>);
                    trans.GetSingleSlotValue(labelCol, ref tmp);
                    BinSingles(in tmp, ref labels, _numBins, out min, out lim);
                    _numLabels = lim - min;
                }
                else if (labelType == NumberDataViewType.Double)
                {
                    var tmp = default(VBuffer <Double>);
                    trans.GetSingleSlotValue(labelCol, ref tmp);
                    BinDoubles(in tmp, ref labels, _numBins, out min, out lim);
                    _numLabels = lim - min;
                }
                else if (labelType is BooleanDataViewType)
                {
                    var tmp = default(VBuffer <bool>);
                    trans.GetSingleSlotValue(labelCol, ref tmp);
                    BinBools(in tmp, ref labels);
                    _numLabels = 3;
                    min        = -1;
                    lim        = 2;
                }
                else
                {
                    ulong labelKeyCount = labelType.GetKeyCount();
                    Contracts.Assert(labelKeyCount < Utils.ArrayMaxSize);
                    KeyLabelGetter <int> del = GetKeyLabels <int>;
                    var methodInfo           = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(labelType.RawType);
                    var parameters           = new object[] { trans, labelCol, labelType };
                    _labels    = (VBuffer <int>)methodInfo.Invoke(this, parameters);
                    _numLabels = labelType.GetKeyCountAsInt32(_host) + 1;

                    // No need to densify or shift in this case.
                    return;
                }

                // Densify and shift labels.
                VBufferUtils.Densify(ref labels);
                Contracts.Assert(labels.IsDense);
                var labelsEditor = VBufferEditor.CreateFromBuffer(ref labels);

                for (int i = 0; i < labels.Length; i++)
                {
                    labelsEditor.Values[i] -= min;
                    Contracts.Assert(labelsEditor.Values[i] < _numLabels);
                }
                _labels = labelsEditor.Commit();
            }