private bool IsValid(IValueMapperDist mapper, ref VectorDataViewType inputType)
        {
            if (mapper == null)
            {
                return(false);
            }
            VectorDataViewType vectorType = mapper.InputType as VectorDataViewType;

            if (vectorType == null || !vectorType.IsKnownSize || vectorType.ItemType != NumberDataViewType.Single)
            {
                return(false);
            }
            if (inputType == null)
            {
                inputType = vectorType;
            }
            else if (inputType.Size != vectorType.Size)
            {
                return(false);
            }
            if (mapper.OutputType != NumberDataViewType.Single)
            {
                return(false);
            }
            if (mapper.DistType != NumberDataViewType.Single)
            {
                return(false);
            }
            return(true);
        }
Beispiel #2
0
            public ImplVec(IHostEnvironment env, PredictorModel[] predictors, IMulticlassOutputCombiner combiner)
                : base(env, predictors, combiner, LoaderSignature, AnnotationUtils.Const.ScoreColumnKind.MulticlassClassification)
            {
                int classCount = CheckLabelColumn(Host, predictors, false);

                _scoreType = new VectorDataViewType(NumberDataViewType.Single, classCount);
            }
Beispiel #3
0
            public ImplVec(IHostEnvironment env, ModelLoadContext ctx, string scoreColumnKind)
                : base(env, ctx, scoreColumnKind)
            {
                int classCount = CheckLabelColumn(Host, PredictorModels, false);

                _scoreType = new VectorDataViewType(NumberDataViewType.Single, classCount);
            }
 public SlotCursorImpl(IChannelProvider provider, SlotCursor cursor, VectorDataViewType typeDst)
     : base(provider, cursor)
 {
     Ch.AssertValue(typeDst);
     _getter = RowCursorUtils.GetLabelGetter(cursor);
     _type   = typeDst;
 }
Beispiel #5
0
        private VectorDataViewType InitializeMappers(out IValueMapper[] mappers)
        {
            Host.AssertNonEmpty(Models);

            mappers = new IValueMapper[Models.Length];
            VectorDataViewType inputType = null;

            for (int i = 0; i < Models.Length; i++)
            {
                var vm = Models[i].Predictor as IValueMapper;
                if (!IsValid(vm, out VectorDataViewType vmInputType))
                {
                    throw Host.Except("Predictor does not implement expected interface");
                }
                if (vmInputType.Size > 0)
                {
                    if (inputType == null)
                    {
                        inputType = vmInputType;
                    }
                    else if (vmInputType.Size != inputType.Size)
                    {
                        throw Host.Except("Predictor input type mismatch");
                    }
                }
                mappers[i] = vm;
            }

            return(inputType ?? new VectorDataViewType(NumberDataViewType.Single));
        }
Beispiel #6
0
 /// <summary>
 /// Instantiate new ensemble model from existing sub-models.
 /// </summary>
 /// <param name="env">The host environment.</param>
 /// <param name="kind">The prediction kind <see cref="PredictionKind"/></param>
 /// <param name="models">Array of sub-models that you want to ensemble together.</param>
 /// <param name="combiner">The combiner class to use to ensemble the models.</param>
 /// <param name="weights">The weights assigned to each model to be ensembled.</param>
 internal EnsembleModelParameters(IHostEnvironment env, PredictionKind kind,
                                  FeatureSubsetModel <float>[] models, IOutputCombiner <Single> combiner, Single[] weights = null)
     : base(env, LoaderSignature, models, combiner, weights)
 {
     PredictionKind = kind;
     _inputType     = InitializeMappers(out _mappers);
 }
Beispiel #7
0
            /// <summary>
            /// Compute the output schema of a <see cref="GroupTransform"/> given a input schema.
            /// </summary>
            /// <param name="sourceSchema">Input schema.</param>
            /// <returns>The associated output schema produced by <see cref="GroupTransform"/>.</returns>
            private DataViewSchema BuildOutputSchema(DataViewSchema sourceSchema)
            {
                // Create schema build. We will sequentially add group columns and then aggregated columns.
                var schemaBuilder = new DataViewSchema.Builder();

                // Handle group(-key) columns. Those columns are used as keys to partition rows in the input data; specifically,
                // rows with the same key value will be merged into one row in the output data.
                foreach (var groupKeyColumnName in _groupColumns)
                {
                    schemaBuilder.AddColumn(groupKeyColumnName, sourceSchema[groupKeyColumnName].Type, sourceSchema[groupKeyColumnName].Annotations);
                }

                // Handle aggregated (aka keep) columns.
                foreach (var groupValueColumnName in _keepColumns)
                {
                    // Prepare column's metadata.
                    var metadataBuilder = new DataViewSchema.Annotations.Builder();
                    metadataBuilder.Add(sourceSchema[groupValueColumnName].Annotations,
                                        s => s == AnnotationUtils.Kinds.IsNormalized || s == AnnotationUtils.Kinds.KeyValues);

                    // Prepare column's type.
                    var aggregatedValueType = sourceSchema[groupValueColumnName].Type as PrimitiveDataViewType;
                    _ectx.CheckValue(aggregatedValueType, nameof(aggregatedValueType), "Columns being aggregated must be primitive types such as string, float, or integer");
                    var aggregatedResultType = new VectorDataViewType(aggregatedValueType);

                    // Add column into output schema.
                    schemaBuilder.AddColumn(groupValueColumnName, aggregatedResultType, metadataBuilder.ToAnnotations());
                }

                return(schemaBuilder.ToSchema());
            }
            protected bool IsValid(IValueMapper mapper, ref VectorDataViewType inputType)
            {
                Contracts.AssertValueOrNull(mapper);
                Contracts.AssertValueOrNull(inputType);

                if (mapper == null)
                {
                    return(false);
                }
                if (mapper.OutputType != NumberDataViewType.Single)
                {
                    return(false);
                }
                if (!(mapper.InputType is VectorDataViewType mapperVectorType) || mapperVectorType.ItemType != NumberDataViewType.Single)
                {
                    return(false);
                }
                if (inputType == null)
                {
                    inputType = mapperVectorType;
                }
                else if (inputType.Size != mapperVectorType.Size)
                {
                    if (inputType.Size == 0)
                    {
                        inputType = mapperVectorType;
                    }
                    else if (mapperVectorType.Size != 0)
                    {
                        return(false);
                    }
                }
                return(true);
            }
                protected override void VerifyView(IDataView view)
                {
                    Host.AssertValue(view);
                    // This must have precisely one column, of type vector.
                    var schema = view.Schema;

                    Host.CheckDecode(schema.Count == 1);
                    var ttype = schema[0].Type;
                    VectorDataViewType vectorType = ttype as VectorDataViewType;

                    if (vectorType == null)
                    {
                        throw Host.ExceptDecode();
                    }
                    // We have no way to encode a type of zero length vectors per se in the case
                    // when there are no rows in the original dataset, but accept that if the vector
                    // count is "unknown" then it's really a zero-row dataset.
                    Host.CheckDecode(vectorType.Size == _parent._header.RowCount);
                    // This came from a binary IDV, so it must have an actual "row" count,
                    // though this row count for this is more like a "slot" count.
                    var rowCountNull = view.GetRowCount();

                    Host.Assert(rowCountNull.HasValue);
                    long rowCount = rowCountNull.Value;
                    // There must be one "row" per "slot" on the column this is a transpose of.
                    // Check that.
                    var type = _parent.Schema[_col].Type;

                    Host.CheckDecode(type.GetValueCount() == rowCount);
                    // The item types should be the same.
                    Host.CheckDecode(type.GetItemType().Equals(vectorType.ItemType));
                }
        private static CountAggregator GetVecAggregator(DataViewRow row, VectorDataViewType colType, int colSrc)
        {
            Func <DataViewRow, VectorDataViewType, int, CountAggregator> del = GetVecAggregator <int>;
            var methodInfo = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(colType.ItemType.RawType);

            return((CountAggregator)methodInfo.Invoke(null, new object[] { row, colType, colSrc }));
        }
        private PairwiseCouplingModelParameters(IHostEnvironment env, ModelLoadContext ctx)
            : base(env, RegistrationName, ctx)
        {
            // *** Binary format ***
            // int: number of classes
            _numClasses = ctx.Reader.ReadInt32();
            Host.CheckDecode(_numClasses > 0);

            long count = (long)_numClasses * (_numClasses + 1) / 2;

            Host.CheckDecode(count <= int.MaxValue);

            // Load the predictors.
            _predictors = new TDistPredictor[(int)count];
            int index = 0;

            for (int i = 0; i < _numClasses; i++)
            {
                for (int j = 0; j < i; j++)
                {
                    Host.Assert(index == GetIndex(i, j));
                    ctx.LoadModel <TDistPredictor, SignatureLoadModel>(Host, out _predictors[index++], string.Format(SubPredictorFmt2, i, j));
                }
                Host.Assert(index == GetIndex(i, i));
                ctx.LoadModel <TDistPredictor, SignatureLoadModel>(Host, out _predictors[index++], string.Format(SubPredictorFmt, i));
            }
            _inputType  = InitializeMappers(out _mappers);
            _outputType = new VectorDataViewType(NumberDataViewType.Single, _numClasses);
        }
        /// <summary>
        /// Constructor. w, thetaPrime, theta must be dense <see cref="VBuffer{T}"/>s.
        /// Note that this takes over ownership of all such vectors.
        /// </summary>
        internal LdSvmModelParameters(IHostEnvironment env, VBuffer <float>[] w, VBuffer <float>[] thetaPrime, VBuffer <float>[] theta,
                                      float sigma, float[] biasW, float[] biasTheta, float[] biasThetaPrime, int treeDepth)
            : base(env, LoaderSignature)
        {
            // _numLeaf is 32-bit signed integer.
            Host.Assert(treeDepth > 0 && treeDepth < 31);
            int numLeaf = 1 << treeDepth;

            Host.Assert(w.Length == numLeaf * 2 - 1);
            Host.Assert(w.All(v => v.IsDense));
            Host.Assert(w.All(v => v.Length == w[0].Length));
            Host.Assert(thetaPrime.Length == numLeaf * 2 - 1);
            Host.Assert(thetaPrime.All(v => v.IsDense));
            Host.Assert(thetaPrime.All(v => v.Length == thetaPrime[0].Length));
            Host.Assert(theta.Length == numLeaf - 1);
            Host.Assert(theta.All(v => v.IsDense));
            Host.Assert(theta.All(v => v.Length == theta[0].Length));
            Host.Assert(biasW.Length == numLeaf * 2 - 1);
            Host.Assert(biasTheta.Length == numLeaf - 1);
            Host.Assert(biasThetaPrime.Length == numLeaf * 2 - 1);
            Host.Assert((w[0].Length > 0) && (w[0].Length == thetaPrime[0].Length) && (w[0].Length == theta[0].Length));

            _numLeaf        = numLeaf;
            _sigma          = sigma;
            _w              = w;
            _thetaPrime     = thetaPrime;
            _theta          = theta;
            _biasW          = biasW;
            _biasTheta      = biasTheta;
            _biasThetaPrime = biasThetaPrime;

            InputType = new VectorDataViewType(NumberDataViewType.Single, _w[0].Length);

            AssertValid();
        }
Beispiel #13
0
 private EnsembleDistributionModelParameters(IHostEnvironment env, ModelLoadContext ctx)
     : base(env, RegistrationName, ctx)
 {
     PredictionKind       = (PredictionKind)ctx.Reader.ReadInt32();
     _probabilityCombiner = new Median(env);
     _inputType           = InitializeMappers(out _mappers);
     ComputeAveragedWeights(out _averagedWeights);
 }
Beispiel #14
0
 /// <summary>
 /// Instantiate new ensemble model from existing sub-models.
 /// </summary>
 /// <param name="env">The host environment.</param>
 /// <param name="kind">The prediction kind <see cref="PredictionKind"/></param>
 /// <param name="models">Array of sub-models that you want to ensemble together.</param>
 /// <param name="combiner">The combiner class to use to ensemble the models.</param>
 /// <param name="weights">The weights assigned to each model to be ensembled.</param>
 internal EnsembleDistributionModelParameters(IHostEnvironment env, PredictionKind kind,
                                              FeatureSubsetModel <float>[] models, IOutputCombiner <Single> combiner, Single[] weights = null)
     : base(env, RegistrationName, models, combiner, weights)
 {
     PredictionKind       = kind;
     _probabilityCombiner = new Median(env);
     _inputType           = InitializeMappers(out _mappers);
     ComputeAveragedWeights(out _averagedWeights);
 }
Beispiel #15
0
 private bool IsValid(IValueMapper mapper, out VectorDataViewType inputType)
 {
     if (mapper != null &&
         mapper.InputType is VectorDataViewType inputVectorType && inputVectorType.ItemType == NumberDataViewType.Single &&
         mapper.OutputType == NumberDataViewType.Single)
     {
         inputType = inputVectorType;
         return(true);
     }
        private protected GamModelParametersBase(IHostEnvironment env, string name,
                                                 double[][] binUpperBounds, double[][] binEffects, double intercept, int numInputFeatures = -1, int[] shapeToInputMap = null)
            : base(env, name)
        {
            Host.CheckValue(binEffects, nameof(binEffects), "May not be null.");
            Host.CheckValue(binUpperBounds, nameof(binUpperBounds), "May not be null.");
            Host.CheckParam(binUpperBounds.Length == binEffects.Length, nameof(binUpperBounds), "Must have same number of features as binEffects");
            Host.CheckParam(binEffects.Length > 0, nameof(binEffects), "Must have at least one entry");
            Host.CheckParam(numInputFeatures == -1 || numInputFeatures > 0, nameof(numInputFeatures), "Must be greater than zero");
            Host.CheckParam(shapeToInputMap == null || shapeToInputMap.Length == binEffects.Length, nameof(shapeToInputMap), "Must have same number of features as binEffects");

            // Define the model basics
            Bias                   = intercept;
            _binUpperBounds        = binUpperBounds;
            _binEffects            = binEffects;
            NumberOfShapeFunctions = binEffects.Length;

            // For sparse inputs we have a fast lookup
            _binsAtAllZero  = new int[NumberOfShapeFunctions];
            _valueAtAllZero = 0;

            // Walk through each feature and perform checks / updates
            for (int i = 0; i < NumberOfShapeFunctions; i++)
            {
                // Check data validity
                Host.CheckValue(binEffects[i], nameof(binEffects), "Array contained null entries");
                Host.CheckParam(binUpperBounds[i].Length == binEffects[i].Length, nameof(binEffects), "Array contained wrong number of effect values");
                Host.CheckParam(Utils.IsMonotonicallyIncreasing(binUpperBounds[i]), nameof(binUpperBounds), "Array must be monotonically increasing");

                // Update the value at zero
                _valueAtAllZero += GetBinEffect(i, 0, out _binsAtAllZero[i]);
            }

            // Define the sparse mappings from/to input to/from shape functions
            _shapeToInputMap = shapeToInputMap;
            if (_shapeToInputMap == null)
            {
                _shapeToInputMap = Utils.GetIdentityPermutation(NumberOfShapeFunctions);
            }

            _numInputFeatures = numInputFeatures;
            if (_numInputFeatures == -1)
            {
                _numInputFeatures = NumberOfShapeFunctions;
            }
            _inputFeatureToShapeFunctionMap = new Dictionary <int, int>(_shapeToInputMap.Length);
            for (int i = 0; i < _shapeToInputMap.Length; i++)
            {
                Host.CheckParam(0 <= _shapeToInputMap[i] && _shapeToInputMap[i] < _numInputFeatures, nameof(_shapeToInputMap), "Contains out of range feature value");
                Host.CheckParam(!_inputFeatureToShapeFunctionMap.ContainsValue(_shapeToInputMap[i]), nameof(_shapeToInputMap), "Contains duplicate mappings");
                _inputFeatureToShapeFunctionMap[_shapeToInputMap[i]] = i;
            }

            _inputType  = new VectorDataViewType(NumberDataViewType.Single, _numInputFeatures);
            _outputType = NumberDataViewType.Single;
        }
Beispiel #17
0
        private List <OnnxVariableInfo> GetOnnxVariablesFromMetadata(IReadOnlyDictionary <string, NodeMetadata> nodeMetadata,
                                                                     IDictionary <string, int[]> shapeDictionary,
                                                                     Dictionary <string, DataViewType> typePool,
                                                                     Dictionary <string, Func <NamedOnnxValue, object> > casterPool)
        {
            var onnxVariableInfos = new List <OnnxVariableInfo>();

            foreach (var pair in nodeMetadata)
            {
                var name         = pair.Key;
                var meta         = pair.Value;
                var dataViewType = typePool[name];
                var caster       = casterPool?[name];

                if (name.StartsWith("mlnet.") &&
                    (name.EndsWith(".unusedInput") || name.EndsWith(".unusedOutput")))
                {
                    continue;
                }

                OnnxVariableInfo info = null;
                if (shapeDictionary != null && shapeDictionary.ContainsKey(name))
                {
                    if (!CheckOnnxShapeCompatibility(shapeDictionary[name].ToList(), meta.Dimensions.ToList()))
                    {
                        throw Contracts.ExceptParamValue(shapeDictionary[name], nameof(shapeDictionary),
                                                         "The specified shape " + string.Join(",", shapeDictionary[name]) +
                                                         " is not compatible with the shape " + string.Join(",", meta.Dimensions) +
                                                         " loaded from the ONNX model file. Only unknown dimension can replace or " +
                                                         "be replaced by another dimension.");
                    }

                    if (dataViewType is VectorDataViewType vectorType)
                    {
                        if (shapeDictionary[name].All(value => value > 0))
                        {
                            dataViewType = new VectorDataViewType(vectorType.ItemType, shapeDictionary[name]);
                        }
                        else
                        {
                            dataViewType = new VectorDataViewType(vectorType.ItemType);
                        }
                    }

                    info = new OnnxVariableInfo(name, shapeDictionary[name].ToList(), meta.ElementType, dataViewType, caster);
                }
                else
                {
                    // No user-specified shape is found, so the shape loaded from ONNX model file is used.
                    info = new OnnxVariableInfo(name, meta.Dimensions.ToList(), meta.ElementType, dataViewType, caster);
                }

                onnxVariableInfos.Add(info);
            }
            return(onnxVariableInfos);
        }
Beispiel #18
0
            public ColInfoEx(DataKind kind, bool hasKeyRange, DataViewType type, VectorDataViewType slotType)
            {
                Contracts.AssertValue(type);
                Contracts.AssertValueOrNull(slotType);
                Contracts.Assert(slotType == null || type.ItemType().Equals(slotType.ItemType()));

                Kind        = kind;
                HasKeyRange = hasKeyRange;
                TypeDst     = type;
                SlotTypeDst = slotType;
            }
        private OnnxTransformer(IHostEnvironment env, Options options, byte[] modelBytes = null) :
            base(Contracts.CheckRef(env, nameof(env)).Register(nameof(OnnxTransformer)))
        {
            Host.CheckValue(options, nameof(options));

            foreach (var col in options.InputColumns)
            {
                Host.CheckNonWhiteSpace(col, nameof(options.InputColumns));
            }
            foreach (var col in options.OutputColumns)
            {
                Host.CheckNonWhiteSpace(col, nameof(options.OutputColumns));
            }

            try
            {
                if (modelBytes == null)
                {
                    Host.CheckNonWhiteSpace(options.ModelFile, nameof(options.ModelFile));
                    Host.CheckIO(File.Exists(options.ModelFile), "Model file {0} does not exists.", options.ModelFile);
                    Model = new OnnxModel(options.ModelFile, options.GpuDeviceId, options.FallbackToCpu);
                }
                else
                {
                    Model = OnnxModel.CreateFromBytes(modelBytes, options.GpuDeviceId, options.FallbackToCpu);
                }
            }
            catch (OnnxRuntimeException e)
            {
                throw Host.Except(e, $"Error initializing model :{e.ToString()}");
            }

            var modelInfo = Model.ModelInfo;

            Inputs      = (options.InputColumns.Count() == 0) ? Model.InputNames.ToArray() : options.InputColumns;
            Outputs     = (options.OutputColumns.Count() == 0) ? Model.OutputNames.ToArray() : options.OutputColumns;
            OutputTypes = new DataViewType[Outputs.Length];
            var numModelOutputs = Model.ModelInfo.OutputsInfo.Length;

            for (int i = 0; i < Outputs.Length; i++)
            {
                var idx = Model.OutputNames.IndexOf(Outputs[i]);
                if (idx < 0)
                {
                    throw Host.Except($"Column {Outputs[i]} doesn't match output node names of model");
                }

                var outputNodeInfo = Model.ModelInfo.OutputsInfo[idx];
                var shape          = outputNodeInfo.Shape;
                var dims           = AdjustDimensions(shape);
                OutputTypes[i] = new VectorDataViewType(OnnxUtils.OnnxToMlNetType(outputNodeInfo.Type), dims.ToArray());
            }
            _options = options;
        }
 private VectorDataViewType InitializeMappers(out IValueMapperDist[] mappers)
 {
     mappers = new IValueMapperDist[_predictors.Length];
     VectorDataViewType inputType = null;
     for (int i = 0; i < _predictors.Length; i++)
     {
         var vmd = _predictors[i] as IValueMapperDist;
         Host.Check(IsValid(vmd, ref inputType), "Predictor doesn't implement the expected interface");
         mappers[i] = vmd;
     }
     return inputType;
 }
        private protected GamModelParametersBase(IHostEnvironment env, string name, ModelLoadContext ctx)
            : base(env, name)
        {
            Host.CheckValue(ctx, nameof(ctx));

            BinaryReader reader = ctx.Reader;

            NumberOfShapeFunctions = reader.ReadInt32();
            Host.CheckDecode(NumberOfShapeFunctions >= 0);
            _numInputFeatures = reader.ReadInt32();
            Host.CheckDecode(_numInputFeatures >= 0);
            Bias = reader.ReadDouble();
            if (ctx.Header.ModelVerWritten == 0x00010001)
            {
                using (var ch = env.Start("GamWarningChannel"))
                    ch.Warning("GAMs models written prior to ML.NET 0.6 are loaded with an incorrect Intercept. For these models, subtract the value of the intercept from the prediction.");
            }

            _binEffects     = new double[NumberOfShapeFunctions][];
            _binUpperBounds = new double[NumberOfShapeFunctions][];
            _binsAtAllZero  = new int[NumberOfShapeFunctions];
            for (int i = 0; i < NumberOfShapeFunctions; i++)
            {
                _binEffects[i] = reader.ReadDoubleArray();
                Host.CheckDecode(Utils.Size(_binEffects[i]) >= 1);
            }
            for (int i = 0; i < NumberOfShapeFunctions; i++)
            {
                _binUpperBounds[i] = reader.ReadDoubleArray(_binEffects[i].Length);
                _valueAtAllZero   += GetBinEffect(i, 0, out _binsAtAllZero[i]);
            }
            int len = reader.ReadInt32();

            Host.CheckDecode(len >= 0);

            _inputFeatureToShapeFunctionMap = new Dictionary <int, int>(len);
            _shapeToInputMap = Utils.CreateArray(NumberOfShapeFunctions, -1);
            for (int i = 0; i < len; i++)
            {
                int key = reader.ReadInt32();
                Host.CheckDecode(0 <= key && key < _numInputFeatures);
                int val = reader.ReadInt32();
                Host.CheckDecode(0 <= val && val < NumberOfShapeFunctions);
                Host.CheckDecode(!_inputFeatureToShapeFunctionMap.ContainsKey(key));
                Host.CheckDecode(_shapeToInputMap[val] == -1);
                _inputFeatureToShapeFunctionMap[key] = val;
                _shapeToInputMap[val] = key;
            }

            _inputType  = new VectorDataViewType(NumberDataViewType.Single, _numInputFeatures);
            _outputType = NumberDataViewType.Single;
        }
Beispiel #22
0
        public ClusteringPerInstanceEvaluator(IHostEnvironment env, DataViewSchema schema, string scoreCol, int numClusters)
            : base(env, schema, scoreCol, null)
        {
            CheckInputColumnTypes(schema);
            _numClusters = numClusters;

            _types = new DataViewType[3];
            var key = new KeyDataViewType(typeof(uint), _numClusters);

            _types[ClusterIdCol]          = key;
            _types[SortedClusterCol]      = new VectorDataViewType(key, _numClusters);
            _types[SortedClusterScoreCol] = new VectorDataViewType(NumberDataViewType.Single, _numClusters);
        }
Beispiel #23
0
        /// <remarks>
        /// The unit test TestEntryPoints.LoadEntryPointModel() exercises the ReadIntArrary(int size) codepath below
        /// as its ctx.Header.ModelVerWritten is 0x00010001, and the persistent model that gets loaded and executed
        /// for this unit test is located at test\data\backcompat\ep_model3.zip/>
        /// </remarks>
        private NaiveBayesMulticlassModelParameters(IHostEnvironment env, ModelLoadContext ctx)
            : base(env, LoaderSignature, ctx)
        {
            // *** Binary format ***
            // int: _labelCount (read during reading of _labelHistogram in ReadLongArray())
            // long[_labelCount]: _labelHistogram
            // int: _featureCount
            // long[_labelCount][_featureCount]: _featureHistogram
            // int[_labelCount]: _absentFeaturesLogProb
            if (ctx.Header.ModelVerWritten >= 0x00010002)
            {
                _labelHistogram = ctx.Reader.ReadLongArray() ?? new long[0];
            }
            else
            {
                _labelHistogram = Array.ConvertAll(ctx.Reader.ReadIntArray() ?? new int[0], x => (long)x);
            }
            _labelCount = _labelHistogram.Length;

            foreach (int labelCount in _labelHistogram)
            {
                Host.CheckDecode(labelCount >= 0);
            }

            _featureCount = ctx.Reader.ReadInt32();
            Host.CheckDecode(_featureCount >= 0);
            _featureHistogram = new long[_labelCount][];
            for (int iLabel = 0; iLabel < _labelCount; iLabel += 1)
            {
                if (_labelHistogram[iLabel] > 0)
                {
                    if (ctx.Header.ModelVerWritten >= 0x00010002)
                    {
                        _featureHistogram[iLabel] = ctx.Reader.ReadLongArray(_featureCount);
                    }
                    else
                    {
                        _featureHistogram[iLabel] = Array.ConvertAll(ctx.Reader.ReadIntArray(_featureCount) ?? new int[0], x => (long)x);
                    }
                    for (int iFeature = 0; iFeature < _featureCount; iFeature += 1)
                    {
                        Host.CheckDecode(_featureHistogram[iLabel][iFeature] >= 0);
                    }
                }
            }

            _absentFeaturesLogProb = ctx.Reader.ReadDoubleArray(_labelCount);
            _totalTrainingCount    = _labelHistogram.Sum();
            _inputType             = new VectorDataViewType(NumberDataViewType.Single, _featureCount);
            _outputType            = new VectorDataViewType(NumberDataViewType.Single, _labelCount);
        }
 /// <summary>
 /// Returns a value getter delegate to fetch the value of column with the given columnIndex, from the row.
 /// This throws if the column is not active in this row, or if the type
 /// <typeparamref name="TValue"/> differs from this column's type.
 /// </summary>
 /// <typeparam name="TValue"> is the column's content type.</typeparam>
 /// <param name="column"> is the output column whose getter should be returned.</param>
 public override ValueGetter <TValue> GetGetter <TValue>(DataViewSchema.Column column)
 {
     if (column.Index == _bindings.outputColumn)
     {
         VectorDataViewType columnType = column.Type as VectorDataViewType;
         Delegate           getter     = Utils.MarshalInvoke(MakeVarLengthVectorGetter <int>, columnType.ItemType.RawType, _cursor);
         return(getter as ValueGetter <TValue>);
     }
     else
     {
         int inputIndex = _bindings.outputToInputMap[column.Index];
         return(_cursor.GetGetter <TValue>(_cursor.Schema[inputIndex]));
     }
 }
            private void SaveAsOnnxCore(OnnxContext ctx, int iinfo, string srcVariableName, string dstVariableName)
            {
                const int minimumOpSetVersion = 9;

                ctx.CheckOpSetVersion(minimumOpSetVersion, LoaderSignature);

                string       opType = "Tokenizer";
                DataViewType dataViewType;

                if (_isSourceVector[iinfo])
                {
                    dataViewType = new VectorDataViewType(TextDataViewType.Instance, _sourceVectorLength[iinfo]);
                }
                else
                {
                    dataViewType = TextDataViewType.Instance;
                }

                string tokenizerOutput = ctx.AddIntermediateVariable(dataViewType, "TokenizerOutput", true);
                var    node            = ctx.CreateNode(opType, srcVariableName, tokenizerOutput, ctx.GetNodeName(opType), "com.microsoft");

                node.AddAttribute("mark", _parent._useMarkerChars);
                node.AddAttribute("mincharnum", 1);
                node.AddAttribute("pad_value", "");
                node.AddAttribute("separators", new string[] { "" });

                opType = "Squeeze";
                var squeezeOutput = ctx.AddIntermediateVariable(dataViewType, "SqueezeOutput");

                node = ctx.CreateNode(opType, tokenizerOutput, squeezeOutput, ctx.GetNodeName(opType), "");
                node.AddAttribute("axes", new long[] { 1 });

                opType = "LabelEncoder";
                var labelEncoderOutput = ctx.AddIntermediateVariable(NumberDataViewType.Int64, "LabelEncoderOutput");

                node = ctx.CreateNode(opType, squeezeOutput, labelEncoderOutput, ctx.GetNodeName(opType));

                IEnumerable <string> charStrings = Enumerable.Range(0, 65535).Select(x => ((char)x).ToString());
                IEnumerable <long>   charValues  = Enumerable.Range(0, 65535).Select(x => Convert.ToInt64(x));

                node.AddAttribute("keys_strings", charStrings);
                node.AddAttribute("values_int64s", charValues);

                opType = "Cast";
                var castNode = ctx.CreateNode(opType, labelEncoderOutput, dstVariableName, ctx.GetNodeName(opType), "");
                var t        = InternalDataKindExtensions.ToInternalDataKind(DataKind.UInt16).ToType();

                castNode.AddAttribute("to", t);
            }
Beispiel #26
0
            public VecValueWriter(DataViewRowCursor cursor, VectorDataViewType type, int source, char sep)
                : base(type.ItemType, source, sep)
            {
                _getSrc = cursor.GetGetter <VBuffer <T> >(cursor.Schema[source]);
                VectorDataViewType typeNames;

                if (type.IsKnownSize &&
                    (typeNames = cursor.Schema[source].Annotations.Schema.GetColumnOrNull(AnnotationUtils.Kinds.SlotNames)?.Type as VectorDataViewType) != null &&
                    typeNames.Size == type.Size && typeNames.ItemType is TextDataViewType)
                {
                    cursor.Schema[source].Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref _slotNames);
                    Contracts.Check(_slotNames.Length == typeNames.Size, "Unexpected slot names length");
                }
                _slotCount = type.Size;
            }
        private DataViewSchema ProcessInputSchema(DataViewSchema inputSchema, string lengthColumnName)
        {
            var builder = new DataViewSchema.Builder();

            for (int i = 0; i < inputSchema.Count; i++)
            {
                var name = inputSchema[i].Name;

                if (_columnNames.Contains(name))
                {
                    _bindings.vectorToInputMap.Add(i);
                }
                else if (name == lengthColumnName)
                {
                    _bindings.lengthColumn = i;
                }
                else
                {
                    builder.AddColumn(name, inputSchema[i].Type);
                    _bindings.outputToInputMap.Add(i);
                }
            }

            if (_bindings.vectorToInputMap.Count > 0)
            {
                var type = inputSchema[_bindings.vectorToInputMap[0]].Type as PrimitiveDataViewType;

                for (int i = 1; i < _bindings.vectorToInputMap.Count; i++)
                {
                    var nextType = inputSchema[_bindings.vectorToInputMap[i]].Type as PrimitiveDataViewType;
                    if (!nextType.Equals(type))
                    {
                        throw Contracts.Except("Input data types of the columns to vectorize must " +
                                               "all be of the same type. Found {0} and {1}.",
                                               type.ToString(),
                                               nextType.ToString());
                    }
                }

                var outputColumnType = new VectorDataViewType(type, 0);
                var outputColumnName = inputSchema[_bindings.vectorToInputMap[0]].Name;
                builder.AddColumn(outputColumnName, outputColumnType);

                _bindings.outputColumn = _bindings.outputToInputMap.Count;
            }

            return(builder.ToSchema());
        }
Beispiel #28
0
        private VectorDataViewType[] GetTypesAndMetadata()
        {
            var md    = Metadata;
            var types = new VectorDataViewType[Infos.Length];

            for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
            {
                var type = Infos[iinfo].TypeSrc;

                // This ensures that our feature count doesn't overflow.
                Host.Check(type.GetValueCount() < int.MaxValue / 2);

                if (!(type is VectorDataViewType vectorType))
                {
                    types[iinfo] = new VectorDataViewType(NumberDataViewType.Single, 2);
                }
 /// <summary>
 /// Instantiates new model parameters from trained model.
 /// </summary>
 /// <param name="env">The host environment.</param>
 /// <param name="labelHistogram">The histogram of labels.</param>
 /// <param name="featureHistogram">The feature histogram.</param>
 /// <param name="featureCount">The number of features.</param>
 internal NaiveBayesMulticlassModelParameters(IHostEnvironment env, int[] labelHistogram, int[][] featureHistogram, int featureCount)
     : base(env, LoaderSignature)
 {
     Host.AssertValue(labelHistogram);
     Host.AssertValue(featureHistogram);
     Host.Assert(labelHistogram.Length == featureHistogram.Length);
     Host.Assert(featureHistogram.All(h => h == null || h.Length == featureCount));
     _labelHistogram        = labelHistogram;
     _featureHistogram      = featureHistogram;
     _totalTrainingCount    = _labelHistogram.Sum();
     _labelCount            = _labelHistogram.Length;
     _featureCount          = featureCount;
     _absentFeaturesLogProb = CalculateAbsentFeatureLogProbabilities(_labelHistogram, _featureHistogram, _featureCount);
     _inputType             = new VectorDataViewType(NumberDataViewType.Single, _featureCount);
     _outputType            = new VectorDataViewType(NumberDataViewType.Single, _labelCount);
 }
            internal ImplRaw(TScalarPredictor[] predictors)
            {
                Contracts.CheckNonEmpty(predictors, nameof(predictors));

                Predictors = new IValueMapper[predictors.Length];
                VectorDataViewType inputType = null;

                for (int i = 0; i < predictors.Length; i++)
                {
                    var vm = predictors[i] as IValueMapper;
                    Contracts.Check(IsValid(vm, ref inputType), "Predictor doesn't implement the expected interface");
                    Predictors[i] = vm;
                }
                CanSavePfa = Predictors.All(m => (m as ISingleCanSavePfa)?.CanSavePfa == true);
                Contracts.AssertValue(inputType);
                InputType = inputType;
            }