private bool IsValid(IValueMapperDist mapper, ref VectorDataViewType inputType) { if (mapper == null) { return(false); } VectorDataViewType vectorType = mapper.InputType as VectorDataViewType; if (vectorType == null || !vectorType.IsKnownSize || vectorType.ItemType != NumberDataViewType.Single) { return(false); } if (inputType == null) { inputType = vectorType; } else if (inputType.Size != vectorType.Size) { return(false); } if (mapper.OutputType != NumberDataViewType.Single) { return(false); } if (mapper.DistType != NumberDataViewType.Single) { return(false); } return(true); }
public ImplVec(IHostEnvironment env, PredictorModel[] predictors, IMulticlassOutputCombiner combiner) : base(env, predictors, combiner, LoaderSignature, AnnotationUtils.Const.ScoreColumnKind.MulticlassClassification) { int classCount = CheckLabelColumn(Host, predictors, false); _scoreType = new VectorDataViewType(NumberDataViewType.Single, classCount); }
public ImplVec(IHostEnvironment env, ModelLoadContext ctx, string scoreColumnKind) : base(env, ctx, scoreColumnKind) { int classCount = CheckLabelColumn(Host, PredictorModels, false); _scoreType = new VectorDataViewType(NumberDataViewType.Single, classCount); }
public SlotCursorImpl(IChannelProvider provider, SlotCursor cursor, VectorDataViewType typeDst) : base(provider, cursor) { Ch.AssertValue(typeDst); _getter = RowCursorUtils.GetLabelGetter(cursor); _type = typeDst; }
private VectorDataViewType InitializeMappers(out IValueMapper[] mappers) { Host.AssertNonEmpty(Models); mappers = new IValueMapper[Models.Length]; VectorDataViewType inputType = null; for (int i = 0; i < Models.Length; i++) { var vm = Models[i].Predictor as IValueMapper; if (!IsValid(vm, out VectorDataViewType vmInputType)) { throw Host.Except("Predictor does not implement expected interface"); } if (vmInputType.Size > 0) { if (inputType == null) { inputType = vmInputType; } else if (vmInputType.Size != inputType.Size) { throw Host.Except("Predictor input type mismatch"); } } mappers[i] = vm; } return(inputType ?? new VectorDataViewType(NumberDataViewType.Single)); }
/// <summary> /// Instantiate new ensemble model from existing sub-models. /// </summary> /// <param name="env">The host environment.</param> /// <param name="kind">The prediction kind <see cref="PredictionKind"/></param> /// <param name="models">Array of sub-models that you want to ensemble together.</param> /// <param name="combiner">The combiner class to use to ensemble the models.</param> /// <param name="weights">The weights assigned to each model to be ensembled.</param> internal EnsembleModelParameters(IHostEnvironment env, PredictionKind kind, FeatureSubsetModel <float>[] models, IOutputCombiner <Single> combiner, Single[] weights = null) : base(env, LoaderSignature, models, combiner, weights) { PredictionKind = kind; _inputType = InitializeMappers(out _mappers); }
/// <summary> /// Compute the output schema of a <see cref="GroupTransform"/> given a input schema. /// </summary> /// <param name="sourceSchema">Input schema.</param> /// <returns>The associated output schema produced by <see cref="GroupTransform"/>.</returns> private DataViewSchema BuildOutputSchema(DataViewSchema sourceSchema) { // Create schema build. We will sequentially add group columns and then aggregated columns. var schemaBuilder = new DataViewSchema.Builder(); // Handle group(-key) columns. Those columns are used as keys to partition rows in the input data; specifically, // rows with the same key value will be merged into one row in the output data. foreach (var groupKeyColumnName in _groupColumns) { schemaBuilder.AddColumn(groupKeyColumnName, sourceSchema[groupKeyColumnName].Type, sourceSchema[groupKeyColumnName].Annotations); } // Handle aggregated (aka keep) columns. foreach (var groupValueColumnName in _keepColumns) { // Prepare column's metadata. var metadataBuilder = new DataViewSchema.Annotations.Builder(); metadataBuilder.Add(sourceSchema[groupValueColumnName].Annotations, s => s == AnnotationUtils.Kinds.IsNormalized || s == AnnotationUtils.Kinds.KeyValues); // Prepare column's type. var aggregatedValueType = sourceSchema[groupValueColumnName].Type as PrimitiveDataViewType; _ectx.CheckValue(aggregatedValueType, nameof(aggregatedValueType), "Columns being aggregated must be primitive types such as string, float, or integer"); var aggregatedResultType = new VectorDataViewType(aggregatedValueType); // Add column into output schema. schemaBuilder.AddColumn(groupValueColumnName, aggregatedResultType, metadataBuilder.ToAnnotations()); } return(schemaBuilder.ToSchema()); }
protected bool IsValid(IValueMapper mapper, ref VectorDataViewType inputType) { Contracts.AssertValueOrNull(mapper); Contracts.AssertValueOrNull(inputType); if (mapper == null) { return(false); } if (mapper.OutputType != NumberDataViewType.Single) { return(false); } if (!(mapper.InputType is VectorDataViewType mapperVectorType) || mapperVectorType.ItemType != NumberDataViewType.Single) { return(false); } if (inputType == null) { inputType = mapperVectorType; } else if (inputType.Size != mapperVectorType.Size) { if (inputType.Size == 0) { inputType = mapperVectorType; } else if (mapperVectorType.Size != 0) { return(false); } } return(true); }
protected override void VerifyView(IDataView view) { Host.AssertValue(view); // This must have precisely one column, of type vector. var schema = view.Schema; Host.CheckDecode(schema.Count == 1); var ttype = schema[0].Type; VectorDataViewType vectorType = ttype as VectorDataViewType; if (vectorType == null) { throw Host.ExceptDecode(); } // We have no way to encode a type of zero length vectors per se in the case // when there are no rows in the original dataset, but accept that if the vector // count is "unknown" then it's really a zero-row dataset. Host.CheckDecode(vectorType.Size == _parent._header.RowCount); // This came from a binary IDV, so it must have an actual "row" count, // though this row count for this is more like a "slot" count. var rowCountNull = view.GetRowCount(); Host.Assert(rowCountNull.HasValue); long rowCount = rowCountNull.Value; // There must be one "row" per "slot" on the column this is a transpose of. // Check that. var type = _parent.Schema[_col].Type; Host.CheckDecode(type.GetValueCount() == rowCount); // The item types should be the same. Host.CheckDecode(type.GetItemType().Equals(vectorType.ItemType)); }
private static CountAggregator GetVecAggregator(DataViewRow row, VectorDataViewType colType, int colSrc) { Func <DataViewRow, VectorDataViewType, int, CountAggregator> del = GetVecAggregator <int>; var methodInfo = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(colType.ItemType.RawType); return((CountAggregator)methodInfo.Invoke(null, new object[] { row, colType, colSrc })); }
private PairwiseCouplingModelParameters(IHostEnvironment env, ModelLoadContext ctx) : base(env, RegistrationName, ctx) { // *** Binary format *** // int: number of classes _numClasses = ctx.Reader.ReadInt32(); Host.CheckDecode(_numClasses > 0); long count = (long)_numClasses * (_numClasses + 1) / 2; Host.CheckDecode(count <= int.MaxValue); // Load the predictors. _predictors = new TDistPredictor[(int)count]; int index = 0; for (int i = 0; i < _numClasses; i++) { for (int j = 0; j < i; j++) { Host.Assert(index == GetIndex(i, j)); ctx.LoadModel <TDistPredictor, SignatureLoadModel>(Host, out _predictors[index++], string.Format(SubPredictorFmt2, i, j)); } Host.Assert(index == GetIndex(i, i)); ctx.LoadModel <TDistPredictor, SignatureLoadModel>(Host, out _predictors[index++], string.Format(SubPredictorFmt, i)); } _inputType = InitializeMappers(out _mappers); _outputType = new VectorDataViewType(NumberDataViewType.Single, _numClasses); }
/// <summary> /// Constructor. w, thetaPrime, theta must be dense <see cref="VBuffer{T}"/>s. /// Note that this takes over ownership of all such vectors. /// </summary> internal LdSvmModelParameters(IHostEnvironment env, VBuffer <float>[] w, VBuffer <float>[] thetaPrime, VBuffer <float>[] theta, float sigma, float[] biasW, float[] biasTheta, float[] biasThetaPrime, int treeDepth) : base(env, LoaderSignature) { // _numLeaf is 32-bit signed integer. Host.Assert(treeDepth > 0 && treeDepth < 31); int numLeaf = 1 << treeDepth; Host.Assert(w.Length == numLeaf * 2 - 1); Host.Assert(w.All(v => v.IsDense)); Host.Assert(w.All(v => v.Length == w[0].Length)); Host.Assert(thetaPrime.Length == numLeaf * 2 - 1); Host.Assert(thetaPrime.All(v => v.IsDense)); Host.Assert(thetaPrime.All(v => v.Length == thetaPrime[0].Length)); Host.Assert(theta.Length == numLeaf - 1); Host.Assert(theta.All(v => v.IsDense)); Host.Assert(theta.All(v => v.Length == theta[0].Length)); Host.Assert(biasW.Length == numLeaf * 2 - 1); Host.Assert(biasTheta.Length == numLeaf - 1); Host.Assert(biasThetaPrime.Length == numLeaf * 2 - 1); Host.Assert((w[0].Length > 0) && (w[0].Length == thetaPrime[0].Length) && (w[0].Length == theta[0].Length)); _numLeaf = numLeaf; _sigma = sigma; _w = w; _thetaPrime = thetaPrime; _theta = theta; _biasW = biasW; _biasTheta = biasTheta; _biasThetaPrime = biasThetaPrime; InputType = new VectorDataViewType(NumberDataViewType.Single, _w[0].Length); AssertValid(); }
private EnsembleDistributionModelParameters(IHostEnvironment env, ModelLoadContext ctx) : base(env, RegistrationName, ctx) { PredictionKind = (PredictionKind)ctx.Reader.ReadInt32(); _probabilityCombiner = new Median(env); _inputType = InitializeMappers(out _mappers); ComputeAveragedWeights(out _averagedWeights); }
/// <summary> /// Instantiate new ensemble model from existing sub-models. /// </summary> /// <param name="env">The host environment.</param> /// <param name="kind">The prediction kind <see cref="PredictionKind"/></param> /// <param name="models">Array of sub-models that you want to ensemble together.</param> /// <param name="combiner">The combiner class to use to ensemble the models.</param> /// <param name="weights">The weights assigned to each model to be ensembled.</param> internal EnsembleDistributionModelParameters(IHostEnvironment env, PredictionKind kind, FeatureSubsetModel <float>[] models, IOutputCombiner <Single> combiner, Single[] weights = null) : base(env, RegistrationName, models, combiner, weights) { PredictionKind = kind; _probabilityCombiner = new Median(env); _inputType = InitializeMappers(out _mappers); ComputeAveragedWeights(out _averagedWeights); }
private bool IsValid(IValueMapper mapper, out VectorDataViewType inputType) { if (mapper != null && mapper.InputType is VectorDataViewType inputVectorType && inputVectorType.ItemType == NumberDataViewType.Single && mapper.OutputType == NumberDataViewType.Single) { inputType = inputVectorType; return(true); }
private protected GamModelParametersBase(IHostEnvironment env, string name, double[][] binUpperBounds, double[][] binEffects, double intercept, int numInputFeatures = -1, int[] shapeToInputMap = null) : base(env, name) { Host.CheckValue(binEffects, nameof(binEffects), "May not be null."); Host.CheckValue(binUpperBounds, nameof(binUpperBounds), "May not be null."); Host.CheckParam(binUpperBounds.Length == binEffects.Length, nameof(binUpperBounds), "Must have same number of features as binEffects"); Host.CheckParam(binEffects.Length > 0, nameof(binEffects), "Must have at least one entry"); Host.CheckParam(numInputFeatures == -1 || numInputFeatures > 0, nameof(numInputFeatures), "Must be greater than zero"); Host.CheckParam(shapeToInputMap == null || shapeToInputMap.Length == binEffects.Length, nameof(shapeToInputMap), "Must have same number of features as binEffects"); // Define the model basics Bias = intercept; _binUpperBounds = binUpperBounds; _binEffects = binEffects; NumberOfShapeFunctions = binEffects.Length; // For sparse inputs we have a fast lookup _binsAtAllZero = new int[NumberOfShapeFunctions]; _valueAtAllZero = 0; // Walk through each feature and perform checks / updates for (int i = 0; i < NumberOfShapeFunctions; i++) { // Check data validity Host.CheckValue(binEffects[i], nameof(binEffects), "Array contained null entries"); Host.CheckParam(binUpperBounds[i].Length == binEffects[i].Length, nameof(binEffects), "Array contained wrong number of effect values"); Host.CheckParam(Utils.IsMonotonicallyIncreasing(binUpperBounds[i]), nameof(binUpperBounds), "Array must be monotonically increasing"); // Update the value at zero _valueAtAllZero += GetBinEffect(i, 0, out _binsAtAllZero[i]); } // Define the sparse mappings from/to input to/from shape functions _shapeToInputMap = shapeToInputMap; if (_shapeToInputMap == null) { _shapeToInputMap = Utils.GetIdentityPermutation(NumberOfShapeFunctions); } _numInputFeatures = numInputFeatures; if (_numInputFeatures == -1) { _numInputFeatures = NumberOfShapeFunctions; } _inputFeatureToShapeFunctionMap = new Dictionary <int, int>(_shapeToInputMap.Length); for (int i = 0; i < _shapeToInputMap.Length; i++) { Host.CheckParam(0 <= _shapeToInputMap[i] && _shapeToInputMap[i] < _numInputFeatures, nameof(_shapeToInputMap), "Contains out of range feature value"); Host.CheckParam(!_inputFeatureToShapeFunctionMap.ContainsValue(_shapeToInputMap[i]), nameof(_shapeToInputMap), "Contains duplicate mappings"); _inputFeatureToShapeFunctionMap[_shapeToInputMap[i]] = i; } _inputType = new VectorDataViewType(NumberDataViewType.Single, _numInputFeatures); _outputType = NumberDataViewType.Single; }
private List <OnnxVariableInfo> GetOnnxVariablesFromMetadata(IReadOnlyDictionary <string, NodeMetadata> nodeMetadata, IDictionary <string, int[]> shapeDictionary, Dictionary <string, DataViewType> typePool, Dictionary <string, Func <NamedOnnxValue, object> > casterPool) { var onnxVariableInfos = new List <OnnxVariableInfo>(); foreach (var pair in nodeMetadata) { var name = pair.Key; var meta = pair.Value; var dataViewType = typePool[name]; var caster = casterPool?[name]; if (name.StartsWith("mlnet.") && (name.EndsWith(".unusedInput") || name.EndsWith(".unusedOutput"))) { continue; } OnnxVariableInfo info = null; if (shapeDictionary != null && shapeDictionary.ContainsKey(name)) { if (!CheckOnnxShapeCompatibility(shapeDictionary[name].ToList(), meta.Dimensions.ToList())) { throw Contracts.ExceptParamValue(shapeDictionary[name], nameof(shapeDictionary), "The specified shape " + string.Join(",", shapeDictionary[name]) + " is not compatible with the shape " + string.Join(",", meta.Dimensions) + " loaded from the ONNX model file. Only unknown dimension can replace or " + "be replaced by another dimension."); } if (dataViewType is VectorDataViewType vectorType) { if (shapeDictionary[name].All(value => value > 0)) { dataViewType = new VectorDataViewType(vectorType.ItemType, shapeDictionary[name]); } else { dataViewType = new VectorDataViewType(vectorType.ItemType); } } info = new OnnxVariableInfo(name, shapeDictionary[name].ToList(), meta.ElementType, dataViewType, caster); } else { // No user-specified shape is found, so the shape loaded from ONNX model file is used. info = new OnnxVariableInfo(name, meta.Dimensions.ToList(), meta.ElementType, dataViewType, caster); } onnxVariableInfos.Add(info); } return(onnxVariableInfos); }
public ColInfoEx(DataKind kind, bool hasKeyRange, DataViewType type, VectorDataViewType slotType) { Contracts.AssertValue(type); Contracts.AssertValueOrNull(slotType); Contracts.Assert(slotType == null || type.ItemType().Equals(slotType.ItemType())); Kind = kind; HasKeyRange = hasKeyRange; TypeDst = type; SlotTypeDst = slotType; }
private OnnxTransformer(IHostEnvironment env, Options options, byte[] modelBytes = null) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(OnnxTransformer))) { Host.CheckValue(options, nameof(options)); foreach (var col in options.InputColumns) { Host.CheckNonWhiteSpace(col, nameof(options.InputColumns)); } foreach (var col in options.OutputColumns) { Host.CheckNonWhiteSpace(col, nameof(options.OutputColumns)); } try { if (modelBytes == null) { Host.CheckNonWhiteSpace(options.ModelFile, nameof(options.ModelFile)); Host.CheckIO(File.Exists(options.ModelFile), "Model file {0} does not exists.", options.ModelFile); Model = new OnnxModel(options.ModelFile, options.GpuDeviceId, options.FallbackToCpu); } else { Model = OnnxModel.CreateFromBytes(modelBytes, options.GpuDeviceId, options.FallbackToCpu); } } catch (OnnxRuntimeException e) { throw Host.Except(e, $"Error initializing model :{e.ToString()}"); } var modelInfo = Model.ModelInfo; Inputs = (options.InputColumns.Count() == 0) ? Model.InputNames.ToArray() : options.InputColumns; Outputs = (options.OutputColumns.Count() == 0) ? Model.OutputNames.ToArray() : options.OutputColumns; OutputTypes = new DataViewType[Outputs.Length]; var numModelOutputs = Model.ModelInfo.OutputsInfo.Length; for (int i = 0; i < Outputs.Length; i++) { var idx = Model.OutputNames.IndexOf(Outputs[i]); if (idx < 0) { throw Host.Except($"Column {Outputs[i]} doesn't match output node names of model"); } var outputNodeInfo = Model.ModelInfo.OutputsInfo[idx]; var shape = outputNodeInfo.Shape; var dims = AdjustDimensions(shape); OutputTypes[i] = new VectorDataViewType(OnnxUtils.OnnxToMlNetType(outputNodeInfo.Type), dims.ToArray()); } _options = options; }
private VectorDataViewType InitializeMappers(out IValueMapperDist[] mappers) { mappers = new IValueMapperDist[_predictors.Length]; VectorDataViewType inputType = null; for (int i = 0; i < _predictors.Length; i++) { var vmd = _predictors[i] as IValueMapperDist; Host.Check(IsValid(vmd, ref inputType), "Predictor doesn't implement the expected interface"); mappers[i] = vmd; } return inputType; }
private protected GamModelParametersBase(IHostEnvironment env, string name, ModelLoadContext ctx) : base(env, name) { Host.CheckValue(ctx, nameof(ctx)); BinaryReader reader = ctx.Reader; NumberOfShapeFunctions = reader.ReadInt32(); Host.CheckDecode(NumberOfShapeFunctions >= 0); _numInputFeatures = reader.ReadInt32(); Host.CheckDecode(_numInputFeatures >= 0); Bias = reader.ReadDouble(); if (ctx.Header.ModelVerWritten == 0x00010001) { using (var ch = env.Start("GamWarningChannel")) ch.Warning("GAMs models written prior to ML.NET 0.6 are loaded with an incorrect Intercept. For these models, subtract the value of the intercept from the prediction."); } _binEffects = new double[NumberOfShapeFunctions][]; _binUpperBounds = new double[NumberOfShapeFunctions][]; _binsAtAllZero = new int[NumberOfShapeFunctions]; for (int i = 0; i < NumberOfShapeFunctions; i++) { _binEffects[i] = reader.ReadDoubleArray(); Host.CheckDecode(Utils.Size(_binEffects[i]) >= 1); } for (int i = 0; i < NumberOfShapeFunctions; i++) { _binUpperBounds[i] = reader.ReadDoubleArray(_binEffects[i].Length); _valueAtAllZero += GetBinEffect(i, 0, out _binsAtAllZero[i]); } int len = reader.ReadInt32(); Host.CheckDecode(len >= 0); _inputFeatureToShapeFunctionMap = new Dictionary <int, int>(len); _shapeToInputMap = Utils.CreateArray(NumberOfShapeFunctions, -1); for (int i = 0; i < len; i++) { int key = reader.ReadInt32(); Host.CheckDecode(0 <= key && key < _numInputFeatures); int val = reader.ReadInt32(); Host.CheckDecode(0 <= val && val < NumberOfShapeFunctions); Host.CheckDecode(!_inputFeatureToShapeFunctionMap.ContainsKey(key)); Host.CheckDecode(_shapeToInputMap[val] == -1); _inputFeatureToShapeFunctionMap[key] = val; _shapeToInputMap[val] = key; } _inputType = new VectorDataViewType(NumberDataViewType.Single, _numInputFeatures); _outputType = NumberDataViewType.Single; }
public ClusteringPerInstanceEvaluator(IHostEnvironment env, DataViewSchema schema, string scoreCol, int numClusters) : base(env, schema, scoreCol, null) { CheckInputColumnTypes(schema); _numClusters = numClusters; _types = new DataViewType[3]; var key = new KeyDataViewType(typeof(uint), _numClusters); _types[ClusterIdCol] = key; _types[SortedClusterCol] = new VectorDataViewType(key, _numClusters); _types[SortedClusterScoreCol] = new VectorDataViewType(NumberDataViewType.Single, _numClusters); }
/// <remarks> /// The unit test TestEntryPoints.LoadEntryPointModel() exercises the ReadIntArrary(int size) codepath below /// as its ctx.Header.ModelVerWritten is 0x00010001, and the persistent model that gets loaded and executed /// for this unit test is located at test\data\backcompat\ep_model3.zip/> /// </remarks> private NaiveBayesMulticlassModelParameters(IHostEnvironment env, ModelLoadContext ctx) : base(env, LoaderSignature, ctx) { // *** Binary format *** // int: _labelCount (read during reading of _labelHistogram in ReadLongArray()) // long[_labelCount]: _labelHistogram // int: _featureCount // long[_labelCount][_featureCount]: _featureHistogram // int[_labelCount]: _absentFeaturesLogProb if (ctx.Header.ModelVerWritten >= 0x00010002) { _labelHistogram = ctx.Reader.ReadLongArray() ?? new long[0]; } else { _labelHistogram = Array.ConvertAll(ctx.Reader.ReadIntArray() ?? new int[0], x => (long)x); } _labelCount = _labelHistogram.Length; foreach (int labelCount in _labelHistogram) { Host.CheckDecode(labelCount >= 0); } _featureCount = ctx.Reader.ReadInt32(); Host.CheckDecode(_featureCount >= 0); _featureHistogram = new long[_labelCount][]; for (int iLabel = 0; iLabel < _labelCount; iLabel += 1) { if (_labelHistogram[iLabel] > 0) { if (ctx.Header.ModelVerWritten >= 0x00010002) { _featureHistogram[iLabel] = ctx.Reader.ReadLongArray(_featureCount); } else { _featureHistogram[iLabel] = Array.ConvertAll(ctx.Reader.ReadIntArray(_featureCount) ?? new int[0], x => (long)x); } for (int iFeature = 0; iFeature < _featureCount; iFeature += 1) { Host.CheckDecode(_featureHistogram[iLabel][iFeature] >= 0); } } } _absentFeaturesLogProb = ctx.Reader.ReadDoubleArray(_labelCount); _totalTrainingCount = _labelHistogram.Sum(); _inputType = new VectorDataViewType(NumberDataViewType.Single, _featureCount); _outputType = new VectorDataViewType(NumberDataViewType.Single, _labelCount); }
/// <summary> /// Returns a value getter delegate to fetch the value of column with the given columnIndex, from the row. /// This throws if the column is not active in this row, or if the type /// <typeparamref name="TValue"/> differs from this column's type. /// </summary> /// <typeparam name="TValue"> is the column's content type.</typeparam> /// <param name="column"> is the output column whose getter should be returned.</param> public override ValueGetter <TValue> GetGetter <TValue>(DataViewSchema.Column column) { if (column.Index == _bindings.outputColumn) { VectorDataViewType columnType = column.Type as VectorDataViewType; Delegate getter = Utils.MarshalInvoke(MakeVarLengthVectorGetter <int>, columnType.ItemType.RawType, _cursor); return(getter as ValueGetter <TValue>); } else { int inputIndex = _bindings.outputToInputMap[column.Index]; return(_cursor.GetGetter <TValue>(_cursor.Schema[inputIndex])); } }
private void SaveAsOnnxCore(OnnxContext ctx, int iinfo, string srcVariableName, string dstVariableName) { const int minimumOpSetVersion = 9; ctx.CheckOpSetVersion(minimumOpSetVersion, LoaderSignature); string opType = "Tokenizer"; DataViewType dataViewType; if (_isSourceVector[iinfo]) { dataViewType = new VectorDataViewType(TextDataViewType.Instance, _sourceVectorLength[iinfo]); } else { dataViewType = TextDataViewType.Instance; } string tokenizerOutput = ctx.AddIntermediateVariable(dataViewType, "TokenizerOutput", true); var node = ctx.CreateNode(opType, srcVariableName, tokenizerOutput, ctx.GetNodeName(opType), "com.microsoft"); node.AddAttribute("mark", _parent._useMarkerChars); node.AddAttribute("mincharnum", 1); node.AddAttribute("pad_value", ""); node.AddAttribute("separators", new string[] { "" }); opType = "Squeeze"; var squeezeOutput = ctx.AddIntermediateVariable(dataViewType, "SqueezeOutput"); node = ctx.CreateNode(opType, tokenizerOutput, squeezeOutput, ctx.GetNodeName(opType), ""); node.AddAttribute("axes", new long[] { 1 }); opType = "LabelEncoder"; var labelEncoderOutput = ctx.AddIntermediateVariable(NumberDataViewType.Int64, "LabelEncoderOutput"); node = ctx.CreateNode(opType, squeezeOutput, labelEncoderOutput, ctx.GetNodeName(opType)); IEnumerable <string> charStrings = Enumerable.Range(0, 65535).Select(x => ((char)x).ToString()); IEnumerable <long> charValues = Enumerable.Range(0, 65535).Select(x => Convert.ToInt64(x)); node.AddAttribute("keys_strings", charStrings); node.AddAttribute("values_int64s", charValues); opType = "Cast"; var castNode = ctx.CreateNode(opType, labelEncoderOutput, dstVariableName, ctx.GetNodeName(opType), ""); var t = InternalDataKindExtensions.ToInternalDataKind(DataKind.UInt16).ToType(); castNode.AddAttribute("to", t); }
public VecValueWriter(DataViewRowCursor cursor, VectorDataViewType type, int source, char sep) : base(type.ItemType, source, sep) { _getSrc = cursor.GetGetter <VBuffer <T> >(cursor.Schema[source]); VectorDataViewType typeNames; if (type.IsKnownSize && (typeNames = cursor.Schema[source].Annotations.Schema.GetColumnOrNull(AnnotationUtils.Kinds.SlotNames)?.Type as VectorDataViewType) != null && typeNames.Size == type.Size && typeNames.ItemType is TextDataViewType) { cursor.Schema[source].Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref _slotNames); Contracts.Check(_slotNames.Length == typeNames.Size, "Unexpected slot names length"); } _slotCount = type.Size; }
private DataViewSchema ProcessInputSchema(DataViewSchema inputSchema, string lengthColumnName) { var builder = new DataViewSchema.Builder(); for (int i = 0; i < inputSchema.Count; i++) { var name = inputSchema[i].Name; if (_columnNames.Contains(name)) { _bindings.vectorToInputMap.Add(i); } else if (name == lengthColumnName) { _bindings.lengthColumn = i; } else { builder.AddColumn(name, inputSchema[i].Type); _bindings.outputToInputMap.Add(i); } } if (_bindings.vectorToInputMap.Count > 0) { var type = inputSchema[_bindings.vectorToInputMap[0]].Type as PrimitiveDataViewType; for (int i = 1; i < _bindings.vectorToInputMap.Count; i++) { var nextType = inputSchema[_bindings.vectorToInputMap[i]].Type as PrimitiveDataViewType; if (!nextType.Equals(type)) { throw Contracts.Except("Input data types of the columns to vectorize must " + "all be of the same type. Found {0} and {1}.", type.ToString(), nextType.ToString()); } } var outputColumnType = new VectorDataViewType(type, 0); var outputColumnName = inputSchema[_bindings.vectorToInputMap[0]].Name; builder.AddColumn(outputColumnName, outputColumnType); _bindings.outputColumn = _bindings.outputToInputMap.Count; } return(builder.ToSchema()); }
private VectorDataViewType[] GetTypesAndMetadata() { var md = Metadata; var types = new VectorDataViewType[Infos.Length]; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { var type = Infos[iinfo].TypeSrc; // This ensures that our feature count doesn't overflow. Host.Check(type.GetValueCount() < int.MaxValue / 2); if (!(type is VectorDataViewType vectorType)) { types[iinfo] = new VectorDataViewType(NumberDataViewType.Single, 2); }
/// <summary> /// Instantiates new model parameters from trained model. /// </summary> /// <param name="env">The host environment.</param> /// <param name="labelHistogram">The histogram of labels.</param> /// <param name="featureHistogram">The feature histogram.</param> /// <param name="featureCount">The number of features.</param> internal NaiveBayesMulticlassModelParameters(IHostEnvironment env, int[] labelHistogram, int[][] featureHistogram, int featureCount) : base(env, LoaderSignature) { Host.AssertValue(labelHistogram); Host.AssertValue(featureHistogram); Host.Assert(labelHistogram.Length == featureHistogram.Length); Host.Assert(featureHistogram.All(h => h == null || h.Length == featureCount)); _labelHistogram = labelHistogram; _featureHistogram = featureHistogram; _totalTrainingCount = _labelHistogram.Sum(); _labelCount = _labelHistogram.Length; _featureCount = featureCount; _absentFeaturesLogProb = CalculateAbsentFeatureLogProbabilities(_labelHistogram, _featureHistogram, _featureCount); _inputType = new VectorDataViewType(NumberDataViewType.Single, _featureCount); _outputType = new VectorDataViewType(NumberDataViewType.Single, _labelCount); }
internal ImplRaw(TScalarPredictor[] predictors) { Contracts.CheckNonEmpty(predictors, nameof(predictors)); Predictors = new IValueMapper[predictors.Length]; VectorDataViewType inputType = null; for (int i = 0; i < predictors.Length; i++) { var vm = predictors[i] as IValueMapper; Contracts.Check(IsValid(vm, ref inputType), "Predictor doesn't implement the expected interface"); Predictors[i] = vm; } CanSavePfa = Predictors.All(m => (m as ISingleCanSavePfa)?.CanSavePfa == true); Contracts.AssertValue(inputType); InputType = inputType; }