public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
        {
            Host.CheckValue(inputSchema, nameof(inputSchema));
            var result    = inputSchema.Columns.ToDictionary(x => x.Name);
            var resultDic = inputSchema.Columns.ToDictionary(x => x.Name);

            for (var i = 0; i < Transformer.Inputs.Length; i++)
            {
                var input = Transformer.Inputs[i];
                if (!inputSchema.TryFindColumn(input, out var col))
                {
                    throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input);
                }
                if (!(col.Kind == SchemaShape.Column.VectorKind.VariableVector || col.Kind == SchemaShape.Column.VectorKind.Vector))
                {
                    throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, nameof(VectorType), col.GetTypeString());
                }
                var expectedType = TensorFlowUtils.Tf2MlNetType(Transformer.TFInputTypes[i]);
                if (col.ItemType != expectedType)
                {
                    throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, expectedType.ToString(), col.ItemType.ToString());
                }
            }
            for (var i = 0; i < Transformer.Outputs.Length; i++)
            {
                resultDic[Transformer.Outputs[i]] = new SchemaShape.Column(Transformer.Outputs[i], SchemaShape.Column.VectorKind.Vector, Transformer.OutputTypes[i].ItemType, false);
            }
            return(new SchemaShape(resultDic.Values));
        }
Exemple #2
0
            private static (ColumnType, TFDataType) GetOutputTypes(TFGraph graph, string columnName)
            {
                Contracts.AssertValue(graph);
                Contracts.AssertNonEmpty(columnName);
                Contracts.AssertValue(graph[columnName]);

                var tfoutput = new TFOutput(graph[columnName]);
                var shape    = graph.GetTensorShape(tfoutput);

                int[] dims = shape.ToIntArray().Skip(shape[0] == -1 ? BatchSize : 0).ToArray();
                var   type = TensorFlowUtils.Tf2MlNetType(tfoutput.OutputType);

                return(new VectorType(type, dims), tfoutput.OutputType);
            }
            public Mapper(IHostEnvironment env, TensorFlowTransform parent, ISchema inputSchema)
            {
                Contracts.CheckValue(env, nameof(env));
                _host = env.Register(nameof(Mapper));
                _host.CheckValue(inputSchema, nameof(inputSchema));
                _host.CheckValue(parent, nameof(parent));
                _parent          = parent;
                _schema          = inputSchema;
                _inputColIndices = new int[_parent.Inputs.Length];
                _isInputVector   = new bool[_parent.Inputs.Length];
                for (int i = 0; i < _parent.Inputs.Length; i++)
                {
                    if (!inputSchema.TryGetColumnIndex(_parent.Inputs[i], out _inputColIndices[i]))
                    {
                        throw _host.Except($"Column {_parent.Inputs[i]} doesn't exist");
                    }

                    var type         = inputSchema.GetColumnType(_inputColIndices[i]);
                    var expectedType = TensorFlowUtils.Tf2MlNetType(_parent.TFInputTypes[i]);
                    if (type.ItemType != expectedType)
                    {
                        throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", _parent.Inputs[i], expectedType.ToString(), type.ToString());
                    }
                    var originalShape = _parent.Graph.GetTensorShape(new TFOutput(_parent.Graph[_parent.Inputs[i]]));
                    var shape         = originalShape.ToIntArray().Skip(originalShape[0] == -1 ? BatchSize : 0);
                    _isInputVector[i] = type.IsVector;
                    if (type.AsVector.DimCount == 1)
                    {
                        int valCount = shape.Aggregate((x, y) => x * y);
                        if (type.ValueCount != valCount)
                        {
                            throw _host.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {shape.ToString()}, but input data is of length {valCount}.");
                        }
                    }
                    else if (shape.Select((dim, j) => dim != type.AsVector.GetDim(j)).Any(b => b))
                    {
                        throw _host.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {shape.ToString()}, but input data is {type.AsVector.ToString()}.");
                    }
                }
            }
        private TensorFlowTransform(IHostEnvironment env, byte[] modelBytes, string[] inputs, string[] outputs)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(RegistrationName));
            _host.CheckValue(modelBytes, nameof(modelBytes));
            Session = LoadTFSession(modelBytes);
            foreach (var input in inputs)
            {
                _host.CheckNonWhiteSpace(input, nameof(inputs));
                if (Session.Graph[input] == null)
                {
                    throw _host.ExceptParam(nameof(inputs), $"Input column '{input}' does not exist in the model");
                }
                var tfInput = new TFOutput(Session.Graph[input]);
                if (!TensorFlowUtils.IsTypeSupported(tfInput.OutputType))
                {
                    throw _host.ExceptParam(nameof(modelBytes), $"Input type '{tfInput.OutputType}' of input column '{input}' is not supported in TensorFlow");
                }
            }

            var newNames = new HashSet <string>();

            foreach (var output in outputs)
            {
                _host.CheckNonEmpty(output, nameof(outputs));
                if (!newNames.Add(output))
                {
                    throw _host.ExceptParam(nameof(outputs), $"Output column '{output}' specified multiple times");
                }
                if (Session.Graph[output] == null)
                {
                    throw _host.ExceptParam(nameof(outputs), $"Output column '{output}' does not exist in the model");
                }
            }

            Inputs        = inputs;
            TFInputTypes  = new TFDataType[Inputs.Length];
            TFInputShapes = new TFShape[Inputs.Length];
            for (int i = 0; i < Inputs.Length; i++)
            {
                var tfInput = new TFOutput(Graph[Inputs[i]]);
                TFInputTypes[i]  = tfInput.OutputType;
                TFInputShapes[i] = Graph.GetTensorShape(tfInput);
                var newShape = new long[TFInputShapes[i].NumDimensions];
                for (int j = 0; j < TFInputShapes[i].NumDimensions; j++)
                {
                    newShape[j] = TFInputShapes[i][j] == -1 ? BatchSize : TFInputShapes[i][j];
                }
                TFInputShapes[i] = new TFShape(newShape);
            }

            Outputs       = outputs;
            OutputTypes   = new ColumnType[Outputs.Length];
            TFOutputTypes = new TFDataType[Outputs.Length];
            for (int i = 0; i < Outputs.Length; i++)
            {
                var   tfOutput = new TFOutput(Graph[Outputs[i]]);
                var   shape    = Graph.GetTensorShape(tfOutput);
                int[] dims     = shape.ToIntArray().Skip(shape[0] == -1 ? BatchSize : 0).ToArray();
                var   type     = TensorFlowUtils.Tf2MlNetType(tfOutput.OutputType);
                OutputTypes[i]   = new VectorType(type, dims);
                TFOutputTypes[i] = tfOutput.OutputType;
            }
        }
            public Mapper(IHostEnvironment env, TensorFlowTransform parent, ISchema inputSchema)
            {
                Contracts.CheckValue(env, nameof(env));
                _host = env.Register(nameof(Mapper));
                _host.CheckValue(inputSchema, nameof(inputSchema));
                _host.CheckValue(parent, nameof(parent));
                _parent               = parent;
                _schema               = inputSchema;
                _inputColIndices      = new int[_parent.Inputs.Length];
                _isInputVector        = new bool[_parent.Inputs.Length];
                _fullySpecifiedShapes = new TFShape[_parent.Inputs.Length];
                for (int i = 0; i < _parent.Inputs.Length; i++)
                {
                    if (!inputSchema.TryGetColumnIndex(_parent.Inputs[i], out _inputColIndices[i]))
                    {
                        throw _host.Except($"Column {_parent.Inputs[i]} doesn't exist");
                    }

                    var type = inputSchema.GetColumnType(_inputColIndices[i]);
                    _isInputVector[i] = type.IsVector;
                    var expectedType = TensorFlowUtils.Tf2MlNetType(_parent.TFInputTypes[i]);
                    if (type.ItemType != expectedType)
                    {
                        throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", _parent.Inputs[i], expectedType.ToString(), type.ToString());
                    }
                    var originalShape = _parent.TFInputShapes[i];
                    var shape         = originalShape.ToIntArray();

                    var colTypeDims = Enumerable.Range(0, type.AsVector.DimCount + 1).Select(d => d == 0 ? 1 : (long)type.AsVector.GetDim(d - 1)).ToArray();
                    if (shape == null)
                    {
                        _fullySpecifiedShapes[i] = new TFShape(colTypeDims);
                    }
                    else if (type.AsVector.DimCount == 1)
                    {
                        // If the column is one dimension we make sure that the total size of the TF shape matches.
                        // Compute the total size of the known dimensions of the shape.
                        int valCount = shape.Where(x => x > 0).Aggregate((x, y) => x * y);
                        // The column length should be divisible by this, so that the other dimensions can be integral.
                        if (type.ValueCount % valCount != 0)
                        {
                            throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of length {type.ValueCount}.");
                        }

                        // If the shape is multi-dimensional, we should be able to create the length of the vector by plugging
                        // in a single value for the unknown shapes. E.g., if the shape is [?,?,3], then there should exist a value
                        // d such that d*d*3 is equal to the length of the input column.
                        var d = originalShape.NumDimensions > 2 ? Math.Pow(type.ValueCount / valCount, 1.0 / (originalShape.NumDimensions - 2)) : 1;
                        if (originalShape.NumDimensions > 2 && d - (int)d != 0)
                        {
                            throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of length {type.ValueCount}.");
                        }

                        // Fill in the unknown dimensions.
                        var l = new long[originalShape.NumDimensions];
                        for (int ishape = 0; ishape < originalShape.NumDimensions; ishape++)
                        {
                            l[ishape] = originalShape[ishape] == -1 ? (int)d : originalShape[ishape];
                        }
                        _fullySpecifiedShapes[i] = new TFShape(l);
                    }
                    else
                    {
                        if (shape.Select((dim, j) => dim != -1 && dim != colTypeDims[j]).Any(b => b))
                        {
                            throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is {type.AsVector.ToString()}.");
                        }

                        // Fill in the unknown dimensions.
                        var l = new long[originalShape.NumDimensions];
                        for (int ishape = 0; ishape < originalShape.NumDimensions; ishape++)
                        {
                            l[ishape] = originalShape[ishape] == -1 ? colTypeDims[ishape] : originalShape[ishape];
                        }
                        _fullySpecifiedShapes[i] = new TFShape(l);
                    }
                }
            }