示例#1
0
 /// <summary>
 /// <see cref="TimeSeriesPredictionFunction{TSrc, TDst}"/> creates a prediction function/engine for a time series pipeline
 /// It updates the state of time series model with observations seen at prediction phase and allows checkpointing the model.
 /// </summary>
 /// <typeparam name="TSrc">Class describing input schema to the model.</typeparam>
 /// <typeparam name="TDst">Class describing the output schema of the prediction.</typeparam>
 /// <param name="transformer">The time series pipeline in the form of a <see cref="ITransformer"/>.</param>
 /// <param name="env">Usually <see cref="MLContext"/></param>
 /// <param name="ignoreMissingColumns">To ignore missing columns. Default is false.</param>
 /// <param name="inputSchemaDefinition">Input schema definition. Default is null.</param>
 /// <param name="outputSchemaDefinition">Output schema definition. Default is null.</param>
 /// <p>Example code can be found by searching for <i>TimeSeriesPredictionFunction</i> in <a href='https://github.com/dotnet/machinelearning'>ML.NET.</a></p>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[MF](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/IidSpikeDetectorTransform.cs)]
 /// [!code-csharp[MF](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/IidChangePointDetectorTransform.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public static TimeSeriesPredictionFunction <TSrc, TDst> CreateTimeSeriesPredictionFunction <TSrc, TDst>(this ITransformer transformer, IHostEnvironment env,
                                                                                                         bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
     where TSrc : class
     where TDst : class, new()
 {
     Contracts.CheckValue(env, nameof(env));
     env.CheckValue(transformer, nameof(transformer));
     env.CheckValueOrNull(inputSchemaDefinition);
     env.CheckValueOrNull(outputSchemaDefinition);
     return(new TimeSeriesPredictionFunction <TSrc, TDst>(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition));
 }
示例#2
0
        public void MetadataSupportInDataViewConstruction()
        {
            var data       = ReadBreastCancerExamples();
            var autoSchema = SchemaDefinition.Create(typeof(BreastCancerExample));

            var mlContext = new MLContext(0);

            // Create Metadata.
            var   kindFloat          = "Testing float as metadata.";
            float valueFloat         = 10;
            var   coltypeFloat       = NumberDataViewType.Single;
            var   kindString         = "Testing string as metadata.";
            var   valueString        = "Strings have value.";
            var   coltypeString      = TextDataViewType.Instance;
            var   kindStringArray    = "Testing string array as metadata.";
            var   valueStringArray   = "I really have no idea what these features entail.".Split(' ');
            var   coltypeStringArray = new VectorDataViewType(coltypeString, valueStringArray.Length);
            var   kindFloatArray     = "Testing float array as metadata.";
            var   valueFloatArray    = new float[] { 1, 17, 7, 19, 25, 0 };
            var   coltypeFloatArray  = new VectorDataViewType(coltypeFloat, valueFloatArray.Length);
            var   kindVBuffer        = "Testing VBuffer as metadata.";
            var   valueVBuffer       = new VBuffer <float>(4, new float[] { 4, 6, 89, 5 });
            var   coltypeVBuffer     = new VectorDataViewType(coltypeFloat, valueVBuffer.Length);

            // Add Metadata.
            var labelColumn = autoSchema[0];

            labelColumn.AddAnnotation(kindFloat, valueFloat, coltypeFloat);
            labelColumn.AddAnnotation(kindString, valueString, coltypeString);

            var featureColumn = autoSchema[1];

            featureColumn.AddAnnotation(kindStringArray, valueStringArray, coltypeStringArray);
            featureColumn.AddAnnotation(kindFloatArray, valueFloatArray, coltypeFloatArray);
            featureColumn.AddAnnotation(kindVBuffer, valueVBuffer, coltypeVBuffer);

            var idv = mlContext.Data.LoadFromEnumerable(data, autoSchema);

            Assert.True(idv.Schema[0].Annotations.Schema.Count == 2);
            Assert.True(idv.Schema[0].Annotations.Schema[0].Name == kindFloat);
            Assert.True(idv.Schema[0].Annotations.Schema[0].Type == coltypeFloat);
            Assert.True(idv.Schema[0].Annotations.Schema[1].Name == kindString);
            Assert.True(idv.Schema[0].Annotations.Schema[1].Type == TextDataViewType.Instance);

            Assert.True(idv.Schema[1].Annotations.Schema.Count == 3);
            Assert.True(idv.Schema[1].Annotations.Schema[0].Name == kindStringArray);
            Assert.True(idv.Schema[1].Annotations.Schema[0].Type is VectorDataViewType vectorType && vectorType.ItemType is TextDataViewType);
            Assert.Throws <ArgumentOutOfRangeException>(() => idv.Schema[1].Annotations.Schema[kindFloat]);

            float retrievedFloat = 0;

            idv.Schema[0].Annotations.GetValue(kindFloat, ref retrievedFloat);
            Assert.True(Math.Abs(retrievedFloat - valueFloat) < .000001);

            ReadOnlyMemory <char> retrievedReadOnlyMemory = new ReadOnlyMemory <char>();

            idv.Schema[0].Annotations.GetValue(kindString, ref retrievedReadOnlyMemory);
            Assert.True(retrievedReadOnlyMemory.Span.SequenceEqual(valueString.AsMemory().Span));

            VBuffer <ReadOnlyMemory <char> > retrievedReadOnlyMemoryVBuffer = new VBuffer <ReadOnlyMemory <char> >();

            idv.Schema[1].Annotations.GetValue(kindStringArray, ref retrievedReadOnlyMemoryVBuffer);
            Assert.True(retrievedReadOnlyMemoryVBuffer.DenseValues().Select((s, i) => s.ToString() == valueStringArray[i]).All(b => b));

            VBuffer <float> retrievedFloatVBuffer = new VBuffer <float>(1, new float[] { 2 });

            idv.Schema[1].Annotations.GetValue(kindFloatArray, ref retrievedFloatVBuffer);
            VBuffer <float> valueFloatVBuffer = new VBuffer <float>(valueFloatArray.Length, valueFloatArray);

            Assert.True(retrievedFloatVBuffer.Items().SequenceEqual(valueFloatVBuffer.Items()));

            VBuffer <float> retrievedVBuffer = new VBuffer <float>();

            idv.Schema[1].Annotations.GetValue(kindVBuffer, ref retrievedVBuffer);
            Assert.True(retrievedVBuffer.Items().SequenceEqual(valueVBuffer.Items()));

            Assert.Throws <InvalidOperationException>(() => idv.Schema[1].Annotations.GetValue(kindFloat, ref retrievedReadOnlyMemoryVBuffer));
        }
        /// <summary>
        /// Create a filter transform
        /// </summary>
        /// <param name="env">The host environment</param>
        /// <param name="source">The dataview upon which we construct the transform</param>
        /// <param name="filterFunc">The function by which we transform source to destination columns and decide whether
        /// to keep the row.</param>
        /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param>
        /// <param name="inputSchemaDefinition">The schema definition overrides for <typeparamref name="TSrc"/></param>
        /// <param name="outputSchemaDefinition">The schema definition overrides for <typeparamref name="TDst"/></param>
        public StatefulFilterTransform(IHostEnvironment env, IDataView source, Func <TSrc, TDst, TState, bool> filterFunc,
                                       Action <TState> initStateAction,
                                       SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
        {
            _host = env.Register(RegistrationName);
            _host.AssertValue(source, "source");
            _host.AssertValue(filterFunc, "filterFunc");
            _host.AssertValueOrNull(initStateAction);
            _host.AssertValueOrNull(inputSchemaDefinition);
            _host.AssertValueOrNull(outputSchemaDefinition);

            _source                = source;
            _filterFunc            = filterFunc;
            _initStateAction       = initStateAction;
            _inputSchemaDefinition = inputSchemaDefinition;
            _typedSource           = TypedCursorable <TSrc> .Create(_host, Source, false, inputSchemaDefinition);

            var outSchema = InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition);

            _addedSchema = outSchema;
            _bindings    = new ColumnBindings(Source.Schema, DataViewConstructionUtils.GetSchemaColumns(outSchema));
        }
示例#4
0
 public TimeSeriesPredictionFunction(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns,
                                     SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) :
     base(env, CloneTransformers(transformer), ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition)
 {
 }
示例#5
0
        public void ImportSingleTestCaseTest()
        {
            // Create the first test case i a helper method...
            var testCase1 = BuildFirstTestCase();

            // Create the second test case and import the first test case into it...
            var testCase2 = new TestCase {
                Name = "Copy First File Test"
            };

            var createFileStep = new CreateStep {
                CreationPath = @"File2.xml"
            };
            var dl = new FileDataLoader
            {
                FilePath = @"..\..\..\Test\BizUnit.TestSteps.Tests\TestData\PurchaseOrder001.xml"
            };

            createFileStep.DataSource = dl;

            testCase2.ExecutionSteps.Add(createFileStep);

            var import = new ImportTestCaseStep {
                TestCase = testCase1
            };

            testCase2.ExecutionSteps.Add(import);

            // Create a validating read step...
            var validatingFileReadStep = new FileReadMultipleStep
            {
                DirectoryPath         = @".",
                SearchPattern         = "File*.xml",
                ExpectedNumberOfFiles = 2
            };

            var validation          = new XmlValidationStep();
            var schemaPurchaseOrder = new SchemaDefinition
            {
                XmlSchemaPath =
                    @"..\..\..\Test\BizUnit.TestSteps.Tests\TestData\PurchaseOrder.xsd",
                XmlSchemaNameSpace =
                    "http://SendMail.PurchaseOrder"
            };

            validation.XmlSchemas.Add(schemaPurchaseOrder);

            var xpathProductId = new XPathDefinition
            {
                Description = "PONumber",
                XPath       =
                    "/*[local-name()='PurchaseOrder' and namespace-uri()='http://SendMail.PurchaseOrder']/*[local-name()='PONumber' and namespace-uri()='']",
                Value = "12323"
            };

            validation.XPathValidations.Add(xpathProductId);
            validatingFileReadStep.SubSteps.Add(validation);
            testCase2.ExecutionSteps.Add(validatingFileReadStep);

            // Run the second test case...
            var bizUnit = new BizUnit(testCase2);

            bizUnit.RunTest();

            TestCase.SaveToFile(testCase2, "ImportSingleTestCaseTest.xml");
        }
        public static void ValidateSouceMessage(string inputMessageFolderPath, string inputFile, SchemaDefinition sourceSchema)
        {
            FileReadMultipleStep fileReadStep = new FileReadMultipleStep();

            fileReadStep.DeleteFiles           = false;
            fileReadStep.DirectoryPath         = inputMessageFolderPath;
            fileReadStep.SearchPattern         = inputFile;
            fileReadStep.FailOnError           = true;
            fileReadStep.ExpectedNumberOfFiles = 1;

            XmlValidationStep inputValidationStep = new XmlValidationStep();

            inputValidationStep.XmlSchemas.Add(sourceSchema);

            fileReadStep.SubSteps.Add(inputValidationStep);
            TestCase inValTestCase = new TestCase();

            inValTestCase.Name = "Validate Input Message";
            inValTestCase.ExecutionSteps.Add(fileReadStep);

            BizUnit.Core.TestRunner testRunner = new BizUnit.Core.TestRunner(inValTestCase);
            testRunner.Run();
        }
示例#7
0
        /// <summary>
        /// Create a custom mapping of input columns to output columns.
        /// </summary>
        /// <param name="env">The host environment</param>
        /// <param name="mapAction">The action by which we map source to destination columns</param>
        /// <param name="contractName">The name of the action (will be saved to the model).</param>
        /// <param name="inputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TSrc"/> and input data.</param>
        /// <param name="outputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TDst"/> and output data.</param>
        public CustomMappingTransformer(IHostEnvironment env, Action <TSrc, TDst> mapAction, string contractName,
                                        SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(CustomMappingTransformer <TSrc, TDst>));
            _host.CheckValue(mapAction, nameof(mapAction));
            _host.CheckValueOrNull(contractName);
            _host.CheckValueOrNull(inputSchemaDefinition);
            _host.CheckValueOrNull(outputSchemaDefinition);

            _mapAction             = mapAction;
            _inputSchemaDefinition = inputSchemaDefinition;

            var outSchema = outputSchemaDefinition == null
               ? InternalSchemaDefinition.Create(typeof(TDst), SchemaDefinition.Direction.Write)
               : InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition);

            _contractName = contractName;
            _addedSchema  = outSchema;
        }
示例#8
0
 internal PredictionEngine(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns,
                           SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
     : base(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition)
 {
 }
示例#9
0
 /// <summary>
 /// Create a custom mapping of input columns to output columns. Most likely, you should call this method when you are loading the model:
 /// use <see cref="CustomMapping{TSrc, TDst}(TransformsCatalog, Action{TSrc, TDst}, string, SchemaDefinition, SchemaDefinition)"/> when you are
 /// training the model.
 /// </summary>
 /// <typeparam name="TSrc">The class defining which columns to take from the incoming data.</typeparam>
 /// <typeparam name="TDst">The class defining which new columns are added to the data.</typeparam>
 /// <param name="catalog">The transform catalog</param>
 /// <param name="mapAction">The mapping action. This must be thread-safe and free from side effects.</param>
 /// <param name="contractName">The contract name, used by ML.NET for loading the model. If <c>null</c> is specified, such a trained model would not be save-able.</param>
 /// <param name="inputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TSrc"/> and input data.</param>
 /// <param name="outputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TDst"/> and output data.</param>
 public static CustomMappingTransformer <TSrc, TDst> CustomMappingTransformer <TSrc, TDst>(this TransformsCatalog catalog, Action <TSrc, TDst> mapAction, string contractName,
                                                                                           SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
     where TSrc : class, new()
     where TDst : class, new()
 => new CustomMappingTransformer <TSrc, TDst>(catalog.GetEnvironment(), mapAction, contractName, inputSchemaDefinition, outputSchemaDefinition);
示例#10
0
 /// <summary>
 /// Create a custom mapping of input columns to output columns.
 /// </summary>
 /// <param name="env">The host environment</param>
 /// <param name="mapAction">The mapping action. This must be thread-safe and free from side effects.</param>
 /// <param name="contractName">The contract name, used by ML.NET for loading the model. If <c>null</c> is specified, such a trained model would not be save-able.</param>
 /// <param name="inputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TSrc"/> and input data.</param>
 /// <param name="outputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TDst"/> and output data.</param>
 public CustomMappingEstimator(IHostEnvironment env, Action <TSrc, TDst> mapAction, string contractName,
                               SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
     : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(CustomMappingEstimator <TSrc, TDst>)),
            new CustomMappingTransformer <TSrc, TDst>(env, mapAction, contractName, inputSchemaDefinition, outputSchemaDefinition))
 {
 }
        public void Demo()
        {
            // Initialization
            // 1. Creating a new local pool ledger configuration that can be used later to connect pool nodes.
            // 2. Open pool ledger and get the pool handle from libindy.
            // 3. Creates a new identity wallet
            // 4. Open identity wallet and get the wallet handle from libindy
            // SEE Initialize() above

            // 5. Generating and storing steward DID and Verkey
            IDid stewardDid = IndyDotNet.Did.Factory.CreateMyDid(_pool, _wallet, new IdentitySeed()
            {
                Seed = "000000000000000000000000Steward1"
            });

            // 6. Generating and storing Trust Anchor DID and Verkey
            IDid trustAnchor = IndyDotNet.Did.Factory.CreateMyDid(_pool, _wallet, null);

            // 7. Build NYM request to add Trust Anchor to the ledger
            INymLedger         nymLedger  = IndyDotNet.Ledger.Factory.CreateNymLedger();
            BuildRequestResult nymRequest = nymLedger.BuildRequest(stewardDid, trustAnchor, trustAnchor.VerKey, "", NymRoles.TrustAnchor);

            // 8. Sending the nym request to ledger
            SignAndSubmitRequestResponse nymResult = nymLedger.SignAndSubmitRequest(_pool, _wallet, stewardDid, nymRequest);

            // 9. build the schema definition request
            SchemaDefinition schemaDefinition = new SchemaDefinition()
            {
                Name    = "name",
                Version = "1.1",
                Id      = "id"
            };

            schemaDefinition.AttributeNames.Add("age");
            schemaDefinition.AttributeNames.Add("height");
            schemaDefinition.AttributeNames.Add("sex");
            schemaDefinition.AttributeNames.Add("name");

            ISchemaLedger     schemaLedger = IndyDotNet.Ledger.Factory.CreateSchemaLedger();
            BuildSchemaResult buildSchema  = schemaLedger.BuildSchemaRequest(stewardDid, schemaDefinition);

            // 10. Sending the SCHEMA request to the ledger
            SignAndSubmitRequestResponse signAndSubmitRequestResponse = schemaLedger.SignAndSubmitRequest(_pool, _wallet, stewardDid, buildSchema);

            // 11. Creating and storing CLAIM DEFINITION using anoncreds as Trust Anchor, for the given Schema
            IIssuerAnonCreds           issuer           = IndyDotNet.AnonCreds.Factory.GetIssuerAnonCreds(_wallet);
            CredentialDefinitionSchema credentialschema = new CredentialDefinitionSchema()
            {
                SequenceNo = 1,
                Id         = "id",
                Name       = "name",
                Version    = "1.1",
                Tag        = "TAG"
            };

            credentialschema.AttributeNames.Add("age");
            credentialschema.AttributeNames.Add("height");
            credentialschema.AttributeNames.Add("sex");
            credentialschema.AttributeNames.Add("name");

            IssuerCredentialDefinition credentialDefinition = issuer.CreateStoreCredentialDef(trustAnchor, credentialschema);

            // 12. Creating Prover wallet and opening it to get the handle
            WalletConfig config = new WalletConfig()
            {
                Id = "ProverIssueCredentialDemoWalletId"
            };

            _proverWallet = IndyDotNet.Wallet.Factory.GetWallet(config, _credentials);
            _proverWallet.Create();
            _proverWallet.Open();

            // 13. Prover is creating Master Secret
            IProverAnonCreds prover = IndyDotNet.AnonCreds.Factory.GetProverAnonCreds(_proverWallet);

            prover.CreateMasterSecret("master_secret");

            // 14. Issuer(Trust Anchor) is creating a Claim Offer for Prover
            IssuerCredentialOffer claimOffer = issuer.CreateCredentialOffer(credentialDefinition.Id);

            // 15. Prover creates Claim Request
            IDid proverDID = IndyDotNet.Did.Factory.CreateMyDid(_pool, _proverWallet, null);

            (ProverCredentialRequest credentialRequest, ProverCredentialRequestMetadata credentialRequestMetadata) = prover.CreateCredentialRequest(proverDID, claimOffer, credentialDefinition, "master_secret");

            Assert.AreEqual(credentialRequest.CredDefId, claimOffer.CredDefId);
            Assert.AreEqual(credentialRequest.ProverDid, proverDID.Did);

            // 16. Issuer(Trust Anchor) creates Claim for Claim Request
            AttributeValuesList attributes = new AttributeValuesList();

            attributes.Add(new AttributeWithValue()
            {
                Name       = "age",
                Value      = "27",
                CheckValue = "27"
            });
            attributes.Add(new AttributeWithValue()
            {
                Name       = "height",
                Value      = "175",
                CheckValue = "175"
            });
            attributes.Add(new AttributeWithValue()
            {
                Name       = "name",
                Value      = "Alex",
                CheckValue = "99262857098057710338306967609588410025648622308394250666849665532448612202874"
            });
            attributes.Add(new AttributeWithValue()
            {
                Name       = "sex",
                Value      = "male",
                CheckValue = "5944657099558967239210949258394887428692050081607692519917050011144233115103"
            });

            IssuerCredential issuerCredential = issuer.CreateCredential(claimOffer, credentialRequest, attributes);

            // 17. Prover processes and stores Claim
            string proverCredentialId = prover.SaveCredential(issuerCredential, credentialDefinition, credentialRequestMetadata);

            Assert.IsTrue(string.IsNullOrEmpty(proverCredentialId), "SaveCredential did not return proverCredentialId");

            // clean up
            // Close and delete wallet
            // Close pool
            // Delete pool ledger config
            // SEE Cleanup() above
        }
示例#12
0
 /// <summary>
 /// Create a prediction engine for one-time prediction.
 /// </summary>
 /// <typeparam name="TSrc">The class that defines the input data.</typeparam>
 /// <typeparam name="TDst">The class that defines the output data.</typeparam>
 /// <param name="transformer">The transformer to use for prediction.</param>
 /// <param name="env">The environment to use.</param>
 /// <param name="inputSchemaDefinition">Additional settings of the input schema.</param>
 /// <param name="outputSchemaDefinition">Additional settings of the output schema.</param>
 public static PredictionEngine <TSrc, TDst> CreatePredictionEngine <TSrc, TDst>(this ITransformer transformer,
                                                                                 IHostEnvironment env, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
     where TSrc : class
     where TDst : class, new()
 => new PredictionEngine <TSrc, TDst>(env, transformer, true, inputSchemaDefinition, outputSchemaDefinition);
示例#13
0
        internal PipeEngine(MLContext mlContext, IDataView pipe, bool ignoreMissingColumns, SchemaDefinition schemaDefinition = null)
        {
            Contracts.AssertValue(mlContext);
            mlContext.AssertValue(pipe);
            mlContext.AssertValueOrNull(schemaDefinition);

            _cursorablePipe = mlContext.Data.AsCursorable <TDst>(pipe, ignoreMissingColumns, schemaDefinition);
            _counter        = 0;
        }
示例#14
0
        /// <summary>
        /// Convert an <see cref="IDataView"/> into a strongly-typed <see cref="IEnumerable{TRow}"/>.
        /// </summary>
        /// <typeparam name="TRow">The user-defined row type.</typeparam>
        /// <param name="data">The underlying data view.</param>
        /// <param name="reuseRowObject">Whether to return the same object on every row, or allocate a new one per row.</param>
        /// <param name="ignoreMissingColumns">Whether to ignore the case when a requested column is not present in the data view.</param>
        /// <param name="schemaDefinition">Optional user-provided schema definition. If it is not present, the schema is inferred from the definition of T.</param>
        /// <returns>The <see cref="IEnumerable{TRow}"/> that holds the data in <paramref name="data"/>. It can be enumerated multiple times.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[BootstrapSample](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public IEnumerable <TRow> CreateEnumerable <TRow>(IDataView data, bool reuseRowObject,
                                                          bool ignoreMissingColumns = false, SchemaDefinition schemaDefinition = null)
            where TRow : class, new()
        {
            _env.CheckValue(data, nameof(data));
            _env.CheckValueOrNull(schemaDefinition);

            var engine = new PipeEngine <TRow>(_env, data, ignoreMissingColumns, schemaDefinition);

            return(engine.RunPipe(reuseRowObject));
        }
示例#15
0
 /// <summary>
 /// Add a SchemaDefinition that will be built into the domain model
 /// </summary>
 /// <param name="schemaDefinition"></param>
 public void AddSchema(SchemaDefinition schemaDefinition)
 {
     EnsureDomainModelIsNotNull();
     _domainModel.AddSchema(schemaDefinition);
 }
示例#16
0
        internal PipeEngine(IHostEnvironment env, IDataView pipe, bool ignoreMissingColumns, SchemaDefinition schemaDefinition = null)
        {
            Contracts.AssertValue(env);
            env.AssertValue(pipe);
            env.AssertValueOrNull(schemaDefinition);

            _cursorablePipe = env.AsCursorable <TDst>(pipe, ignoreMissingColumns, schemaDefinition);
            _counter        = 0;
        }
示例#17
0
        internal static IDomainModelDefinitionsProvider BuildTestExtensionDefinitionsProvider()
        {
            var aggredateDefinitions2 = new[]
            {
                new AggregateDefinition(
                    new FullName("ExtensionPhysical", "ExtensionAggregate"),
                    new[]
                {
                    new FullName("ExtensionPhysical", "ExtensionEntity")
                })
            };

            var entityDefinitions2 = new[]
            {
                new EntityDefinition(
                    "ExtensionPhysical",
                    "ExtensionAggregate",
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true)
                },
                    new[]
                {
                    new EntityIdentifierDefinition(
                        "PK",
                        new[]
                    {
                        "KeyProperty1", "KeyProperty2"
                    },
                        isPrimary: true)
                },
                    true),
                new EntityDefinition(
                    "ExtensionPhysical",
                    "ExtensionEntity",
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true)
                },
                    new[]
                {
                    new EntityIdentifierDefinition(
                        "PK",
                        new[]
                    {
                        "KeyProperty1", "KeyProperty2"
                    },
                        isPrimary: true)
                },
                    true)
            };

            var associationDefinitions2 = new[]
            {
                new AssociationDefinition(
                    new FullName("ExtensionPhysical", "FK_ExtensionEntity"),
                    Cardinality.OneToOneExtension,
                    new FullName(EdFiConventions.PhysicalSchemaName, "CoreEntity"),
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true)
                },
                    new FullName("ExtensionPhysical", "ExtensionEntity"),
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true)
                },
                    isIdentifying: true,
                    isRequired: true)
            };

            var schemaDefinition2 = new SchemaDefinition("Extension-Logical", "ExtensionPhysical");

            var modelDefinitions2 = new DomainModelDefinitions(
                schemaDefinition2,
                aggredateDefinitions2,
                entityDefinitions2,
                associationDefinitions2);

            var domainModelDefinitionsProvider = A.Fake <IDomainModelDefinitionsProvider>();

            A.CallTo(() => domainModelDefinitionsProvider.GetDomainModelDefinitions())
            .Returns(modelDefinitions2);

            return(domainModelDefinitionsProvider);
        }
示例#18
0
        GetImportanceMetricsMatrix(
            IHostEnvironment env,
            IPredictionTransformer <IPredictor> model,
            IDataView data,
            Func <IDataView, TMetric> evaluationFunc,
            Func <TMetric, TMetric, TMetric> deltaFunc,
            string features,
            int permutationCount,
            bool useFeatureWeightFilter = false,
            int?topExamples             = null)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register(nameof(PermutationFeatureImportance <TMetric, TResult>));

            host.CheckValue(model, nameof(model));
            host.CheckValue(data, nameof(data));
            host.CheckNonEmpty(features, nameof(features));

            topExamples = topExamples ?? Utils.ArrayMaxSize;
            host.Check(topExamples > 0, "Provide how many examples to use (positive number) or set to null to use whole dataset.");

            VBuffer <ReadOnlyMemory <char> > slotNames = default;
            var metricsDelta = new List <TResult>();

            using (var ch = host.Start("GetImportanceMetrics"))
            {
                ch.Trace("Scoring and evaluating baseline.");
                var baselineMetrics = evaluationFunc(model.Transform(data));

                // Get slot names.
                var featuresColumn = data.Schema[features];
                int numSlots       = featuresColumn.Type.VectorSize;
                data.Schema.TryGetColumnIndex(features, out int featuresColumnIndex);

                ch.Info("Number of slots: " + numSlots);
                if (data.Schema[featuresColumnIndex].HasSlotNames(numSlots))
                {
                    data.Schema[featuresColumnIndex].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref slotNames);
                }

                if (slotNames.Length != numSlots)
                {
                    slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(numSlots);
                }

                VBuffer <float> weights = default;
                var             workingFeatureIndices = Enumerable.Range(0, numSlots).ToList();
                int             zeroWeightsCount      = 0;

                // By default set to the number of all features available.
                var evaluatedFeaturesCount = numSlots;
                if (useFeatureWeightFilter)
                {
                    var predictorWithWeights = model.Model as IPredictorWithFeatureWeights <Single>;
                    if (predictorWithWeights != null)
                    {
                        predictorWithWeights.GetFeatureWeights(ref weights);

                        const int     maxReportedZeroFeatures = 10;
                        StringBuilder msgFilteredOutFeatures  = new StringBuilder("The following features have zero weight and will not be evaluated: \n \t");
                        var           prefix = "";
                        foreach (var k in weights.Items(all: true))
                        {
                            if (k.Value == 0)
                            {
                                zeroWeightsCount++;

                                // Print info about first few features we're not going to evaluate.
                                if (zeroWeightsCount <= maxReportedZeroFeatures)
                                {
                                    msgFilteredOutFeatures.Append(prefix);
                                    msgFilteredOutFeatures.Append(GetSlotName(slotNames, k.Key));
                                    prefix = ", ";
                                }
                            }
                            else
                            {
                                workingFeatureIndices.Add(k.Key);
                            }
                        }

                        // Old FastTree models has less weights than slots.
                        if (weights.Length < numSlots)
                        {
                            ch.Warning(
                                "Predictor had fewer features than slots. All unknown features will get default 0 weight.");
                            zeroWeightsCount += numSlots - weights.Length;
                            var indexes = weights.GetIndices().ToArray();
                            var values  = weights.GetValues().ToArray();
                            var count   = values.Length;
                            weights = new VBuffer <float>(numSlots, count, values, indexes);
                        }

                        evaluatedFeaturesCount = workingFeatureIndices.Count;
                        ch.Info("Number of zero weights: {0} out of {1}.", zeroWeightsCount, weights.Length);

                        // Print what features have 0 weight
                        if (zeroWeightsCount > 0)
                        {
                            if (zeroWeightsCount > maxReportedZeroFeatures)
                            {
                                msgFilteredOutFeatures.Append(string.Format("... (printing out  {0} features here).\n Use 'Index' column in the report for info on what features are not evaluated.", maxReportedZeroFeatures));
                            }
                            ch.Info(msgFilteredOutFeatures.ToString());
                        }
                    }
                }

                if (workingFeatureIndices.Count == 0 && zeroWeightsCount == 0)
                {
                    // Use all features otherwise.
                    workingFeatureIndices.AddRange(Enumerable.Range(0, numSlots));
                }

                if (zeroWeightsCount == numSlots)
                {
                    ch.Warning("All features have 0 weight thus can not do thorough evaluation");
                    return(metricsDelta.ToImmutableArray());
                }

                // Note: this will not work on the huge dataset.
                var          maxSize = topExamples;
                List <float> initialfeatureValuesList = new List <float>();

                // Cursor through the data to cache slot 0 values for the upcoming permutation.
                var valuesRowCount = 0;
                // REVIEW: Seems like if the labels are NaN, so that all metrics are NaN, this command will be useless.
                // In which case probably erroring out is probably the most useful thing.
                using (var cursor = data.GetRowCursor(col => col == featuresColumnIndex))
                {
                    var featuresGetter = cursor.GetGetter <VBuffer <float> >(featuresColumnIndex);
                    var featuresBuffer = default(VBuffer <float>);

                    while (initialfeatureValuesList.Count < maxSize && cursor.MoveNext())
                    {
                        featuresGetter(ref featuresBuffer);
                        initialfeatureValuesList.Add(featuresBuffer.GetItemOrDefault(workingFeatureIndices[0]));
                    }

                    valuesRowCount = initialfeatureValuesList.Count;
                }

                if (valuesRowCount > 0)
                {
                    ch.Info("Detected {0} examples for evaluation.", valuesRowCount);
                }
                else
                {
                    ch.Warning("Detected no examples for evaluation.");
                    return(metricsDelta.ToImmutableArray());
                }

                float[] featureValuesBuffer = initialfeatureValuesList.ToArray();
                float[] nextValues          = new float[valuesRowCount];

                // Now iterate through all the working slots, do permutation and calc the delta of metrics.
                int processedCnt     = 0;
                int nextFeatureIndex = 0;
                var shuffleRand      = RandomUtils.Create(host.Rand.Next());
                using (var pch = host.StartProgressChannel("SDCA preprocessing with lookup"))
                {
                    pch.SetHeader(new ProgressHeader("processed slots"), e => e.SetProgress(0, processedCnt));
                    foreach (var workingIndx in workingFeatureIndices)
                    {
                        // Index for the feature we will permute next.  Needed to build in advance a buffer for the permutation.
                        if (processedCnt < workingFeatureIndices.Count - 1)
                        {
                            nextFeatureIndex = workingFeatureIndices[processedCnt + 1];
                        }

                        // Used for pre-caching the next feature
                        int nextValuesIndex = 0;

                        SchemaDefinition input = SchemaDefinition.Create(typeof(FeaturesBuffer));
                        Contracts.Assert(input.Count == 1);
                        input[0].ColumnName = features;

                        SchemaDefinition output = SchemaDefinition.Create(typeof(FeaturesBuffer));
                        Contracts.Assert(output.Count == 1);
                        output[0].ColumnName = features;
                        output[0].ColumnType = featuresColumn.Type;

                        // Perform multiple permutations for one feature to build a confidence interval
                        var metricsDeltaForFeature = new TResult();
                        for (int permutationIteration = 0; permutationIteration < permutationCount; permutationIteration++)
                        {
                            Utils.Shuffle <float>(shuffleRand, featureValuesBuffer);

                            Action <FeaturesBuffer, FeaturesBuffer, PermuterState> permuter =
                                (src, dst, state) =>
                            {
                                src.Features.CopyTo(ref dst.Features);
                                VBufferUtils.ApplyAt(ref dst.Features, workingIndx,
                                                     (int ii, ref float d) =>
                                                     d = featureValuesBuffer[state.SampleIndex++]);

                                // Is it time to pre-cache the next feature?
                                if (permutationIteration == permutationCount - 1 &&
                                    processedCnt < workingFeatureIndices.Count - 1)
                                {
                                    // Fill out the featureValueBuffer for the next feature while updating the current feature
                                    // This is the reason I need PermuterState in LambdaTransform.CreateMap.
                                    nextValues[nextValuesIndex] = src.Features.GetItemOrDefault(nextFeatureIndex);
                                    if (nextValuesIndex < valuesRowCount - 1)
                                    {
                                        nextValuesIndex++;
                                    }
                                }
                            };

                            IDataView viewPermuted = LambdaTransform.CreateMap(
                                host, data, permuter, null, input, output);
                            if (valuesRowCount == topExamples)
                            {
                                viewPermuted = SkipTakeFilter.Create(host, new SkipTakeFilter.TakeArguments()
                                {
                                    Count = valuesRowCount
                                }, viewPermuted);
                            }

                            var metrics = evaluationFunc(model.Transform(viewPermuted));

                            var delta = deltaFunc(metrics, baselineMetrics);
                            metricsDeltaForFeature.Add(delta);
                        }

                        // Add the metrics delta to the list
                        metricsDelta.Add(metricsDeltaForFeature);

                        // Swap values for next iteration of permutation.
                        if (processedCnt < workingFeatureIndices.Count - 1)
                        {
                            Array.Clear(featureValuesBuffer, 0, featureValuesBuffer.Length);
                            nextValues.CopyTo(featureValuesBuffer, 0);
                            Array.Clear(nextValues, 0, nextValues.Length);
                        }
                        processedCnt++;
                    }
                    pch.Checkpoint(processedCnt, processedCnt);
                }
            }

            return(metricsDelta.ToImmutableArray());
        }
示例#19
0
        internal static IDomainModelDefinitionsProvider BuildCoreModelEntityDefinitionsProvider()
        {
            var edfiSchema      = EdFiConventions.PhysicalSchemaName;
            var edfiLogicalName = EdFiConventions.LogicalName;

            var entityDefinitions = new[]
            {
                new EntityDefinition(
                    edfiSchema,
                    "CoreEntity",
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true)
                },
                    new[]
                {
                    new EntityIdentifierDefinition(
                        "PK",
                        new[]
                    {
                        "KeyProperty1", "KeyProperty2"
                    },
                        isPrimary: true)
                },
                    true),
                new EntityDefinition(
                    edfiSchema,
                    "Collection1Item",
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true),
                    new EntityPropertyDefinition("KeyProperty2", new PropertyType(DbType.Int32), null, true)
                },
                    new[]
                {
                    new EntityIdentifierDefinition(
                        "PK",
                        new[]
                    {
                        "KeyProperty1", "KeyProperty2"
                    },
                        isPrimary: true)
                },
                    true),
                new EntityDefinition(
                    edfiSchema,
                    "EmbeddedObject1",
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true)
                },
                    new[]
                {
                    new EntityIdentifierDefinition(
                        "PK",
                        new[]
                    {
                        "KeyProperty1"
                    },
                        isPrimary: true)
                },
                    true)
            };

            var associationDefinitions = new[]
            {
                new AssociationDefinition(
                    new FullName(edfiSchema, "FK_CoreEntityEmbeddedObject"),
                    Cardinality.OneToOne,
                    new FullName(edfiSchema, "CoreEntity"),
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true)
                },
                    new FullName(edfiSchema, "EmbeddedObject1"),
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true)
                },
                    isIdentifying: true,
                    isRequired: true),
                new AssociationDefinition(
                    new FullName(edfiSchema, "FK_CoreEntityCollection"),
                    Cardinality.OneToZeroOrMore,
                    new FullName(edfiSchema, "CoreEntity"),
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true)
                },
                    new FullName(edfiSchema, "Collection1Item"),
                    new[]
                {
                    new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true)
                },
                    isIdentifying: true,
                    isRequired: true)
            };

            var aggredateDefinitions = new[]
            {
                new AggregateDefinition(
                    new FullName(edfiSchema, "CoreEntity"),
                    new[]
                {
                    new FullName(edfiSchema, "Collection1Item"), new FullName(edfiSchema, "EmbeddedObject1")
                })
            };

            var schemaDefinition = new SchemaDefinition(edfiLogicalName, edfiSchema);

            var modelDefinitions = new DomainModelDefinitions(
                schemaDefinition,
                aggredateDefinitions,
                entityDefinitions,
                associationDefinitions);

            var domainModelDefinitionsProvider = A.Fake <IDomainModelDefinitionsProvider>();

            A.CallTo(() => domainModelDefinitionsProvider.GetDomainModelDefinitions())
            .Returns(modelDefinitions);

            return(domainModelDefinitionsProvider);
        }
        public static void MapTest(string inputMessageFolderPath, string outputMessageFolderPath, TestableMapBase target, string
                                   sourceFile, SchemaDefinition sourceSchema, string destinationFile, SchemaDefinition destinationSchema,
                                   List <XPathDefinition> xpathList)
        {
            ValidateSouceMessage(inputMessageFolderPath, sourceFile, sourceSchema);

            string inputMessagePath  = inputMessageFolderPath + sourceFile;
            string outputMessagePath = outputMessageFolderPath + destinationFile;

            target.ValidateInput  = false;
            target.ValidateOutput = false;
            target.TestMap(inputMessagePath, Microsoft.BizTalk.TestTools.Schema.InputInstanceType.Xml, outputMessagePath,
                           Microsoft.BizTalk.TestTools.Schema.OutputInstanceType.XML);
        }
        /// <summary>
        /// Create a filter transform that is savable iff <paramref name="saveAction"/> and <paramref name="loadFunc"/> are
        /// not null.
        /// </summary>
        /// <param name="env">The host environment</param>
        /// <param name="source">The dataview upon which we construct the transform</param>
        /// <param name="filterFunc">The function by which we transform source to destination columns and decide whether
        /// to keep the row.</param>
        /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param>
        /// <param name="saveAction">An action that allows us to save state to the serialization stream. May be
        /// null simultaneously with <paramref name="loadFunc"/>.</param>
        /// <param name="loadFunc">A function that given the serialization stream and a data view, returns
        /// an <see cref="ITransformTemplate"/>. The intent is, this returned object should itself be a
        /// <see cref="CustomMappingTransformer{TSrc,TDst}"/>, but this is not strictly necessary. This delegate should be
        /// a static non-lambda method that this assembly can legally call. May be null simultaneously with
        /// <paramref name="saveAction"/>.</param>
        /// <param name="inputSchemaDefinition">The schema definition overrides for <typeparamref name="TSrc"/></param>
        /// <param name="outputSchemaDefinition">The schema definition overrides for <typeparamref name="TDst"/></param>
        public ExtStatefulFilterTransform(IHostEnvironment env, IDataView source, Func <TSrc, TDst, TState, bool> filterFunc,
                                          Action <TState> initStateAction,
                                          Action <BinaryWriter> saveAction, ExtLambdaTransform.LoadDelegate loadFunc,
                                          SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
            : base(env, RegistrationName, saveAction, loadFunc)
        {
            Host.AssertValue(source, "source");
            Host.AssertValue(filterFunc, "filterFunc");
            Host.AssertValueOrNull(initStateAction);
            Host.AssertValueOrNull(inputSchemaDefinition);
            Host.AssertValueOrNull(outputSchemaDefinition);

            _source                = source;
            _filterFunc            = filterFunc;
            _initStateAction       = initStateAction;
            _inputSchemaDefinition = inputSchemaDefinition;
            _typedSource           = TypedCursorable <TSrc> .Create(Host, Source, false, inputSchemaDefinition);

            var outSchema = InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition);

            _addedSchema = outSchema;
            _bindings    = new ColumnBindings(Source.Schema, DataViewConstructionUtils.GetSchemaColumns(outSchema));
        }
        public static void ValidateDestinationMessage(string OutoutputMessageFolderPath, string outputFile, SchemaDefinition
                                                      destinationSchema, List <XPathDefinition> xpathList)
        {
            FileReadMultipleStep fileReadStep = new FileReadMultipleStep();

            fileReadStep.DeleteFiles           = false;
            fileReadStep.FailOnError           = true;
            fileReadStep.DirectoryPath         = OutoutputMessageFolderPath;
            fileReadStep.SearchPattern         = outputFile;
            fileReadStep.ExpectedNumberOfFiles = 1;


            XmlValidationStep validateOutPutStep = new XmlValidationStep();

            validateOutPutStep.XmlSchemas.Add(destinationSchema);

            foreach (var xpath in xpathList)
            {
                validateOutPutStep.XPathValidations.Add(xpath);
            }

            fileReadStep.SubSteps.Add(validateOutPutStep);

            TestCase outValTestCase = new TestCase();

            outValTestCase.Name = "Validate Output Message";
            outValTestCase.ExecutionSteps.Add(fileReadStep);

            BizUnit.Core.TestRunner testRunner = new BizUnit.Core.TestRunner(outValTestCase);
            testRunner.Run();
        }
        /// <summary>
        /// This is a 'stateful non-savable' version of the map transform: the mapping function is guaranteed to be invoked once per
        /// every row of the data set, in sequence; one user-defined state object will be allocated per cursor and passed to the
        /// map function every time. If <typeparamref name="TSrc"/>, <typeparamref name="TDst"/>, or
        /// <typeparamref name="TState"/> implement the <see cref="IDisposable" /> interface, they will be disposed after use.
        /// </summary>
        /// <typeparam name="TSrc">The type that describes what 'source' columns are consumed from the
        /// input <see cref="IDataView"/>.</typeparam>
        /// <typeparam name="TState">The type of the state object to allocate per cursor.</typeparam>
        /// <typeparam name="TDst">The type that describes what new columns are added by this transform.</typeparam>
        /// <param name="env">The host environment to use.</param>
        /// <param name="source">The input data to apply transformation to.</param>
        /// <param name="mapAction">The function that performs the transformation. The function should transform its <typeparamref name="TSrc"/>
        /// argument into its <typeparamref name="TDst"/> argument and can utilize the per-cursor <typeparamref name="TState"/> state.</param>
        /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param>
        /// <param name="inputSchemaDefinition">The optional input schema. If <c>null</c>, the schema is
        /// inferred from the <typeparamref name="TSrc"/> type.</param>
        /// <param name="outputSchemaDefinition">The optional output schema. If <c>null</c>, the schema is
        /// inferred from the <typeparamref name="TDst"/> type.</param>
        public static ITransformTemplate CreateMap <TSrc, TDst, TState>(IHostEnvironment env, IDataView source,
                                                                        Action <TSrc, TDst, TState> mapAction, Action <TState> initStateAction,
                                                                        SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
            where TSrc : class, new()
            where TDst : class, new()
            where TState : class, new()
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(source, nameof(source));
            env.CheckValue(mapAction, nameof(mapAction));
            env.CheckValueOrNull(initStateAction);
            env.CheckValueOrNull(inputSchemaDefinition);
            env.CheckValueOrNull(outputSchemaDefinition);

            return(new ExtStatefulFilterTransform <TSrc, TDst, TState>(env, source,
                                                                       (src, dst, state) =>
            {
                mapAction(src, dst, state);
                return true;
            }, initStateAction, null, null, inputSchemaDefinition, outputSchemaDefinition));
        }
示例#24
0
 public string BuildCreateScript(SchemaDefinition schema)
 {
     return(schema.ToCreateScript(true));
 }
        /// <summary>
        /// This creates a filter transform that can 'accept' or 'decline' any row of the data based on the contents of the row
        /// or state of the cursor.
        /// This is a 'stateful non-savable' version of the filter: the filter function is guaranteed to be invoked once per
        /// every row of the data set, in sequence (non-parallelizable); one user-defined state object will be allocated per cursor and passed to the
        /// filter function every time.
        /// If <typeparamref name="TSrc"/> or <typeparamref name="TState"/> implement the <see cref="IDisposable" /> interface, they will be disposed after use.
        /// </summary>
        /// <typeparam name="TSrc">The type that describes what 'source' columns are consumed from the
        /// input <see cref="IDataView"/>.</typeparam>
        /// <typeparam name="TState">The type of the state object to allocate per cursor.</typeparam>
        /// <param name="env">The host environment to use.</param>
        /// <param name="source">The input data to apply transformation to.</param>
        /// <param name="filterFunc">The user-defined function that determines whether to keep the row or discard it. First parameter
        /// is the current row's contents, the second parameter is the cursor-specific state object.</param>
        /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param>
        /// <param name="inputSchemaDefinition">The optional input schema. If <c>null</c>, the schema is
        /// inferred from the <typeparamref name="TSrc"/> type.</param>
        /// <returns></returns>
        public static ITransformTemplate CreateFilter <TSrc, TState>(IHostEnvironment env, IDataView source,
                                                                     Func <TSrc, TState, bool> filterFunc, Action <TState> initStateAction, SchemaDefinition inputSchemaDefinition = null)
            where TSrc : class, new()
            where TState : class, new()
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(source, nameof(source));
            env.CheckValue(filterFunc, nameof(filterFunc));
            env.CheckValueOrNull(initStateAction);
            env.CheckValueOrNull(inputSchemaDefinition);

            return(new ExtStatefulFilterTransform <TSrc, object, TState>(env, source,
                                                                         (src, dst, state) => filterFunc(src, state), initStateAction, null, null, inputSchemaDefinition));
        }
示例#26
0
        internal override void PredictionEngineCore(IHostEnvironment env, DataViewConstructionUtils.InputRow <TSrc> inputRow, IRowToRowMapper mapper, bool ignoreMissingColumns,
                                                    SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition, out Action disposer, out IRowReadableAs <TDst> outputRow)
        {
            List <IStatefulRow> rows = new List <IStatefulRow>();
            IRow outputRowLocal      = outputRowLocal = GetStatefulRows(inputRow, mapper, col => true, rows, out disposer);
            var  cursorable          = TypedCursorable <TDst> .Create(env, new EmptyDataView(env, mapper.OutputSchema), ignoreMissingColumns, outputSchemaDefinition);

            _pinger   = CreatePinger(rows);
            outputRow = cursorable.GetRow(outputRowLocal);
        }
示例#27
0
        private protected PredictionEngineBase(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns,
                                               SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
        {
            Contracts.CheckValue(env, nameof(env));
            env.AssertValue(transformer);
            Transformer = transformer;
            var makeMapper = TransformerChecker(env, transformer);

            env.AssertValue(makeMapper);
            _inputRow = DataViewConstructionUtils.CreateInputRow <TSrc>(env, inputSchemaDefinition);
            PredictionEngineCore(env, _inputRow, makeMapper(_inputRow.Schema), ignoreMissingColumns, outputSchemaDefinition, out _disposer, out _outputRow);
            OutputSchema = Transformer.GetOutputSchema(_inputRow.Schema);
        }
示例#28
0
 /// <summary>
 /// Create a prediction engine for one-time prediction.
 /// </summary>
 /// <typeparam name="TSrc">The class that defines the input data.</typeparam>
 /// <typeparam name="TDst">The class that defines the output data.</typeparam>
 /// <param name="transformer">The transformer to use for prediction.</param>
 /// <param name="ignoreMissingColumns">Whether to throw an exception if a column exists in
 /// <paramref name="outputSchemaDefinition"/> but the corresponding member doesn't exist in
 /// <typeparamref name="TDst"/>.</param>
 /// <param name="inputSchemaDefinition">Additional settings of the input schema.</param>
 /// <param name="outputSchemaDefinition">Additional settings of the output schema.</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[Save](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/SaveLoadModel.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public PredictionEngine <TSrc, TDst> CreatePredictionEngine <TSrc, TDst>(ITransformer transformer,
                                                                          bool ignoreMissingColumns = true, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
     where TSrc : class
     where TDst : class, new()
 {
     return(transformer.CreatePredictionEngine <TSrc, TDst>(_env, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition));
 }
示例#29
0
        private protected virtual void PredictionEngineCore(IHostEnvironment env, DataViewConstructionUtils.InputRow <TSrc> inputRow,
                                                            IRowToRowMapper mapper, bool ignoreMissingColumns, SchemaDefinition outputSchemaDefinition, out Action disposer, out IRowReadableAs <TDst> outputRow)
        {
            var cursorable = TypedCursorable <TDst> .Create(env, new EmptyDataView(env, mapper.OutputSchema), ignoreMissingColumns, outputSchemaDefinition);

            var outputRowLocal = mapper.GetRow(inputRow, mapper.OutputSchema);

            outputRow = cursorable.GetRow(outputRowLocal);
            disposer  = inputRow.Dispose;
        }
示例#30
0
        public bool DebugTestThatAllTablesValidate(IDataConnector genericData, out SchemaDefinition reason)
        {
            reason = null;
#if (!ISWIN)
            foreach (var s in schema)
            {
                if (!genericData.VerifyTableExists(s.Name, s.Columns, s.Indices))
                {
                    reason = s;
                    return false;
                }
            }
#else
            foreach (var s in schema.Where(s => !genericData.VerifyTableExists(s.Name, s.Columns, s.Indices)))
            {
                reason = s;
                return false;
            }
#endif
            return true;
        }
        // Creating IDataView from IEnumerable, and setting the size of the vector
        // at runtime. When the data model is defined through types, setting the
        // size of the vector is done through the VectorType annotation. When the
        // size of the data is not known at compile time, the Schema can be directly
        // modified at runtime and the size of the vector set there. This is
        // important, because most of the ML.NET trainers require the Features
        // vector to be of known size.
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for
            // exception tracking and logging, as a catalog of available operations
            // and as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable.
            IEnumerable <DataPointVector> enumerableKnownSize = new DataPointVector[]
            {
                new DataPointVector {
                    Features = new float[] { 1.2f, 3.4f, 4.5f, 3.2f,
                                             7, 5f }
                },

                new DataPointVector {
                    Features = new float[] { 4.2f, 3.4f, 14.65f,
                                             3.2f, 3, 5f }
                },

                new DataPointVector {
                    Features = new float[] { 1.6f, 3.5f, 4.5f, 6.2f,
                                             3, 5f }
                },
            };

            // Load dataset into an IDataView.
            IDataView data          = mlContext.Data.LoadFromEnumerable(enumerableKnownSize);
            var       featureColumn = data.Schema["Features"].Type as VectorDataViewType;

            // Inspecting the schema
            Console.WriteLine($"Is the size of the Features column known: " +
                              $"{featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}");

            // Preview
            //
            // Is the size of the Features column known? True.
            // Size: 5.

            // If the size of the vector is unknown at compile time, it can be set
            // at runtime.
            IEnumerable <DataPoint> enumerableUnknownSize = new DataPoint[]
            {
                new DataPoint {
                    Features = new float[] { 1.2f, 3.4f, 4.5f }
                },
                new DataPoint {
                    Features = new float[] { 4.2f, 3.4f, 1.6f }
                },
                new DataPoint {
                    Features = new float[] { 1.6f, 3.5f, 4.5f }
                },
            };

            // The feature dimension (typically this will be the Count of the array
            // of the features vector known at runtime).
            int featureDimension = 3;
            var definedSchema    = SchemaDefinition.Create(typeof(DataPoint));

            featureColumn = definedSchema["Features"]
                            .ColumnType as VectorDataViewType;

            Console.WriteLine($"Is the size of the Features column known: " +
                              $"{featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}");

            // Preview
            //
            // Is the size of the Features column known? False.
            // Size: 0.

            // Set the column type to be a known-size vector.
            var vectorItemType = ((VectorDataViewType)definedSchema[0].ColumnType)
                                 .ItemType;

            definedSchema[0].ColumnType = new VectorDataViewType(vectorItemType,
                                                                 featureDimension);

            // Read the data into an IDataView with the modified schema supplied in
            IDataView data2 = mlContext.Data
                              .LoadFromEnumerable(enumerableUnknownSize, definedSchema);

            featureColumn = data2.Schema["Features"].Type as VectorDataViewType;
            // Inspecting the schema
            Console.WriteLine($"Is the size of the Features column known: " +
                              $"{featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}");

            // Preview
            //
            // Is the size of the Features column known? True.
            // Size: 3.
        }