/// <summary> /// <see cref="TimeSeriesPredictionFunction{TSrc, TDst}"/> creates a prediction function/engine for a time series pipeline /// It updates the state of time series model with observations seen at prediction phase and allows checkpointing the model. /// </summary> /// <typeparam name="TSrc">Class describing input schema to the model.</typeparam> /// <typeparam name="TDst">Class describing the output schema of the prediction.</typeparam> /// <param name="transformer">The time series pipeline in the form of a <see cref="ITransformer"/>.</param> /// <param name="env">Usually <see cref="MLContext"/></param> /// <param name="ignoreMissingColumns">To ignore missing columns. Default is false.</param> /// <param name="inputSchemaDefinition">Input schema definition. Default is null.</param> /// <param name="outputSchemaDefinition">Output schema definition. Default is null.</param> /// <p>Example code can be found by searching for <i>TimeSeriesPredictionFunction</i> in <a href='https://github.com/dotnet/machinelearning'>ML.NET.</a></p> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MF](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/IidSpikeDetectorTransform.cs)] /// [!code-csharp[MF](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/IidChangePointDetectorTransform.cs)] /// ]]> /// </format> /// </example> public static TimeSeriesPredictionFunction <TSrc, TDst> CreateTimeSeriesPredictionFunction <TSrc, TDst>(this ITransformer transformer, IHostEnvironment env, bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class where TDst : class, new() { Contracts.CheckValue(env, nameof(env)); env.CheckValue(transformer, nameof(transformer)); env.CheckValueOrNull(inputSchemaDefinition); env.CheckValueOrNull(outputSchemaDefinition); return(new TimeSeriesPredictionFunction <TSrc, TDst>(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition)); }
public void MetadataSupportInDataViewConstruction() { var data = ReadBreastCancerExamples(); var autoSchema = SchemaDefinition.Create(typeof(BreastCancerExample)); var mlContext = new MLContext(0); // Create Metadata. var kindFloat = "Testing float as metadata."; float valueFloat = 10; var coltypeFloat = NumberDataViewType.Single; var kindString = "Testing string as metadata."; var valueString = "Strings have value."; var coltypeString = TextDataViewType.Instance; var kindStringArray = "Testing string array as metadata."; var valueStringArray = "I really have no idea what these features entail.".Split(' '); var coltypeStringArray = new VectorDataViewType(coltypeString, valueStringArray.Length); var kindFloatArray = "Testing float array as metadata."; var valueFloatArray = new float[] { 1, 17, 7, 19, 25, 0 }; var coltypeFloatArray = new VectorDataViewType(coltypeFloat, valueFloatArray.Length); var kindVBuffer = "Testing VBuffer as metadata."; var valueVBuffer = new VBuffer <float>(4, new float[] { 4, 6, 89, 5 }); var coltypeVBuffer = new VectorDataViewType(coltypeFloat, valueVBuffer.Length); // Add Metadata. var labelColumn = autoSchema[0]; labelColumn.AddAnnotation(kindFloat, valueFloat, coltypeFloat); labelColumn.AddAnnotation(kindString, valueString, coltypeString); var featureColumn = autoSchema[1]; featureColumn.AddAnnotation(kindStringArray, valueStringArray, coltypeStringArray); featureColumn.AddAnnotation(kindFloatArray, valueFloatArray, coltypeFloatArray); featureColumn.AddAnnotation(kindVBuffer, valueVBuffer, coltypeVBuffer); var idv = mlContext.Data.LoadFromEnumerable(data, autoSchema); Assert.True(idv.Schema[0].Annotations.Schema.Count == 2); Assert.True(idv.Schema[0].Annotations.Schema[0].Name == kindFloat); Assert.True(idv.Schema[0].Annotations.Schema[0].Type == coltypeFloat); Assert.True(idv.Schema[0].Annotations.Schema[1].Name == kindString); Assert.True(idv.Schema[0].Annotations.Schema[1].Type == TextDataViewType.Instance); Assert.True(idv.Schema[1].Annotations.Schema.Count == 3); Assert.True(idv.Schema[1].Annotations.Schema[0].Name == kindStringArray); Assert.True(idv.Schema[1].Annotations.Schema[0].Type is VectorDataViewType vectorType && vectorType.ItemType is TextDataViewType); Assert.Throws <ArgumentOutOfRangeException>(() => idv.Schema[1].Annotations.Schema[kindFloat]); float retrievedFloat = 0; idv.Schema[0].Annotations.GetValue(kindFloat, ref retrievedFloat); Assert.True(Math.Abs(retrievedFloat - valueFloat) < .000001); ReadOnlyMemory <char> retrievedReadOnlyMemory = new ReadOnlyMemory <char>(); idv.Schema[0].Annotations.GetValue(kindString, ref retrievedReadOnlyMemory); Assert.True(retrievedReadOnlyMemory.Span.SequenceEqual(valueString.AsMemory().Span)); VBuffer <ReadOnlyMemory <char> > retrievedReadOnlyMemoryVBuffer = new VBuffer <ReadOnlyMemory <char> >(); idv.Schema[1].Annotations.GetValue(kindStringArray, ref retrievedReadOnlyMemoryVBuffer); Assert.True(retrievedReadOnlyMemoryVBuffer.DenseValues().Select((s, i) => s.ToString() == valueStringArray[i]).All(b => b)); VBuffer <float> retrievedFloatVBuffer = new VBuffer <float>(1, new float[] { 2 }); idv.Schema[1].Annotations.GetValue(kindFloatArray, ref retrievedFloatVBuffer); VBuffer <float> valueFloatVBuffer = new VBuffer <float>(valueFloatArray.Length, valueFloatArray); Assert.True(retrievedFloatVBuffer.Items().SequenceEqual(valueFloatVBuffer.Items())); VBuffer <float> retrievedVBuffer = new VBuffer <float>(); idv.Schema[1].Annotations.GetValue(kindVBuffer, ref retrievedVBuffer); Assert.True(retrievedVBuffer.Items().SequenceEqual(valueVBuffer.Items())); Assert.Throws <InvalidOperationException>(() => idv.Schema[1].Annotations.GetValue(kindFloat, ref retrievedReadOnlyMemoryVBuffer)); }
/// <summary> /// Create a filter transform /// </summary> /// <param name="env">The host environment</param> /// <param name="source">The dataview upon which we construct the transform</param> /// <param name="filterFunc">The function by which we transform source to destination columns and decide whether /// to keep the row.</param> /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param> /// <param name="inputSchemaDefinition">The schema definition overrides for <typeparamref name="TSrc"/></param> /// <param name="outputSchemaDefinition">The schema definition overrides for <typeparamref name="TDst"/></param> public StatefulFilterTransform(IHostEnvironment env, IDataView source, Func <TSrc, TDst, TState, bool> filterFunc, Action <TState> initStateAction, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { _host = env.Register(RegistrationName); _host.AssertValue(source, "source"); _host.AssertValue(filterFunc, "filterFunc"); _host.AssertValueOrNull(initStateAction); _host.AssertValueOrNull(inputSchemaDefinition); _host.AssertValueOrNull(outputSchemaDefinition); _source = source; _filterFunc = filterFunc; _initStateAction = initStateAction; _inputSchemaDefinition = inputSchemaDefinition; _typedSource = TypedCursorable <TSrc> .Create(_host, Source, false, inputSchemaDefinition); var outSchema = InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition); _addedSchema = outSchema; _bindings = new ColumnBindings(Source.Schema, DataViewConstructionUtils.GetSchemaColumns(outSchema)); }
public TimeSeriesPredictionFunction(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : base(env, CloneTransformers(transformer), ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition) { }
public void ImportSingleTestCaseTest() { // Create the first test case i a helper method... var testCase1 = BuildFirstTestCase(); // Create the second test case and import the first test case into it... var testCase2 = new TestCase { Name = "Copy First File Test" }; var createFileStep = new CreateStep { CreationPath = @"File2.xml" }; var dl = new FileDataLoader { FilePath = @"..\..\..\Test\BizUnit.TestSteps.Tests\TestData\PurchaseOrder001.xml" }; createFileStep.DataSource = dl; testCase2.ExecutionSteps.Add(createFileStep); var import = new ImportTestCaseStep { TestCase = testCase1 }; testCase2.ExecutionSteps.Add(import); // Create a validating read step... var validatingFileReadStep = new FileReadMultipleStep { DirectoryPath = @".", SearchPattern = "File*.xml", ExpectedNumberOfFiles = 2 }; var validation = new XmlValidationStep(); var schemaPurchaseOrder = new SchemaDefinition { XmlSchemaPath = @"..\..\..\Test\BizUnit.TestSteps.Tests\TestData\PurchaseOrder.xsd", XmlSchemaNameSpace = "http://SendMail.PurchaseOrder" }; validation.XmlSchemas.Add(schemaPurchaseOrder); var xpathProductId = new XPathDefinition { Description = "PONumber", XPath = "/*[local-name()='PurchaseOrder' and namespace-uri()='http://SendMail.PurchaseOrder']/*[local-name()='PONumber' and namespace-uri()='']", Value = "12323" }; validation.XPathValidations.Add(xpathProductId); validatingFileReadStep.SubSteps.Add(validation); testCase2.ExecutionSteps.Add(validatingFileReadStep); // Run the second test case... var bizUnit = new BizUnit(testCase2); bizUnit.RunTest(); TestCase.SaveToFile(testCase2, "ImportSingleTestCaseTest.xml"); }
public static void ValidateSouceMessage(string inputMessageFolderPath, string inputFile, SchemaDefinition sourceSchema) { FileReadMultipleStep fileReadStep = new FileReadMultipleStep(); fileReadStep.DeleteFiles = false; fileReadStep.DirectoryPath = inputMessageFolderPath; fileReadStep.SearchPattern = inputFile; fileReadStep.FailOnError = true; fileReadStep.ExpectedNumberOfFiles = 1; XmlValidationStep inputValidationStep = new XmlValidationStep(); inputValidationStep.XmlSchemas.Add(sourceSchema); fileReadStep.SubSteps.Add(inputValidationStep); TestCase inValTestCase = new TestCase(); inValTestCase.Name = "Validate Input Message"; inValTestCase.ExecutionSteps.Add(fileReadStep); BizUnit.Core.TestRunner testRunner = new BizUnit.Core.TestRunner(inValTestCase); testRunner.Run(); }
/// <summary> /// Create a custom mapping of input columns to output columns. /// </summary> /// <param name="env">The host environment</param> /// <param name="mapAction">The action by which we map source to destination columns</param> /// <param name="contractName">The name of the action (will be saved to the model).</param> /// <param name="inputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TSrc"/> and input data.</param> /// <param name="outputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TDst"/> and output data.</param> public CustomMappingTransformer(IHostEnvironment env, Action <TSrc, TDst> mapAction, string contractName, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(CustomMappingTransformer <TSrc, TDst>)); _host.CheckValue(mapAction, nameof(mapAction)); _host.CheckValueOrNull(contractName); _host.CheckValueOrNull(inputSchemaDefinition); _host.CheckValueOrNull(outputSchemaDefinition); _mapAction = mapAction; _inputSchemaDefinition = inputSchemaDefinition; var outSchema = outputSchemaDefinition == null ? InternalSchemaDefinition.Create(typeof(TDst), SchemaDefinition.Direction.Write) : InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition); _contractName = contractName; _addedSchema = outSchema; }
internal PredictionEngine(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : base(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition) { }
/// <summary> /// Create a custom mapping of input columns to output columns. Most likely, you should call this method when you are loading the model: /// use <see cref="CustomMapping{TSrc, TDst}(TransformsCatalog, Action{TSrc, TDst}, string, SchemaDefinition, SchemaDefinition)"/> when you are /// training the model. /// </summary> /// <typeparam name="TSrc">The class defining which columns to take from the incoming data.</typeparam> /// <typeparam name="TDst">The class defining which new columns are added to the data.</typeparam> /// <param name="catalog">The transform catalog</param> /// <param name="mapAction">The mapping action. This must be thread-safe and free from side effects.</param> /// <param name="contractName">The contract name, used by ML.NET for loading the model. If <c>null</c> is specified, such a trained model would not be save-able.</param> /// <param name="inputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TSrc"/> and input data.</param> /// <param name="outputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TDst"/> and output data.</param> public static CustomMappingTransformer <TSrc, TDst> CustomMappingTransformer <TSrc, TDst>(this TransformsCatalog catalog, Action <TSrc, TDst> mapAction, string contractName, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class, new() where TDst : class, new() => new CustomMappingTransformer <TSrc, TDst>(catalog.GetEnvironment(), mapAction, contractName, inputSchemaDefinition, outputSchemaDefinition);
/// <summary> /// Create a custom mapping of input columns to output columns. /// </summary> /// <param name="env">The host environment</param> /// <param name="mapAction">The mapping action. This must be thread-safe and free from side effects.</param> /// <param name="contractName">The contract name, used by ML.NET for loading the model. If <c>null</c> is specified, such a trained model would not be save-able.</param> /// <param name="inputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TSrc"/> and input data.</param> /// <param name="outputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TDst"/> and output data.</param> public CustomMappingEstimator(IHostEnvironment env, Action <TSrc, TDst> mapAction, string contractName, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(CustomMappingEstimator <TSrc, TDst>)), new CustomMappingTransformer <TSrc, TDst>(env, mapAction, contractName, inputSchemaDefinition, outputSchemaDefinition)) { }
public void Demo() { // Initialization // 1. Creating a new local pool ledger configuration that can be used later to connect pool nodes. // 2. Open pool ledger and get the pool handle from libindy. // 3. Creates a new identity wallet // 4. Open identity wallet and get the wallet handle from libindy // SEE Initialize() above // 5. Generating and storing steward DID and Verkey IDid stewardDid = IndyDotNet.Did.Factory.CreateMyDid(_pool, _wallet, new IdentitySeed() { Seed = "000000000000000000000000Steward1" }); // 6. Generating and storing Trust Anchor DID and Verkey IDid trustAnchor = IndyDotNet.Did.Factory.CreateMyDid(_pool, _wallet, null); // 7. Build NYM request to add Trust Anchor to the ledger INymLedger nymLedger = IndyDotNet.Ledger.Factory.CreateNymLedger(); BuildRequestResult nymRequest = nymLedger.BuildRequest(stewardDid, trustAnchor, trustAnchor.VerKey, "", NymRoles.TrustAnchor); // 8. Sending the nym request to ledger SignAndSubmitRequestResponse nymResult = nymLedger.SignAndSubmitRequest(_pool, _wallet, stewardDid, nymRequest); // 9. build the schema definition request SchemaDefinition schemaDefinition = new SchemaDefinition() { Name = "name", Version = "1.1", Id = "id" }; schemaDefinition.AttributeNames.Add("age"); schemaDefinition.AttributeNames.Add("height"); schemaDefinition.AttributeNames.Add("sex"); schemaDefinition.AttributeNames.Add("name"); ISchemaLedger schemaLedger = IndyDotNet.Ledger.Factory.CreateSchemaLedger(); BuildSchemaResult buildSchema = schemaLedger.BuildSchemaRequest(stewardDid, schemaDefinition); // 10. Sending the SCHEMA request to the ledger SignAndSubmitRequestResponse signAndSubmitRequestResponse = schemaLedger.SignAndSubmitRequest(_pool, _wallet, stewardDid, buildSchema); // 11. Creating and storing CLAIM DEFINITION using anoncreds as Trust Anchor, for the given Schema IIssuerAnonCreds issuer = IndyDotNet.AnonCreds.Factory.GetIssuerAnonCreds(_wallet); CredentialDefinitionSchema credentialschema = new CredentialDefinitionSchema() { SequenceNo = 1, Id = "id", Name = "name", Version = "1.1", Tag = "TAG" }; credentialschema.AttributeNames.Add("age"); credentialschema.AttributeNames.Add("height"); credentialschema.AttributeNames.Add("sex"); credentialschema.AttributeNames.Add("name"); IssuerCredentialDefinition credentialDefinition = issuer.CreateStoreCredentialDef(trustAnchor, credentialschema); // 12. Creating Prover wallet and opening it to get the handle WalletConfig config = new WalletConfig() { Id = "ProverIssueCredentialDemoWalletId" }; _proverWallet = IndyDotNet.Wallet.Factory.GetWallet(config, _credentials); _proverWallet.Create(); _proverWallet.Open(); // 13. Prover is creating Master Secret IProverAnonCreds prover = IndyDotNet.AnonCreds.Factory.GetProverAnonCreds(_proverWallet); prover.CreateMasterSecret("master_secret"); // 14. Issuer(Trust Anchor) is creating a Claim Offer for Prover IssuerCredentialOffer claimOffer = issuer.CreateCredentialOffer(credentialDefinition.Id); // 15. Prover creates Claim Request IDid proverDID = IndyDotNet.Did.Factory.CreateMyDid(_pool, _proverWallet, null); (ProverCredentialRequest credentialRequest, ProverCredentialRequestMetadata credentialRequestMetadata) = prover.CreateCredentialRequest(proverDID, claimOffer, credentialDefinition, "master_secret"); Assert.AreEqual(credentialRequest.CredDefId, claimOffer.CredDefId); Assert.AreEqual(credentialRequest.ProverDid, proverDID.Did); // 16. Issuer(Trust Anchor) creates Claim for Claim Request AttributeValuesList attributes = new AttributeValuesList(); attributes.Add(new AttributeWithValue() { Name = "age", Value = "27", CheckValue = "27" }); attributes.Add(new AttributeWithValue() { Name = "height", Value = "175", CheckValue = "175" }); attributes.Add(new AttributeWithValue() { Name = "name", Value = "Alex", CheckValue = "99262857098057710338306967609588410025648622308394250666849665532448612202874" }); attributes.Add(new AttributeWithValue() { Name = "sex", Value = "male", CheckValue = "5944657099558967239210949258394887428692050081607692519917050011144233115103" }); IssuerCredential issuerCredential = issuer.CreateCredential(claimOffer, credentialRequest, attributes); // 17. Prover processes and stores Claim string proverCredentialId = prover.SaveCredential(issuerCredential, credentialDefinition, credentialRequestMetadata); Assert.IsTrue(string.IsNullOrEmpty(proverCredentialId), "SaveCredential did not return proverCredentialId"); // clean up // Close and delete wallet // Close pool // Delete pool ledger config // SEE Cleanup() above }
/// <summary> /// Create a prediction engine for one-time prediction. /// </summary> /// <typeparam name="TSrc">The class that defines the input data.</typeparam> /// <typeparam name="TDst">The class that defines the output data.</typeparam> /// <param name="transformer">The transformer to use for prediction.</param> /// <param name="env">The environment to use.</param> /// <param name="inputSchemaDefinition">Additional settings of the input schema.</param> /// <param name="outputSchemaDefinition">Additional settings of the output schema.</param> public static PredictionEngine <TSrc, TDst> CreatePredictionEngine <TSrc, TDst>(this ITransformer transformer, IHostEnvironment env, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class where TDst : class, new() => new PredictionEngine <TSrc, TDst>(env, transformer, true, inputSchemaDefinition, outputSchemaDefinition);
internal PipeEngine(MLContext mlContext, IDataView pipe, bool ignoreMissingColumns, SchemaDefinition schemaDefinition = null) { Contracts.AssertValue(mlContext); mlContext.AssertValue(pipe); mlContext.AssertValueOrNull(schemaDefinition); _cursorablePipe = mlContext.Data.AsCursorable <TDst>(pipe, ignoreMissingColumns, schemaDefinition); _counter = 0; }
/// <summary> /// Convert an <see cref="IDataView"/> into a strongly-typed <see cref="IEnumerable{TRow}"/>. /// </summary> /// <typeparam name="TRow">The user-defined row type.</typeparam> /// <param name="data">The underlying data view.</param> /// <param name="reuseRowObject">Whether to return the same object on every row, or allocate a new one per row.</param> /// <param name="ignoreMissingColumns">Whether to ignore the case when a requested column is not present in the data view.</param> /// <param name="schemaDefinition">Optional user-provided schema definition. If it is not present, the schema is inferred from the definition of T.</param> /// <returns>The <see cref="IEnumerable{TRow}"/> that holds the data in <paramref name="data"/>. It can be enumerated multiple times.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[BootstrapSample](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs)] /// ]]> /// </format> /// </example> public IEnumerable <TRow> CreateEnumerable <TRow>(IDataView data, bool reuseRowObject, bool ignoreMissingColumns = false, SchemaDefinition schemaDefinition = null) where TRow : class, new() { _env.CheckValue(data, nameof(data)); _env.CheckValueOrNull(schemaDefinition); var engine = new PipeEngine <TRow>(_env, data, ignoreMissingColumns, schemaDefinition); return(engine.RunPipe(reuseRowObject)); }
/// <summary> /// Add a SchemaDefinition that will be built into the domain model /// </summary> /// <param name="schemaDefinition"></param> public void AddSchema(SchemaDefinition schemaDefinition) { EnsureDomainModelIsNotNull(); _domainModel.AddSchema(schemaDefinition); }
internal PipeEngine(IHostEnvironment env, IDataView pipe, bool ignoreMissingColumns, SchemaDefinition schemaDefinition = null) { Contracts.AssertValue(env); env.AssertValue(pipe); env.AssertValueOrNull(schemaDefinition); _cursorablePipe = env.AsCursorable <TDst>(pipe, ignoreMissingColumns, schemaDefinition); _counter = 0; }
internal static IDomainModelDefinitionsProvider BuildTestExtensionDefinitionsProvider() { var aggredateDefinitions2 = new[] { new AggregateDefinition( new FullName("ExtensionPhysical", "ExtensionAggregate"), new[] { new FullName("ExtensionPhysical", "ExtensionEntity") }) }; var entityDefinitions2 = new[] { new EntityDefinition( "ExtensionPhysical", "ExtensionAggregate", new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true) }, new[] { new EntityIdentifierDefinition( "PK", new[] { "KeyProperty1", "KeyProperty2" }, isPrimary: true) }, true), new EntityDefinition( "ExtensionPhysical", "ExtensionEntity", new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true) }, new[] { new EntityIdentifierDefinition( "PK", new[] { "KeyProperty1", "KeyProperty2" }, isPrimary: true) }, true) }; var associationDefinitions2 = new[] { new AssociationDefinition( new FullName("ExtensionPhysical", "FK_ExtensionEntity"), Cardinality.OneToOneExtension, new FullName(EdFiConventions.PhysicalSchemaName, "CoreEntity"), new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true) }, new FullName("ExtensionPhysical", "ExtensionEntity"), new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true) }, isIdentifying: true, isRequired: true) }; var schemaDefinition2 = new SchemaDefinition("Extension-Logical", "ExtensionPhysical"); var modelDefinitions2 = new DomainModelDefinitions( schemaDefinition2, aggredateDefinitions2, entityDefinitions2, associationDefinitions2); var domainModelDefinitionsProvider = A.Fake <IDomainModelDefinitionsProvider>(); A.CallTo(() => domainModelDefinitionsProvider.GetDomainModelDefinitions()) .Returns(modelDefinitions2); return(domainModelDefinitionsProvider); }
GetImportanceMetricsMatrix( IHostEnvironment env, IPredictionTransformer <IPredictor> model, IDataView data, Func <IDataView, TMetric> evaluationFunc, Func <TMetric, TMetric, TMetric> deltaFunc, string features, int permutationCount, bool useFeatureWeightFilter = false, int?topExamples = null) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(nameof(PermutationFeatureImportance <TMetric, TResult>)); host.CheckValue(model, nameof(model)); host.CheckValue(data, nameof(data)); host.CheckNonEmpty(features, nameof(features)); topExamples = topExamples ?? Utils.ArrayMaxSize; host.Check(topExamples > 0, "Provide how many examples to use (positive number) or set to null to use whole dataset."); VBuffer <ReadOnlyMemory <char> > slotNames = default; var metricsDelta = new List <TResult>(); using (var ch = host.Start("GetImportanceMetrics")) { ch.Trace("Scoring and evaluating baseline."); var baselineMetrics = evaluationFunc(model.Transform(data)); // Get slot names. var featuresColumn = data.Schema[features]; int numSlots = featuresColumn.Type.VectorSize; data.Schema.TryGetColumnIndex(features, out int featuresColumnIndex); ch.Info("Number of slots: " + numSlots); if (data.Schema[featuresColumnIndex].HasSlotNames(numSlots)) { data.Schema[featuresColumnIndex].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref slotNames); } if (slotNames.Length != numSlots) { slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(numSlots); } VBuffer <float> weights = default; var workingFeatureIndices = Enumerable.Range(0, numSlots).ToList(); int zeroWeightsCount = 0; // By default set to the number of all features available. var evaluatedFeaturesCount = numSlots; if (useFeatureWeightFilter) { var predictorWithWeights = model.Model as IPredictorWithFeatureWeights <Single>; if (predictorWithWeights != null) { predictorWithWeights.GetFeatureWeights(ref weights); const int maxReportedZeroFeatures = 10; StringBuilder msgFilteredOutFeatures = new StringBuilder("The following features have zero weight and will not be evaluated: \n \t"); var prefix = ""; foreach (var k in weights.Items(all: true)) { if (k.Value == 0) { zeroWeightsCount++; // Print info about first few features we're not going to evaluate. if (zeroWeightsCount <= maxReportedZeroFeatures) { msgFilteredOutFeatures.Append(prefix); msgFilteredOutFeatures.Append(GetSlotName(slotNames, k.Key)); prefix = ", "; } } else { workingFeatureIndices.Add(k.Key); } } // Old FastTree models has less weights than slots. if (weights.Length < numSlots) { ch.Warning( "Predictor had fewer features than slots. All unknown features will get default 0 weight."); zeroWeightsCount += numSlots - weights.Length; var indexes = weights.GetIndices().ToArray(); var values = weights.GetValues().ToArray(); var count = values.Length; weights = new VBuffer <float>(numSlots, count, values, indexes); } evaluatedFeaturesCount = workingFeatureIndices.Count; ch.Info("Number of zero weights: {0} out of {1}.", zeroWeightsCount, weights.Length); // Print what features have 0 weight if (zeroWeightsCount > 0) { if (zeroWeightsCount > maxReportedZeroFeatures) { msgFilteredOutFeatures.Append(string.Format("... (printing out {0} features here).\n Use 'Index' column in the report for info on what features are not evaluated.", maxReportedZeroFeatures)); } ch.Info(msgFilteredOutFeatures.ToString()); } } } if (workingFeatureIndices.Count == 0 && zeroWeightsCount == 0) { // Use all features otherwise. workingFeatureIndices.AddRange(Enumerable.Range(0, numSlots)); } if (zeroWeightsCount == numSlots) { ch.Warning("All features have 0 weight thus can not do thorough evaluation"); return(metricsDelta.ToImmutableArray()); } // Note: this will not work on the huge dataset. var maxSize = topExamples; List <float> initialfeatureValuesList = new List <float>(); // Cursor through the data to cache slot 0 values for the upcoming permutation. var valuesRowCount = 0; // REVIEW: Seems like if the labels are NaN, so that all metrics are NaN, this command will be useless. // In which case probably erroring out is probably the most useful thing. using (var cursor = data.GetRowCursor(col => col == featuresColumnIndex)) { var featuresGetter = cursor.GetGetter <VBuffer <float> >(featuresColumnIndex); var featuresBuffer = default(VBuffer <float>); while (initialfeatureValuesList.Count < maxSize && cursor.MoveNext()) { featuresGetter(ref featuresBuffer); initialfeatureValuesList.Add(featuresBuffer.GetItemOrDefault(workingFeatureIndices[0])); } valuesRowCount = initialfeatureValuesList.Count; } if (valuesRowCount > 0) { ch.Info("Detected {0} examples for evaluation.", valuesRowCount); } else { ch.Warning("Detected no examples for evaluation."); return(metricsDelta.ToImmutableArray()); } float[] featureValuesBuffer = initialfeatureValuesList.ToArray(); float[] nextValues = new float[valuesRowCount]; // Now iterate through all the working slots, do permutation and calc the delta of metrics. int processedCnt = 0; int nextFeatureIndex = 0; var shuffleRand = RandomUtils.Create(host.Rand.Next()); using (var pch = host.StartProgressChannel("SDCA preprocessing with lookup")) { pch.SetHeader(new ProgressHeader("processed slots"), e => e.SetProgress(0, processedCnt)); foreach (var workingIndx in workingFeatureIndices) { // Index for the feature we will permute next. Needed to build in advance a buffer for the permutation. if (processedCnt < workingFeatureIndices.Count - 1) { nextFeatureIndex = workingFeatureIndices[processedCnt + 1]; } // Used for pre-caching the next feature int nextValuesIndex = 0; SchemaDefinition input = SchemaDefinition.Create(typeof(FeaturesBuffer)); Contracts.Assert(input.Count == 1); input[0].ColumnName = features; SchemaDefinition output = SchemaDefinition.Create(typeof(FeaturesBuffer)); Contracts.Assert(output.Count == 1); output[0].ColumnName = features; output[0].ColumnType = featuresColumn.Type; // Perform multiple permutations for one feature to build a confidence interval var metricsDeltaForFeature = new TResult(); for (int permutationIteration = 0; permutationIteration < permutationCount; permutationIteration++) { Utils.Shuffle <float>(shuffleRand, featureValuesBuffer); Action <FeaturesBuffer, FeaturesBuffer, PermuterState> permuter = (src, dst, state) => { src.Features.CopyTo(ref dst.Features); VBufferUtils.ApplyAt(ref dst.Features, workingIndx, (int ii, ref float d) => d = featureValuesBuffer[state.SampleIndex++]); // Is it time to pre-cache the next feature? if (permutationIteration == permutationCount - 1 && processedCnt < workingFeatureIndices.Count - 1) { // Fill out the featureValueBuffer for the next feature while updating the current feature // This is the reason I need PermuterState in LambdaTransform.CreateMap. nextValues[nextValuesIndex] = src.Features.GetItemOrDefault(nextFeatureIndex); if (nextValuesIndex < valuesRowCount - 1) { nextValuesIndex++; } } }; IDataView viewPermuted = LambdaTransform.CreateMap( host, data, permuter, null, input, output); if (valuesRowCount == topExamples) { viewPermuted = SkipTakeFilter.Create(host, new SkipTakeFilter.TakeArguments() { Count = valuesRowCount }, viewPermuted); } var metrics = evaluationFunc(model.Transform(viewPermuted)); var delta = deltaFunc(metrics, baselineMetrics); metricsDeltaForFeature.Add(delta); } // Add the metrics delta to the list metricsDelta.Add(metricsDeltaForFeature); // Swap values for next iteration of permutation. if (processedCnt < workingFeatureIndices.Count - 1) { Array.Clear(featureValuesBuffer, 0, featureValuesBuffer.Length); nextValues.CopyTo(featureValuesBuffer, 0); Array.Clear(nextValues, 0, nextValues.Length); } processedCnt++; } pch.Checkpoint(processedCnt, processedCnt); } } return(metricsDelta.ToImmutableArray()); }
internal static IDomainModelDefinitionsProvider BuildCoreModelEntityDefinitionsProvider() { var edfiSchema = EdFiConventions.PhysicalSchemaName; var edfiLogicalName = EdFiConventions.LogicalName; var entityDefinitions = new[] { new EntityDefinition( edfiSchema, "CoreEntity", new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true) }, new[] { new EntityIdentifierDefinition( "PK", new[] { "KeyProperty1", "KeyProperty2" }, isPrimary: true) }, true), new EntityDefinition( edfiSchema, "Collection1Item", new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true), new EntityPropertyDefinition("KeyProperty2", new PropertyType(DbType.Int32), null, true) }, new[] { new EntityIdentifierDefinition( "PK", new[] { "KeyProperty1", "KeyProperty2" }, isPrimary: true) }, true), new EntityDefinition( edfiSchema, "EmbeddedObject1", new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true) }, new[] { new EntityIdentifierDefinition( "PK", new[] { "KeyProperty1" }, isPrimary: true) }, true) }; var associationDefinitions = new[] { new AssociationDefinition( new FullName(edfiSchema, "FK_CoreEntityEmbeddedObject"), Cardinality.OneToOne, new FullName(edfiSchema, "CoreEntity"), new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true) }, new FullName(edfiSchema, "EmbeddedObject1"), new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true) }, isIdentifying: true, isRequired: true), new AssociationDefinition( new FullName(edfiSchema, "FK_CoreEntityCollection"), Cardinality.OneToZeroOrMore, new FullName(edfiSchema, "CoreEntity"), new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true) }, new FullName(edfiSchema, "Collection1Item"), new[] { new EntityPropertyDefinition("KeyProperty1", new PropertyType(DbType.Int32), null, true) }, isIdentifying: true, isRequired: true) }; var aggredateDefinitions = new[] { new AggregateDefinition( new FullName(edfiSchema, "CoreEntity"), new[] { new FullName(edfiSchema, "Collection1Item"), new FullName(edfiSchema, "EmbeddedObject1") }) }; var schemaDefinition = new SchemaDefinition(edfiLogicalName, edfiSchema); var modelDefinitions = new DomainModelDefinitions( schemaDefinition, aggredateDefinitions, entityDefinitions, associationDefinitions); var domainModelDefinitionsProvider = A.Fake <IDomainModelDefinitionsProvider>(); A.CallTo(() => domainModelDefinitionsProvider.GetDomainModelDefinitions()) .Returns(modelDefinitions); return(domainModelDefinitionsProvider); }
public static void MapTest(string inputMessageFolderPath, string outputMessageFolderPath, TestableMapBase target, string sourceFile, SchemaDefinition sourceSchema, string destinationFile, SchemaDefinition destinationSchema, List <XPathDefinition> xpathList) { ValidateSouceMessage(inputMessageFolderPath, sourceFile, sourceSchema); string inputMessagePath = inputMessageFolderPath + sourceFile; string outputMessagePath = outputMessageFolderPath + destinationFile; target.ValidateInput = false; target.ValidateOutput = false; target.TestMap(inputMessagePath, Microsoft.BizTalk.TestTools.Schema.InputInstanceType.Xml, outputMessagePath, Microsoft.BizTalk.TestTools.Schema.OutputInstanceType.XML); }
/// <summary> /// Create a filter transform that is savable iff <paramref name="saveAction"/> and <paramref name="loadFunc"/> are /// not null. /// </summary> /// <param name="env">The host environment</param> /// <param name="source">The dataview upon which we construct the transform</param> /// <param name="filterFunc">The function by which we transform source to destination columns and decide whether /// to keep the row.</param> /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param> /// <param name="saveAction">An action that allows us to save state to the serialization stream. May be /// null simultaneously with <paramref name="loadFunc"/>.</param> /// <param name="loadFunc">A function that given the serialization stream and a data view, returns /// an <see cref="ITransformTemplate"/>. The intent is, this returned object should itself be a /// <see cref="CustomMappingTransformer{TSrc,TDst}"/>, but this is not strictly necessary. This delegate should be /// a static non-lambda method that this assembly can legally call. May be null simultaneously with /// <paramref name="saveAction"/>.</param> /// <param name="inputSchemaDefinition">The schema definition overrides for <typeparamref name="TSrc"/></param> /// <param name="outputSchemaDefinition">The schema definition overrides for <typeparamref name="TDst"/></param> public ExtStatefulFilterTransform(IHostEnvironment env, IDataView source, Func <TSrc, TDst, TState, bool> filterFunc, Action <TState> initStateAction, Action <BinaryWriter> saveAction, ExtLambdaTransform.LoadDelegate loadFunc, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : base(env, RegistrationName, saveAction, loadFunc) { Host.AssertValue(source, "source"); Host.AssertValue(filterFunc, "filterFunc"); Host.AssertValueOrNull(initStateAction); Host.AssertValueOrNull(inputSchemaDefinition); Host.AssertValueOrNull(outputSchemaDefinition); _source = source; _filterFunc = filterFunc; _initStateAction = initStateAction; _inputSchemaDefinition = inputSchemaDefinition; _typedSource = TypedCursorable <TSrc> .Create(Host, Source, false, inputSchemaDefinition); var outSchema = InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition); _addedSchema = outSchema; _bindings = new ColumnBindings(Source.Schema, DataViewConstructionUtils.GetSchemaColumns(outSchema)); }
public static void ValidateDestinationMessage(string OutoutputMessageFolderPath, string outputFile, SchemaDefinition destinationSchema, List <XPathDefinition> xpathList) { FileReadMultipleStep fileReadStep = new FileReadMultipleStep(); fileReadStep.DeleteFiles = false; fileReadStep.FailOnError = true; fileReadStep.DirectoryPath = OutoutputMessageFolderPath; fileReadStep.SearchPattern = outputFile; fileReadStep.ExpectedNumberOfFiles = 1; XmlValidationStep validateOutPutStep = new XmlValidationStep(); validateOutPutStep.XmlSchemas.Add(destinationSchema); foreach (var xpath in xpathList) { validateOutPutStep.XPathValidations.Add(xpath); } fileReadStep.SubSteps.Add(validateOutPutStep); TestCase outValTestCase = new TestCase(); outValTestCase.Name = "Validate Output Message"; outValTestCase.ExecutionSteps.Add(fileReadStep); BizUnit.Core.TestRunner testRunner = new BizUnit.Core.TestRunner(outValTestCase); testRunner.Run(); }
/// <summary> /// This is a 'stateful non-savable' version of the map transform: the mapping function is guaranteed to be invoked once per /// every row of the data set, in sequence; one user-defined state object will be allocated per cursor and passed to the /// map function every time. If <typeparamref name="TSrc"/>, <typeparamref name="TDst"/>, or /// <typeparamref name="TState"/> implement the <see cref="IDisposable" /> interface, they will be disposed after use. /// </summary> /// <typeparam name="TSrc">The type that describes what 'source' columns are consumed from the /// input <see cref="IDataView"/>.</typeparam> /// <typeparam name="TState">The type of the state object to allocate per cursor.</typeparam> /// <typeparam name="TDst">The type that describes what new columns are added by this transform.</typeparam> /// <param name="env">The host environment to use.</param> /// <param name="source">The input data to apply transformation to.</param> /// <param name="mapAction">The function that performs the transformation. The function should transform its <typeparamref name="TSrc"/> /// argument into its <typeparamref name="TDst"/> argument and can utilize the per-cursor <typeparamref name="TState"/> state.</param> /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param> /// <param name="inputSchemaDefinition">The optional input schema. If <c>null</c>, the schema is /// inferred from the <typeparamref name="TSrc"/> type.</param> /// <param name="outputSchemaDefinition">The optional output schema. If <c>null</c>, the schema is /// inferred from the <typeparamref name="TDst"/> type.</param> public static ITransformTemplate CreateMap <TSrc, TDst, TState>(IHostEnvironment env, IDataView source, Action <TSrc, TDst, TState> mapAction, Action <TState> initStateAction, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class, new() where TDst : class, new() where TState : class, new() { Contracts.CheckValue(env, nameof(env)); env.CheckValue(source, nameof(source)); env.CheckValue(mapAction, nameof(mapAction)); env.CheckValueOrNull(initStateAction); env.CheckValueOrNull(inputSchemaDefinition); env.CheckValueOrNull(outputSchemaDefinition); return(new ExtStatefulFilterTransform <TSrc, TDst, TState>(env, source, (src, dst, state) => { mapAction(src, dst, state); return true; }, initStateAction, null, null, inputSchemaDefinition, outputSchemaDefinition)); }
public string BuildCreateScript(SchemaDefinition schema) { return(schema.ToCreateScript(true)); }
/// <summary> /// This creates a filter transform that can 'accept' or 'decline' any row of the data based on the contents of the row /// or state of the cursor. /// This is a 'stateful non-savable' version of the filter: the filter function is guaranteed to be invoked once per /// every row of the data set, in sequence (non-parallelizable); one user-defined state object will be allocated per cursor and passed to the /// filter function every time. /// If <typeparamref name="TSrc"/> or <typeparamref name="TState"/> implement the <see cref="IDisposable" /> interface, they will be disposed after use. /// </summary> /// <typeparam name="TSrc">The type that describes what 'source' columns are consumed from the /// input <see cref="IDataView"/>.</typeparam> /// <typeparam name="TState">The type of the state object to allocate per cursor.</typeparam> /// <param name="env">The host environment to use.</param> /// <param name="source">The input data to apply transformation to.</param> /// <param name="filterFunc">The user-defined function that determines whether to keep the row or discard it. First parameter /// is the current row's contents, the second parameter is the cursor-specific state object.</param> /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param> /// <param name="inputSchemaDefinition">The optional input schema. If <c>null</c>, the schema is /// inferred from the <typeparamref name="TSrc"/> type.</param> /// <returns></returns> public static ITransformTemplate CreateFilter <TSrc, TState>(IHostEnvironment env, IDataView source, Func <TSrc, TState, bool> filterFunc, Action <TState> initStateAction, SchemaDefinition inputSchemaDefinition = null) where TSrc : class, new() where TState : class, new() { Contracts.CheckValue(env, nameof(env)); env.CheckValue(source, nameof(source)); env.CheckValue(filterFunc, nameof(filterFunc)); env.CheckValueOrNull(initStateAction); env.CheckValueOrNull(inputSchemaDefinition); return(new ExtStatefulFilterTransform <TSrc, object, TState>(env, source, (src, dst, state) => filterFunc(src, state), initStateAction, null, null, inputSchemaDefinition)); }
internal override void PredictionEngineCore(IHostEnvironment env, DataViewConstructionUtils.InputRow <TSrc> inputRow, IRowToRowMapper mapper, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition, out Action disposer, out IRowReadableAs <TDst> outputRow) { List <IStatefulRow> rows = new List <IStatefulRow>(); IRow outputRowLocal = outputRowLocal = GetStatefulRows(inputRow, mapper, col => true, rows, out disposer); var cursorable = TypedCursorable <TDst> .Create(env, new EmptyDataView(env, mapper.OutputSchema), ignoreMissingColumns, outputSchemaDefinition); _pinger = CreatePinger(rows); outputRow = cursorable.GetRow(outputRowLocal); }
private protected PredictionEngineBase(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.CheckValue(env, nameof(env)); env.AssertValue(transformer); Transformer = transformer; var makeMapper = TransformerChecker(env, transformer); env.AssertValue(makeMapper); _inputRow = DataViewConstructionUtils.CreateInputRow <TSrc>(env, inputSchemaDefinition); PredictionEngineCore(env, _inputRow, makeMapper(_inputRow.Schema), ignoreMissingColumns, outputSchemaDefinition, out _disposer, out _outputRow); OutputSchema = Transformer.GetOutputSchema(_inputRow.Schema); }
/// <summary> /// Create a prediction engine for one-time prediction. /// </summary> /// <typeparam name="TSrc">The class that defines the input data.</typeparam> /// <typeparam name="TDst">The class that defines the output data.</typeparam> /// <param name="transformer">The transformer to use for prediction.</param> /// <param name="ignoreMissingColumns">Whether to throw an exception if a column exists in /// <paramref name="outputSchemaDefinition"/> but the corresponding member doesn't exist in /// <typeparamref name="TDst"/>.</param> /// <param name="inputSchemaDefinition">Additional settings of the input schema.</param> /// <param name="outputSchemaDefinition">Additional settings of the output schema.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[Save](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/SaveLoadModel.cs)] /// ]]> /// </format> /// </example> public PredictionEngine <TSrc, TDst> CreatePredictionEngine <TSrc, TDst>(ITransformer transformer, bool ignoreMissingColumns = true, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class where TDst : class, new() { return(transformer.CreatePredictionEngine <TSrc, TDst>(_env, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition)); }
private protected virtual void PredictionEngineCore(IHostEnvironment env, DataViewConstructionUtils.InputRow <TSrc> inputRow, IRowToRowMapper mapper, bool ignoreMissingColumns, SchemaDefinition outputSchemaDefinition, out Action disposer, out IRowReadableAs <TDst> outputRow) { var cursorable = TypedCursorable <TDst> .Create(env, new EmptyDataView(env, mapper.OutputSchema), ignoreMissingColumns, outputSchemaDefinition); var outputRowLocal = mapper.GetRow(inputRow, mapper.OutputSchema); outputRow = cursorable.GetRow(outputRowLocal); disposer = inputRow.Dispose; }
public bool DebugTestThatAllTablesValidate(IDataConnector genericData, out SchemaDefinition reason) { reason = null; #if (!ISWIN) foreach (var s in schema) { if (!genericData.VerifyTableExists(s.Name, s.Columns, s.Indices)) { reason = s; return false; } } #else foreach (var s in schema.Where(s => !genericData.VerifyTableExists(s.Name, s.Columns, s.Indices))) { reason = s; return false; } #endif return true; }
// Creating IDataView from IEnumerable, and setting the size of the vector // at runtime. When the data model is defined through types, setting the // size of the vector is done through the VectorType annotation. When the // size of the data is not known at compile time, the Schema can be directly // modified at runtime and the size of the vector set there. This is // important, because most of the ML.NET trainers require the Features // vector to be of known size. public static void Example() { // Create a new context for ML.NET operations. It can be used for // exception tracking and logging, as a catalog of available operations // and as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable <DataPointVector> enumerableKnownSize = new DataPointVector[] { new DataPointVector { Features = new float[] { 1.2f, 3.4f, 4.5f, 3.2f, 7, 5f } }, new DataPointVector { Features = new float[] { 4.2f, 3.4f, 14.65f, 3.2f, 3, 5f } }, new DataPointVector { Features = new float[] { 1.6f, 3.5f, 4.5f, 6.2f, 3, 5f } }, }; // Load dataset into an IDataView. IDataView data = mlContext.Data.LoadFromEnumerable(enumerableKnownSize); var featureColumn = data.Schema["Features"].Type as VectorDataViewType; // Inspecting the schema Console.WriteLine($"Is the size of the Features column known: " + $"{featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}"); // Preview // // Is the size of the Features column known? True. // Size: 5. // If the size of the vector is unknown at compile time, it can be set // at runtime. IEnumerable <DataPoint> enumerableUnknownSize = new DataPoint[] { new DataPoint { Features = new float[] { 1.2f, 3.4f, 4.5f } }, new DataPoint { Features = new float[] { 4.2f, 3.4f, 1.6f } }, new DataPoint { Features = new float[] { 1.6f, 3.5f, 4.5f } }, }; // The feature dimension (typically this will be the Count of the array // of the features vector known at runtime). int featureDimension = 3; var definedSchema = SchemaDefinition.Create(typeof(DataPoint)); featureColumn = definedSchema["Features"] .ColumnType as VectorDataViewType; Console.WriteLine($"Is the size of the Features column known: " + $"{featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}"); // Preview // // Is the size of the Features column known? False. // Size: 0. // Set the column type to be a known-size vector. var vectorItemType = ((VectorDataViewType)definedSchema[0].ColumnType) .ItemType; definedSchema[0].ColumnType = new VectorDataViewType(vectorItemType, featureDimension); // Read the data into an IDataView with the modified schema supplied in IDataView data2 = mlContext.Data .LoadFromEnumerable(enumerableUnknownSize, definedSchema); featureColumn = data2.Schema["Features"].Type as VectorDataViewType; // Inspecting the schema Console.WriteLine($"Is the size of the Features column known: " + $"{featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}"); // Preview // // Is the size of the Features column known? True. // Size: 3. }