/// <summary> /// Adds sessions from data stores located in the specified path. /// </summary> /// <param name="dataset">The dataset to add sessions to.</param> /// <param name="path">The path that contains the data stores.</param> /// <param name="partitionName">The name of the partion to be added when adding a new session. Default is null.</param> public static void AddSessionsFromExistingStores(this Dataset dataset, string path, string partitionName = null) { dataset.AddSessionsFromExistingStores(path, path, partitionName); }
/// <summary> /// Compute derived partion for each session in the dataset. /// </summary> /// <typeparam name="TParameter">The type of paramater passed to the action.</typeparam> /// <param name="dataset">The dataset over which to derive partitions.</param> /// <param name="computeDerived">The action to be invoked to derive partitions.</param> /// <param name="parameter">The parameter to be passed to the action.</param> /// <param name="outputPartitionName">The output partition name to be created.</param> /// <param name="overwrite">Flag indicating whether the partition should be overwritten. Default is false.</param> /// <param name="outputStoreName">The name of the output data store. Default is null.</param> /// <param name="outputStorePath">The path of the output data store. Default is null.</param> /// <param name="replayDescriptor">The replay descriptor to us</param> /// <returns>A dataset with the newly derived partitions.</returns> public static Dataset CreateDerivedPartition <TParameter>( this Dataset dataset, Action <Pipeline, SessionImporter, Exporter, TParameter> computeDerived, TParameter parameter, string outputPartitionName, bool overwrite = false, string outputStoreName = null, string outputStorePath = null, ReplayDescriptor replayDescriptor = null) { int sessionIndex = 0; foreach (var session in dataset.Sessions) { if (session.Partitions.Any(p => p.Name == outputPartitionName)) { if (overwrite) { // remove the partition first session.RemovePartition(session.Partitions.First(p => p.Name == outputPartitionName)); } else { // if the overwrite flag is not on, throw throw new Exception($"Session already contains partition with name {outputPartitionName}"); } } // the first partition is where we put the data if output is not specified var inputPartition = session.Partitions.FirstOrDefault(); // figure out the output partition path var outputPartitionPath = (outputStorePath == null) ? inputPartition.StorePath : Path.Combine(outputStorePath, $"{sessionIndex}"); // create and run the pipeline using (var pipeline = Pipeline.Create()) { var importer = SessionImporter.Open(pipeline, session); var exporter = Store.Create(pipeline, outputStoreName ?? outputPartitionName, outputPartitionPath); computeDerived(pipeline, importer, exporter, parameter); var startTime = DateTime.Now; Console.WriteLine($"Computing derived features on {inputPartition.StorePath} ..."); // Add a default replay strategy if (replayDescriptor == null) { replayDescriptor = ReplayDescriptor.ReplayAll; } pipeline.Run(replayDescriptor); var finishTime = DateTime.Now; Console.WriteLine($" - Time elapsed: {(finishTime - startTime).TotalMinutes:0.00} min."); } // add the partition session.AddStorePartition(outputPartitionName, outputPartitionPath, outputPartitionName); // increment session index sessionIndex++; } return(dataset); }