/// <summary> /// Adds the folder containing dataset specific elements /// </summary> public void AddFolderDelegate() { var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); var option = ShouldSearchSubDirectories; if (DataFolderPath == null) { ApplicationStatusMediator.SetStatus("The directory specified does not exist."); return; } if (!Directory.Exists(DataFolderPath)) { ApplicationStatusMediator.SetStatus("The directory specified does not exist."); return; } var datasetLoader = new DatasetLoader(); var files = datasetLoader.GetValidDatasets(DataFolderPath, extensions, option); AddDatasets(files); }
public ITrainingDataset GetTrainingDataset(int size) { LoadData(inputData, resultData); float[,] X = new float[inputData.Count, SubHistory.SubHistoryLength]; float[,] Y = new float[inputData.Count, IMoveEngine.Payoffs]; for (int i = 0; i < inputData.Count; i++) { for (int j = 0; j < SubHistory.SubHistoryLength; j++) { X[i, j] = inputData[i][j]; } var data = new float[IMoveEngine.Payoffs]; data[Math.Clamp((int)resultData[i], 0, 20)] = 1f; for (int j = 0; j < IMoveEngine.Payoffs; j++) { Y[i, j] = data[j]; } } Normalize(X); (float[,] X, float[,] Y)d = (X, Y); int batchSize = 512; return(d.X == null || d.Y == null ? null : DatasetLoader.Training(d, batchSize)); }
private async Task AddDatasets(string folderPath) { var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(folderPath, extensions, SearchOption.TopDirectoryOnly); if (!string.IsNullOrEmpty(datasetLoader.ErrorMessage)) { MessageBox.Show(datasetLoader.ErrorMessage); } if (!this.CheckDatasets(datasets)) { MessageBox.Show("Datasets are incompatible."); return; } // Add valid datasets. this.Analysis.MetaData.Datasets.AddRange(datasets); await this.UpdateDatasets(); }
private bool CheckDatasets(IEnumerable <DatasetInformation> datasets) { if (this.Analysis.MetaData.Datasets.Count > 0) { return(DatasetLoader.IsValidDatasetCombo(this.Analysis.MetaData.Datasets[0].DatasetType, datasets.Select(ds => ds.DatasetType))); } return(true); }
public object Convert(object value, Type targetType, object parameter, CultureInfo culture) { if (value == null) { return(false); } var data = value.ToString(); return(DatasetLoader.ExtractDatasetName(data)); }
private List <DatasetInformation> GetAndValidateDatasets(IEnumerable <string> filePaths) { var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(filePaths); if (!string.IsNullOrEmpty(datasetLoader.ErrorMessage)) { MessageBox.Show(datasetLoader.ErrorMessage); } return(datasets); }
public static async Task <ITrainingDataset> GetTrainingDatasetAsync(int size, Cifar100ClassificationMode mode = Cifar100ClassificationMode.Fine, [CanBeNull] IProgress <HttpProgress> callback = null, CancellationToken token = default) { IReadOnlyDictionary <String, Func <Stream> > map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, callback, token); if (map == null) { return(null); } IReadOnlyList <(float[], float[])> data = ParseSamples(map[TrainingBinFilename], TrainingSamplesInBinFile, mode); return(DatasetLoader.Training(data, size)); }
public override void OnInspectorGUI() { DrawDefaultInspector(); DatasetLoader manager = (DatasetLoader)target; if (GUILayout.Button("Load Dataset Spherical")) { manager.SpawnPlotFromPath(); } if (GUILayout.Button("Load Dataset Cartesian")) { manager.SpawnCartPlotFromPath(); } }
public static async Task <ITestDataset> GetTestDatasetAsync([CanBeNull] Action <TrainingProgressEventArgs> progress = null, CancellationToken token = default) { Func <Stream>[] factories = await Task.WhenAll( DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TestSetValuesFilename}", null, token), DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TestSetLabelsFilename}", null, token)); if (factories.Any(s => s == null)) { return(null); } (float[,] X, float[,] Y)data = ParseSamples((factories[0], factories[1]), TestSamples); return(data.X == null || data.Y == null ? null : DatasetLoader.Test(data, progress)); }
public static async Task <ITrainingDataset> GetTrainingDatasetAsync(int size, CancellationToken token = default) { Func <Stream>[] factories = await Task.WhenAll( DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TrainingSetValuesFilename}", null, token), DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TrainingSetLabelsFilename}", null, token)); if (factories.Any(s => s == null)) { return(null); } (float[,] X, float[,] Y)data = ParseSamples((factories[0], factories[1]), TrainingSamples); return(data.X == null || data.Y == null ? null : DatasetLoader.Training(data, size)); }
/// <summary> /// Adds datasets from a single file /// </summary> private void AddSingleFileDelegate() { var fileExists = File.Exists(SingleFilePath); if (fileExists) { var datasetLoader = new DatasetLoader(); AddDatasets(datasetLoader.GetValidDatasets(new List <string> { SingleFilePath })); } else { ApplicationStatusMediator.SetStatus("The input file does not exist."); } }
public DatasetNeuralNetworkNet Cargar() { var irises = LeerArchivoDataset(); var setosas = SepararDatasetSegunTipo(irises, TipoIris.Setosa); var versicolores = SepararDatasetSegunTipo(irises, TipoIris.Versicolor); var virginicas = SepararDatasetSegunTipo(irises, TipoIris.Virginica); var training = UnirTiposIrisEnDatasetEspecifo(new[] { setosas, versicolores, virginicas }, (irisesTipo) => irisesTipo.Training); var test = UnirTiposIrisEnDatasetEspecifo(new[] { setosas, versicolores, virginicas }, (irisesTipo) => irisesTipo.Test); var dataTraining = CrearDataset(training, (values) => DatasetLoader.Training(values, 50)); var dataTest = CrearDataset(test, (values) => DatasetLoader.Test(values, null)); return(new DatasetNeuralNetworkNet(dataTraining, dataTest)); }
/// <summary> /// Adds a new dataset to the list. /// </summary> /// <param name="inputFiles"></param> /// <returns>A list of added datasets</returns> public List <DatasetInformation> AddInputFiles(List <InputFile> inputFiles) { var datasetLoader = new DatasetLoader(); var addedSets = datasetLoader.GetValidDatasets(inputFiles, false); this.Datasets.AddRange(addedSets); // Reformat their Id's var id = 0; foreach (var x in Datasets) { x.DatasetId = id++; } return(addedSets); }
public void SetUp() { DependencyResolver.AddModule(new MockModule()); var localDirFull = Path.Combine(_rootDir, _localDir); var remoteDirFull = Path.Combine(_rootDir, _remoteDir); var correctDataset = File.ReadAllText(Path.Combine(_fileDir, "small-test.txt")); _mockFileSystem = DependencyResolver.GetService <IFileSystem>() as MockFileSystem; _mockFileSystem.AddFile(Path.Combine(localDirFull, "local_correct.txt"), new MockFileData(correctDataset)); _mockFileSystem.AddFile(Path.Combine(remoteDirFull, "remote_correct.txt"), new MockFileData(correctDataset)); _mockFileSystem.AddFile(Path.Combine(localDirFull, "local_incorrect.txt"), new MockFileData(textContents: "incorrect_data")); _mockFileSystem.AddFile(Path.Combine(remoteDirFull, "remote_incorrect.txt"), new MockFileData(textContents: "incorrect_data")); _rootConfig = new DataRootConfig(localDirFull, remoteDirFull); _datasetLoader = new DatasetLoader(_rootConfig); }
private async Task AddDatasets(IEnumerable <string> files) { var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(files); if (!string.IsNullOrEmpty(datasetLoader.ErrorMessage)) { MessageBox.Show(datasetLoader.ErrorMessage); } if (!this.CheckDatasets(datasets)) { MessageBox.Show("Datasets are incompatible."); return; } // Add valid datasets. this.Analysis.MetaData.Datasets.AddRange(datasets); await this.UpdateDatasets(); }
/// <summary> /// Adds a MultiAlign file /// </summary> private void AddInputFileDelegate() { var fileExists = File.Exists(InputFilePath); if (fileExists) { // Read input files try { var datasetLoader = new DatasetLoader(); var info = MultiAlignFileInputReader.ReadInputFile(InputFilePath); AddDatasets(datasetLoader.GetValidDatasets(info.Files, false)); } catch { ApplicationStatusMediator.SetStatus("Could not read the input file. Check the file format."); } } else { ApplicationStatusMediator.SetStatus("The input file does not exist."); } }
public void CreateFeatureDatabase(string directoryPath, string databasePath) { var directory = GetPath(directoryPath); databasePath = GetPath(databasePath); // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); NHibernateUtil.CreateDatabase(databasePath); // Synchronization and IO for serializing all data to the database. var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true); var cache = new FeatureLoader { Providers = providers }; var datasetId = 0; foreach (var dataset in datasets) { dataset.DatasetId = datasetId++; var features = FindFeatures(dataset, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder); cache.CacheFeatures(features); } providers.DatasetCache.AddAll(datasets); }
public void TestPeptideBands(string directory, string matchPath) { // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var baselineDataset = datasets[0]; UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var peptideOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .05, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var features = new List <MSFeatureLight>(); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawFile.Path)) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = finderFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures)); features = features.Where(x => x.HasMsMs()).ToList(); features = features.OrderBy(x => x.Mz).ToList(); var peptideList = new List <MSFeatureLight>(); foreach (var feature in features) { foreach (var spectrum in feature.MSnSpectra) { var peptideFound = false; foreach (var peptide in spectrum.Peptides) { peptideList.Add(feature); peptideFound = true; break; } if (peptideFound) { break; } } } using (var writer = File.CreateText(matchPath)) { writer.WriteLine("Charge\tpmz\tscan\tNET\t"); foreach (var feature in peptideList) { writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan, feature.Net); } } } }
public void TestClustering( string directory, string outputPath, FeatureAlignmentType alignmentType, LcmsFeatureClusteringAlgorithmType clusterType) { var matchPath = string.Format("{0}.txt", outputPath); var errorPath = string.Format("{0}-errors.txt", outputPath); // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Setup our alignment options var alignmentOptions = new AlignmentOptions(); var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; // Create our algorithms var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType, alignmentOptions.LCMSWarpOptions, spectralOptions); var clusterer = ClusterFactory.Create(clusterType); clusterer.Parameters = new FeatureClusterParameters <UMCLight> { Tolerances = featureTolerances }; RegisterProgressNotifier(aligner); RegisterProgressNotifier(finder); RegisterProgressNotifier(clusterer); var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; for (var i = 0; i < 1; i++) { var aligneeDatasets = datasets.Where((t, j) => j != i).ToList(); PerformMultiAlignAnalysis(datasets[0], aligneeDatasets, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder, aligner, clusterer, matchPath, errorPath); } }
public static int Run(ProgramSettings settings) { ThreadPool.SetMaxThreads(Environment.ProcessorCount, Environment.ProcessorCount); PrintSettings(settings); (var trainDataset, var testDataset) = DatasetLoader.LoadDatasets( trainDataFilename: settings.TrainDataPath, trainLabelsFilename: settings.TrainLabelsPath, testDataFilename: settings.TestDataPath, testLabelsFilename: settings.TestLabelsPath, classificationType: settings.ClassificationType); Console.WriteLine(); PrintTrainDatasetInformation(trainDataset); Console.WriteLine(); var hyperRectangleCoverageComputer = new HyperRectangleCoverageComputer(dataset: trainDataset); var featureTestIntervalConveter = new FeatureTestDimensionIntervalConverter(trainDataset); var ruleAntecedentHyperRectangleConverter = new RuleAntecedentHyperRectangleConverter(featureTestIntervalConveter); var seedSelector = new CFSBESeedFinder( ruleConverter: ruleAntecedentHyperRectangleConverter, coverageComputer: hyperRectangleCoverageComputer); var antecedentCreator = new AntecedentCreator(ruleAntecedentHyperRectangleConverter: ruleAntecedentHyperRectangleConverter); var consequentCreator = settings.ClassificationType switch { ClassificationType.SingleLabel => (IConsequentCreator) new SingleLabelConsequentCreator(dataset: trainDataset), ClassificationType.MultiLabel => (IConsequentCreator) new MultiLabelConsequentCreator(dataset: trainDataset, threshold: settings.RuleConsequentThreshold), _ => throw CommonExceptions.UnknownClassificationType, }; var hyperRectangleIntersector = new HyperRectangleIntersector(trainDataset); var nonIntersectingHyperRectangleCreator = new NonIntersectingRectangleCreator(hyperRectangleIntersector); var ruleCreator = new CoverageAwareRuleCreator( seedSelector: seedSelector, boxConverter: ruleAntecedentHyperRectangleConverter, boxCreator: nonIntersectingHyperRectangleCreator, coverageComputer: hyperRectangleCoverageComputer, antecedentCreator: antecedentCreator, consequentCreator: consequentCreator, hyperRectangleIntersector: hyperRectangleIntersector, targetNumberOfInstancesToCover: settings.TargetNumberOfInstancesToCoverDuringRuleCreationg, runExpensiveSanityChecks: settings.SkipExpensiveSanityChecks); var individualMutationChooser = BiasedOptionChooser <IndividualMutationType> .Create( new Dictionary <IndividualMutationType, int>() { [IndividualMutationType.AddRule] = settings.IndividualMutationAddRuleWeight, [IndividualMutationType.ModifyRule] = settings.IndividualMutationModifyRuleWeight, [IndividualMutationType.RemoveRule] = settings.IndividualMutationRemoveRuleWeight }); var ruleSwappingindividualMutator = new RuleSwappingIndividualMutator( mutationChooser: individualMutationChooser, ruleCreator: ruleCreator); var populationMutator = new PopulationMutator( individualMutator: ruleSwappingindividualMutator, mutantsPerGeneration: settings.MutantsPerGeneration, maximumFailedAttemptsPerGeneration: settings.MaximumFailedMutationAttemptsPerGeneration); var trainMetrics = IMetricParser.ParseMetrics( dataset: trainDataset, metricsNames: settings.MetricNames, classificationType: settings.ClassificationType); var trainFitnessEvaluator = new FitnessEvaluator(trainMetrics); var testMetrics = IMetricParser.ParseMetrics( dataset: testDataset, metricsNames: settings.MetricNames, classificationType: settings.ClassificationType); var testFitnessEvaluator = new FitnessEvaluator(testMetrics); var fittestIdentifier = IFittestIdentifierParser.Parse( name: settings.SelectionAlgorithm, fittestCount: settings.PopulationSize); var individualCreator = new SingleRuleIndividualCreator(ruleCreator: ruleCreator); var initialPopulation = CreateInitialPopulation( individualCreator: individualCreator, settings: settings); var modelSerializer = new ModelSerializer(); var populationFitnessSerializer = new PopulationFitnessSerializer( trainFitnessEvaluator: trainFitnessEvaluator, testFitnessEvaluator: testFitnessEvaluator); var trainPredictionsSerializer = new PredictionsSerializer(dataset: trainDataset); var testPredictionsSerializer = new PredictionsSerializer(dataset: testDataset); var persistentOutputManager = new PersistentOutputManager( outputDirectory: settings.OutputDirectory, saveModels: settings.SaveModels, saveTrainPredictions: settings.SaveTrainPredictions, saveTestPredictions: settings.SaveTestPredictions, modelSerializer: modelSerializer, populationFitnessSerializer: populationFitnessSerializer, trainPredictionsSerializer: trainPredictionsSerializer, testPredictionsSerializer: testPredictionsSerializer); //var consistencyChecker = new RuleConsistencyChecker( // ruleAntecedentHyperRectangleConverterconverter: ruleAntecedentHyperRectangleConverter, // hyperRectangleIntersector: hyperRectangleIntersector); var evolutionEngine = new EvolutionEngine( maximumNumberOfGenerations: settings.MaximumGenerations, fitnessEvaluator: trainFitnessEvaluator, populationMutator: populationMutator, fittestIdentifier: fittestIdentifier); var lastGenerationSummary = evolutionEngine.Run(initialPopulation); if (lastGenerationSummary.GenerationNumber == settings.MaximumGenerations) { Console.WriteLine($"Evolution cycle stopped. Reason: maximum number of generations reached."); } else { Console.WriteLine($"Evolution cycle stopped. Reason: maximum number of generations reached."); } persistentOutputManager.SaveWhatMustBeSaved(population: lastGenerationSummary.Population); Console.WriteLine("Done."); return(0); }