/// <summary> /// The main function to run the program /// </summary> /// <param name="args">Passed in arguments to the program</param> public static int Main(string[] args) { var asmName = typeof(Program).GetTypeInfo().Assembly.GetName(); var exeName = Path.GetFileName(Assembly.GetExecutingAssembly().Location); // Alternatively: System.AppDomain.CurrentDomain.FriendlyName var version = MetaboliteValidatorOptions.GetAppVersion(); var parser = new CommandLineParser <MetaboliteValidatorOptions>(asmName.Name, version) { ProgramInfo = "This program reads metabolites in a .tsv file and pushes new information " + Environment.NewLine + "to the git repository at https://github.com/PNNL-Comp-Mass-Spec/MetabolomicsCCS", ContactInfo = "Program written by Ryan Wilson and Matthew Monroe for the Department of Energy (PNNL, Richland, WA) in 2017" + Environment.NewLine + Environment.NewLine + "E-mail: [email protected] or [email protected]" + Environment.NewLine + "Website: https://github.com/PNNL-Comp-Mass-Spec/ or https://panomics.pnnl.gov/ or https://www.pnnl.gov/integrative-omics", UsageExamples = { exeName + " NewMetabolites.tsv", exeName + " NewMetabolites.tsv -i", exeName + " NewMetabolites.tsv -preview", exeName + " NewMetabolites.tsv -user MyUsername -password *Dfw3gf" } }; var result = parser.ParseArgs(args); var options = result.ParsedResults; try { if (!result.Success) { if (parser.CreateParamFileProvided) { return(0); } // Delay for 1500 msec in case the user double clicked this file from within Windows Explorer (or started the program via a shortcut) Thread.Sleep(1500); return(-1); } if (!options.ValidateArgs(out var errorMessage)) { parser.PrintHelp(); Console.WriteLine(); ConsoleMsgUtils.ShowWarning("Validation error:"); ConsoleMsgUtils.ShowWarning(errorMessage); Thread.Sleep(1500); return(-1); } options.OutputSetOptions(); } catch (Exception e) { Console.WriteLine(); Console.Write($"Error running {exeName}"); Console.WriteLine(e.Message); Console.WriteLine($"See help with {exeName} --help"); return(-1); } var program = new Program(); var success = program.ProcessMetabolites(options); Console.WriteLine(); if (success) { Console.WriteLine("Processing complete"); } else { Console.WriteLine("Processing failed"); } System.Threading.Thread.Sleep(1500); return(0); }
/// <summary> /// Initialization function that controls the program /// </summary> /// <param name="options">Processing options</param> /// <returns>True on success, false if an error</returns> private bool ProcessMetabolites(MetaboliteValidatorOptions options) { try { if (string.IsNullOrWhiteSpace(options.InputFile)) { Console.WriteLine(); Console.WriteLine("Error, input file not defined"); return(false); } var inputFile = new FileInfo(options.InputFile); if (!inputFile.Exists) { Console.WriteLine(); Console.WriteLine("Error, input file not found: " + inputFile.FullName); return(false); } // init github api interaction with the repo and owner var github = new Github("MetabolomicsCCS", "PNNL-Comp-Mass-Spec", options.Preview); if (!string.IsNullOrEmpty(options.Username)) { github.Username = options.Username; if (!string.IsNullOrEmpty(options.Password)) { if (options.Password.StartsWith("*")) { github.Password = MetaboliteValidatorOptions.DecodePassword(options.Password.Substring(1)); } else { github.Password = options.Password; } } } // get main data file from github var dataFile = github.GetFile("data/" + MASTER_TSV_FILE); // parse the new data to append to current data var fileToAppend = new DelimitedFileParser(); fileToAppend.ParseFile(inputFile.FullName, '\t'); Console.WriteLine(); Console.WriteLine("Found {0} records in local file {1}", fileToAppend.Count(), inputFile.Name); // Update column names if necessary UpdateHeaders(fileToAppend); // parse the main data file from github var mainFile = new DelimitedFileParser(); if (dataFile == null) { mainFile.SetDelimiter('\t'); mainFile.SetHeaders(fileToAppend.GetHeaders()); } else { mainFile.ParseString(dataFile, '\t'); Console.WriteLine(); Console.WriteLine("Found {0} records in file {1} retrieved from GitHub", mainFile.Count(), MASTER_TSV_FILE); Console.WriteLine(); } // Update column names if necessary UpdateHeaders(mainFile); var duplicateRowCount = 0; if (!options.IgnoreErrors) { // Get ids for Kegg and PubChem var keggIds = fileToAppend.GetColumnAt("kegg").Where(x => !string.IsNullOrEmpty(x)).ToList(); var cidIds = fileToAppend.GetColumnAt("pubchem cid").Where(x => !string.IsNullOrEmpty(x)).ToList(); var mainCasIds = mainFile.GetColumnAt("cas").Where(x => !string.IsNullOrEmpty(x)).ToList(); // generate PubChem and Kegg utils var pub = new PubchemUtil(cidIds.ToArray()); var kegg = new KeggUtil(keggIds.ToArray()); var file = new StreamWriter("ValidationApi.txt"); var dupRows = new DelimitedFileParser(); dupRows.SetHeaders(fileToAppend.GetHeaders()); dupRows.SetDelimiter('\t'); var warningRows = new DelimitedFileParser(); warningRows.SetHeaders(fileToAppend.GetHeaders()); warningRows.SetDelimiter('\t'); var missingKegg = new DelimitedFileParser(); missingKegg.SetHeaders(fileToAppend.GetHeaders()); missingKegg.SetDelimiter('\t'); var dataMap = fileToAppend.GetMap(); // compare fileToAppend to utils for (var i = dataMap.Count - 1; i >= 0; i--) { Compound p = null; CompoundData k = null; if (!string.IsNullOrEmpty(dataMap[i]["pubchem cid"])) { p = pub.PubChemMap[int.Parse(dataMap[i]["pubchem cid"])]; } if (!string.IsNullOrEmpty(dataMap[i]["kegg"]) && kegg.CompoundsMap.ContainsKey(dataMap[i]["kegg"])) { k = kegg.CompoundsMap[dataMap[i]["kegg"]]; } if (mainCasIds.Contains(dataMap[i]["cas"])) { dupRows.Add(dataMap[i]); fileToAppend.Remove(dataMap[i]); } else { if (k == null && CheckRow(dataMap[i], p, null)) { missingKegg.Add(dataMap[i]); } else if (!CheckRow(dataMap[i], p, k)) { // remove from list add to warning file WriteContentToFile(file, dataMap[i], p, k, warningRows.Count() + 2); warningRows.Add(dataMap[i]); fileToAppend.Remove(dataMap[i]); } } } duplicateRowCount = dupRows.Count(); file.Close(); if (fileToAppend.Count() > 0) { Console.WriteLine("Validating data file with GoodTables"); var goodTables = new GoodTables(fileToAppend.ToString(true), SchemaUrl); if (!goodTables.Response.success) { //foreach(var result in goodTables.Response.report.results) //{ // fileToAppend.Remove(result["0"].result_context[0]); //} goodTables.OutputResponse(new StreamWriter(GOOD_TABLES_WARNING_FILE)); Console.WriteLine(); Console.WriteLine("GoodTables reports errors; see " + GOOD_TABLES_WARNING_FILE); Console.WriteLine("Note that data with N/A in columns that expect a number will be flagged as an error by GoodTables; those errors can be ignored"); } } streamToFile(DUPLICATE_ROWS_FILE, dupRows); streamToFile(WARNING_ROWS_FILE, warningRows); streamToFile(MISSING_KEGG_FILE, missingKegg); if (warningRows.Count() > 0) { Console.WriteLine(); Console.WriteLine("Warnings were encountered; see file " + WARNING_ROWS_FILE); } if (missingKegg.Count() > 0) { Console.WriteLine(); Console.WriteLine("Warnings were encountered; see file " + MISSING_KEGG_FILE); } } else { Console.WriteLine(); Console.WriteLine("Ignoring validation, skipping to file upload."); } if (fileToAppend.Count() == 0) { Console.WriteLine(); Console.WriteLine("No new compounds were found; see {0} for the {1} skipped compounds", DUPLICATE_ROWS_FILE, duplicateRowCount); } else { // this will add the new data tsv to the existing tsv downloaded from github var success = mainFile.Concat(fileToAppend); if (!success) { // Concatenation of new records failed; do not upload return(false); } // Start command line process for GoodTables // // string userDirPath = Environment.GetEnvironmentVariable("GOODTABLES_PATH"); // string commandLine = $"schema \"{options.InputFile}\" --schema \"{SchemaUrl}\""; // string GoodTablesPath = $"{userDirPath}\\GoodTables"; //CommandLineProcess pro = new CommandLineProcess(GoodTablesPath, commandLine); //// if error display errors and exit //if (pro.Status.Equals(CommandLineProcess.StatusCode.Error)) //{ // Console.WriteLine($"GoodTables Validation error\n\n{pro.StandardOut}{pro.StandardError}\nExiting program please check that the data is valid."); // Console.ReadKey(); // Environment.Exit(1); //} //// if the GoodTables.exe file isn't found display message and exit //else if (pro.Status.Equals(CommandLineProcess.StatusCode.FileNotFound)) //{ // Console.WriteLine("File not found. Please make sure you have installed python and GoodTables.\n" // +"Check that the folder path for GoodTables.exe is added to an environment variable named GOODTABLES_PATH.\n" // +"Press any key to continue."); // Console.ReadKey(); // Environment.Exit(1); //} //else //{ // Console.WriteLine($"GoodTables validation\n\n{pro.StandardOut}"); // // This will send the completed tsv back to github github.SendFileAsync(mainFile.ToString(true), "data/" + MASTER_TSV_FILE); // send Agilent file to github github.SendFileAsync(mainFile.PrintAgilent(), "data/metabolitedataAgilent.tsv"); //} } return(true); } catch (Exception ex) { Console.WriteLine(); Console.WriteLine("Error processing data: " + ex.Message); Console.WriteLine(StackTraceFormatter.GetExceptionStackTraceMultiLine(ex)); return(false); } }