public bool CheckRow(Dictionary <string, string> row, Compound pubChem, CompoundData kegg)
        {
            var rowFormula = row["formula"];
            var rowCas     = row["cas"];
            var rowMass    = (int)double.Parse(row["mass"]);
            var pubFormula = "";
            var pubMass    = 0.0;

            if (pubChem != null)
            {
                pubFormula = pubChem.findProp("Molecular Formula").sval;
                pubMass    = pubChem.findProp("MonoIsotopic").fval;
            }
            if (kegg != null)
            {
                var keggFormula   = kegg.Formula;
                var keggExactMass = kegg.ExactMass;
                var keggCas       = kegg.OtherId("CAS");
                return(rowFormula == keggFormula &&
                       rowFormula == pubFormula &&
                       rowCas == keggCas &&
                       rowMass == (int)keggExactMass &&
                       rowMass == (int)pubMass);
            }
            return(rowFormula == pubFormula &&
                   rowMass == (int)pubMass);
        }
 private string printKegg(CompoundData k)
 {
     if (k != null)
     {
         return($"{"KEGG",10}{k.KeggId,10}" +
                $"{(int)k.ExactMass,20}" +
                $"{k.Formula,20}{k.OtherId("CAS"),20}\n");
     }
     return("No Kegg\n");
 }
 private void WriteContentToFile(TextWriter file, IReadOnlyDictionary <string, string> row, Compound pubChem, CompoundData kegg, int rowIndex)
 {
     file.Write(printHead(rowIndex));
     file.Write(printRow(row));
     file.Write(printKegg(kegg));
     file.Write(printPubChem(pubChem));
     file.Write("\n");
 }
        /// <summary>
        /// Initialization function that controls the program
        /// </summary>
        /// <param name="options">Processing options</param>
        /// <returns>True on success, false if an error</returns>
        private bool ProcessMetabolites(MetaboliteValidatorOptions options)
        {
            try
            {
                if (string.IsNullOrWhiteSpace(options.InputFile))
                {
                    Console.WriteLine();
                    Console.WriteLine("Error, input file not defined");
                    return(false);
                }

                var inputFile = new FileInfo(options.InputFile);

                if (!inputFile.Exists)
                {
                    Console.WriteLine();
                    Console.WriteLine("Error, input file not found: " + inputFile.FullName);
                    return(false);
                }

                // init github api interaction with the repo and owner
                var github = new Github("MetabolomicsCCS", "PNNL-Comp-Mass-Spec", options.Preview);

                if (!string.IsNullOrEmpty(options.Username))
                {
                    github.Username = options.Username;

                    if (!string.IsNullOrEmpty(options.Password))
                    {
                        if (options.Password.StartsWith("*"))
                        {
                            github.Password = MetaboliteValidatorOptions.DecodePassword(options.Password.Substring(1));
                        }
                        else
                        {
                            github.Password = options.Password;
                        }
                    }
                }

                // get main data file from github
                var dataFile = github.GetFile("data/" + MASTER_TSV_FILE);

                // parse the new data to append to current data
                var fileToAppend = new DelimitedFileParser();
                fileToAppend.ParseFile(inputFile.FullName, '\t');

                Console.WriteLine();
                Console.WriteLine("Found {0} records in local file {1}", fileToAppend.Count(), inputFile.Name);

                // Update column names if necessary
                UpdateHeaders(fileToAppend);

                // parse the main data file from github
                var mainFile = new DelimitedFileParser();
                if (dataFile == null)
                {
                    mainFile.SetDelimiter('\t');
                    mainFile.SetHeaders(fileToAppend.GetHeaders());
                }
                else
                {
                    mainFile.ParseString(dataFile, '\t');

                    Console.WriteLine();
                    Console.WriteLine("Found {0} records in file {1} retrieved from GitHub", mainFile.Count(), MASTER_TSV_FILE);
                    Console.WriteLine();
                }

                // Update column names if necessary
                UpdateHeaders(mainFile);

                var duplicateRowCount = 0;

                if (!options.IgnoreErrors)
                {
                    // Get ids for Kegg and PubChem
                    var keggIds    = fileToAppend.GetColumnAt("kegg").Where(x => !string.IsNullOrEmpty(x)).ToList();
                    var cidIds     = fileToAppend.GetColumnAt("pubchem cid").Where(x => !string.IsNullOrEmpty(x)).ToList();
                    var mainCasIds = mainFile.GetColumnAt("cas").Where(x => !string.IsNullOrEmpty(x)).ToList();

                    // generate PubChem and Kegg utils
                    var pub  = new PubchemUtil(cidIds.ToArray());
                    var kegg = new KeggUtil(keggIds.ToArray());
                    var file = new StreamWriter("ValidationApi.txt");

                    var dupRows = new DelimitedFileParser();
                    dupRows.SetHeaders(fileToAppend.GetHeaders());
                    dupRows.SetDelimiter('\t');

                    var warningRows = new DelimitedFileParser();
                    warningRows.SetHeaders(fileToAppend.GetHeaders());
                    warningRows.SetDelimiter('\t');

                    var missingKegg = new DelimitedFileParser();
                    missingKegg.SetHeaders(fileToAppend.GetHeaders());
                    missingKegg.SetDelimiter('\t');

                    var dataMap = fileToAppend.GetMap();

                    // compare fileToAppend to utils
                    for (var i = dataMap.Count - 1; i >= 0; i--)
                    {
                        Compound     p = null;
                        CompoundData k = null;
                        if (!string.IsNullOrEmpty(dataMap[i]["pubchem cid"]))
                        {
                            p = pub.PubChemMap[int.Parse(dataMap[i]["pubchem cid"])];
                        }
                        if (!string.IsNullOrEmpty(dataMap[i]["kegg"]) && kegg.CompoundsMap.ContainsKey(dataMap[i]["kegg"]))
                        {
                            k = kegg.CompoundsMap[dataMap[i]["kegg"]];
                        }
                        if (mainCasIds.Contains(dataMap[i]["cas"]))
                        {
                            dupRows.Add(dataMap[i]);
                            fileToAppend.Remove(dataMap[i]);
                        }
                        else
                        {
                            if (k == null && CheckRow(dataMap[i], p, null))
                            {
                                missingKegg.Add(dataMap[i]);
                            }
                            else if (!CheckRow(dataMap[i], p, k))
                            {
                                // remove from list add to warning file
                                WriteContentToFile(file, dataMap[i], p, k, warningRows.Count() + 2);
                                warningRows.Add(dataMap[i]);
                                fileToAppend.Remove(dataMap[i]);
                            }
                        }
                    }

                    duplicateRowCount = dupRows.Count();

                    file.Close();

                    if (fileToAppend.Count() > 0)
                    {
                        Console.WriteLine("Validating data file with GoodTables");
                        var goodTables = new GoodTables(fileToAppend.ToString(true), SchemaUrl);
                        if (!goodTables.Response.success)
                        {
                            //foreach(var result in goodTables.Response.report.results)
                            //{
                            //    fileToAppend.Remove(result["0"].result_context[0]);
                            //}

                            goodTables.OutputResponse(new StreamWriter(GOOD_TABLES_WARNING_FILE));

                            Console.WriteLine();
                            Console.WriteLine("GoodTables reports errors; see " + GOOD_TABLES_WARNING_FILE);
                            Console.WriteLine("Note that data with N/A in columns that expect a number will be flagged as an error by GoodTables; those errors can be ignored");
                        }
                    }

                    streamToFile(DUPLICATE_ROWS_FILE, dupRows);
                    streamToFile(WARNING_ROWS_FILE, warningRows);
                    streamToFile(MISSING_KEGG_FILE, missingKegg);

                    if (warningRows.Count() > 0)
                    {
                        Console.WriteLine();
                        Console.WriteLine("Warnings were encountered; see file " + WARNING_ROWS_FILE);
                    }

                    if (missingKegg.Count() > 0)
                    {
                        Console.WriteLine();
                        Console.WriteLine("Warnings were encountered; see file " + MISSING_KEGG_FILE);
                    }
                }
                else
                {
                    Console.WriteLine();
                    Console.WriteLine("Ignoring validation, skipping to file upload.");
                }

                if (fileToAppend.Count() == 0)
                {
                    Console.WriteLine();
                    Console.WriteLine("No new compounds were found; see {0} for the {1} skipped compounds", DUPLICATE_ROWS_FILE, duplicateRowCount);
                }
                else
                {
                    // this will add the new data tsv to the existing tsv downloaded from github
                    var success = mainFile.Concat(fileToAppend);

                    if (!success)
                    {
                        // Concatenation of new records failed; do not upload
                        return(false);
                    }

                    // Start command line process for GoodTables
                    //
                    // string userDirPath = Environment.GetEnvironmentVariable("GOODTABLES_PATH");
                    // string commandLine = $"schema \"{options.InputFile}\" --schema \"{SchemaUrl}\"";
                    // string GoodTablesPath = $"{userDirPath}\\GoodTables";
                    //CommandLineProcess pro = new CommandLineProcess(GoodTablesPath, commandLine);
                    //// if error display errors and exit
                    //if (pro.Status.Equals(CommandLineProcess.StatusCode.Error))
                    //{
                    //    Console.WriteLine($"GoodTables Validation error\n\n{pro.StandardOut}{pro.StandardError}\nExiting program please check that the data is valid.");
                    //    Console.ReadKey();
                    //    Environment.Exit(1);
                    //}
                    //// if the GoodTables.exe file isn't found display message and exit
                    //else if (pro.Status.Equals(CommandLineProcess.StatusCode.FileNotFound))
                    //{
                    //    Console.WriteLine("File not found. Please make sure you have installed python and GoodTables.\n"
                    //        +"Check that the folder path for GoodTables.exe is added to an environment variable named GOODTABLES_PATH.\n"
                    //        +"Press any key to continue.");
                    //    Console.ReadKey();
                    //    Environment.Exit(1);
                    //}
                    //else
                    //{
                    //    Console.WriteLine($"GoodTables validation\n\n{pro.StandardOut}");
                    //
                    // This will send the completed tsv back to github
                    github.SendFileAsync(mainFile.ToString(true), "data/" + MASTER_TSV_FILE);

                    // send Agilent file to github
                    github.SendFileAsync(mainFile.PrintAgilent(), "data/metabolitedataAgilent.tsv");
                    //}
                }

                return(true);
            }
            catch (Exception ex)
            {
                Console.WriteLine();
                Console.WriteLine("Error processing data: " + ex.Message);
                Console.WriteLine(StackTraceFormatter.GetExceptionStackTraceMultiLine(ex));
                return(false);
            }
        }
        public CompoundData ReadKeggCompoundStream(string page)
        {
            var lines = page.Split('\n');

            CompoundData entryData = null;

            for (var i = 0; i < lines.Length; i++)
            {
                var      line = lines[i];
                string[] tokens;
                if (line.ToLower().StartsWith("entry"))
                {
                    //System.Console.WriteLine(line);
                    tokens    = line.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                    entryData = new CompoundData(tokens[1])
                    {
                        Type = tokens[2]
                    };
                }

                if (entryData == null)
                {
                    continue;
                }

                if (line.ToLower().StartsWith("name"))
                {
                    tokens = line.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                    entryData.Names.Add(tokens[1]);
                    line = lines[++i];
                    while (line != null && char.IsWhiteSpace(line[0]))
                    {
                        entryData.Names.Add(line.Trim());
                        line = lines[++i];
                    }
                }

                if (line != null && line.ToLower().StartsWith("formula"))
                {
                    tokens            = line.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                    entryData.Formula = tokens[1];
                    line = lines[++i];
                }

                if (line != null && line.ToLower().StartsWith("exact_mass"))
                {
                    tokens = line.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                    entryData.ExactMass = double.Parse(tokens[1]);
                    line = lines[++i];
                }

                if (line != null && line.ToLower().StartsWith("mol_weight"))
                {
                    tokens = line.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                    entryData.MolecularWeight = double.Parse(tokens[1]);
                    line = lines[++i];
                }

                if (line != null && line.ToLower().StartsWith("comment"))
                {
                    line = line.Remove(0, 7);
                    entryData.Comment = line.Trim();
                    line = lines[++i];
                }

                if (line != null && line.ToLower().StartsWith("pathway"))
                {
                    line = line.Remove(0, 7);
                    while (line != null && char.IsWhiteSpace(line[0]))
                    {
                        tokens = line.Trim().Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                        entryData.Pathways.Add(tokens[0]);
                        line = lines[++i];
                    }
                }

                if (line != null && line.ToLower().StartsWith("dblinks"))
                {
                    line = line.Remove(0, 7);
                    while (line != null && char.IsWhiteSpace(line[0]))
                    {
                        tokens = line.Trim().Split(new[] { ": " }, StringSplitOptions.RemoveEmptyEntries);
                        var identifiers = tokens[1].Trim().Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                        foreach (var identifier in identifiers)
                        {
                            entryData.OtherIds.Add(new KeyValuePair <string, string>(tokens[0], identifier));
                        }
                        line = lines[++i];
                    }
                }
            }
            return(entryData);
        }