private void streamToFile(string fileName, DelimitedFileParser parsedFile)
        {
            var warnFile = new StreamWriter(fileName);

            warnFile.Write(parsedFile.ToString(true));
            warnFile.Close();
        }
Esempio n. 2
0
        /// <summary>This function will append a DelimitedFileParser to the end of this class</summary>
        /// <param name="a">The DelimietedFileParser to add to this class</param>
        //public bool Concat(DelimitedFileParser a)
        //{
        //    if (CompareHeaders(a.GetHeaders(), _headers))
        //    {
        //        var replacement = new string[_full.Length+a.GetRows().Length][];
        //        var i = 0;
        //        for (i = 0; i < _full.Length;i++)
        //        {
        //            replacement[i] = _full[i];
        //        }
        //        for (var j = 0; j < a.GetRows().Length; j++)
        //        {
        //            replacement[i + j] = a.GetRows()[j];
        //        }
        //        _full = replacement;
        //        return true;
        //    }
        //    return false;
        //}

        public bool Concat(DelimitedFileParser a)
        {
            if (CompareHeaders(a.GetHeaders(), _headers))
            {
                FullMap.AddRange(a.GetMap());
                foreach (var h in _headers)
                {
                    ReverseMap[h].AddRange(a.GetColumnAt(h));
                }
                return(true);
            }

            Console.WriteLine();
            Console.WriteLine("Concatenation of new records failed; header name mismatch");
            Console.WriteLine("New data:      " + string.Join(", ", a.GetHeaders()));
            Console.WriteLine("Existing data: " + string.Join(", ", _headers));

            return(false);
        }
        private void UpdateHeaders(DelimitedFileParser fileToAppend)
        {
            var currentHeaders = fileToAppend.GetHeaders();

            // Dictionary mapping old header names to new header names
            var headerMapping = new Dictionary <string, string>();

            foreach (var header in currentHeaders)
            {
                switch (header.ToLower())
                {
                case "cid":
                    headerMapping.Add(header, "PubChem CID");
                    break;
                }
            }

            if (headerMapping.Count > 0)
            {
                fileToAppend.UpdateHeaders(headerMapping);
            }
        }
        /// <summary>
        /// Initialization function that controls the program
        /// </summary>
        /// <param name="options">Processing options</param>
        /// <returns>True on success, false if an error</returns>
        private bool ProcessMetabolites(MetaboliteValidatorOptions options)
        {
            try
            {
                if (string.IsNullOrWhiteSpace(options.InputFile))
                {
                    Console.WriteLine();
                    Console.WriteLine("Error, input file not defined");
                    return(false);
                }

                var inputFile = new FileInfo(options.InputFile);

                if (!inputFile.Exists)
                {
                    Console.WriteLine();
                    Console.WriteLine("Error, input file not found: " + inputFile.FullName);
                    return(false);
                }

                // init github api interaction with the repo and owner
                var github = new Github("MetabolomicsCCS", "PNNL-Comp-Mass-Spec", options.Preview);

                if (!string.IsNullOrEmpty(options.Username))
                {
                    github.Username = options.Username;

                    if (!string.IsNullOrEmpty(options.Password))
                    {
                        if (options.Password.StartsWith("*"))
                        {
                            github.Password = MetaboliteValidatorOptions.DecodePassword(options.Password.Substring(1));
                        }
                        else
                        {
                            github.Password = options.Password;
                        }
                    }
                }

                // get main data file from github
                var dataFile = github.GetFile("data/" + MASTER_TSV_FILE);

                // parse the new data to append to current data
                var fileToAppend = new DelimitedFileParser();
                fileToAppend.ParseFile(inputFile.FullName, '\t');

                Console.WriteLine();
                Console.WriteLine("Found {0} records in local file {1}", fileToAppend.Count(), inputFile.Name);

                // Update column names if necessary
                UpdateHeaders(fileToAppend);

                // parse the main data file from github
                var mainFile = new DelimitedFileParser();
                if (dataFile == null)
                {
                    mainFile.SetDelimiter('\t');
                    mainFile.SetHeaders(fileToAppend.GetHeaders());
                }
                else
                {
                    mainFile.ParseString(dataFile, '\t');

                    Console.WriteLine();
                    Console.WriteLine("Found {0} records in file {1} retrieved from GitHub", mainFile.Count(), MASTER_TSV_FILE);
                    Console.WriteLine();
                }

                // Update column names if necessary
                UpdateHeaders(mainFile);

                var duplicateRowCount = 0;

                if (!options.IgnoreErrors)
                {
                    // Get ids for Kegg and PubChem
                    var keggIds    = fileToAppend.GetColumnAt("kegg").Where(x => !string.IsNullOrEmpty(x)).ToList();
                    var cidIds     = fileToAppend.GetColumnAt("pubchem cid").Where(x => !string.IsNullOrEmpty(x)).ToList();
                    var mainCasIds = mainFile.GetColumnAt("cas").Where(x => !string.IsNullOrEmpty(x)).ToList();

                    // generate PubChem and Kegg utils
                    var pub  = new PubchemUtil(cidIds.ToArray());
                    var kegg = new KeggUtil(keggIds.ToArray());
                    var file = new StreamWriter("ValidationApi.txt");

                    var dupRows = new DelimitedFileParser();
                    dupRows.SetHeaders(fileToAppend.GetHeaders());
                    dupRows.SetDelimiter('\t');

                    var warningRows = new DelimitedFileParser();
                    warningRows.SetHeaders(fileToAppend.GetHeaders());
                    warningRows.SetDelimiter('\t');

                    var missingKegg = new DelimitedFileParser();
                    missingKegg.SetHeaders(fileToAppend.GetHeaders());
                    missingKegg.SetDelimiter('\t');

                    var dataMap = fileToAppend.GetMap();

                    // compare fileToAppend to utils
                    for (var i = dataMap.Count - 1; i >= 0; i--)
                    {
                        Compound     p = null;
                        CompoundData k = null;
                        if (!string.IsNullOrEmpty(dataMap[i]["pubchem cid"]))
                        {
                            p = pub.PubChemMap[int.Parse(dataMap[i]["pubchem cid"])];
                        }
                        if (!string.IsNullOrEmpty(dataMap[i]["kegg"]) && kegg.CompoundsMap.ContainsKey(dataMap[i]["kegg"]))
                        {
                            k = kegg.CompoundsMap[dataMap[i]["kegg"]];
                        }
                        if (mainCasIds.Contains(dataMap[i]["cas"]))
                        {
                            dupRows.Add(dataMap[i]);
                            fileToAppend.Remove(dataMap[i]);
                        }
                        else
                        {
                            if (k == null && CheckRow(dataMap[i], p, null))
                            {
                                missingKegg.Add(dataMap[i]);
                            }
                            else if (!CheckRow(dataMap[i], p, k))
                            {
                                // remove from list add to warning file
                                WriteContentToFile(file, dataMap[i], p, k, warningRows.Count() + 2);
                                warningRows.Add(dataMap[i]);
                                fileToAppend.Remove(dataMap[i]);
                            }
                        }
                    }

                    duplicateRowCount = dupRows.Count();

                    file.Close();

                    if (fileToAppend.Count() > 0)
                    {
                        Console.WriteLine("Validating data file with GoodTables");
                        var goodTables = new GoodTables(fileToAppend.ToString(true), SchemaUrl);
                        if (!goodTables.Response.success)
                        {
                            //foreach(var result in goodTables.Response.report.results)
                            //{
                            //    fileToAppend.Remove(result["0"].result_context[0]);
                            //}

                            goodTables.OutputResponse(new StreamWriter(GOOD_TABLES_WARNING_FILE));

                            Console.WriteLine();
                            Console.WriteLine("GoodTables reports errors; see " + GOOD_TABLES_WARNING_FILE);
                            Console.WriteLine("Note that data with N/A in columns that expect a number will be flagged as an error by GoodTables; those errors can be ignored");
                        }
                    }

                    streamToFile(DUPLICATE_ROWS_FILE, dupRows);
                    streamToFile(WARNING_ROWS_FILE, warningRows);
                    streamToFile(MISSING_KEGG_FILE, missingKegg);

                    if (warningRows.Count() > 0)
                    {
                        Console.WriteLine();
                        Console.WriteLine("Warnings were encountered; see file " + WARNING_ROWS_FILE);
                    }

                    if (missingKegg.Count() > 0)
                    {
                        Console.WriteLine();
                        Console.WriteLine("Warnings were encountered; see file " + MISSING_KEGG_FILE);
                    }
                }
                else
                {
                    Console.WriteLine();
                    Console.WriteLine("Ignoring validation, skipping to file upload.");
                }

                if (fileToAppend.Count() == 0)
                {
                    Console.WriteLine();
                    Console.WriteLine("No new compounds were found; see {0} for the {1} skipped compounds", DUPLICATE_ROWS_FILE, duplicateRowCount);
                }
                else
                {
                    // this will add the new data tsv to the existing tsv downloaded from github
                    var success = mainFile.Concat(fileToAppend);

                    if (!success)
                    {
                        // Concatenation of new records failed; do not upload
                        return(false);
                    }

                    // Start command line process for GoodTables
                    //
                    // string userDirPath = Environment.GetEnvironmentVariable("GOODTABLES_PATH");
                    // string commandLine = $"schema \"{options.InputFile}\" --schema \"{SchemaUrl}\"";
                    // string GoodTablesPath = $"{userDirPath}\\GoodTables";
                    //CommandLineProcess pro = new CommandLineProcess(GoodTablesPath, commandLine);
                    //// if error display errors and exit
                    //if (pro.Status.Equals(CommandLineProcess.StatusCode.Error))
                    //{
                    //    Console.WriteLine($"GoodTables Validation error\n\n{pro.StandardOut}{pro.StandardError}\nExiting program please check that the data is valid.");
                    //    Console.ReadKey();
                    //    Environment.Exit(1);
                    //}
                    //// if the GoodTables.exe file isn't found display message and exit
                    //else if (pro.Status.Equals(CommandLineProcess.StatusCode.FileNotFound))
                    //{
                    //    Console.WriteLine("File not found. Please make sure you have installed python and GoodTables.\n"
                    //        +"Check that the folder path for GoodTables.exe is added to an environment variable named GOODTABLES_PATH.\n"
                    //        +"Press any key to continue.");
                    //    Console.ReadKey();
                    //    Environment.Exit(1);
                    //}
                    //else
                    //{
                    //    Console.WriteLine($"GoodTables validation\n\n{pro.StandardOut}");
                    //
                    // This will send the completed tsv back to github
                    github.SendFileAsync(mainFile.ToString(true), "data/" + MASTER_TSV_FILE);

                    // send Agilent file to github
                    github.SendFileAsync(mainFile.PrintAgilent(), "data/metabolitedataAgilent.tsv");
                    //}
                }

                return(true);
            }
            catch (Exception ex)
            {
                Console.WriteLine();
                Console.WriteLine("Error processing data: " + ex.Message);
                Console.WriteLine(StackTraceFormatter.GetExceptionStackTraceMultiLine(ex));
                return(false);
            }
        }
 public ValidatePubchem(Dictionary <int, Property> pubchem, DelimitedFileParser parser)
 {
     _pubchem = pubchem;
     _parser  = parser;
     Validate();
 }