Example #1
0
        public static bool ProcessGDCUploadReport(string fileName)
        {
            if (!File.Exists(fileName))
            {
                Console.WriteLine("File not found, Upload Report file from GDC: " + fileName);
                return(false);
            }

            int    counter = 0;
            string line;

            try
            {
                using (StreamReader file = new StreamReader(fileName))
                {
                    while ((line = file.ReadLine()) != null)
                    {
                        string[] parts = line.Split('\t');
                        if (parts.Length > 1)
                        {
                            if (parts[2] == "submitted_unaligned_reads")
                            {
                                counter++;
                                SeqFileInfo newDataFile = new SeqFileInfo
                                {
                                    Id             = parts[0],
                                    Related_case   = parts[1],
                                    EType          = parts[2],
                                    Submitter_id   = parts[4],
                                    ReadyForUpload = false
                                };

                                var tempSUR = new SUR();
                                if (GDCmetadata.SURdictionary.TryGetValue(parts[4], out tempSUR))
                                {
                                    newDataFile.DataFileName = tempSUR.file_name;
                                    newDataFile.DataFileSize = tempSUR.file_size;
                                }

                                Program.SeqDataFiles.Add(counter, newDataFile);
                            }
                        }
                    }
                    file.Close();
                }
            }
            catch
            {
                Console.WriteLine("Exception while processing upload report from the gdc: " + fileName);
                Console.WriteLine("Counter = " + counter.ToString());
                return(false);
            }
            return(true);
        }
Example #2
0
        public static int GoFindDataFiles(string basePath)
        {
            // since we cannot modify a Dictionary item while iterating over the dictionary,
            // copy the keys to a List and iterate over that instead
            List <int> ListOfKeys = new List <int>();

            foreach (KeyValuePair <int, SeqFileInfo> dataFile in Program.SeqDataFiles)
            {
                ListOfKeys.Add(dataFile.Key);
            }

            int numFilesNotFound = 0;

            foreach (int key in ListOfKeys)
            {
                string TracSeqDeliveryFolderName = "";

                SeqFileInfo newDataFile = Program.SeqDataFiles[key];
                string      runId       = newDataFile.Submitter_id.Substring(0, 35); // first 35 chars of the submitter_id is our run_id

                if (newDataFile.DataFileName.IndexOf("bam") != -1)
                {
                    TracSeqDeliveryFolderName = "uBam";
                }

                else if (newDataFile.DataFileName.IndexOf("fastq") != -1)
                {
                    TracSeqDeliveryFolderName = "fastq";
                }

                // string fileLocation = Path.Combine(basePath, TracSeqDeliveryFolderName, runId);
                string fileLocation = Path.Combine(basePath, TracSeqDeliveryFolderName); // runId is not currently in the path

                if (File.Exists(Path.Combine(fileLocation, newDataFile.DataFileName)))
                {
                    newDataFile.DataFileLocation = fileLocation;
                    newDataFile.ReadyForUpload   = true;
                    Program.SeqDataFiles[key]    = newDataFile;
                    //Console.WriteLine($"Found: {Path.Combine(fileLocation, newDataFile.DataFileName)}");
                }
                else
                {
                    //Console.WriteLine($"Not found: {Path.Combine(fileLocation, newDataFile.DataFileName)}");
                    numFilesNotFound++;
                }
            }

            return(numFilesNotFound);
        }
Example #3
0
        public static bool UploadSequenceData(int workId, int remainingItems)
        {
            SeqFileInfo   SeqDataFile = new SeqFileInfo();
            StringBuilder LogMessage  = new StringBuilder();

            if (!LogFileSet.TryGetValue((int)Task.CurrentId, out string logFile))
            {
                Console.WriteLine($"Unable to get logfile name from LogFileSet on TaskId {workId}");
                return(false);
            }

            if (!SeqDataFiles.TryGetValue(workId, out SeqDataFile))
            {
                File.AppendAllText(logFile, ($"Unable to get SeqFileInfo object out of SeqDataFiles {workId}" + Environment.NewLine));
                return(false);
            }

            string        cmdLineArgs;
            string        startTime = DateTime.Now.ToString("g");
            StringBuilder sb        = new StringBuilder();

            //if (UseSimulator)
            //{
            //    cmdLineArgs = SeqDataFile.Submitter_id + " " + "fast";
            //    DataTransferTool = "gdcsim.exe";
            //}
            //else
            //  cmdLineArgs = ("upload -t " + GDCTokenFile + " " + SeqDataFile.Id);

            // gdc-client reverts to a "simple mode" when the file size is less than 1GB, however
            // it does not exit cleanly in this mode. So force all xfers to be multi-part by setting
            // the upload-part-size to a value smaller than the file size.
            string uploadPartSize  = "";
            long   defaultPartSize = 1000000000;

            if (SeqDataFile.DataFileSize < defaultPartSize)
            {
                long newPartSize = (long)(SeqDataFile.DataFileSize * 0.8);
                uploadPartSize = " --upload-part-size " + newPartSize.ToString();
            }

            cmdLineArgs = ("upload -t " + GDCTokenFile + " " + SeqDataFile.Id + uploadPartSize);

            sb.Append("Begin:" + "\t");
            sb.Append(startTime + "\t");
            sb.Append(SeqDataFile.Id + "\t");
            sb.Append(SeqDataFile.Submitter_id);
            sb.Append(Environment.NewLine);

            sb.Append("uploading ");
            sb.Append(SeqDataFile.Id);
            sb.Append(" on thread ");
            sb.Append(Task.CurrentId.ToString());
            sb.Append(" with ");
            sb.Append(remainingItems.ToString());
            sb.Append(" work items remaining.");
            sb.Append(Environment.NewLine);
            sb.Append("WorkingDirectory = ");
            sb.Append(SeqDataFile.DataFileLocation);

            sb.Append(Environment.NewLine);
            sb.Append(SeqDataFile.DataFileName);
            sb.Append("\t");
            sb.Append(SeqDataFile.DataFileSize);
            sb.Append("\t");
            sb.Append("partsize: " + uploadPartSize);
            sb.Append(Environment.NewLine);
            sb.Append("cmd = " + DataTransferTool + " " + cmdLineArgs);

            File.AppendAllText(logFile, sb.ToString());
            sb.Clear();

            string stdOut = "";
            string stdErr = "";

            if (TestMode)
            {
                Console.WriteLine(DataTransferTool + " " + cmdLineArgs + "; filename: " + SeqDataFile.DataFileName);
                // fake the output of a gdc-client run indicating upload finished successfully
                stdOut = "Multipart upload finished for file " + SeqDataFile.Id + Environment.NewLine;
            }
            else
            {
                using (var proc = new Process())
                {
                    ProcessStartInfo procStartInfo = new ProcessStartInfo();
                    procStartInfo.FileName  = DataTransferTool;
                    procStartInfo.Arguments = cmdLineArgs;

                    // the gdc-client DTT requires that it be executed from within the directory where the data file resides
                    // actually, with newer 1.4 version you can set --path path and it seems to work
                    procStartInfo.WorkingDirectory = SeqDataFile.DataFileLocation;

                    procStartInfo.CreateNoWindow         = true;
                    procStartInfo.UseShellExecute        = false;
                    procStartInfo.RedirectStandardOutput = true;
                    procStartInfo.RedirectStandardInput  = true;
                    procStartInfo.RedirectStandardError  = true;

                    proc.StartInfo = procStartInfo;
                    proc.Start();

                    stdOut = proc.StandardOutput.ReadToEnd();
                    stdErr = proc.StandardError.ReadToEnd();

                    proc.WaitForExit();
                }
            }

            string endTime = DateTime.Now.ToString("g");

            // two common error messages to look for:
            string knownErrorMessage1 = "File in validated state, initiate_multipart not allowed";  // file already exists at GDC
            string knownErrorMessage2 = "File with id " + SeqDataFile.Id + " not found";            // local file not found, gdc xfer tool likely not executed from within directory that contains the file

            int uploadSuccess = stdOut.IndexOf("Multipart upload finished for file " + SeqDataFile.Id);

            sb.Clear();
            bool   keepWorking = true;
            string logDateTime = DateTime.Now.ToString("g");

            if (uploadSuccess == -1)  // upload was not successful
            {
                sb.Append(Environment.NewLine);
                string failBaseText = "***" + "\t" + logDateTime
                                      + "\t" + "File-NOT-UPLOADED:"
                                      + "\t" + SeqDataFile.Id
                                      + "\t" + SeqDataFile.Submitter_id
                                      + "\t";

                if (stdOut.IndexOf(knownErrorMessage1) != -1)
                {
                    sb.Append(failBaseText + "Fail: File already at GDC");
                    keepWorking = false;
                }
                else if (stdOut.IndexOf(knownErrorMessage2) != -1)
                {
                    sb.Append(failBaseText + "Fail: Local file not found");
                    keepWorking = false;
                }
                else if (SeqDataFile.UploadAttempts == NumRetries)
                {
                    sb.Append(failBaseText + "Fail: Reached Max Retries");
                    keepWorking = false;
                }

                if ((SeqDataFile.UploadAttempts < NumRetries) && keepWorking)
                {
                    SeqDataFile.UploadAttempts++;
                    SeqDataFiles[workId] = SeqDataFile;

                    SeqDataFilesQueue.Enqueue(workId);
                    Thread.Sleep(200);

                    sb.Append("---");
                    sb.Append("\t" + logDateTime);
                    sb.Append("\t" + "Re-queuing");
                    sb.Append("\t" + SeqDataFile.Id);
                    sb.Append("\t" + SeqDataFile.Submitter_id);
                    sb.Append("\t" + "Re-queued: ");
                    sb.Append(SeqDataFile.UploadAttempts.ToString());
                    sb.Append(" of ");
                    sb.Append(NumRetries.ToString());
                    sb.Append(Environment.NewLine);
                    Console.WriteLine($"Re-queued item {workId}");
                }
            }

            sb.Append(Environment.NewLine);
            sb.Append(stdOut);
            sb.Append("End: " + endTime + Environment.NewLine + Environment.NewLine);
            File.AppendAllText(logFile, sb.ToString());
            sb.Clear();

            return(true);
        }