public static bool ProcessGDCUploadReport(string fileName) { if (!File.Exists(fileName)) { Console.WriteLine("File not found, Upload Report file from GDC: " + fileName); return(false); } int counter = 0; string line; try { using (StreamReader file = new StreamReader(fileName)) { while ((line = file.ReadLine()) != null) { string[] parts = line.Split('\t'); if (parts.Length > 1) { if (parts[2] == "submitted_unaligned_reads") { counter++; SeqFileInfo newDataFile = new SeqFileInfo { Id = parts[0], Related_case = parts[1], EType = parts[2], Submitter_id = parts[4], ReadyForUpload = false }; var tempSUR = new SUR(); if (GDCmetadata.SURdictionary.TryGetValue(parts[4], out tempSUR)) { newDataFile.DataFileName = tempSUR.file_name; newDataFile.DataFileSize = tempSUR.file_size; } Program.SeqDataFiles.Add(counter, newDataFile); } } } file.Close(); } } catch { Console.WriteLine("Exception while processing upload report from the gdc: " + fileName); Console.WriteLine("Counter = " + counter.ToString()); return(false); } return(true); }
public static int GoFindDataFiles(string basePath) { // since we cannot modify a Dictionary item while iterating over the dictionary, // copy the keys to a List and iterate over that instead List <int> ListOfKeys = new List <int>(); foreach (KeyValuePair <int, SeqFileInfo> dataFile in Program.SeqDataFiles) { ListOfKeys.Add(dataFile.Key); } int numFilesNotFound = 0; foreach (int key in ListOfKeys) { string TracSeqDeliveryFolderName = ""; SeqFileInfo newDataFile = Program.SeqDataFiles[key]; string runId = newDataFile.Submitter_id.Substring(0, 35); // first 35 chars of the submitter_id is our run_id if (newDataFile.DataFileName.IndexOf("bam") != -1) { TracSeqDeliveryFolderName = "uBam"; } else if (newDataFile.DataFileName.IndexOf("fastq") != -1) { TracSeqDeliveryFolderName = "fastq"; } // string fileLocation = Path.Combine(basePath, TracSeqDeliveryFolderName, runId); string fileLocation = Path.Combine(basePath, TracSeqDeliveryFolderName); // runId is not currently in the path if (File.Exists(Path.Combine(fileLocation, newDataFile.DataFileName))) { newDataFile.DataFileLocation = fileLocation; newDataFile.ReadyForUpload = true; Program.SeqDataFiles[key] = newDataFile; //Console.WriteLine($"Found: {Path.Combine(fileLocation, newDataFile.DataFileName)}"); } else { //Console.WriteLine($"Not found: {Path.Combine(fileLocation, newDataFile.DataFileName)}"); numFilesNotFound++; } } return(numFilesNotFound); }
public static bool UploadSequenceData(int workId, int remainingItems) { SeqFileInfo SeqDataFile = new SeqFileInfo(); StringBuilder LogMessage = new StringBuilder(); if (!LogFileSet.TryGetValue((int)Task.CurrentId, out string logFile)) { Console.WriteLine($"Unable to get logfile name from LogFileSet on TaskId {workId}"); return(false); } if (!SeqDataFiles.TryGetValue(workId, out SeqDataFile)) { File.AppendAllText(logFile, ($"Unable to get SeqFileInfo object out of SeqDataFiles {workId}" + Environment.NewLine)); return(false); } string cmdLineArgs; string startTime = DateTime.Now.ToString("g"); StringBuilder sb = new StringBuilder(); //if (UseSimulator) //{ // cmdLineArgs = SeqDataFile.Submitter_id + " " + "fast"; // DataTransferTool = "gdcsim.exe"; //} //else // cmdLineArgs = ("upload -t " + GDCTokenFile + " " + SeqDataFile.Id); // gdc-client reverts to a "simple mode" when the file size is less than 1GB, however // it does not exit cleanly in this mode. So force all xfers to be multi-part by setting // the upload-part-size to a value smaller than the file size. string uploadPartSize = ""; long defaultPartSize = 1000000000; if (SeqDataFile.DataFileSize < defaultPartSize) { long newPartSize = (long)(SeqDataFile.DataFileSize * 0.8); uploadPartSize = " --upload-part-size " + newPartSize.ToString(); } cmdLineArgs = ("upload -t " + GDCTokenFile + " " + SeqDataFile.Id + uploadPartSize); sb.Append("Begin:" + "\t"); sb.Append(startTime + "\t"); sb.Append(SeqDataFile.Id + "\t"); sb.Append(SeqDataFile.Submitter_id); sb.Append(Environment.NewLine); sb.Append("uploading "); sb.Append(SeqDataFile.Id); sb.Append(" on thread "); sb.Append(Task.CurrentId.ToString()); sb.Append(" with "); sb.Append(remainingItems.ToString()); sb.Append(" work items remaining."); sb.Append(Environment.NewLine); sb.Append("WorkingDirectory = "); sb.Append(SeqDataFile.DataFileLocation); sb.Append(Environment.NewLine); sb.Append(SeqDataFile.DataFileName); sb.Append("\t"); sb.Append(SeqDataFile.DataFileSize); sb.Append("\t"); sb.Append("partsize: " + uploadPartSize); sb.Append(Environment.NewLine); sb.Append("cmd = " + DataTransferTool + " " + cmdLineArgs); File.AppendAllText(logFile, sb.ToString()); sb.Clear(); string stdOut = ""; string stdErr = ""; if (TestMode) { Console.WriteLine(DataTransferTool + " " + cmdLineArgs + "; filename: " + SeqDataFile.DataFileName); // fake the output of a gdc-client run indicating upload finished successfully stdOut = "Multipart upload finished for file " + SeqDataFile.Id + Environment.NewLine; } else { using (var proc = new Process()) { ProcessStartInfo procStartInfo = new ProcessStartInfo(); procStartInfo.FileName = DataTransferTool; procStartInfo.Arguments = cmdLineArgs; // the gdc-client DTT requires that it be executed from within the directory where the data file resides // actually, with newer 1.4 version you can set --path path and it seems to work procStartInfo.WorkingDirectory = SeqDataFile.DataFileLocation; procStartInfo.CreateNoWindow = true; procStartInfo.UseShellExecute = false; procStartInfo.RedirectStandardOutput = true; procStartInfo.RedirectStandardInput = true; procStartInfo.RedirectStandardError = true; proc.StartInfo = procStartInfo; proc.Start(); stdOut = proc.StandardOutput.ReadToEnd(); stdErr = proc.StandardError.ReadToEnd(); proc.WaitForExit(); } } string endTime = DateTime.Now.ToString("g"); // two common error messages to look for: string knownErrorMessage1 = "File in validated state, initiate_multipart not allowed"; // file already exists at GDC string knownErrorMessage2 = "File with id " + SeqDataFile.Id + " not found"; // local file not found, gdc xfer tool likely not executed from within directory that contains the file int uploadSuccess = stdOut.IndexOf("Multipart upload finished for file " + SeqDataFile.Id); sb.Clear(); bool keepWorking = true; string logDateTime = DateTime.Now.ToString("g"); if (uploadSuccess == -1) // upload was not successful { sb.Append(Environment.NewLine); string failBaseText = "***" + "\t" + logDateTime + "\t" + "File-NOT-UPLOADED:" + "\t" + SeqDataFile.Id + "\t" + SeqDataFile.Submitter_id + "\t"; if (stdOut.IndexOf(knownErrorMessage1) != -1) { sb.Append(failBaseText + "Fail: File already at GDC"); keepWorking = false; } else if (stdOut.IndexOf(knownErrorMessage2) != -1) { sb.Append(failBaseText + "Fail: Local file not found"); keepWorking = false; } else if (SeqDataFile.UploadAttempts == NumRetries) { sb.Append(failBaseText + "Fail: Reached Max Retries"); keepWorking = false; } if ((SeqDataFile.UploadAttempts < NumRetries) && keepWorking) { SeqDataFile.UploadAttempts++; SeqDataFiles[workId] = SeqDataFile; SeqDataFilesQueue.Enqueue(workId); Thread.Sleep(200); sb.Append("---"); sb.Append("\t" + logDateTime); sb.Append("\t" + "Re-queuing"); sb.Append("\t" + SeqDataFile.Id); sb.Append("\t" + SeqDataFile.Submitter_id); sb.Append("\t" + "Re-queued: "); sb.Append(SeqDataFile.UploadAttempts.ToString()); sb.Append(" of "); sb.Append(NumRetries.ToString()); sb.Append(Environment.NewLine); Console.WriteLine($"Re-queued item {workId}"); } } sb.Append(Environment.NewLine); sb.Append(stdOut); sb.Append("End: " + endTime + Environment.NewLine + Environment.NewLine); File.AppendAllText(logFile, sb.ToString()); sb.Clear(); return(true); }