public void LastFieldEmptyFollowedByMissingFieldsOnNextRecord() { const string Data = "a,b,c,d,e" + "\na,b,c,d," + "\na,b,"; using (var csv = new CsvReader(new StringReader(Data), false)) { csv.MissingFieldAction = MissingFieldAction.ReplaceByNull; var record = new string[5]; Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); Assert.AreEqual(new string[] { "a", "b", "c", "d", "e" }, record); Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); Assert.AreEqual(new string[] { "a", "b", "c", "d", "" }, record); Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); Assert.AreEqual(new string[] { "a", "b", "", null, null }, record); Assert.IsFalse(csv.ReadNextRecord()); } }
public void LastFieldEmptyFollowedByMissingFieldsOnNextRecord() { const string Data = "a,b,c,d,e" + "\na,b,c,d," + "\na,b,"; using (var csv = new CsvReader(new StringReader(Data), CsvReader.DefaultBufferSize, new CsvLayout(hasHeaders: false), new CsvBehaviour(missingFieldAction: MissingFieldAction.ReplaceByNull))) { var record = new string[5]; Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); CollectionAssert.AreEqual(new string[] { "a", "b", "c", "d", "e" }, record); Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); CollectionAssert.AreEqual(new string[] { "a", "b", "c", "d", "" }, record); Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); CollectionAssert.AreEqual(new string[] { "a", "b", "", null, null }, record); Assert.IsFalse(csv.ReadNextRecord()); } }
public void LastFieldEmptyFollowedByMissingFieldsOnNextRecord() { const string Data = "a,b,c,d,e" + "\na,b,c,d," + "\na,b,"; using (var csv = new CsvReader(new StringReader(Data), false)) { csv.MissingFieldAction = MissingFieldAction.ReplaceByNull; var record = new string[5]; Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); CollectionAssert.AreEqual(new string[] { "a", "b", "c", "d", "e" }, record); Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); CollectionAssert.AreEqual(new string[] { "a", "b", "c", "d", "" }, record); Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); CollectionAssert.AreEqual(new string[] { "a", "b", "", null, null }, record); Assert.IsFalse(csv.ReadNextRecord()); } }
public static void CheckSampleData1(long recordIndex, CsvReader csv) { string[] fields = new string[6]; csv.CopyCurrentRecordTo(fields); CheckSampleData1(csv.HasHeaders, recordIndex, fields, 0); }
public void ArgumentTestCopyCurrentRecordTo3() { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.CopyCurrentRecordTo(new string[1], 1); } }
protected override SimpleTree <string> ParseCsv(CsvReader csv) { SimpleTree <string> result = new SimpleTree <string>(); while (csv.ReadNextRecord()) { string unit = string.Empty; string[] arr = new string[Headers.Length]; csv.CopyCurrentRecordTo(arr); StringBuilder org = new StringBuilder(); for (int i = 0; i < Headers.Length - 2; i++) { if (string.IsNullOrEmpty(arr[i]) == false) { org.Append(arr[i]); org.Append('.'); unit = Headers[i]; } } string path = org.ToString(); string name = string.Format("{0} - {1}", path.TrimEnd('.').Substring(path.TrimEnd('.').LastIndexOf('.') + 1), arr[6]); string desc = arr[7]; AddToTree(result, path, name, desc, new Property(DEPARTMENT_PROP_NAME, unit)); } return(result); }
/// <summary> /// Advances the enumerator to the next record of the CSV. /// </summary> /// <returns><see langword="true"/> if the enumerator was successfully advanced to the next record, <see langword="false"/> if the enumerator has passed the end of the CSV.</returns> public bool MoveNext() { if (_reader._currentRecordIndex != _currentRecordIndex) { throw new InvalidOperationException(ExceptionMessage.EnumerationVersionCheckFailed); } if (_reader.ReadNextRecord()) { if (_current == null) { _current = new string[_reader._fieldCount]; } _reader.CopyCurrentRecordTo(_current); _currentRecordIndex = _reader._currentRecordIndex; return(true); } else { _current = null; _currentRecordIndex = _reader._currentRecordIndex; return(false); } }
public void CopyCurrentRecordToTest1() { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { Assert.Throws <InvalidOperationException>(() => csv.CopyCurrentRecordTo(new string[CsvReaderSampleData.SampleData1RecordCount])); } }
public void CopyCurrentRecordToTest1() { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.CopyCurrentRecordTo(new string[CsvReaderSampleData.SampleData1RecordCount]); } }
public void ArgumentTestCopyCurrentRecordTo5() { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.ReadNextRecord(); csv.CopyCurrentRecordTo(new string[CsvReaderSampleData.SampleData1RecordCount], 1); } }
public void ArgumentTestCopyCurrentRecordTo4() { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.ReadNextRecord(); csv.CopyCurrentRecordTo(new string[CsvReaderSampleData.SampleData1RecordCount - 1], 0); } }
/// <summary> /// Read the record of given file path /// Return number of record or exception message /// </summary> /// <param name="passHeader"></param> /// <param name="in_delimeter"></param> /// <param name="filePath"></param> /// <returns></returns> public object ReadRecords(string[] passHeader = null, char in_delimeter = ',', string filePath = null) { try { if (!filePath.Contains(".csv")) { throw new CensusAnalyserException(CensusAnalyserException.ExceptionType.INVALID_EXTENSION_OF_FILE, "Invalid Extension of file"); } else if (!filePath.Contains(actualPath)) { throw new CensusAnalyserException(CensusAnalyserException.ExceptionType.FILE_NOT_FOUND, "Invalid file"); } //// Streams are used to read/write data from large files //// CsvReader is open source C# library to read CSV data from strings/textFiles CsvReader csvRecords = new CsvReader(new StreamReader(filePath), true); int fieldCount = csvRecords.FieldCount; string[] headers = csvRecords.GetFieldHeaders(); delimeter = csvRecords.Delimiter; //// string ArrayList List <string[]> record = new List <string[]>(); while (csvRecords.ReadNextRecord()) { string[] tempRecord = new string[fieldCount]; csvRecords.CopyCurrentRecordTo(tempRecord); record.Add(tempRecord); numberOfRecord++; } if (numberOfRecord == 0) { throw new CSVException(CSVException.ExceptionType.FILE_IS_EMPTY, "File has no data"); } if (!in_delimeter.Equals(delimeter)) { throw new CensusAnalyserException(CensusAnalyserException.ExceptionType.INCORRECT_DELIMETER, "Incorrect Delimeter"); } else if (!IsHeaderSame(passHeader, headers)) { throw new CensusAnalyserException(CensusAnalyserException.ExceptionType.INVALID_HEADER_ERROR, "Invalid Header"); } return(numberOfRecord); } catch (CensusAnalyserException file_not_found) { return(file_not_found.Message); } catch (CSVException emptyFileException) { return(emptyFileException.Message); } catch (Exception exception) { throw new Exception(exception.Message); } }
public void CopyCurrentRecordTo_ArrayBeyondBounds_ThrowsArgumentOutOfRangeException() { Assert.Throws <ArgumentOutOfRangeException>(() => { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.CopyCurrentRecordTo(new string[1], 1); } }); }
public void CopyCurrentRecordTo_Null_ThrowsArgumentNullException() { Assert.Throws <ArgumentNullException>(() => { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.CopyCurrentRecordTo(null); } }); }
public void ArgumentTestCopyCurrentRecordTo3() { Assert.Throws <ArgumentOutOfRangeException>(() => { using (var csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.CopyCurrentRecordTo(new string[1], 1); } }); }
public void CopyCurrentRecordTo_ArrayTooSmall_ThrowsArgumentException() { Assert.Throws <ArgumentException>(() => { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.ReadNextRecord(); csv.CopyCurrentRecordTo(new string[CsvReaderSampleData.SampleData1RecordCount - 1], 0); } }); }
public void CopyCurrentRecordTo_NotEnoughSlotsAfterIndex_ThrowsArgumentOutOfRangeException() { Assert.Throws <ArgumentException>(() => { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.ReadNextRecord(); csv.CopyCurrentRecordTo(new string[CsvReaderSampleData.SampleData1RecordCount], 1); } }); }
public void ArgumentTestCopyCurrentRecordTo5() { Assert.Throws <ArgumentException>(() => { using (var csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.ReadNextRecord(); csv.CopyCurrentRecordTo(new string[CsvReaderSampleData.SampleData1RecordCount], 1); } }); }
public CsvRecord Read() { if (!csvReader.ReadNextRecord()) { return(null); } var fieldCount = csvReader.FieldCount; var values = new string[fieldCount]; csvReader.CopyCurrentRecordTo(values); return(new CsvRecord(headers, values)); }
public void TrimFieldValuesTest(string data, ValueTrimmingOptions trimmingOptions, params string[] expected) { using (var csv = new CsvReader(new StringReader(data), false, CsvReader.DefaultDelimiter, CsvReader.DefaultQuote, CsvReader.DefaultEscape, CsvReader.DefaultComment, trimmingOptions)) { while (csv.ReadNextRecord()) { var actual = new string[csv.FieldCount]; csv.CopyCurrentRecordTo(actual); Assert.AreEqual(expected, actual); } } }
private string CSV_ReloadData(string my_filename, bool is_headers, bool is_unicode, char char_separator, ref List <string[]> LineList, ref string[] headers) { LineList.Clear(); // uses CSVreader Library from http://www.codeproject.com/KB/database/CsvReader.aspx under MIT License int fieldCount; System.IO.StreamReader myReader; if (is_unicode == true) { myReader = new System.IO.StreamReader(my_filename, Encoding.UTF8); } else { myReader = new System.IO.StreamReader(my_filename, Encoding.GetEncoding("ISO-8859-1")); } // open the file, which is a CSV file with or without headers try { using (CsvReader csv = new CsvReader(myReader, is_headers, char_separator)) { fieldCount = csv.FieldCount; if (is_headers == true) { headers = csv.GetFieldHeaders(); } else { headers = null; } while (csv.ReadNextRecord()) { string[] temparray = new string[fieldCount]; csv.CopyCurrentRecordTo(temparray); LineList.Add(temparray); } } } catch (Exception e) { myReader.Close(); return(e.ToString()); } myReader.Close(); return("success"); }
public void MissingFieldAllQuotedFields_Issue_12() { string sample = "\"A\",\"B\"\n" + "\"1\",\"2\"\n" + "\"3\"\n" + "\"5\",\"6\""; string[] buffer = new string[2]; using (CsvReader csv = new CsvReader(new StringReader(sample), false)) { while (csv.ReadNextRecord()) { csv.CopyCurrentRecordTo(buffer); } } }
public static IList <T> ParseArray <T>(string fileName, bool hasHeaders = false) where T : IFromCsv, new() { var list = new List <T>(); using (var reader = new CsvReader(new StreamReader(fileName), hasHeaders, '\t')) { while (reader.ReadNextRecord()) { var strings = new string[reader.FieldCount]; reader.CopyCurrentRecordTo(strings); var t = new T(); t.Fill(strings); list.Add(t); } } return(list); }
public static void ReadData() { try { // Summary: // Initializes a new instance of the CsvReader class. //@ :- backslash is special letter so @symbol literal there is no escape characters using (CsvReader csv = new CsvReader(new StreamReader(@"C:\Users\Admin\Documents\Visual Studio 2017\Projects\OpenCSV\OpenCSV\users.csv"), true)) { //The number of fields int field_Count = csv.FieldCount; //reading the header into the string array string[] headers = csv.GetFieldHeaders(); List <string[]> records = new List <string[]>(); //all record into the list while (csv.ReadNextRecord()) { //create new object to store the data string[] temp_record = new string[field_Count]; // copy data into string csv.CopyCurrentRecordTo(temp_record); // add the data string into List: records records.Add(temp_record); } //print the record foreach (string[] print_record in records) { for (int j = 0; j < field_Count; j++) { Console.Write($" {headers[j]} = {print_record[j]} "); } } } } catch (FileNotFoundException file_not_found) { throw new FileNotFoundException(file_not_found.FileName); } catch (Exception exception) { throw new Exception(exception.Message); } }
/// <summary> /// main method /// </summary> /// <param name="args"></param> static void Main(string[] args) { try { // read csv file using CsvReader csvReader = new CsvReader(new StreamReader(File.OpenRead(@"C:\Users\Saksham\source\repos\OpenCSV\StateCensusData.csv")), true); // getting no of fields int fieldsCount = csvReader.FieldCount; Console.WriteLine(fieldsCount); // reading the headers in array string[] headers = csvReader.GetFieldHeaders(); List <string[]> records = new List <string[]>(); int i = 0; // adding all rows to list while (csvReader.ReadNextRecord()) { string[] recordsInARow = new string[fieldsCount]; csvReader.CopyCurrentRecordTo(recordsInARow); records.Add(recordsInARow); i++; } // print the records foreach (string[] rec in records) { Console.WriteLine(String.Join(" ", rec)); } } catch (FileNotFoundException file_not_found) { throw new Exception(file_not_found.FileName); } CsvDataToFile.WriteData(); }
public void MissingFieldAllQuotedFields_Issue_12() { var sample = "\"A\",\"B\"\n" + "\"1\",\"2\"\n" + "\"3\"\n" + "\"5\",\"6\""; var buffer = new string[2]; Assert.Throws <MissingFieldCsvException>(() => { using (var csv = new CsvReader(new StringReader(sample), false)) { while (csv.ReadNextRecord()) { csv.CopyCurrentRecordTo(buffer); } } }); }
static public List <T> ParseCSV <T>(string csvText) where T : CSVBaseData { TextReader reader = new StringReader(csvText); CsvReader csv = new CsvReader(reader, true); int fieldCount = csv.FieldCount; string[] headers = csv.GetFieldHeaders(); csv.ReadNextRecord();//skip desc List <T> result = new List <T>(); while (csv.ReadNextRecord()) { long index = csv.CurrentRecordIndex; string[] values = new string[fieldCount]; csv.CopyCurrentRecordTo(values); T obj = (T)Activator.CreateInstance(typeof(T)); obj.ParseData(index, fieldCount, headers, values); result.Add(obj); } return(result); }
/// <summary> /// Read the data from csv file /// </summary> public static void ReadDataFromCsv() { try { //read csv file using (CsvReader csv = new CsvReader(new StreamReader("C:/Users/intel/source/repos/open_CSV_File/open_CSV_File/users.csv"), true)) { //The number of fields int field_Count = csv.FieldCount; //reading the header into the string array string[] headers = csv.GetFieldHeaders(); List <string[]> records = new List <string[]>(); //all record into the list while (csv.ReadNextRecord()) { //create new object to store the data string[] temp_record = new string[field_Count]; // copy data into string csv.CopyCurrentRecordTo(temp_record); // add the data string into List: records records.Add(temp_record); } //print the record foreach (string[] print_record in records) { for (int j = 0; j < field_Count; j++) { Console.Write($" {headers[j]} = {print_record[j]} "); } } } } catch (FileNotFoundException file_not_found) { throw new Exception(file_not_found.FileName); } }
public object StateLoadData(string filePath, string header, string delimeter = ",") { int numberOfRecord = 0; var file_total = File.ReadLines(filePath); string[] line_element = file_total.ToArray(); if (!line_element[0].Contains(header)) { throw new CensusAnalyserException(CensusException.Wrong_Header + ""); } else if (!file_total.Contains(";")) { throw new CensusAnalyserException(CensusException.Wrong_Delimiter + ""); } try { CsvReader csvRecords = new CsvReader(new StreamReader(filePath), true); int fieldCount = csvRecords.FieldCount; string[] headers = csvRecords.GetFieldHeaders(); char delimete = csvRecords.Delimiter; //// string ArrayList List <string[]> record = new List <string[]>(); foreach (var line in File.ReadLines(filePath)) { string[] tempRecord = new string[fieldCount]; csvRecords.CopyCurrentRecordTo(tempRecord); record.Add(tempRecord); numberOfRecord++; } return(numberOfRecord); } catch (FileNotFoundException) { throw new FileNotFoundException(CensusException.Wrong_File_Path + ""); } }
public void CopyCurrentRecordTo_ArrayBeyondBounds_ThrowsArgumentOutOfRangeException() { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.CopyCurrentRecordTo(new string[1], 1); } }
public void CopyCurrentRecordTo_ArrayTooSmall_ThrowsArgumentException() { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.ReadNextRecord(); csv.CopyCurrentRecordTo(new string[CsvReaderSampleData.SampleData1RecordCount - 1], 0); } }
public void CopyCurrentRecordTo_NotEnoughSlotsAfterIndex_ThrowsArgumentOutOfRangeException() { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.ReadNextRecord(); csv.CopyCurrentRecordTo(new string[CsvReaderSampleData.SampleData1RecordCount], 1); } }
private void WriteOutputFiles(IEnumerable<QuantFile> quantFiles) { OnUpdateLog("Writing output files..."); foreach (QuantFile file in quantFiles) { string filePath = file.FilePath; string fileName = Path.GetFileNameWithoutExtension(filePath); string outputFilePath = Path.Combine(OutputDirectory, fileName+"_quant.csv"); OnUpdateLog("Writing "+outputFilePath+"..."); using (CsvReader reader = new CsvReader(new StreamReader(filePath), true)) { using (StreamWriter writer = new StreamWriter(outputFilePath)) { StringBuilder sb = new StringBuilder(); string[] headerColumns = reader.GetFieldHeaders(); int headerCount = headerColumns.Length; string header = string.Join(",", headerColumns); sb.Append(header); if (CalculatePurity) { sb.Append(",Precursor Purity (%)"); } // Raw Intensities foreach (TagInformation tag in UsedTags.Values) { sb.AppendFormat(",{0} ({1} NL)", tag.SampleName, tag.TagName); } // Denormalized Intensities foreach (TagInformation tag in UsedTags.Values) { sb.AppendFormat(",{0} ({1} dNL)", tag.SampleName, tag.TagName); } // Purity Corrected Intensities foreach (TagInformation tag in UsedTags.Values) { sb.AppendFormat(",{0} ({1} PC)", tag.SampleName, tag.TagName); } // Purity Corrected Normalized Intensities foreach (TagInformation tag in UsedTags.Values) { sb.AppendFormat(",{0} ({1} PCN)", tag.SampleName, tag.TagName); } // Number of Channels Detected sb.Append(",Channels Detected"); writer.WriteLine(sb.ToString()); while (reader.ReadNextRecord()) { sb.Clear(); string[] inputData = new string[headerCount]; reader.CopyCurrentRecordTo(inputData); for(int i=0; i <inputData.Length; i++) { string data = inputData[i]; if (data.Contains('"')) data = data.Replace("\"", "\"\""); if (data.Contains(',')) { sb.Append('"'); sb.Append(data); sb.Append('"'); } else { sb.Append(data); } sb.Append(','); } sb.Remove(sb.Length - 1, 1); //sb.Append(string.Join(",", inputData)); //int scanNumber = int.Parse(reader["Spectrum number"]); string fileId = reader["Filename/id"]; PSM psm = file[fileId]; List<QuantPeak> peaks = (from TagInformation tag in UsedTags.Values select psm[tag]).ToList(); // Print out precrusor purity if requested if (CalculatePurity) { sb.Append(','); sb.Append(psm.Purity*100); } // Raw Intensities foreach (QuantPeak peak in peaks) { sb.Append(','); sb.Append(peak.RawIntensity); } // Denormalized Intensities foreach (QuantPeak peak in peaks) { sb.Append(','); sb.Append(peak.DeNormalizedIntensity); } // Purity Corrected Intensities foreach (QuantPeak peak in peaks) { sb.Append(','); sb.Append(peak.PurityCorrectedIntensity); } // Purity Corrected Normalized Intensities foreach (QuantPeak peak in peaks) { sb.Append(','); sb.Append(peak.PurityCorrectedNormalizedIntensity); } // Number of Channels Detected (positive raw intensity) int channelsDetected = peaks.Count(p => p.RawIntensity > 0); sb.Append(','); sb.Append(channelsDetected); writer.WriteLine(sb.ToString()); } } } } }
public void CopyCurrentRowTo(string[] array) { _csv.CopyCurrentRecordTo(array); }
private List<Protein> CompileResults(List<LocalizedHit> hits, string csvFile, string outputDirectory, bool breakProteinsApart = false) { Dictionary<string, LocalizedHit> hitsdict = new Dictionary<string, LocalizedHit>(); // Group all the localized Hits into proteins Dictionary<string, Protein> proteins = new Dictionary<string, Protein>(); foreach (LocalizedHit hit in hits) { hitsdict.Add(hit.PSM.Filename, hit); string defline = hit.PSM.Defline; if (breakProteinsApart) { string[] groups = hit.PSM.ProteinGroup.Split('|'); foreach (string group in groups) { Protein prot; if (!proteins.TryGetValue(group, out prot)) { prot = new Protein(group, defline); proteins.Add(group, prot); } prot.AddHit(hit); } } else { Protein prot; if (!proteins.TryGetValue(hit.PSM.ProteinGroup, out prot)) { prot = new Protein(hit.PSM.ProteinGroup, defline); proteins.Add(hit.PSM.ProteinGroup, prot); } prot.AddHit(hit); } } using (StreamWriter writer = new StreamWriter(Path.Combine(outputDirectory, Path.GetFileNameWithoutExtension(csvFile) + "_all.csv")), localizedWriter = new StreamWriter(Path.Combine(outputDirectory, Path.GetFileNameWithoutExtension(csvFile) + "_localized.csv"))) { using (CsvReader reader = new CsvReader(new StreamReader(csvFile), true)) { LocalizedHit hit = null; headerInfo = reader.GetFieldHeaders(); bool tqFound = false; for (int i = 0; i < reader.FieldCount; i++) { if (headerInfo[i].EndsWith("NL)")) { if (!tqFound) { FirstQuantColumn = i; tqFound = true; } } if(headerInfo[i] == "Channels Detected") LastQuantColumn = i-1; } string header = string.Join(",", headerInfo) + ",# Isoforms,# of Considered Fragments,Localized?,Delta Score,Best Isoform,Spectral Matches,% TIC,Second Best Isoform,Second Spectral Matches,Second % TIC"; writer.WriteLine(header); localizedWriter.WriteLine(header); while (reader.ReadNextRecord()) { string mods = reader["Mods"]; if (string.IsNullOrEmpty(mods)) continue; List<Modification> variableMods = OmssaModification.ParseModificationLine(mods).Select(item => item.Item1).OfType<Modification>().ToList(); // Only keep things with quantified Modifications if (!variableMods.Any(mod => QuantifiedModifications.Contains(mod))) continue; string filename = reader["Filename/id"]; if(!hitsdict.TryGetValue(filename, out hit)) continue; string[] data = new string[reader.FieldCount]; reader.CopyCurrentRecordTo(data); hit.omssapsm = data; StringBuilder sb = new StringBuilder(); foreach (string datum in data) { if (datum.Contains(',')) { sb.Append("\""); sb.Append(datum); sb.Append("\""); } else sb.Append(datum); sb.Append(','); } sb.Append(hit.PSM.Isoforms); sb.Append(','); sb.Append(hit.LocalizedIsoform.Fragments.Count); sb.Append(','); sb.Append(hit.IsLocalized); sb.Append(','); sb.Append(hit.MatchDifference); sb.Append(','); sb.Append(hit.LocalizedIsoform.SequenceWithModifications); sb.Append(','); sb.Append(hit.LocalizedIsoform.SpectralMatch.Matches); sb.Append(','); sb.Append(hit.LocalizedIsoform.SpectralMatch.PercentTIC); if (hit.PSM.Isoforms > 1) { //sb.Append(','); //sb.Append(hit.BestPeptideSDFCount); sb.Append(','); sb.Append(hit.SecondBestPeptideIsoform.SequenceWithModifications); sb.Append(','); sb.Append(hit.SecondBestPeptideIsoform.SpectralMatch.Matches); sb.Append(','); sb.Append(hit.SecondBestPeptideIsoform.SpectralMatch.PercentTIC); //sb.Append(','); //sb.Append(hit.SecondBestPeptideSDFCount); } if(hit.IsLocalized) localizedWriter.WriteLine(sb.ToString()); writer.WriteLine(sb.ToString()); } } } return proteins.Values.ToList(); }
private void btnImport_Click(object sender, EventArgs e) { if (!ValidationProvider.Validate()) { return; } try { bulkentries.Clear(); using (CsvReader csv = new CsvReader(new StreamReader((String)txtFile.EditValue), true, ddlSeperator.Text[0], CsvReader.DefaultQuote, CsvReader.DefaultEscape, CsvReader.DefaultComment, true)) { while (csv.ReadNextRecord()) { try { DB.VW_Account Account = DataContext.ReadonlyContext.VW_Account .Where(n => n.Archived.Equals(false) && n.CodeSub.Equals(tblFields.Rows[1][1].ToString() != "<NOT MAPPED>" ? ParseString(csv[tblFields.Rows[1][1].ToString()]) : "")).FirstOrDefault(); DB.GLX_Aging aging = DataContext.EntityAccountingContext.GLX_Aging .Where(n => n.Code.Contains(tblFields.Rows[4][1].ToString() != "<NOT MAPPED>" ? ParseString(csv[tblFields.Rows[4][1].ToString()]) : "")).OrderBy(o => o.Id) .FirstOrDefault(); //TODO : Need to make this so that you can choose account on Payment/Receipt screens if (Account == null) { skippedAccounts.Add(tblFields.Rows[1][1].ToString() != "<NOT MAPPED>" ? ParseString(csv[tblFields.Rows[1][1].ToString()]) : ""); continue; } bulkentries.Add(new BulkPaymentEntry() { //PeriodId = DB.SYS_PeriodGetCurrentPeriod().Id, PeriodId = Convert.ToInt64(ddlPeriod.EditValue), Date = tblFields.Rows[0][1].ToString() != "<NOT MAPPED>" ? ParseDate(csv[tblFields.Rows[0][1].ToString()]) : DateTime.Now, AccountId = Account != null ? (Int64?)Account.Id : null, Reference = tblFields.Rows[2][1].ToString() != "<NOT MAPPED>" ? ParseString(csv[tblFields.Rows[2][1].ToString()]) : "", Description = tblFields.Rows[3][1].ToString() != "<NOT MAPPED>" ? ParseString(csv[tblFields.Rows[3][1].ToString()]) : "", AgingId = aging != null ? aging.Id : (Int16)1, Settlement = 0, Amount = tblFields.Rows[5][1].ToString() != "<NOT MAPPED>" ? ParseDecimal(csv[tblFields.Rows[5][1].ToString()]) * (chkReverseAmounts.Checked ? -1 : 1) : 0 }); } catch (Exception) { String [] rowdata = new string[csv.FieldCount]; csv.CopyCurrentRecordTo(rowdata); CDS.Client.Desktop.Essential.BaseAlert.ShowAlert("Parsing Error", String.Format("There was a problem reading a value from the row and the import will not continue. The row data provided: \n{0}.", String.Join(ddlSeperator.Text, rowdata)), Essential.BaseAlert.Buttons.Ok, Essential.BaseAlert.Icons.Warning); return; } } } CriteriaToExpressionConverter converter = new CriteriaToExpressionConverter(); DevExpress.XtraFilterEditor.FilterEditorControl filRule = new DevExpress.XtraFilterEditor.FilterEditorControl(); foreach (DB.GLX_BulkEntryRule rule in DataContext.EntityAccountingContext.GLX_BulkEntryRule.Where(n => n.EntityId == (Int64)ddlAccount.EditValue)) { filRule.FilterString = rule.EntryRule; filRule.ApplyFilter(); //http://www.devexpress.com/Support/Center/p/Q357031.aspx var qry = ((IQueryable <BulkPaymentEntry>)bulkentries.AsQueryable().AppendWhere(converter, filRule.FilterCriteria)).ToList(); if (rule.EntryAction == "N") { // Do not import foreach (BulkPaymentEntry row in qry) { bulkentries.Remove(row); } } else if (rule.EntryAction == "Y") { // Import foreach (BulkPaymentEntry row in qry) { row.AccountId = rule.EntityContraId; } } } // Get taxes List <DB.GLX_Tax> taxes = DataContext.EntityAccountingContext.GLX_Tax.ToList(); // Populate periods this.DialogResult = System.Windows.Forms.DialogResult.OK; this.Close(); } catch (Exception ex) { if (CDS.Shared.Exception.UserInterfaceExceptionHandler.HandleException(ref ex)) { throw ex; } } }
public dynamic ReadData(string Path, string classDAOname, int jsonForm, int sort, int columnNumber, int stringIsCharOrInt) { //declear number of record is 0 int numberOfRecord = 0; try { //steramreader read the data from file using StreamReader streamread = new StreamReader(Path); //lode stream reader data on csv reder using CsvReader csv = new CsvReader(streamread, true); //number of field present in file int fieldCount = csv.FieldCount; //get headers dynamic headers = csv.GetFieldHeaders(); //if class name is same of stateScensusCodeDAO then it goes inside if (classDAOname.Equals("UScensusDataDAO")) { //create dictionary to store object of stateScensusCodeDAO class Dictionary <int, UScensusDataDAO> record = new Dictionary <int, UScensusDataDAO>(); //headers name add at starting as object record.Add(numberOfRecord, new UScensusDataDAO(headers)); //geting delimeter char delimeter = csv.Delimiter; //add record csv file to list while (csv.ReadNextRecord()) { //calculate number of record numberOfRecord++; //create temp array for storing data tempararily string[] temp = new string[fieldCount]; //copy data from csv file to temp list csv.CopyCurrentRecordTo(temp); //add temp data to Dictionary as object record.Add(numberOfRecord, new UScensusDataDAO(temp)); } //if number of record is zero then throw exception file is empty if (numberOfRecord == 0) { throw new StateCensusException(StateCensusException.ExceptionType.FILE_HAS_NO_DATA, "file has not any data"); } //if user send 0 for sorting then data otherwise no if (sort == 0) { //call the sorting function record = SortTheList(record, columnNumber, fieldCount, stringIsCharOrInt); } //if user send 0 for output in json format otherwise no if (jsonForm == 0) { var jsonFormdata = JsonSerializer.Serialize(record.Values); //return data dynamically return(record, numberOfRecord, headers, jsonFormdata); } return(record, numberOfRecord, headers); } return(0); } //all exceptions catch below catch (StateCensusException e) { throw new StateCensusException(StateCensusException.ExceptionType.FILE_HAS_NO_DATA, e.Message); } catch (Exception e) { Console.WriteLine(e.Message); throw new Exception(e.Message); } }
public void CopyCurrentRecordTo_Null_ThrowsArgumentNullException() { using (CsvReader csv = new CsvReader(new StringReader(CsvReaderSampleData.SampleData1), false)) { csv.CopyCurrentRecordTo(null); } }
private void WriteFiles(IEnumerable<InputFile> csvFiles, bool isBatched = false) { Log("Writing output files..."); List<StreamWriter> openWriters = new List<StreamWriter>(); const string headerLine = "Spectrum number,Filename/id,Peptide,E-value,Mass,gi,Accession,Start,Stop,Defline,Mods,Charge,Theo Mass,P-value,NIST score,Precursor Isolation m/z (Th),Precursor Theoretical m/z (Th),Precursor Isotope Selected, Adjusted Precursor m/z (Th),Precursor Mass Error (ppm),Adjusted Precursor Mass Error (ppm)"; string outputSummaryFile = Path.Combine(_outputFolder, string.Format("FDR summary_{0:yyyyMMddhhmmss}.csv", DateTime.Now)); StreamWriter summaryWriter = new StreamWriter(outputSummaryFile); openWriters.Add(summaryWriter); Dictionary<PSM, Peptide> overallBestPsms = null; StreamWriter batchTargetUniqueWriter = null, batchDecoyUniqueWriter = null; bool firstHeader = true; int batchTotalPsms = 0; int batchTotalPeptides = 0; int batchTotalDecoyPsms = 0; int batchTotalDecoyPeptides = 0; StreamWriter batchScansWriter = new StreamWriter(Path.Combine(_outputFolder, "scans.csv")); StreamWriter batchDecoyWriter = new StreamWriter(Path.Combine(_outputFolder, "decoy_psms.csv")); StreamWriter batchTargetWriter = new StreamWriter(Path.Combine(_outputFolder, "psms.csv")); openWriters.Add(batchScansWriter); openWriters.Add(batchDecoyWriter); openWriters.Add(batchTargetWriter); if (isBatched) { batchTargetUniqueWriter = new StreamWriter(Path.Combine(_outputFolder, "peptides.csv")); batchDecoyUniqueWriter = new StreamWriter(Path.Combine(_outputFolder, "decoy_peptides.csv")); openWriters.Add(batchTargetUniqueWriter); openWriters.Add(batchDecoyUniqueWriter); overallBestPsms = _allPeptides.ToDictionary(pep => pep.BestMatch); } summaryWriter.WriteLine("CSV File,Raw File,Total MS Spectra,Total MS/MS Spectra,Average MS/MS Inj Time (ms),Max MS/MS Inj Time (ms),Average # of MS/MS per Cycle, Max # of MS/MS per Cycle,Total Scored Spectra,Total PSMs,Systematic Precursor Mass Error (ppm),Maximum Precursor Mass Error (ppm),E-Value Threshold,PSMs,Decoy PSMs,PSM FDR (%),Peptides,Decoy Peptides,Peptide FDR (%)"); StringBuilder summaryStringBuilder = new StringBuilder(); int totalPsms = 0; double totalError = 0; double totalMaximalError = 0; int totalPeptides = 0; int totalDecoyPeptides = 0; double totalThreshold = 0; int totalDecoyPsms = 0; int totalMS = 0; int totalInitialPsms = 0; int totalMSMS = 0; foreach (InputFile csvFile in csvFiles) { summaryStringBuilder.Clear(); string outputTargetFile = Path.Combine(_outputPsmFolder, Path.GetFileNameWithoutExtension(csvFile.FilePath) + "_psms.csv"); string outputDecoyFile = Path.Combine(_outputPsmFolder, Path.GetFileNameWithoutExtension(csvFile.FilePath) + "_decoy_psms.csv"); string outputScansFile = Path.Combine(_outputScansFolder, Path.GetFileNameWithoutExtension(csvFile.FilePath) + "_scans.csv"); string outputTargetUniqueFile = Path.Combine(_outputPeptideFolder, Path.GetFileNameWithoutExtension(csvFile.FilePath) + "_peptides.csv"); string outputDecoyUniqueFile = Path.Combine(_outputPeptideFolder, Path.GetFileNameWithoutExtension(csvFile.FilePath) + "_decoy_peptides.csv"); Log("Writing output files for " + Path.GetFileNameWithoutExtension(csvFile.Name) + " in " + _outputFolder + "..."); summaryStringBuilder.Append(csvFile.FilePath); summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.RawFilePath); summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.TotalMSscans); totalMS += csvFile.TotalMSscans; summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.TotalMSMSscans); totalMSMS += csvFile.TotalMSMSscans; summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.AverageMSMSInjectionTime.ToString("F5")); summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.MaxMSMSInjectionTime); summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.AverageMSMSSCansBetweenMS.ToString("F5")); summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.MaxMSMSScansBetweenMS); summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.TotalScans); summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.PsmCount); totalInitialPsms += csvFile.PsmCount; summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.SystematicPrecursorMassError.ToString("F5")); totalError += csvFile.SystematicPrecursorMassError; summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.PrecursorMassToleranceThreshold.ToString("F5")); totalMaximalError += csvFile.PrecursorMassToleranceThreshold; summaryStringBuilder.Append(','); summaryStringBuilder.Append(csvFile.ScoreThreshold); totalThreshold += csvFile.ScoreThreshold; summaryStringBuilder.Append(','); int totalPSMs = csvFile.FdrFilteredPSMs.Count; int targetPSMs = csvFile.FdrFilteredPSMs.Count(pep => !pep.IsDecoy); int decoyPSMs = totalPSMs - targetPSMs; summaryStringBuilder.Append(targetPSMs); summaryStringBuilder.Append(','); summaryStringBuilder.Append(decoyPSMs); summaryStringBuilder.Append(','); summaryStringBuilder.Append(100*decoyPSMs/(double) targetPSMs); summaryStringBuilder.Append(','); int total = csvFile.FdrFilteredPeptides.Count; int targets = csvFile.FdrFilteredPeptides.Count(pep => !pep.IsDecoy); int decoys = total - targets; summaryStringBuilder.Append(targets); summaryStringBuilder.Append(','); summaryStringBuilder.Append(decoys); summaryStringBuilder.Append(','); summaryStringBuilder.Append(100*decoys/(double) targets); using (StreamWriter targetWriter = new StreamWriter(outputTargetFile), decoyWriter = new StreamWriter(outputDecoyFile), scansWriter = new StreamWriter(outputScansFile), targetUniqueWriter = new StreamWriter(outputTargetUniqueFile), decoyUniqueWriter = new StreamWriter(outputDecoyUniqueFile)) { Dictionary<string, PSM> allPsms = csvFile.PeptideSpectralMatches.ToDictionary(psm => psm.FileName + psm.Peptide.Sequence); HashSet<PSM> fdrPSMs = new HashSet<PSM>(csvFile.FdrFilteredPSMs); Dictionary<PSM, Peptide> fdrPeptides = csvFile.FdrFilteredPeptides.ToDictionary(pep => pep.BestMatch); HashSet<int> scansProcessed = new HashSet<int>(); StringBuilder sb = new StringBuilder(); using (CsvReader reader = new CsvReader(new StreamReader(csvFile.FilePath), true)) { string[] headers = reader.GetFieldHeaders(); int headerCount = headers.Length; int modsColumnIndex = 10; //reader.GetFieldIndex("Mods"); int chargeColumnIndex = 11; string[] data = new string[headerCount]; decoyWriter.WriteLine(headerLine); targetWriter.WriteLine(headerLine); scansWriter.WriteLine(headerLine); targetUniqueWriter.WriteLine(headerLine); decoyUniqueWriter.WriteLine(headerLine); if (firstHeader) { batchScansWriter.WriteLine(headerLine); batchDecoyWriter.WriteLine(headerLine); batchTargetWriter.WriteLine(headerLine); if (isBatched) { batchDecoyUniqueWriter.WriteLine(headerLine); batchTargetUniqueWriter.WriteLine(headerLine); } firstHeader = false; } while (reader.ReadNextRecord()) { PSM psm; int spectralNumber = int.Parse(reader["Spectrum number"]); if (scansProcessed.Contains(spectralNumber)) continue; string fileName = reader["Filename/id"]; string sequence = reader["Peptide"].ToUpper(); if (allPsms.TryGetValue(fileName + sequence, out psm)) { bool isNegative = psm.Charge < 0; scansProcessed.Add(spectralNumber); sb.Clear(); reader.CopyCurrentRecordTo(data); for (int i = 0; i < 15; i++) { string datum = data[i]; if (_includeFixedMods && i == modsColumnIndex) { datum = OmssaModification.WriteModificationString(psm.Peptide); } // Replace the charge if negative if (isNegative && i == chargeColumnIndex) { sb.Append(psm.Charge); sb.Append(','); continue; } if (datum.Contains('"')) datum = datum.Replace("\"", "\"\""); if (datum.Contains(',')) { sb.Append('"'); sb.Append(datum); sb.Append('"'); } else { sb.Append(datum); } sb.Append(','); } sb.Append(psm.IsolationMz); sb.Append(','); sb.Append(Mass.MzFromMass(psm.MonoisotopicMass, psm.Charge)); sb.Append(','); sb.Append(psm.IsotopeSelected); sb.Append(','); sb.Append(Mass.MzFromMass(psm.AdjustedIsolationMass, psm.Charge)); sb.Append(','); sb.Append(psm.PrecursorMassError); sb.Append(','); sb.Append(psm.CorrectedPrecursorMassError); string line = sb.ToString(); scansWriter.WriteLine(line); batchScansWriter.WriteLine(line); // Passes FDR, write out if (fdrPSMs.Contains(psm)) { if (psm.IsDecoy) { totalDecoyPsms++; batchTotalDecoyPsms++; decoyWriter.WriteLine(line); batchDecoyWriter.WriteLine(line); } else { totalPsms++; batchTotalPsms++; targetWriter.WriteLine(line); batchTargetWriter.WriteLine(line); } Peptide pep; // Is this the best unique psm? if (fdrPeptides.TryGetValue(psm, out pep)) { if (pep.IsDecoy) { totalDecoyPeptides++; decoyUniqueWriter.WriteLine(line); } else { totalPeptides++; targetUniqueWriter.WriteLine(line); } } if (isBatched && overallBestPsms.TryGetValue(psm, out pep)) { if (pep.IsDecoy) { batchTotalDecoyPeptides++; batchDecoyUniqueWriter.WriteLine(line); } else { batchTotalPeptides++; batchTargetUniqueWriter.WriteLine(line); } } } } } } } summaryWriter.WriteLine(summaryStringBuilder.ToString()); } summaryWriter.WriteLine(); //int count = csvFiles.Count; //totalPsms /= count; //totalDecoyPsms /= count; //double totalPsmFdr = 100*totalDecoyPsms/(double) totalPsms; //totalPeptides /= count; //totalDecoyPeptides /= count; //double totalPeptideFdr = 100*totalDecoyPeptides/(double) totalPeptides; //summaryWriter.WriteLine("Average Results,,{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11},{12},{13},{14}", totalMS / count, totalMSMS / count, "", "", totalScans / count, totalInitialPsms / count, totalError / count, totalMaximalError / count, totalThreshold / count, totalPsms, totalDecoyPsms, "", totalPeptides, totalDecoyPeptides, ""); if (isBatched) { double psmFDR = 100*batchTotalDecoyPsms/(double) batchTotalPsms; double peptideFDR = 100 *batchTotalDecoyPeptides/(double) batchTotalPeptides; summaryWriter.WriteLine("Batched Results,,,,,,,,,,,,,{0},{1},{2},{3},{4},{5}", batchTotalPsms, batchTotalDecoyPsms, psmFDR, batchTotalPeptides, batchTotalDecoyPeptides, peptideFDR); Log(string.Format("{0:N0} peptides ({1:N0} decoys FDR = {2:F4}) in total [Batched]", batchTotalPeptides, batchTotalDecoyPeptides, peptideFDR)); } foreach (StreamWriter writer in openWriters) { writer.Close(); } }
public void LastFieldEmptyFollowedByMissingFieldsOnNextRecord() { const string Data = "a,b,c,d,e" + "\na,b,c,d," + "\na,b,"; using (var csv = new CsvReader(new StringReader(Data), CsvReader.DefaultBufferSize, new CsvLayout(hasHeaders:false), new CsvBehaviour(missingFieldAction:MissingFieldAction.ReplaceByNull))) { var record = new string[5]; Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); CollectionAssert.AreEqual(new string[] { "a", "b", "c", "d", "e" }, record); Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); CollectionAssert.AreEqual(new string[] { "a", "b", "c", "d", "" }, record); Assert.IsTrue(csv.ReadNextRecord()); csv.CopyCurrentRecordTo(record); CollectionAssert.AreEqual(new string[] { "a", "b", "", null, null }, record); Assert.IsFalse(csv.ReadNextRecord()); } }
public void Phosphinate() { StreamWriter log = null; IXRawfile2 raw = null; StreamReader csv = null; StreamWriter non_phospho_output = null; StreamWriter localized_phospho_output = null; StreamWriter unlocalized_phospho_output = null; StreamWriter motifX = null; try { onStarting(new EventArgs()); onUpdateProgress(new ProgressEventArgs(0)); StringBuilder fixed_modifications_sb = new StringBuilder(); foreach (Modification modification in fixedModifications) { fixed_modifications_sb.Append(modification.Name + ", "); } if (fixed_modifications_sb.Length > 0) { fixed_modifications_sb = fixed_modifications_sb.Remove(fixed_modifications_sb.Length - 2, 2); } string fixed_modifications = fixed_modifications_sb.ToString(); if (!Directory.Exists(outputFolder)) { Directory.CreateDirectory(outputFolder); } log = new StreamWriter(Path.Combine(outputFolder, "Phosphinator_log.txt")); log.AutoFlush = true; log.WriteLine("Phosphinator PARAMETERS"); log.WriteLine("Fixed Modifications: " + fixed_modifications); log.WriteLine("Fragment Intensity Threshold: " + intensityThreshold.ToString() + " (" + intensityThresholdType.ToString() + ')'); log.WriteLine("Fragment m/z Tolerance (Th): " + mzTolerance.ToString()); log.WriteLine("Ambiguity Score Threshold: " + ambiguityScoreThreshold.ToString()); log.WriteLine("Eliminate Precursor Interference: " + eliminatePrecursorInterference.ToString()); if (eliminatePrecursorInterference) { log.WriteLine("Precursor Interference Threshold: " + precursorInterferenceThreshold.ToString()); } if (motifXOutput) { log.WriteLine("Motif-X Fasta Protein Database Filepath: " + motifXFastaProteinDatabaseFilepath); log.WriteLine("Motif-X Window Size: " + motifXWindowSize.ToString()); } log.WriteLine(); ProteinSiteCounter identified_sites_by_protein = new ProteinSiteCounter(); ProteinSiteCounter localized_sites_by_protein = new ProteinSiteCounter(); ProteinSiteCounter unlocalized_sites_by_protein = new ProteinSiteCounter(); Dictionary<string, Dictionary<KeyValuePair<int, string>, List<string>>> localized = new Dictionary<string, Dictionary<KeyValuePair<int, string>, List<string>>>(); Dictionary<string, Dictionary<KeyValuePair<int, string>, List<string>>> unlocalized = new Dictionary<string, Dictionary<KeyValuePair<int, string>, List<string>>>(); non_phospho_output = new StreamWriter(Path.Combine(outputFolder, "non_phospho.csv")); localized_phospho_output = new StreamWriter(Path.Combine(outputFolder, "localized_phospho.csv")); unlocalized_phospho_output = new StreamWriter(Path.Combine(outputFolder, "unlocalized_phospho.csv")); ProteinDictionary proteins = null; Dictionary<string, int> motifs = null; if (motifXOutput) { proteins = new ProteinDictionary(motifXFastaProteinDatabaseFilepath); motifs = new Dictionary<string, int>(); motifX = new StreamWriter(Path.Combine(outputFolder, "motif-x.txt")); } raw = (IXRawfile2) new MSFileReader_XRawfile(); string header_line = null; string[] headers = null; bool quant = false; foreach (string csv_filepath in csvFilepaths) { onStartingFile(new FilepathEventArgs(csv_filepath)); csv = new StreamReader(csv_filepath); using (CsvReader reader = new CsvReader(csv, true)) { headers = reader.GetFieldHeaders(); header_line = string.Join(",", headers); quant = headers.Contains("Channels Detected"); string[] lineData = new string[headers.Length]; //header_line = csv.ReadLine(); //quant = header_line.Contains("TQ_"); non_phospho_output.WriteLine(header_line); localized_phospho_output.WriteLine(header_line + ", Number of Theoretical Fragments, Identified Phosphoisoform, Identified Phosphoisoform Number of Matching Fragments, Best Phosphoisoforms, Best Phosphoisoform, Best Phosphoisoform Number of Matching Fragments, Second-Best Phosphoisoform, Second-Best Phosphoisoform Number of Matching Fragments, Identified Phosphoisoform Correct?, Preliminary Localization of All Phosphorylations?, Peptide Phosphorylation Sites, Probability of Spurious Fragment Match, Number of Theoretical Site-Determining Fragment Ions, Number of Matching Site-Determining Fragment Ions, Matching Site-Determining Fragment Ions, Probability Values, Ambiguity Scores, Phosphorylation Sites Localized?, All Phosphorylation Sites Localized?"); unlocalized_phospho_output.WriteLine(header_line + ", Number of Theoretical Fragments, Identified Phosphoisoform, Identified Phosphoisoform Number of Matching Fragments, Best Phosphoisoforms, Best Phosphoisoform, Best Phosphoisoform Number of Matching Fragments, Second-Best Phosphoisoform, Second-Best Phosphoisoform Number of Matching Fragments, Identified Phosphoisoform Correct?, Preliminary Localization of All Phosphorylations?, Peptide Phosphorylation Sites, Probability of Spurious Fragment Match, Number of Theoretical Site-Determining Fragment Ions, Number of Matching Site-Determining Fragment Ions, Matching Site-Determining Fragment Ions, Probability Values, Ambiguity Scores, Phosphorylation Sites Localized?, All Phosphorylation Sites Localized?"); while (reader.ReadNextRecord()) { //string line = csv.ReadLine(); //string[] fields = Regex.Split(line, // @",(?!(?<=(?:^|,)\s*\x22(?:[^\x22]|\x22\x22|\\\x22)*,)(?:[^\x22]|\x22\x22|\\\x22)*\x22\s*(?:,|$))"); // // crazy regex to parse CSV with internal double quotes from http://regexlib.com/REDetails.aspx?regexp_id=621 string sequence = reader["Peptide"]; string dynamic_modifications = reader["Mods"]; if (!dynamic_modifications.Contains("phosphorylation")) { //non_phospho_output.WriteLine(line); } else { Peptide identified_phosphopeptide = new Peptide(sequence, fixedModifications, dynamic_modifications); int start_residue = int.Parse(reader["Start"]); int stop_residue = int.Parse(reader["Stop"]); string protein_description = reader["Defline"].Trim('"'); StringBuilder sb = new StringBuilder(); reader.CopyCurrentRecordTo(lineData); foreach (string datum in lineData) { if (datum.Contains(',')) sb.Append("\"" + datum + "\""); else sb.Append(datum); sb.Append(','); } sb.Remove(sb.Length - 1, 1); string line = sb.ToString(); if (!identified_sites_by_protein.ContainsKey(protein_description)) { identified_sites_by_protein.Add(protein_description, new Dictionary<string, int>()); } foreach (KeyValuePair<int, string> kvp in identified_phosphopeptide.DynamicModifications) { if (kvp.Value.Contains("phosphorylation")) { string site = sequence[kvp.Key - 1] + (start_residue + kvp.Key).ToString(); if (!identified_sites_by_protein[protein_description].ContainsKey(site)) { identified_sites_by_protein[protein_description].Add(site, 0); } identified_sites_by_protein[protein_description][site]++; } } int scan_number = int.Parse(reader["Spectrum number"]); string filenameID = reader["Filename/id"]; FragmentType[] fragment_types = null; if (filenameID.Contains(".ETD.") || filenameID.Contains(".ECD.")) { fragment_types = new FragmentType[] {FragmentType.c, FragmentType.zdot}; } else { fragment_types = new FragmentType[] {FragmentType.b, FragmentType.y}; } string raw_filename = filenameID.Substring(0, filenameID.IndexOf('.')) + ".raw"; int charge = int.Parse(reader["Charge"]); string current_raw_filename = null; raw.GetFileName(ref current_raw_filename); if (current_raw_filename == null || !raw_filename.Equals(Path.GetFileName(current_raw_filename), StringComparison.InvariantCultureIgnoreCase)) { raw.Close(); string[] raw_filepaths = null; if (!string.IsNullOrEmpty(rawFolder) && Directory.Exists(rawFolder)) { raw_filepaths = Directory.GetFiles(rawFolder, raw_filename, SearchOption.AllDirectories); } else { raw_filepaths = Directory.GetFiles(Path.GetDirectoryName(csv_filepath), raw_filename, SearchOption.AllDirectories); } if (raw_filepaths.Length == 0) { throw new FileNotFoundException("No corresponding .raw file found for " + csv_filepath); } if (raw_filepaths.Length > 1) { throw new Exception("Multiple corresponding .raw files found for " + csv_filepath); } raw.Open(raw_filepaths[0]); raw.SetCurrentController(0, 1); } string scan_filter = null; raw.GetFilterForScanNum(scan_number, ref scan_filter); string low_mz_scan_filter = scan_filter.Substring(scan_filter.IndexOf('[') + 1); double low_mz = double.Parse(low_mz_scan_filter.Substring(0, low_mz_scan_filter.IndexOf('-'))); string high_mz_scan_filter = scan_filter.Substring(scan_filter.LastIndexOf('-') + 1); double high_mz = double.Parse(high_mz_scan_filter.Substring(0, high_mz_scan_filter.IndexOf(']'))); double[,] spectrum = null; if (scan_filter.Contains("FTMS")) { object labels_obj = null; object flags_obj = null; raw.GetLabelData(ref labels_obj, ref flags_obj, ref scan_number); spectrum = (double[,]) labels_obj; } else { double centroid_width = double.NaN; object spectrum_obj = null; object flags = null; int size = -1; raw.GetMassListFromScanNum(ref scan_number, null, 0, -1, 0, 1, ref centroid_width, ref spectrum_obj, ref flags, ref size); spectrum = (double[,]) spectrum_obj; } double base_peak_mz = double.NaN; double base_peak_intensity = double.NaN; for (int i = spectrum.GetLowerBound(1); i <= spectrum.GetUpperBound(1); i++) { if (double.IsNaN(base_peak_mz) || spectrum[(int) RawLabelDataColumn.Intensity, i] > base_peak_intensity) { base_peak_mz = spectrum[(int) RawLabelDataColumn.MZ, i]; base_peak_intensity = spectrum[(int) RawLabelDataColumn.Intensity, i]; } } double intensity_threshold = intensityThreshold; if (intensityThresholdType == IntensityThresholdType.Relative) { intensity_threshold = (intensityThreshold/100.0)*base_peak_intensity; } double[] parameters = new double[4]; if (!scan_filter.Contains("FTMS") && intensityThresholdType == IntensityThresholdType.SignalToNoiseRatio) { List<double> relative_intensities = new List<double>(); for (int i = spectrum.GetLowerBound(1); i <= spectrum.GetUpperBound(1); i++) { relative_intensities.Add(spectrum[(int) RawLabelDataColumn.Intensity, i]/ base_peak_intensity); } double bin_width = 0.001; int bins = 101; double[][] relative_intensity_histogram = new double[2][]; relative_intensity_histogram[0] = new double[bins]; relative_intensity_histogram[1] = new double[bins]; for (int i = relative_intensity_histogram[0].GetLowerBound(0); i <= relative_intensity_histogram[0].GetUpperBound(0); i++) { relative_intensity_histogram[0][i] = i*bin_width; } foreach (double relative_intensity in relative_intensities) { int bin_number = (int) Math.Floor(relative_intensity/bin_width); if (bin_number < bins) { relative_intensity_histogram[1][bin_number]++; } } parameters[0] = 0.0; parameters[1] = 100.0; parameters[2] = 0.0; parameters[3] = 0.001; double[] weights = new double[relative_intensity_histogram[1].Length]; for (int i = weights.GetLowerBound(0); i <= weights.GetUpperBound(0); i++) { weights[i] = 1.0; } LMA lma = new LMA(new GaussianFunctionWithPartials(), parameters, relative_intensity_histogram, weights, new DotNetMatrix.GeneralMatrix(4, 4), 0.001, 5000); lma.Fit(); } List<Peptide> peptides = GetAlternativePhosphoisoformPeptides( identified_phosphopeptide, fixedModifications); List<PhosphopeptideStatistics> all_phosphopeptide_stats = new List<PhosphopeptideStatistics>(peptides.Count); PhosphopeptideStatistics identified_phosphoisoform = null; List<double> ms2_mz_peaks = new List<double>(spectrum.GetLength(1)); for (int i = spectrum.GetLowerBound(1); i <= spectrum.GetUpperBound(1); i++) { double signal_to_noise = scan_filter.Contains("FTMS") ? (spectrum[(int) RawLabelDataColumn.Intensity, i] - spectrum[(int) RawLabelDataColumn.NoiseBaseline, i])/ spectrum[(int) RawLabelDataColumn.NoiseLevel, i] : ((spectrum[(int) RawLabelDataColumn.Intensity, i]/base_peak_intensity) - parameters[2])/parameters[3]; if ((intensityThresholdType == IntensityThresholdType.SignalToNoiseRatio && signal_to_noise >= intensity_threshold) || (intensityThresholdType != IntensityThresholdType.SignalToNoiseRatio && spectrum[(int) RawLabelDataColumn.Intensity, i] >= intensity_threshold)) { ms2_mz_peaks.Add(spectrum[(int) RawLabelDataColumn.MZ, i]); } } double mz_range = high_mz - low_mz; Dictionary<double, bool> searched_fragment_mzs = new Dictionary<double, bool>(); foreach (Peptide peptide in peptides) { PhosphopeptideStatistics phosphopeptide_stats = new PhosphopeptideStatistics(peptide); if (peptide.Sequence == identified_phosphopeptide.Sequence) { identified_phosphoisoform = phosphopeptide_stats; } FragmentDictionary fragments = peptide.CalculateFragments(fragment_types); foreach (KeyValuePair<string, Fragment> kvp in fragments) { phosphopeptide_stats.Fragments.Add(kvp.Key, new Dictionary<int, bool>()); for (int fragment_charge = 1; fragment_charge <= (charge >= 3 ? 2 : 1); fragment_charge++) { if (fragment_charge > 1 && fragment_charge > (double) kvp.Value.Number/peptide.Sequence.Length*charge) { break; } double mz = MZFromMassAndCharge(kvp.Value.Mass, fragment_charge); if (mz < low_mz || mz > high_mz) { continue; } if (!searched_fragment_mzs.ContainsKey(mz)) { bool found = false; foreach (double ms2_mz_peak in ms2_mz_peaks) { if (Math.Abs(ms2_mz_peak - mz) <= mzTolerance) { found = true; break; } else if (ms2_mz_peak > mz + mzTolerance) { break; } } searched_fragment_mzs.Add(mz, found); } phosphopeptide_stats.Fragments[kvp.Key].Add(fragment_charge, searched_fragment_mzs[mz]); } } all_phosphopeptide_stats.Add(phosphopeptide_stats); } all_phosphopeptide_stats.Sort(ComparePhosphopeptidesByDescendingMatchingFragments); PhosphopeptideStatistics best_phosphoisoform = all_phosphopeptide_stats[0]; PhosphopeptideStatistics second_best_phosphoisoform = all_phosphopeptide_stats.Count > 1 ? all_phosphopeptide_stats[1] : null; List<string> best_sequences = new List<string>(); foreach (PhosphopeptideStatistics phosphopeptide_stats in all_phosphopeptide_stats) { if (phosphopeptide_stats.NumberOfMatchingFragments == best_phosphoisoform.NumberOfMatchingFragments) { best_sequences.Add(phosphopeptide_stats.Peptide.Sequence); } else { break; } } bool preliminary_localization = second_best_phosphoisoform == null || best_phosphoisoform.NumberOfMatchingFragments > second_best_phosphoisoform.NumberOfMatchingFragments; bool all_sites_localized = preliminary_localization; Dictionary<string, bool> peptide_sites = new Dictionary<string, bool>(); Dictionary<string, bool> protein_sites = new Dictionary<string, bool>(); string best_sequence = best_phosphoisoform.Peptide.Sequence; for (int i = 0; i < best_sequence.Length; i++) { if (char.IsLower(best_sequence[i])) { if (preliminary_localization && second_best_phosphoisoform == null) { peptide_sites.Add(best_sequence[i] + (i + 1).ToString(), true); protein_sites.Add(best_sequence[i] + (start_residue + i).ToString(), true); if (!localized_sites_by_protein.ContainsKey(protein_description)) { localized_sites_by_protein.Add(protein_description, new Dictionary<string, int>()); } string site = best_sequence[i] + (start_residue + i).ToString(); if (!localized_sites_by_protein[protein_description].ContainsKey(site)) { localized_sites_by_protein[protein_description].Add(site, 0); } localized_sites_by_protein[protein_description][site]++; if (motifXOutput) { ExtractMotifs(motifs, proteins, protein_description, best_sequence, start_residue, i); } } else { peptide_sites.Add(best_sequence[i] + (i + 1).ToString(), false); protein_sites.Add(best_sequence[i] + (start_residue + i).ToString(), false); } } } double probability_of_success = double.NaN; List<string> theoretical_site_determining_fragment_ions = new List<string>(); List<string> matching_site_determining_fragment_ions = new List<string>(); List<string> left_site_determining_fragments = new List<string>(); List<string> right_site_determining_fragments = new List<string>(); List<string> site_determining_fragments = new List<string>(); List<string> p_values = new List<string>(); List<string> a_scores = new List<string>(); List<string> sites_localized = new List<string>(); if (preliminary_localization && second_best_phosphoisoform != null) { probability_of_success = (ms2_mz_peaks.Count*2*mzTolerance)/mz_range; for (int i = 0; i < best_sequence.Length; i++) { if (char.IsLower(best_sequence[i])) { int first_phosphorylatable_residue = i - 1; while (first_phosphorylatable_residue >= 0) { if (best_sequence[first_phosphorylatable_residue] == 'S' || best_sequence[first_phosphorylatable_residue] == 'T' || best_sequence[first_phosphorylatable_residue] == 'Y') { break; } else { first_phosphorylatable_residue--; } } int? num_left_theoretical_site_determining_fragment_ions = null; int? num_left_matching_site_determining_fragment_ions = null; double left_p_value = double.NaN; double left_a_score = double.NaN; if (first_phosphorylatable_residue >= 0) { num_left_theoretical_site_determining_fragment_ions = 0; num_left_matching_site_determining_fragment_ions = 0; for (int j = first_phosphorylatable_residue + 1; j <= i; j++) { string n_terminal_fragment = fragment_types[0].ToString() + j.ToString(); if (best_phosphoisoform.Fragments.ContainsKey(n_terminal_fragment)) { foreach ( KeyValuePair<int, bool> kvp in best_phosphoisoform.Fragments[n_terminal_fragment]) { num_left_theoretical_site_determining_fragment_ions++; if (kvp.Value) { num_left_matching_site_determining_fragment_ions++; string n_terminal_fragment_string = n_terminal_fragment + "(+" + kvp.Key.ToString() + ')'; if ( !left_site_determining_fragments.Contains( n_terminal_fragment_string)) { left_site_determining_fragments.Add( n_terminal_fragment_string); } } } } string c_terminal_fragment = fragment_types[1].ToString() + (best_sequence.Length - j).ToString(); if (best_phosphoisoform.Fragments.ContainsKey(c_terminal_fragment)) { foreach ( KeyValuePair<int, bool> kvp in best_phosphoisoform.Fragments[c_terminal_fragment]) { num_left_theoretical_site_determining_fragment_ions++; if (kvp.Value) { num_left_matching_site_determining_fragment_ions++; string c_terminal_fragment_string = c_terminal_fragment + "(+" + kvp.Key.ToString() + ')'; if ( !left_site_determining_fragments.Contains( c_terminal_fragment_string)) { left_site_determining_fragments.Add( c_terminal_fragment_string); } } } } } left_p_value = alglib.binomialdistr.binomialcdistribution( num_left_matching_site_determining_fragment_ions.Value - 1, num_left_theoretical_site_determining_fragment_ions.Value, probability_of_success); left_a_score = -10*Math.Log10(left_p_value); } int last_phosphorylatable_residue = i + 1; while (last_phosphorylatable_residue < best_sequence.Length) { if (best_sequence[last_phosphorylatable_residue] == 'S' || best_sequence[last_phosphorylatable_residue] == 'T' || best_sequence[last_phosphorylatable_residue] == 'Y') { break; } else { last_phosphorylatable_residue++; } } int? num_right_theoretical_site_determining_fragment_ions = null; int? num_right_matching_site_determining_fragment_ions = null; double right_p_value = double.NaN; double right_a_score = double.NaN; if (last_phosphorylatable_residue < best_sequence.Length) { num_right_theoretical_site_determining_fragment_ions = 0; num_right_matching_site_determining_fragment_ions = 0; for (int j = last_phosphorylatable_residue; j > i; j--) { string n_terminal_fragment = fragment_types[0].ToString() + j.ToString(); if (best_phosphoisoform.Fragments.ContainsKey(n_terminal_fragment)) { foreach ( KeyValuePair<int, bool> kvp in best_phosphoisoform.Fragments[n_terminal_fragment]) { num_right_theoretical_site_determining_fragment_ions++; if (kvp.Value) { num_right_matching_site_determining_fragment_ions++; string n_terminal_fragment_string = n_terminal_fragment + "(+" + kvp.Key.ToString() + ')'; if ( !right_site_determining_fragments.Contains( n_terminal_fragment_string)) { right_site_determining_fragments.Add( n_terminal_fragment_string); } } } } string c_terminal_fragment = fragment_types[1].ToString() + (best_sequence.Length - j).ToString(); if (best_phosphoisoform.Fragments.ContainsKey(c_terminal_fragment)) { foreach ( KeyValuePair<int, bool> kvp in best_phosphoisoform.Fragments[c_terminal_fragment]) { num_right_theoretical_site_determining_fragment_ions++; if (kvp.Value) { num_right_matching_site_determining_fragment_ions++; string c_terminal_fragment_string = c_terminal_fragment + "(+" + kvp.Key.ToString() + ')'; if ( !right_site_determining_fragments.Contains( c_terminal_fragment_string)) { right_site_determining_fragments.Add( c_terminal_fragment_string); } } } } } right_p_value = alglib.binomialdistr.binomialcdistribution( num_right_matching_site_determining_fragment_ions.Value - 1, num_right_theoretical_site_determining_fragment_ions.Value, probability_of_success); right_a_score = -10*Math.Log10(right_p_value); } theoretical_site_determining_fragment_ions.Add( (num_left_theoretical_site_determining_fragment_ions.HasValue ? num_left_theoretical_site_determining_fragment_ions.ToString() : "n/a") + " | " + (num_right_theoretical_site_determining_fragment_ions.HasValue ? num_right_theoretical_site_determining_fragment_ions.ToString() : "n/a")); matching_site_determining_fragment_ions.Add( (num_left_matching_site_determining_fragment_ions.HasValue ? num_left_matching_site_determining_fragment_ions.ToString() : "n/a") + " | " + (num_right_matching_site_determining_fragment_ions.HasValue ? num_right_matching_site_determining_fragment_ions.ToString() : "n/a")); site_determining_fragments.Add((left_site_determining_fragments.Count > 0 ? string.Join(",", left_site_determining_fragments.ToArray()) : "n/a") + " | " + (right_site_determining_fragments.Count > 0 ? string.Join(",", right_site_determining_fragments .ToArray()) : "n/a")); p_values.Add((double.IsNaN(left_p_value) ? "n/a" : left_p_value.ToString()) + " | " + (double.IsNaN(right_p_value) ? "n/a" : right_p_value.ToString())); a_scores.Add((double.IsNaN(left_a_score) ? "n/a" : left_a_score.ToString()) + " | " + (double.IsNaN(right_a_score) ? "n/a" : right_a_score.ToString())); bool site_localized = (double.IsNaN(left_a_score) || left_a_score >= ambiguityScoreThreshold) && (double.IsNaN(right_a_score) || right_a_score >= ambiguityScoreThreshold); sites_localized.Add(site_localized.ToString().ToUpper()); if (site_localized) { peptide_sites[best_sequence[i] + (i + 1).ToString()] = true; protein_sites[best_sequence[i] + (start_residue + i).ToString()] = true; if (!localized_sites_by_protein.ContainsKey(protein_description)) { localized_sites_by_protein.Add(protein_description, new Dictionary<string, int>()); } string site = best_sequence[i] + (start_residue + i).ToString(); if (!localized_sites_by_protein[protein_description].ContainsKey(site)) { localized_sites_by_protein[protein_description].Add(site, 0); } localized_sites_by_protein[protein_description][site]++; if (motifXOutput) { ExtractMotifs(motifs, proteins, protein_description, best_sequence, start_residue, i); } } if (!site_localized) { all_sites_localized = false; } } } } int phosphorylations = 0; foreach ( string dynamic_modification in best_phosphoisoform.Peptide.DynamicModifications.Values) { if (dynamic_modification.Contains("phosphorylation")) { phosphorylations++; } } string isoform = null; if (all_sites_localized) { foreach (KeyValuePair<string, bool> kvp in protein_sites) { isoform += kvp.Key + ','; } isoform = isoform.Substring(0, isoform.Length - 1); KeyValuePair<int, string> isoform_kvp = new KeyValuePair<int, string>(phosphorylations, isoform); if (!localized.ContainsKey(protein_description)) { localized.Add(protein_description, new Dictionary<KeyValuePair<int, string>, List<string>>()); } if (!localized[protein_description].ContainsKey(isoform_kvp)) { localized[protein_description].Add(isoform_kvp, new List<string>()); } localized[protein_description][isoform_kvp].Add(line); } else { if (preliminary_localization) { foreach (KeyValuePair<string, bool> kvp in protein_sites) { isoform += kvp.Key; if (!kvp.Value) { isoform += '?'; } isoform += ','; } } else { for (int i = 0; i < best_sequence.Length; i++) { bool phospho = false; for (int j = 0; j < all_phosphopeptide_stats.Count; j++) { if (all_phosphopeptide_stats[j].NumberOfMatchingFragments < best_phosphoisoform.NumberOfMatchingFragments) { break; } if (char.IsLower(all_phosphopeptide_stats[j].Peptide.Sequence[i])) { phospho = true; } } if (phospho) { isoform += char.ToLower(best_sequence[i]) + (start_residue + i).ToString() + "?,"; } } } isoform = isoform.Substring(0, isoform.Length - 1); KeyValuePair<int, string> isoform_kvp = new KeyValuePair<int, string>(phosphorylations, isoform); if (!unlocalized.ContainsKey(protein_description)) { unlocalized.Add(protein_description, new Dictionary<KeyValuePair<int, string>, List<string>>()); } if (!unlocalized[protein_description].ContainsKey(isoform_kvp)) { unlocalized[protein_description].Add(isoform_kvp, new List<string>()); } unlocalized[protein_description][isoform_kvp].Add(line); } StreamWriter output = all_sites_localized ? localized_phospho_output : unlocalized_phospho_output; output.Write(line + ','); output.Write(identified_phosphoisoform.NumberOfTotalFragments.ToString() + ','); output.Write(identified_phosphoisoform.Peptide.Sequence + ','); output.Write(identified_phosphoisoform.NumberOfMatchingFragments.ToString() + ','); for (int s = 0; s < best_sequences.Count; s++) { output.Write(best_sequences[s]); if (s < best_sequences.Count - 1) { output.Write('/'); } } output.Write(','); output.Write(best_phosphoisoform.Peptide.Sequence + ','); output.Write(best_phosphoisoform.NumberOfMatchingFragments.ToString() + ','); if (second_best_phosphoisoform != null) { output.Write(second_best_phosphoisoform.Peptide.Sequence + ','); output.Write(second_best_phosphoisoform.NumberOfMatchingFragments.ToString() + ','); } else { output.Write("n/a,n/a,"); } output.Write( (identified_phosphoisoform.NumberOfMatchingFragments == best_phosphoisoform.NumberOfMatchingFragments).ToString() + ','); output.Write(preliminary_localization.ToString() + ','); string[] peptide_sites_array = new string[peptide_sites.Count]; peptide_sites.Keys.CopyTo(peptide_sites_array, 0); string peptide_sites_array_string = string.Join("; ", peptide_sites_array); AppendFieldToCsv(peptide_sites_array_string, output); output.Write((!double.IsNaN(probability_of_success) ? probability_of_success.ToString() : string.Empty) + ','); string theoretical_site_determining_fragment_ions_string = string.Join("; ", theoretical_site_determining_fragment_ions.ToArray()); AppendFieldToCsv(theoretical_site_determining_fragment_ions_string, output); string matching_site_determining_fragment_ions_string = string.Join("; ", matching_site_determining_fragment_ions.ToArray()); AppendFieldToCsv(matching_site_determining_fragment_ions_string, output); string site_determining_fragments_string = string.Join("; ", site_determining_fragments.ToArray()); AppendFieldToCsv(site_determining_fragments_string, output); string p_values_string = string.Join("; ", p_values.ToArray()); AppendFieldToCsv(p_values_string, output); string a_scores_string = string.Join("; ", a_scores.ToArray()); AppendFieldToCsv(a_scores_string, output); string sites_localized_string = string.Join("; ", sites_localized.ToArray()); AppendFieldToCsv(sites_localized_string, output); output.Write(all_sites_localized.ToString().ToUpper()); output.WriteLine(); } double progress = (double) csv.BaseStream.Position/csv.BaseStream.Length; onUpdateProgress(new ProgressEventArgs((int) Math.Round(progress*100.0))); } } csv.Close(); onFinishedFile(new EventArgs()); } raw.Close(); non_phospho_output.Close(); localized_phospho_output.Close(); unlocalized_phospho_output.Close(); log.WriteLine("Identified Phosphoproteins: " + identified_sites_by_protein.Proteins.ToString()); log.WriteLine("Identified Phosphosites: " + identified_sites_by_protein.Sites.ToString()); log.WriteLine(); log.WriteLine("Localized Phosphoproteins: " + localized_sites_by_protein.Proteins.ToString()); log.WriteLine("Localized Phosphosites: " + localized_sites_by_protein.Sites.ToString()); log.WriteLine(); int localized_phosphoisoforms = 0; foreach (KeyValuePair<string, Dictionary<KeyValuePair<int, string>, List<string>>> kvp in localized) { foreach (KeyValuePair<KeyValuePair<int, string>, List<string>> kvp2 in kvp.Value) { localized_phosphoisoforms++; } } int unlocalized_phosphoisoforms = 0; foreach (KeyValuePair<string, Dictionary<KeyValuePair<int, string>, List<string>>> kvp in unlocalized) { foreach (KeyValuePair<KeyValuePair<int, string>, List<string>> kvp2 in kvp.Value) { unlocalized_phosphoisoforms++; } } log.WriteLine("Localized Phosphoisoforms: " + localized_phosphoisoforms.ToString()); log.WriteLine("Unlocalized Phosphoisoforms: " + unlocalized_phosphoisoforms.ToString()); log.Close(); using (StreamWriter protein_sites = new StreamWriter(Path.Combine(outputFolder, "localized_protein_phosphosites.csv"))) { protein_sites.WriteLine("Protein Description, Number of Localized Phosphosites"); protein_sites.WriteLine(", Localized Phosphosite"); foreach (KeyValuePair<string, Dictionary<string, int>> kvp in localized_sites_by_protein) { protein_sites.WriteLine((kvp.Key.Contains(",") ? '"' + kvp.Key + '"' : kvp.Key) + ',' + kvp.Value.Count.ToString()); foreach (KeyValuePair<string, int> kvp2 in kvp.Value) { protein_sites.WriteLine(',' + kvp2.Key); } } } using (StreamWriter full_localized_output = new StreamWriter(Path.Combine(outputFolder, "full_localized_phosphoisoforms.csv"))) { //int interference_index = -1; int first_quant_index = -1; int last_quant_index = -1; if (!quant) { full_localized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites, PSMs Identified, Peptides Identified"); } else { full_localized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites, PSMs Identified, PSMs Quantified, Peptides Identified, Peptides Quantified,"); for (int i = 0; i < headers.Length; i++) { if (headers[i].EndsWith("NL)")) { if (first_quant_index < 0) { first_quant_index = i; } } if (first_quant_index >= 0) full_localized_output.Write(' ' + headers[i] + ','); if (headers[i].Equals("Channels Detected")) { last_quant_index = i; } } full_localized_output.Write(" Phosphoisoform Quantified?"); } full_localized_output.WriteLine(); full_localized_output.WriteLine(", " + header_line); using (StreamWriter reduced_localized_output = new StreamWriter(Path.Combine(outputFolder, "reduced_localized_phosphoisoforms.csv"))) { if (!quant) { reduced_localized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides , PSMs Identified, Peptides Identified"); } else { reduced_localized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides , PSMs Identified, PSMs Quantified, Peptides Identified, Peptides Quantified,"); for (int i = first_quant_index; i <= last_quant_index; i++) { reduced_localized_output.Write(' ' + headers[i] + ','); } reduced_localized_output.Write(" Phosphoisoform Quantified?"); } reduced_localized_output.WriteLine(); foreach (KeyValuePair<string, Dictionary<KeyValuePair<int, string>, List<string>>> kvp in localized) { foreach (KeyValuePair<KeyValuePair<int, string>, List<string>> kvp2 in kvp.Value) { full_localized_output.Write((kvp.Key.Contains(",") ? '"' + kvp.Key + '"' : kvp.Key) + ','); reduced_localized_output.Write((kvp.Key.Contains(",") ? '"' + kvp.Key + '"' : kvp.Key) + ','); full_localized_output.Write((kvp2.Key.Value.Contains(",") ? '"' + kvp2.Key.Value + '"' : kvp2.Key.Value) + ','); reduced_localized_output.Write((kvp2.Key.Value.Contains(",") ? '"' + kvp2.Key.Value + '"' : kvp2.Key.Value) + ','); full_localized_output.Write(kvp2.Key.Key.ToString() + ','); reduced_localized_output.Write(kvp2.Key.Key.ToString() + ','); double[] isoform_quantitation = new double[last_quant_index - first_quant_index + 1]; isoform_quantitation = Array.ConvertAll<double, double>(isoform_quantitation, SET_DOUBLE_VALUE_TO_NAN); int spectra_identified = 0; int spectra_quantified = 0; Dictionary<string, int> unique_peptides_identified = new Dictionary<string, int>(); Dictionary<string, int> unique_peptides_quantified = new Dictionary<string, int>(); StringBuilder peptides = new StringBuilder(); foreach (string line in kvp2.Value) { string[] fields = Regex.Split(line, @",(?!(?<=(?:^|,)\s*\x22(?:[^\x22]|\x22\x22|\\\x22)*,)(?:[^\x22]|\x22\x22|\\\x22)*\x22\s*(?:,|$))"); // crazy regex to parse CSV with internal double quotes from http://regexlib.com/REDetails.aspx?regexp_id=621 spectra_identified++; string peptide_sequence = fields[2]; peptides.Append(peptide_sequence + " "); if (!unique_peptides_identified.ContainsKey(peptide_sequence)) { unique_peptides_identified.Add(peptide_sequence, 0); } unique_peptides_identified[peptide_sequence]++; if (quant) { spectra_quantified++; if (double.IsNaN(isoform_quantitation[0])) { isoform_quantitation = Array.ConvertAll<double, double>(isoform_quantitation, SET_DOUBLE_VALUE_TO_ZERO); } if (!unique_peptides_quantified.ContainsKey(peptide_sequence)) { unique_peptides_quantified.Add(peptide_sequence, 0); } unique_peptides_quantified[peptide_sequence]++; for (int i = first_quant_index; i <= last_quant_index; i++) { double val = 0; double.TryParse(fields[i], out val); isoform_quantitation[i - first_quant_index] += val; } } } full_localized_output.Write(peptides.ToString() + ','); reduced_localized_output.Write(peptides.ToString() + ','); full_localized_output.Write(spectra_identified.ToString() + ','); reduced_localized_output.Write(spectra_identified.ToString() + ','); if (quant) { full_localized_output.Write(spectra_quantified.ToString() + ','); reduced_localized_output.Write(spectra_quantified.ToString() + ','); } full_localized_output.Write(unique_peptides_identified.Count.ToString() + ','); reduced_localized_output.Write(unique_peptides_identified.Count.ToString() + ','); if (quant) { full_localized_output.Write(unique_peptides_quantified.Count.ToString() + ','); reduced_localized_output.Write(unique_peptides_quantified.Count.ToString() + ','); for (int i = isoform_quantitation.GetLowerBound(0); i <= isoform_quantitation.GetUpperBound(0); i++) { full_localized_output.Write(isoform_quantitation[i].ToString() + ','); reduced_localized_output.Write(isoform_quantitation[i].ToString() + ','); } full_localized_output.Write((spectra_quantified > 0).ToString()); reduced_localized_output.Write((spectra_quantified > 0).ToString()); } full_localized_output.WriteLine(); reduced_localized_output.WriteLine(); foreach (string line in kvp2.Value) { full_localized_output.WriteLine(',' + line); } } } } } using (StreamWriter full_unlocalized_output = new StreamWriter(Path.Combine(outputFolder, "full_unlocalized_phosphoisoforms.csv"))) { int first_quant_index = -1; int last_quant_index = -1; if (!quant) { full_unlocalized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides, PSMs Identified, Peptides Identified"); } else { full_unlocalized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides, PSMs Identified, PSMs Quantified, Peptides Identified, Peptides Quantified,"); for (int i = 0; i < headers.Length; i++) { if (headers[i].EndsWith("NL)")) { if (first_quant_index < 0) { first_quant_index = i; } } if (first_quant_index >= 0) full_unlocalized_output.Write(' ' + headers[i] + ','); if (headers[i].Equals("Channels Detected")) { last_quant_index = i; } } full_unlocalized_output.Write(" Phosphoisoform Quantified?"); } full_unlocalized_output.WriteLine(); full_unlocalized_output.WriteLine(", " + header_line); using (StreamWriter reduced_unlocalized_output = new StreamWriter(Path.Combine(outputFolder, "reduced_unlocalized_phosphoisoforms.csv"))) { if (!quant) { reduced_unlocalized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides, PSMs Identified, Peptides Identified"); } else { reduced_unlocalized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides, PSMs Identified, PSMs Quantified, Peptides Identified, Peptides Quantified,"); for (int i = first_quant_index; i <= last_quant_index; i++) { reduced_unlocalized_output.Write(' ' + headers[i] + ','); } reduced_unlocalized_output.Write(" Phosphoisoform Quantified?"); } reduced_unlocalized_output.WriteLine(); foreach (KeyValuePair<string, Dictionary<KeyValuePair<int, string>, List<string>>> kvp in unlocalized) { foreach (KeyValuePair<KeyValuePair<int, string>, List<string>> kvp2 in kvp.Value) { full_unlocalized_output.Write((kvp.Key.Contains(",") ? '"' + kvp.Key + '"' : kvp.Key) + ','); reduced_unlocalized_output.Write((kvp.Key.Contains(",") ? '"' + kvp.Key + '"' : kvp.Key) + ','); full_unlocalized_output.Write((kvp2.Key.Value.Contains(",") ? '"' + kvp2.Key.Value + '"' : kvp2.Key.Value) + ','); reduced_unlocalized_output.Write((kvp2.Key.Value.Contains(",") ? '"' + kvp2.Key.Value + '"' : kvp2.Key.Value) + ','); full_unlocalized_output.Write(kvp2.Key.Key.ToString() + ','); reduced_unlocalized_output.Write(kvp2.Key.Key.ToString() + ','); double[] isoform_quantitation = new double[last_quant_index - first_quant_index + 1]; isoform_quantitation = Array.ConvertAll<double, double>(isoform_quantitation, SET_DOUBLE_VALUE_TO_NAN); int spectra_identified = 0; int spectra_quantified = 0; Dictionary<string, int> unique_peptides_identified = new Dictionary<string, int>(); Dictionary<string, int> unique_peptides_quantified = new Dictionary<string, int>(); StringBuilder peptides = new StringBuilder(); foreach (string line in kvp2.Value) { string[] fields = Regex.Split(line, @",(?!(?<=(?:^|,)\s*\x22(?:[^\x22]|\x22\x22|\\\x22)*,)(?:[^\x22]|\x22\x22|\\\x22)*\x22\s*(?:,|$))"); // crazy regex to parse CSV with internal double quotes from http://regexlib.com/REDetails.aspx?regexp_id=621 spectra_identified++; string peptide_sequence = fields[2]; peptides.Append(peptide_sequence + " "); if (!unique_peptides_identified.ContainsKey(peptide_sequence)) { unique_peptides_identified.Add(peptide_sequence, 0); } unique_peptides_identified[peptide_sequence]++; if (quant) { spectra_quantified++; if (double.IsNaN(isoform_quantitation[0])) { isoform_quantitation = Array.ConvertAll<double, double>(isoform_quantitation, SET_DOUBLE_VALUE_TO_ZERO); } if (!unique_peptides_quantified.ContainsKey(peptide_sequence)) { unique_peptides_quantified.Add(peptide_sequence, 0); } unique_peptides_quantified[peptide_sequence]++; for (int i = first_quant_index; i <= last_quant_index; i++) { double val = 0; double.TryParse(fields[i], out val); isoform_quantitation[i - first_quant_index] += val; } } } full_unlocalized_output.Write(peptides.ToString() + ','); reduced_unlocalized_output.Write(peptides.ToString() + ','); full_unlocalized_output.Write(spectra_identified.ToString() + ','); reduced_unlocalized_output.Write(spectra_identified.ToString() + ','); if (quant) { full_unlocalized_output.Write(spectra_quantified.ToString() + ','); reduced_unlocalized_output.Write(spectra_quantified.ToString() + ','); } full_unlocalized_output.Write(unique_peptides_identified.Count.ToString() + ','); reduced_unlocalized_output.Write(unique_peptides_identified.Count.ToString() + ','); if (quant) { full_unlocalized_output.Write(unique_peptides_quantified.Count.ToString() + ','); reduced_unlocalized_output.Write(unique_peptides_quantified.Count.ToString() + ','); for (int i = isoform_quantitation.GetLowerBound(0); i <= isoform_quantitation.GetUpperBound(0); i++) { full_unlocalized_output.Write(isoform_quantitation[i].ToString() + ','); reduced_unlocalized_output.Write(isoform_quantitation[i].ToString() + ','); } full_unlocalized_output.Write((spectra_quantified > 0).ToString()); reduced_unlocalized_output.Write((spectra_quantified > 0).ToString()); } full_unlocalized_output.WriteLine(); reduced_unlocalized_output.WriteLine(); foreach (string line in kvp2.Value) { full_unlocalized_output.WriteLine(',' + line); } } } } } if (motifXOutput) { foreach (string motif in motifs.Keys) { motifX.WriteLine(motif); } motifX.Close(); using (StreamWriter motif_fasta = new StreamWriter(Path.Combine(outputFolder, "motif-x.fasta"))) { foreach (KeyValuePair<string, string> kvp in proteins) { if (!kvp.Key.Contains("DECOY") && !kvp.Key.Contains("REVERSED")) { motif_fasta.WriteLine('>' + kvp.Key); motif_fasta.WriteLine(kvp.Value); } } } } onFinished(new EventArgs()); //} //catch(Exception ex) //{ // onThrowException(new ExceptionEventArgs(ex)); //} //finally //{ if (log != null) { log.Close(); } if (raw != null) { raw.Close(); } if (csv != null) { csv.Close(); } if (non_phospho_output != null) { non_phospho_output.Close(); } if (localized_phospho_output != null) { localized_phospho_output.Close(); } if (unlocalized_phospho_output != null) { unlocalized_phospho_output.Close(); } if (motifX != null) { motifX.Close(); } } catch (Exception e) { MessageBox.Show(e.Message + " " + e.StackTrace); } }