private static void ExportMixedSampleResult(Dictionary <Peptide, MaxFlowElutionCurve> ratios, Sample mixedSample, MixedPrecursor mixedPrecursor, double keyMz, DBOptions dbOptions) { // Export results in a file vsCSVWriter writerRatio = new vsCSVWriter(dbOptions.OutputFolder + @"IndividualNoSpike\" + vsCSV.GetFileName_NoExtension(mixedSample.sSDF) + "_" + keyMz + "MZ_" + mixedPrecursor.Queries[0].spectrum.RetentionTimeInMin + "min.csv"); string titleIndividual = "Scan time,Total Area"; foreach (Peptide charPep in ratios.Keys) { titleIndividual += "," + charPep.Sequence; } writerRatio.AddLine(titleIndividual); string line = "Total," + mixedPrecursor.eCurveIntensityCount.Area; foreach (Peptide charPep in ratios.Keys) { line += "," + ratios[charPep].eCurvePerMs.Area; } writerRatio.AddLine(line); for (int i = 0; i < mixedPrecursor.eCurveIntensityCount.intensityCount.Count; i++) { line = mixedPrecursor.eCurveIntensityCount.time[i] / (1000.0 * 60.0) + "," + mixedPrecursor.eCurveIntensityCount.intensityCount[i]; foreach (Peptide charPep in ratios.Keys) { line += "," + ratios[charPep].eCurvePerMs.InterpolateIntensity(mixedPrecursor.eCurveIntensityCount.time[i]); } writerRatio.AddLine(line); } writerRatio.WriteToFile(); }
private static void ExportSpikedSampleResult(Dictionary <double, Dictionary <Sample, CharacterizedPrecursor> > characterizedPeptides, DBOptions dbOptions) { foreach (double keyMz in characterizedPeptides.Keys) { foreach (Sample sample in characterizedPeptides[keyMz].Keys) { vsCSVWriter writerRatio = new vsCSVWriter(dbOptions.OutputFolder + @"IndividualNoSpike\" + vsCSV.GetFileName_NoExtension(sample.sSDF) + "_" + keyMz + "MZ.csv"); string titleIndividual = "Scan time,Precursor Intensity,Intensity Per Millisecond"; foreach (ProductMatch pm in characterizedPeptides[keyMz][sample].AllFragments) { titleIndividual += "," + pm.Fragment.Name + pm.fragmentPos + "^" + pm.charge; } writerRatio.AddLine(titleIndividual); foreach (Query query in characterizedPeptides[keyMz][sample].Queries) { string line = query.spectrum.RetentionTimeInMin + "," + query.spectrum.PrecursorIntensity + "," + query.spectrum.PrecursorIntensityPerMilliSecond; foreach (ProductMatch pm in characterizedPeptides[keyMz][sample].AllFragments) { double intensity = 0.0; foreach (ProductMatch pmSpec in query.psms[0].AllProductMatches) { if (pmSpec.charge == pm.charge && pmSpec.Fragment == pm.Fragment && pmSpec.fragmentPos == pm.fragmentPos) { intensity = pmSpec.obsIntensity; } } line += "," + intensity; } writerRatio.AddLine(line); } writerRatio.WriteToFile(); } } }
public static void Export(string filename, List <PeptideMatch> peptides) { vsCSVWriter writer = new vsCSVWriter(filename); writer.AddLine("Sequence,Variable Modification,Score,Decoy,Precursor Mass Error"); foreach (PeptideMatch pm in peptides) { writer.AddLine(pm.peptide.BaseSequence + "," + pm.peptide.Sequence + "," + pm.ProbabilityScore() + "," + pm.peptide.Decoy + "," + pm.GetPrecursorMassError()); } writer.WriteToFile(); }
public static void Export(string filename, List <ProteinGroupMatch> proteins) { vsCSVWriter writer = new vsCSVWriter(filename); writer.AddLine(ProteinGroupMatch.Header); foreach (ProteinGroupMatch group in proteins) { writer.AddLine(group.ToString()); } writer.WriteToFile(); }
public static bool Run() { string outputPath = @"C:\_IRIC\DATA\Test\testMhc\Stats\"; vsCSVWriter writer = new vsCSVWriter(outputPath + "output.csv"); writer.AddLine("File,# MS1s,# MSMS,1 Charge,2 Charge,3 Charge,4 Charge,5 Charge,6 Charge,7 Charge,8 Charge,9 Charge,10 Charge,11 Charge,12 Charge,13 Charge,14 Charge"); DBOptions options = MhcSample.CreateOptions(outputPath); string[] files = new string[] { @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL.raw", @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS60_MSMS15.raw", @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS60_MSMS30.raw", @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS60_MSMS60.raw", @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS120_MSMS15.raw", @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS120_MSMS30.raw", @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS120_MSMS60.raw" }; foreach (string file in files) { pwiz.CLI.msdata.MSDataFile msFile = new pwiz.CLI.msdata.MSDataFile(file); Spectra spectra = Spectra.Load(msFile, options, file); spectra.Sort(ProductSpectrum.AscendingPrecursorMassComparison); Dictionary <Track, Precursor> DicOfComputedTracks = new Dictionary <Track, Precursor>(); int[] charges = new int[14]; foreach (Track track in spectra.tracks) { if (!DicOfComputedTracks.ContainsKey(track)) { DicOfComputedTracks.Add(track, null); int charge = 0; foreach (Precursor precursor in Queries.GetIsotopes(track, options, spectra.tracks, null)) { if (precursor.Charge > 0) { charge = precursor.Charge; } if (!DicOfComputedTracks.ContainsKey(precursor.Track)) { DicOfComputedTracks.Add(precursor.Track, precursor); } } charges[charge]++; } } string line = file + "," + spectra.MS1s.Count + "," + spectra.Count; for (int i = 0; i < charges.Length; i++) { line += "," + charges[i]; } writer.AddLine(line); } writer.WriteToFile(); return(true); }
public void Export(string filename) { vsCSVWriter writer = new vsCSVWriter(filename); writer.AddLine(Track.TITLE); foreach (Track track in this) { writer.AddLine(track.ToString()); } writer.WriteToFile(); }
public void ExportMSMS(string filename) { vsCSVWriter writer = new vsCSVWriter(filename); writer.AddLine(ProductSpectrum.TITLE); foreach (ProductSpectrum spectrum in this) { writer.AddLine(spectrum.ToString()); } writer.WriteToFile(); }
public static void ShuffleSequences(string fastaFile) { try { FileStream fs; try { fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read); } catch (System.Exception) { fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); } vsCSVWriter wrShuffled = new vsCSVWriter(vsCSV.GetFolder(fastaFile) + vsCSV.GetFileName_NoExtension(fastaFile) + "_Shuffled.fasta"); using (StreamReader sr = new StreamReader(fs)) { ProteinIdType idType = ProteinIdType.Unknown; string line; string aaSeq = ""; while ((line = sr.ReadLine()) != null) { if (line.StartsWith(">")) { if (!string.IsNullOrEmpty(aaSeq)) { wrShuffled.AddLine(Proteomics.Utilities.Tools.AminoAcidTools.Shuffle(aaSeq)); } wrShuffled.AddLine(line); aaSeq = ""; } else { aaSeq += line; } } if (!string.IsNullOrEmpty(aaSeq)) { wrShuffled.AddLine(Proteomics.Utilities.Tools.AminoAcidTools.Shuffle(aaSeq)); } } fs.Close(); wrShuffled.WriteToFile(); } catch (System.Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } }
public static void Export(string filename, List <PeptideSpectrumMatch> psms) { vsCSVWriter writer = new vsCSVWriter(filename); writer.AddLine("Mz,Rt,Charge,Sequence,Modifications,Precursor Score,Product Score,Intensity Score,Final Score,Precursor Mass Error,Decoy?,Protein Score"); foreach (PeptideSpectrumMatch psm in psms) { writer.AddLine(psm.Query.precursor.Track.MZ + "," + psm.Query.spectrum.RetentionTimeInMin + "," + psm.Query.precursor.Charge + "," + psm.Peptide.BaseSequence + "," + psm.Peptide.Sequence + "," + psm.PrecursorScore + "," + psm.ProductScore + "," + psm.IntensityScore + "," + psm.ProbabilityScore() + "," + psm.PrecursorMzError + "," + psm.Decoy + "," + psm.ProteinScore); } writer.WriteToFile(); }
public static void AppendProteinDescriptionToMascotReport(string csvMascotFile, string fastaFile, string csvFileOut) { vsCSV csvMascot = new vsCSV(csvMascotFile); vsCSVWriter writer = new vsCSVWriter(csvFileOut); try { FileStream fs; try { fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read); } catch (System.Exception) { fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); } Dictionary <string, string> DicOfProt = new Dictionary <string, string>(); foreach (string line in csvMascot.LINES_LIST) { string[] splits = line.Split(','); if (splits.Length > 2 && !DicOfProt.ContainsKey(splits[2])) { DicOfProt.Add(splits[2], ""); } } using (StreamReader sr = new StreamReader(fs)) { string line; while ((line = sr.ReadLine()) != null) { if (line.StartsWith(">")) { string[] split = line.Substring(1).Split(' '); if (DicOfProt.ContainsKey(split[0])) { DicOfProt[split[0]] = line.Substring(split[0].Length + 1); } } } } foreach (string line in csvMascot.LINES_LIST) { string[] splits = line.Split(','); string lineToWrite = line; if (splits.Length > 2 && DicOfProt.ContainsKey(splits[2])) { lineToWrite += "," + DicOfProt[splits[2]]; } writer.AddLine(lineToWrite); } writer.WriteToFile(); } catch (System.Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } }
public void ExportFragmentIntensitiesForAllPSM(List <PeptideSpectrumMatch> psms, Peptide peptide, int psmCharge, string fileName) { vsCSVWriter writer = new vsCSVWriter(fileName); string title = "Retention Time"; for (int i = 1; i <= peptide.Length; i++) { for (int charge = 1; charge <= psmCharge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { title += "," + i + fragment.Name + " ^" + charge; } } } writer.AddLine(title); foreach (PeptideSpectrumMatch psm in psms) { string line = psm.Query.spectrum.RetentionTimeInMin.ToString(); for (int i = 1; i <= peptide.Length; i++) { for (int charge = 1; charge <= psmCharge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { double cumul = 0.0; foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { cumul += match.obsIntensity; } } line += "," + cumul; } } } writer.AddLine(line); } writer.WriteToFile(); }
public static void Export(string filename, IEnumerable <Query> queries) { vsCSVWriter writer = new vsCSVWriter(filename); writer.AddLine("Spectrum Precursor Mz,Rt,Charge,BaseSequence,Sequence,Precursor Score,Product Score,Intensity Score,Final Score,Precursor Mass Error,Decoy?,Protein Score"); foreach (Query query in queries) { string line = query.spectrum.PrecursorMZ + "," + query.precursor.Track.RT + "," + query.precursor.Charge + ","; PeptideSpectrumMatch match = query.precursor.OptimizedBestPsm(); if (match != null) { line += match.Peptide.BaseSequence + "," + match.Peptide.Sequence + "," + match.PrecursorScore + "," + match.ProductScore + "," + match.IntensityScore + "," + query.ScoreFct(match.Peptide) + "," + match.PrecursorMzError + "," + match.Decoy + "," + match.ProteinScore; } writer.AddLine(line); } writer.WriteToFile(); }
public static void SeparateForwardAndReverse(string fastaFile) { try { FileStream fs; try { fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read); } catch (System.Exception) { fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); } vsCSVWriter wrForward = new vsCSVWriter(vsCSV.GetFolder(fastaFile) + vsCSV.GetFileName_NoExtension(fastaFile) + "_ForwardOnly.fasta"); vsCSVWriter wrReverse = new vsCSVWriter(vsCSV.GetFolder(fastaFile) + vsCSV.GetFileName_NoExtension(fastaFile) + "_ReverseOnly.fasta"); using (StreamReader sr = new StreamReader(fs)) { ProteinIdType idType = ProteinIdType.Unknown; string line; while ((line = sr.ReadLine()) != null) { if (line.StartsWith(">")) { if (line.StartsWith(">REVERSE_")) { idType = ProteinIdType.Reverse; } else { idType = ProteinIdType.Forward; } } switch (idType) { case ProteinIdType.Forward: wrForward.AddLine(line); break; case ProteinIdType.Reverse: wrReverse.AddLine(line); break; case ProteinIdType.Unknown: break; } } } fs.Close(); wrForward.WriteToFile(); wrReverse.WriteToFile(); } catch (System.Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } }
public static void Export(string filename, List <Precursor> precursors) { vsCSVWriter writer = new vsCSVWriter(filename); writer.AddLine("Index.Mz,Rt,Precursor Mz,Charge,Most Intense Charge,Precursor Mass,Peptide Mass,Sequence,Modified Sequence,Precursor Score,Product Score,Intensity Score,Final Score,Precursor Mass Error,Decoy?,Protein Score"); foreach (Precursor precursor in precursors) { string line = precursor.INDEX + "," + precursor.Track.RT + "," + precursor.Track.MZ + "," + precursor.Charge + "," + precursor.GetMostIntenseCharge() + "," + precursor.Mass + ","; PeptideSpectrumMatch match = precursor.OptimizedBestPsm(); if (match != null) { line += match.Peptide.MonoisotopicMass + "," + match.Peptide.BaseSequence + "," + match.Peptide.Sequence + "," + match.PrecursorScore + "," + match.ProductScore + "," + match.IntensityScore + "," + precursor.ProbabilityScore(match.Peptide) + "," + match.PrecursorMzError + "," + match.Decoy + "," + match.ProteinScore; } writer.AddLine(line); } writer.WriteToFile(); }
public static void ToCSV(string rawFileName, string csvOutFileName) { vsCSVWriter csvWriter = new vsCSVWriter(csvOutFileName); csvWriter.AddLine("Scan Number,Retention Time (min),Ms Level"); pwiz.CLI.msdata.MSDataFile msFile = new pwiz.CLI.msdata.MSDataFile(rawFileName); int num_spectra = msFile.run.spectrumList.size(); for (int i = 0; i < num_spectra; i++) { //Spectrum pwiz.CLI.msdata.Spectrum mySpec = msFile.run.spectrumList.spectrum(i, false); double retention_time = mySpec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_scan_start_time).timeInSeconds() / 60.0; csvWriter.AddLine((i + 1) + "," + retention_time + "," + mySpec.cvParam(pwiz.CLI.cv.CVID.MS_ms_level).value); } csvWriter.WriteToFile(); }
public static void ExportAllVariables(string fileName, Dictionary <DateTime, List <Variable> > DicOfTime, Dictionary <string, List <Variable> > DicOfVar, Dictionary <string, double> DicOfCorr) { vsCSVWriter writer = new vsCSVWriter(fileName); string title = "Date"; string corr = ""; foreach (string key in DicOfVar.Keys) { title += "," + key; corr += "," + DicOfCorr[key]; } writer.AddLine(title); writer.AddLine(corr); foreach (DateTime key in DicOfTime.Keys) { string newLine = key.ToString(); foreach (string nameToMatch in DicOfVar.Keys) { string varVal = ","; foreach (Variable v in DicOfTime[key]) { if (v.name.CompareTo(nameToMatch) == 0) { if (v.name.CompareTo("price") == 0) { varVal = "," + v.value; } else { varVal = "," + v.normValue; } } } newLine += varVal; } writer.AddLine(newLine); } writer.WriteToFile(); }
public static bool MascotCompare() { List <string> listMascotFiles = new List <string>(); listMascotFiles.Add(@"N:\Thibault\Frederic Lamoliatte\Olivier\no MSed.csv"); listMascotFiles.Add(@"N:\Thibault\Frederic Lamoliatte\Olivier\MSed1.csv"); listMascotFiles.Add(@"N:\Thibault\Frederic Lamoliatte\Olivier\MSed2.csv"); Dictionary <string, string[]> dicOfPep = new Dictionary <string, string[]>(); for (int i = 0; i < listMascotFiles.Count; i++) { vsCSV csv = new vsCSV(listMascotFiles[i]); foreach (string line in csv.LINES_LIST) { string[] splits = line.Split(vsCSV._Generic_Separator); if (splits.Length > 26) { string key = splits[1] + "," + splits[33]; if (!dicOfPep.ContainsKey(key))//raw+scan+seq+mod { dicOfPep.Add(key, new string[3]); } dicOfPep[key][i] = "," + splits[13] + "," + splits[14] + "," + splits[18] + "," + splits[26]; } } } vsCSVWriter writer = new vsCSVWriter(@"C:\_IRIC\DATA\Sumo\outputCompare.csv"); foreach (string key in dicOfPep.Keys) { string str = key; for (int i = 0; i < listMascotFiles.Count; i++) { if (dicOfPep[key][i] != null) { str += dicOfPep[key][i]; } else { str += ",,,,"; } } writer.AddLine(str); } writer.WriteToFile(); return(true); }
public static void AddIPIToUbiPredFile(string fastaFile, string csvUbiFile, string csvFileOut) { vsCSV csv = new vsCSV(csvUbiFile); vsCSVWriter writer = new vsCSVWriter(csvFileOut); try { FileStream fs; try { fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read); } catch (System.Exception) { fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); } Dictionary <int, string> IDs = new Dictionary <int, string>(); using (StreamReader sr = new StreamReader(fs)) { int idNb = 0; string line; while ((line = sr.ReadLine()) != null) { if (line.StartsWith(">")) { idNb++; foreach (string csvLine in csv.LINES_LIST) { string[] splits = csvLine.Split(vsCSV._Generic_Separator); if (line.Contains(splits[1])) { writer.AddLine(csvLine + "," + line.Substring(splits[1].Length + 1, 13)); } } } } } fs.Close(); writer.WriteToFile(); } catch (System.Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } }
public static void AppendUbiPredToMascotReport(string csvUbiFile, string csvMascotFile, string csvFileOut) { vsCSV csvUbi = new vsCSV(csvUbiFile); vsCSV csvMascot = new vsCSV(csvMascotFile); vsCSVWriter writer = new vsCSVWriter(csvFileOut); try { foreach (string lineMascot in csvMascot.LINES_LIST) { string strToAppend = ""; try { string[] mSplits = lineMascot.Split(vsCSV._Generic_Separator); if (mSplits.Length >= 17 && !lineMascot.StartsWith("Search")) { int indexStart = int.Parse(mSplits[16]); int indexStop = int.Parse(mSplits[17]); foreach (string lineUbi in csvUbi.LINES_LIST) { string[] splits = lineUbi.Split(vsCSV._Generic_Separator); int indexUbi = int.Parse(splits[2]); if (splits[4].Contains(mSplits[3]) && indexUbi >= indexStart && indexUbi <= indexStop) { strToAppend += "," + splits[2] + "," + splits[3]; } } } } catch (System.Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } writer.AddLine(lineMascot + strToAppend); } writer.WriteToFile(); } catch (System.Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } }
public void ExportFragmentIntensities(List <PeptideSpectrumMatch> psms, Peptide peptide, int psmCharge, string fileName) { vsCSVWriter writer = new vsCSVWriter(fileName); List <FragmentClass> fragments = new List <FragmentClass>(); foreach (FragmentClass fragment in dbOptions.fragments) //foreach (string fragment in FragmentDictionary.Fragments.Keys) { bool found = false; foreach (ProductMatch match in dbOptions.fragments.ComputeFragments(peptide, psmCharge, dbOptions)) { if (fragment == match.Fragment) { found = true; break; } } if (found) { fragments.Add(fragment); } } string title = "Cumulated Product Intensities"; for (int charge = 1; charge <= psmCharge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { title += "," + fragment.Name + " ^" + charge; } } for (int charge = 1; charge <= psmCharge; charge++) { foreach (FragmentClass fragment in fragments) { title += "," + fragment.Name + " ^" + charge; } } writer.AddLine(title); for (int i = 1; i <= peptide.Length; i++) { string line = i.ToString(); for (int charge = 1; charge <= psmCharge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { double cumul = 0.0; foreach (PeptideSpectrumMatch psm in psms) { foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { cumul += match.obsIntensity; } } } line += "," + cumul; } } for (int charge = 1; charge <= psmCharge; charge++) { foreach (FragmentClass fragment in fragments) { double cumul = 0.0; foreach (PeptideSpectrumMatch psm in psms) { foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { cumul += match.obsIntensity; } } } line += "," + cumul; } } writer.AddLine(line); } writer.WriteToFile(); }
public static void YangLiuPeptidesWithAllProteins() { //vsCSV csvPeptides = new vsCSV(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\Identifications.csv"); vsCSV csvPeptides = new vsCSV(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\Cluster_Intensity_peptides_NormP.csv"); vsCSVWriter writer = new vsCSVWriter(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\ProteinsPerPeptidesFromDatabases_AllReadingFrames.csv"); NucleicAcid.InitHash(); FileStream protein_fasta_database1 = new FileStream(@"G:\Thibault\Olivier\Databases\DMatton\Matton_Illumina_Anthesis_WithReverse.fasta", FileMode.Open, FileAccess.Read, FileShare.Read); List <Protein> proteins1 = new List <Protein>(ProteinFastaReader.ReadProteins(protein_fasta_database1, false)); Dictionary <string, List <string> > protein1AAs = new Dictionary <string, List <string> >(); foreach (Protein prot in proteins1) { for (int shift = 0; shift < 3; shift++) { protein1AAs.Add(prot.Description + " | Reading Frame " + shift + " | Forward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, false)); protein1AAs.Add(prot.Description + " | Reading Frame " + shift + " | Backward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, true)); } } FileStream protein_fasta_database2 = new FileStream(@"G:\Thibault\Olivier\Databases\DMatton\mattond_20110418_WithReverse_EditedJuly2013.fasta", FileMode.Open, FileAccess.Read, FileShare.Read); List <Protein> proteins2 = new List <Protein>(ProteinFastaReader.ReadProteins(protein_fasta_database2, false)); Dictionary <string, List <string> > protein2AAs = new Dictionary <string, List <string> >(); foreach (Protein prot in proteins2) { for (int shift = 0; shift < 3; shift++) { protein2AAs.Add(prot.Description + " | Reading Frame " + shift + " | Forward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, false)); protein2AAs.Add(prot.Description + " | Reading Frame " + shift + " | Backward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, true)); } } writer.AddLine(csvPeptides.LINES_LIST[0]); Dictionary <string, List <string> > dicOfPepProt = new Dictionary <string, List <string> >(); for (int i = 1; i < csvPeptides.LINES_LIST.Count; i++) { string[] splits = csvPeptides.LINES_LIST[i].Split(vsCSV._Generic_Separator); string seq = splits[4]; //string seq = splits[13]; /* * string protDesc = splits[10]; * if (protein1AAs.ContainsKey(protDesc)) * if (!protein1AAs[protDesc].Contains(seq)) * Console.WriteLine("Should be there 1"); * * if (protein2AAs.ContainsKey(protDesc)) * if (!protein2AAs[protDesc].Contains(seq)) * Console.WriteLine("Should be there 1"); * //*/ StringBuilder sb = new StringBuilder(); foreach (string key in protein1AAs.Keys) { foreach (string protSeq in protein1AAs[key]) { if (protSeq.Contains(seq)) { sb.Append(key + ";"); break; } } } foreach (string key in protein2AAs.Keys) { foreach (string protSeq in protein2AAs[key]) { if (protSeq.Contains(seq)) { sb.Append(key + ";"); break; } } } if (sb.Length == 0) { Console.WriteLine("Zut"); } writer.AddLine(csvPeptides.LINES_LIST[i] + "," + sb.ToString().Trim()); } writer.WriteToFile(); }
public void WriteFragmentation(bool target) { vsCSVWriter writer = new vsCSVWriter(dbOptions.OutputFolder + "FragmentStats_" + (target ? "Targets" : "Decoy") + ".csv"); writer.AddLine(" === Fragmentation of " + (target ? "Targets" : "Decoys") + " ==="); foreach (FragmentClass fragment in dbOptions.fragments) { double cumulIntensity = 0; int nbFrag = 0; Dictionary <int, int> positions = new Dictionary <int, int>(); foreach (Precursor precursor in matchedPrecursors) { PeptideSpectrumMatch psm = precursor.OptimizedBestPsm(); if (psm.Target == target) { foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment) { nbFrag++; if (!positions.ContainsKey(match.fragmentPos)) { positions.Add(match.fragmentPos, 1); } else { positions[match.fragmentPos]++; } cumulIntensity += match.obsIntensity; } } } } string strPos = ""; if (positions.Count > 0) { foreach (int key in positions.Keys) { strPos += "|" + key + ":" + positions[key]; } } else { strPos += ","; } writer.AddLine(" " + fragment.Name + ", Number of fragments = , " + nbFrag + ", Intensity = ," + cumulIntensity + ", fragment matched [" + strPos.Substring(1) + "]"); } foreach (FragmentClass fragment in dbOptions.fragments) //foreach (string fragment in FragmentDictionary.Fragments.Keys) { double cumulIntensity = 0; int nbFrag = 0; foreach (Precursor precursor in matchedPrecursors) { PeptideSpectrumMatch psm = precursor.OptimizedBestPsm(); if (psm.Target == target) { foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment) { nbFrag++; cumulIntensity += match.obsIntensity; } } } } writer.AddLine(" " + fragment + ", Number of fragments = ," + nbFrag + ", Intensity = ," + cumulIntensity); } foreach (FragmentClass fragment in dbOptions.fragments) //foreach (string fragment in FragmentDictionary.AAFragments.Keys) { double cumulIntensity = 0; int nbFrag = 0; foreach (Precursor precursor in matchedPrecursors) { PeptideSpectrumMatch psm = precursor.OptimizedBestPsm(); if (psm.Target == target) { foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment) { nbFrag++; cumulIntensity += match.obsIntensity; } } } } writer.AddLine(" " + fragment + ", Number of fragments = ," + nbFrag + ", Intensity = ," + cumulIntensity); } writer.WriteToFile(); }
public void ExportFragments(PeptideSpectrumMatch psm) { vsCSVWriter writer = new vsCSVWriter(dbOptions.OutputFolder + psm.Peptide.Sequence + "_" + vsCSV.GetFileName_NoExtension(psm.Query.sample.sSDF) + "_" + psm.Query.precursor.Track.RT + ".csv"); List <FragmentClass> fragments = new List <FragmentClass>(); foreach (FragmentClass fragment in dbOptions.fragments) //foreach (string fragment in FragmentDictionary.Fragments.Keys) { bool found = false; foreach (ProductMatch match in dbOptions.fragments.ComputeFragments(psm.Peptide, psm.Query.precursor.Charge, dbOptions)) { if (fragment == match.Fragment) { found = true; break; } } if (found) { fragments.Add(fragment); } } string title = "Theoretical Fragments"; for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { title += "," + fragment.Name + " ^" + charge; } } for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in fragments) { title += "," + fragment.Name + " ^" + charge; } } writer.AddLine(title); for (int i = 1; i <= psm.Peptide.Length; i++) { string line = i.ToString(); for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { bool found = false; foreach (ProductMatch match in dbOptions.fragments.ComputeFragments(psm.Peptide, psm.Query.precursor.Charge, dbOptions)) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { line += "," + match.theoMz; found = true; break; } } if (!found) { line += ","; } } } for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in fragments) { bool found = false; foreach (ProductMatch match in dbOptions.fragments.ComputeFragments(psm.Peptide, psm.Query.precursor.Charge, dbOptions)) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { line += "," + match.theoMz; found = true; break; } } if (!found) { line += ","; } } } writer.AddLine(line); } title = "Observed Fragments Intensities"; for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { title += "," + fragment.Name + " ^" + charge; } } for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in fragments) { title += "," + fragment.Name + " ^" + charge; } } writer.AddLine(title); for (int i = 1; i <= psm.Peptide.Length; i++) { string line = i.ToString(); for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { bool found = false; foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { line += "," + match.obsIntensity; found = true; break; } } if (!found) { line += ","; } } } for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in fragments) { bool found = false; foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { line += "," + match.obsIntensity; found = true; break; } } if (!found) { line += ","; } } } writer.AddLine(line); } title = "Observed Fragments Mz"; for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { title += "," + fragment.Name + " ^" + charge; } } for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in fragments) { title += "," + fragment.Name + " ^" + charge; } } writer.AddLine(title); for (int i = 1; i <= psm.Peptide.Length; i++) { string line = i.ToString(); for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { bool found = false; foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { line += "," + match.obsMz; found = true; break; } } if (!found) { line += ","; } } } for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in fragments) { bool found = false; foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { line += "," + match.obsMz; found = true; break; } } if (!found) { line += ","; } } } writer.AddLine(line); } title = "Error on Fragments"; for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { title += "," + fragment.Name + " ^" + charge; } } for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in fragments) { title += "," + fragment.Name + " ^" + charge; } } writer.AddLine(title); for (int i = 1; i <= psm.Peptide.Length; i++) { string line = i.ToString(); for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in dbOptions.fragments) { bool found = false; foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { line += "," + match.mass_diff; found = true; break; } } if (!found) { line += ","; } } } for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++) { foreach (FragmentClass fragment in fragments) { bool found = false; foreach (ProductMatch match in psm.AllProductMatches) { if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge) { line += "," + match.mass_diff; found = true; break; } } if (!found) { line += ","; } } } writer.AddLine(line); } writer.WriteToFile(); }
public static void FromFolderWithRetentionTimeCSV(string mascotReportCSVFile, string folder, string outputCSV) { string[] files = Directory.GetFiles(folder, "*_RetentionTimes.csv"); List <vsCSV> RTs = new List <vsCSV>(); foreach (string file in files) { RTs.Add(new vsCSV(file)); } vsCSVWriter writer = new vsCSVWriter(outputCSV); vsCSV mascotReport = new vsCSV(mascotReportCSVFile); int indexScanNumber = -1; int indexFileName = -1; int indexRetentionTime = -1; bool isContent = false; for (int i = 0; i < mascotReport.LINES_LIST.Count; i++) { string line = mascotReport.LINES_LIST[i]; string[] splits = line.Split(vsCSV._Generic_Separator); if (line.Contains("Scan Number")) { indexScanNumber = vsCSV.GetColumnIndex(splits, "Scan Number"); } if (line.Contains("FileName")) { indexFileName = vsCSV.GetColumnIndex(splits, "FileName"); } if (line.Contains("Pep Elution Time")) { indexRetentionTime = vsCSV.GetColumnIndex(splits, "Pep Elution Time"); } if (isContent) { string[] strScanSplits = splits[indexScanNumber].Split('-'); int tmpScan = 0; for (int k = 0; k < strScanSplits.Length; k++) { tmpScan += int.Parse(strScanSplits[k]); } string file = vsCSV.GetFileName_NoExtension(splits[indexFileName]); string rt = ""; for (int j = 0; j < files.Length; j++) { if (files[j].Contains(file)) { rt = RTs[j].LINES_LIST[tmpScan].Split(vsCSV._Generic_Separator)[1]; break; } } splits[indexRetentionTime] = rt; line = vsCSV.Concatenate(splits, ","); } if (indexScanNumber >= 0 && indexScanNumber < splits.Length && indexFileName >= 0 && indexFileName < splits.Length && indexRetentionTime >= 0 && indexRetentionTime < splits.Length) { isContent = true; } writer.AddLine(line); } writer.WriteToFile(); }
/// <summary> /// Provides deconvoluted elution curves of mixed spectra from the provided raw files using the provided synthetic raw file /// Exports in CSV files and stores everything in class objects /// </summary> /// <param name="spikedRaws"></param> /// <param name="mixedRaws"></param> /// <param name="fastaFile"></param> /// <param name="folderToOutputTo"></param> /// <param name="conSol"></param> public void Solve(string[] spikedRaws, string[] mixedRaws, string fastaFile, string folderToOutputTo, IConSol conSol) { dbOptions = CreateOptions(fastaFile, folderToOutputTo, precTolPpm, prodTolDa, conSol); SpikedSamples = new Samples(dbOptions); for (int i = 0; i < spikedRaws.Length; i++) { SpikedSamples.Add(new Sample(i + 1, 1, 1, spikedRaws[i], spikedRaws[i], 0, "")); } //Precompute Spiked peptide identifications SpikedResult = Ace.Start(dbOptions, SpikedSamples, false, false); SpikedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "SpikedSamplesPSMs.csv"); MixedSamples = new Samples(dbOptions); for (int i = 0; i < mixedRaws.Length; i++) { MixedSamples.Add(new Sample(i + 1, 1, 1, mixedRaws[i], mixedRaws[i], 0, "")); } //Precompute Mixed peptide identifications mixedResult = Ace.Start(dbOptions, MixedSamples, false, false); if (mixedResult == null) { conSol.WriteLine("OOPS! No queries could be extracted from the list of mixed spectrum files..."); } else { mixedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "MixedSamplesPSMs.csv"); conSol.WriteLine("Computing gradient descents..."); //Compute all usable spiked peptides characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(SpikedSamples, SpikedResult, dbOptions, nbMinFragments, nbMaxFragments); ExportSpikedSampleResult(characterizedPeptides, dbOptions); vsCSVWriter writerCumul = new vsCSVWriter(OutputFolder + "Results.csv"); string titleCombined = "Mixed Sample,Precursor"; string curveStr = "Polynomial Curve,"; string spikedIntensityStr = "Area under the curve,"; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { titleCombined += "," + charPrec.Peptide.Sequence + " Charge " + charPrec.Charge; if (charPrec.eCurveIntensityCount.Coefficients != null && charPrec.eCurveIntensityCount.Coefficients.Length == 3) { curveStr += "," + charPrec.eCurveIntensityCount.Coefficients[0] + "x^2 + " + charPrec.eCurveIntensityCount.Coefficients[1] + "x" + charPrec.eCurveIntensityCount.Coefficients[2]; } else { curveStr += ",NA"; } spikedIntensityStr += "," + charPrec.eCurveIntensityCount.Area; } } writerCumul.AddLine(titleCombined); writerCumul.AddLine(curveStr); writerCumul.AddLine(spikedIntensityStr); mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >(); foreach (Sample mixedSample in MixedSamples) { mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, dbOptions, characterizedPeptides)); } //Get the list of precursors to characterize foreach (Sample mixedSample in MixedSamples) { foreach (double keyMz in characterizedPeptides.Keys) { List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > listOfRatios = new List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> >(); foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample]) { if (mPrec.MZ == keyMz) { // Compute Max Flow for this precursor Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios = GetRatios(characterizedPeptides, mPrec, dbOptions, nbMinFragments, nbMaxFragments); listOfRatios.Add(ratios); ExportMixedSampleResult(ratios, mixedSample, mPrec, keyMz, dbOptions); } } bool isEmpty = true; string resultStr = vsCSV.GetFileName(mixedSample.sSDF) + "," + keyMz; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { double cumulArea = 0.0; foreach (Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios in listOfRatios) { if (ratios.ContainsKey(charPrec)) { cumulArea += ratios[charPrec].eCurvePerMs.Area; } } resultStr += "," + cumulArea; if (cumulArea > 0) { isEmpty = false; } } } if (!isEmpty) { writerCumul.AddLine(resultStr); } } } writerCumul.WriteToFile(); //List Modifications Dictionary <Modification, double> dicOfIntensityPerMod = new Dictionary <Modification, double>(); foreach (Sample sample in mixedPrecursors.Keys) { foreach (MixedPrecursor mP in mixedPrecursors[sample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (cP.Peptide.VariableModifications != null) { foreach (Modification mod in cP.Peptide.VariableModifications.Values) { if (!dicOfIntensityPerMod.ContainsKey(mod)) { dicOfIntensityPerMod.Add(mod, 0.0); } } } } } } //Compute site occupancy for identical sequences (real positionnal isomers) vsCSVWriter writerSitesOccupancy = new vsCSVWriter(OutputFolder + "Results_SiteOccupancy.csv"); List <Protein> AllProteins = Ace.ReadProteomeFromFasta(fastaFile, false, dbOptions); foreach (Protein protein in AllProteins) { string newTitleProtein = protein.Description.Replace(',', ' ') + "," + protein.Sequence; for (int i = 0; i < protein.Sequence.Length; i++) { newTitleProtein += "," + protein[i].ToString(); } writerSitesOccupancy.AddLine(newTitleProtein); foreach (Sample mixedSample in mixedPrecursors.Keys) { string coverage = "Coverage," + mixedSample.Name; for (int i = 0; i < protein.Sequence.Length; i++) { double cumulSite = 0.0; newTitleProtein += "," + protein[i].ToString(); foreach (MixedPrecursor mP in mixedPrecursors[mixedSample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber) { cumulSite += mP.PeptideRatios[cP].eCurvePerMs.Area; } } } coverage += "," + cumulSite; } writerSitesOccupancy.AddLine(coverage); } foreach (Modification mod in dicOfIntensityPerMod.Keys) { Dictionary <Sample, string> dicOfLines = new Dictionary <Sample, string>(); for (int i = 0; i < protein.Sequence.Length; i++) { foreach (Sample mixedSample in mixedPrecursors.Keys) { double cumulModArea = 0.0; foreach (MixedPrecursor mP in mixedPrecursors[mixedSample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber && cP.Peptide.VariableModifications != null) { foreach (int pos in cP.Peptide.VariableModifications.Keys) { if (cP.Peptide.StartResidueNumber + pos - 2 == i + 1 && cP.Peptide.VariableModifications[pos] == mod) { cumulModArea += mP.PeptideRatios[cP].eCurvePerMs.Area; } } } } } if (!dicOfLines.ContainsKey(mixedSample)) { dicOfLines.Add(mixedSample, mod.Description + "," + mixedSample.Name + "," + cumulModArea); } else { dicOfLines[mixedSample] += "," + cumulModArea; } } } foreach (string line in dicOfLines.Values) { writerSitesOccupancy.AddLine(line); } } } writerSitesOccupancy.WriteToFile(); } }
public static void ComputeSequenceFROverlap(string fastaFile, bool addReverse, string csvFileOut) { try { FileStream fs; try { fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read); } catch (System.Exception) { fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); } Dictionary <char, Dictionary <string, long> > DicOfSeq = new Dictionary <char, Dictionary <string, long> >(30); for (int i = 0; i < 26; i++) { DicOfSeq.Add((char)('A' + i), new Dictionary <string, long>()); } using (StreamReader sr = new StreamReader(fs)) { string line; while ((line = sr.ReadLine()) != null) { if (!line.StartsWith(">")) { if (DicOfSeq[line[0]].ContainsKey(line)) { DicOfSeq[line[0]][line] += 1; } else { DicOfSeq[line[0]].Add(line, 1); } if (addReverse) { string rev = Reverse(line); if (DicOfSeq[line[0]].ContainsKey(rev)) { DicOfSeq[line[0]][rev] += 1; } else { DicOfSeq[line[0]].Add(rev, 1); } } } } } fs.Close(); Dictionary <long, long> DicOfNb = new Dictionary <long, long>(); for (long i = 1; i <= 40; i++) { DicOfNb.Add(i, 0); } for (int i = 0; i < 26; i++) { foreach (long val in DicOfSeq[(char)('A' + i)].Values) { if (DicOfNb.ContainsKey(val)) { DicOfNb[val] += 1; } else { DicOfNb.Add(val, 1); } } } vsCSVWriter writer = new vsCSVWriter(csvFileOut); foreach (long key in DicOfNb.Keys) { writer.AddLine(key + "," + DicOfNb[key]); } writer.WriteToFile(); } catch (System.Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } }
public static bool LysineConservation() { List <string> zincIDs = GetZincProteinsENSP(); Dictionary <string, List <int> > dicOfSites = ReadZNF(@"C:\_IRIC\DATA\Sumo\ZNF.csv"); string csvMatrix = @"C:\_IRIC\DATA\Sumo\matrix_human.tsv"; string csvToAnnotate = @"C:\_IRIC\DATA\Sumo\liste des sites SUMO.csv"; string output = @"C:\_IRIC\DATA\Sumo\outputL.csv"; string outputConservation = @"C:\_IRIC\DATA\Sumo\outputConservation.csv"; string outputAll = @"C:\_IRIC\DATA\Sumo\outputConservationAll_b.csv"; vsCSV csvM = new vsCSV(csvMatrix); vsCSV csvA = new vsCSV(csvToAnnotate); Dictionary <string, int> dicOfAnnotates = new Dictionary <string, int>(); for (int i = 1; i < csvA.LINES_LIST.Count; i++) { string[] items = csvA.LINES_LIST[i].Split(vsCSV._Generic_Separator); dicOfAnnotates.Add(items[0] + items[5], i); } vsCSVWriter writer = new vsCSVWriter(output); writer.AddLine(csvA.getFirstLine()); Dictionary <string, List <double> > dicOfAllAA = new Dictionary <string, List <double> >(); vsCSVWriter writerConsAll = new vsCSVWriter(outputAll); StringBuilder sb = new StringBuilder(); for (char aa = 'A'; aa <= 'Z'; aa++) { dicOfAllAA.Add(aa.ToString(), new List <double>()); sb.Append(aa + ","); } sb.Append("SpecialK,ZincEveryWhere,ZincSumo"); dicOfAllAA.Add("SpecialK", new List <double>()); dicOfAllAA.Add("ZincEveryWhere", new List <double>()); dicOfAllAA.Add("ZincSumo", new List <double>()); writerConsAll.AddLine(sb.ToString()); writer.AddLine("AminoAcid,Some Number"); Dictionary <string, double> dicOfAA = new Dictionary <string, double>(); Dictionary <string, int> dicOfAANumber = new Dictionary <string, int>(); int nbK = 0; List <double> echantillonageNoZero = new List <double>(); double value = 0; for (int j = 0; j < csvM.LINES_LIST.Count; j++) { string[] splitsJ = csvM.LINES_LIST[j].Split('\t'); string aa = splitsJ[3]; if (!dicOfAA.ContainsKey(aa)) { dicOfAANumber.Add(aa, 0); dicOfAA.Add(aa, 0); } value = -1; if (double.TryParse(splitsJ[5], out value)) { dicOfAA[aa] += value; dicOfAANumber[aa]++; dicOfAllAA[aa].Add(value); } //string ensbJ = splitsJ[1]; //int positionJ = int.Parse(splitsJ[2]); if ("K".CompareTo(aa) == 0) { value = -1; if (double.TryParse(splitsJ[5], out value)) { bool found = false; foreach (string id in zincIDs) { if (id.CompareTo(splitsJ[1]) == 0) { found = true; } } if (found) { dicOfAllAA["ZincEveryWhere"].Add(value); } if (dicOfSites.ContainsKey(splitsJ[1] + "|" + splitsJ[2])) { dicOfAllAA["ZincSumo"].Add(value); } echantillonageNoZero.Add(value); if (dicOfAnnotates.ContainsKey(splitsJ[1] + splitsJ[2])) { if (!dicOfAA.ContainsKey("SpecialK")) { dicOfAANumber.Add("SpecialK", 0); dicOfAA.Add("SpecialK", 0); } dicOfAA["SpecialK"] += value; dicOfAANumber["SpecialK"]++; dicOfAllAA["SpecialK"].Add(value); nbK++; writer.AddLine(csvA.LINES_LIST[dicOfAnnotates[splitsJ[1] + splitsJ[2]]] + "," + csvM.LINES_LIST[j].Replace('\t', ',')); } } } } writer.WriteToFile(); vsCSVWriter writerCons = new vsCSVWriter(outputConservation); foreach (string key in dicOfAA.Keys) { writerCons.AddLine(key + "," + dicOfAA[key] / (double)dicOfAANumber[key]); } double meanNoZero = 0; Random r = new Random(); for (int i = 0; i < nbK; i++) { int index = (int)Math.Floor(r.NextDouble() * (echantillonageNoZero.Count - 1)); meanNoZero += echantillonageNoZero[index]; } writerCons.AddLine("Echantillonnage K," + meanNoZero / (double)nbK); writerCons.WriteToFile(); int lineIndex = 0; bool keepGoing = true; while (keepGoing) { StringBuilder sb2 = new StringBuilder(); keepGoing = false; foreach (string key in dicOfAllAA.Keys) { if (lineIndex < dicOfAllAA[key].Count) { sb2.Append(dicOfAllAA[key][lineIndex] + ","); keepGoing = true; } else { sb2.Append(","); } } writerConsAll.AddLine(sb2.ToString()); lineIndex++; } writerConsAll.WriteToFile(); return(true); }
// Proteomics.Utilities.Fasta.FastaRead.AppendProteinDescriptionToMascotReport(@"C:\Users\caronlio\Downloads\filtered peptides.csv", // @"C:\_IRIC\DATA\Tariq\peptideDb-minOcc60_WithReverse.fasta", // @"C:\Users\caronlio\Downloads\filtered peptides_WithProteinDescriptions.csv"); // public static void AppendhCKSAAPToMascotReport(string txtHCKSAAPFile, string csvMascotFile, string csvFileOut) { vsCSV csvUbi = new vsCSV(txtHCKSAAPFile); vsCSV csvMascot = new vsCSV(csvMascotFile); vsCSVWriter writer = new vsCSVWriter(csvFileOut); try { foreach (string lineMascot in csvMascot.LINES_LIST) { string strToAppend = ""; try { string[] mSplits = lineMascot.Split(vsCSV._Generic_Separator); if (mSplits.Length >= 17 && !lineMascot.StartsWith("Search")) { int indexStart = int.Parse(mSplits[16]); int indexStop = int.Parse(mSplits[17]); bool inIPI = false; foreach (string lineUbi in csvUbi.LINES_LIST) { if (lineUbi.StartsWith(">")) { if (inIPI) { break; } inIPI = false; if (lineUbi.StartsWith(">IPI:") && lineUbi.Contains(mSplits[3])) { inIPI = true; } } if (inIPI) { string[] splits = lineUbi.Split('\t'); int indexPos = -1; if (splits.Length > 6 && int.TryParse(splits[0], out indexPos)) { if (indexPos >= indexStart && indexPos <= indexStop) { strToAppend += "," + splits[0] + "," + splits[5]; } } } } } } catch (System.Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } writer.AddLine(lineMascot + strToAppend); } writer.WriteToFile(); } catch (System.Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } }
/// <summary> /// Extract isomer ratios from a given spectrum (transformed into capacity vector) /// </summary> /// <param name="ratiosToFit"></param> /// <param name="nbProductsToKeep"></param> /// <param name="capacity"></param> /// <param name="tolerance"></param> /// <param name="PrecursorIntensityInCTrap"></param> /// <param name="PrecursorIntensity"></param> /// <param name="underFlow"></param> /// <param name="percentError"></param> /// <param name="ConSole"></param> /// <returns></returns> public static Dictionary <CharacterizedPrecursor, SolvedResult> SolveFromSpectrum(IEnumerable <CharacterizedPrecursor> ratiosToFit, int nbProductsToKeep, IEnumerable <MsMsPeak> capacity, MassTolerance tolerance, double PrecursorIntensityInCTrap, double PrecursorIntensity, out double underFlow, out double percentError, IConSol ConSole, string fileOut = null) { bool keepGoing = true; Dictionary <double, double> mixedSpectrum = new Dictionary <double, double>(); List <Dictionary <double, double> > unitSpectrum = new List <Dictionary <double, double> >(); foreach (CharacterizedPrecursor isomer in ratiosToFit) { foreach (double key in isomer.NormalizedFragments[nbProductsToKeep].Keys) { if (!mixedSpectrum.ContainsKey(key)) { double cumulIntensity = 0.0; foreach (MsMsPeak peak in capacity) { if (Math.Abs(Utilities.Numerics.CalculateMassError(peak.MZ, key, tolerance.Units)) <= tolerance.Value) { cumulIntensity += peak.Intensity; } } mixedSpectrum.Add(key, cumulIntensity);// / PrecursorIntensityInCTrap); } } if (isomer.NormalizedFragments.ContainsKey(nbProductsToKeep)) { if (isomer.FragmentNormalizor.ContainsKey(nbProductsToKeep)) { Dictionary <double, double> dic = new Dictionary <double, double>(); foreach (double key in isomer.NormalizedFragments[nbProductsToKeep].Keys) { dic.Add(key, isomer.NormalizedFragments[nbProductsToKeep][key] * isomer.FragmentNormalizor[nbProductsToKeep].InterpolateIntensity(PrecursorIntensityInCTrap)); } unitSpectrum.Add(dic); } else { unitSpectrum.Add(isomer.NormalizedFragments[nbProductsToKeep]); } } else { keepGoing = false; } } vsCSVWriter writerFrag = null; if (!string.IsNullOrEmpty(fileOut)) { writerFrag = new vsCSVWriter(fileOut); string line = "Fragments:"; foreach (double key in mixedSpectrum.Keys) { line += "," + key; } writerFrag.AddLine(line); line = "Mixed:"; foreach (double val in mixedSpectrum.Values) { line += "," + val; } writerFrag.AddLine(line); } //This nbProduct seems relevant, try to use isomer to get ratios for this spectrum if (keepGoing) { List <double> solution = new List <double>(); double stepSize = PrecursorIntensityInCTrap / 1000.0; if (stepSize < 1) { stepSize = 1; } double tmpUnderflow = 0; Utilities.Methods.GradientDescent.SolveMaxFlowStyle(unitSpectrum, mixedSpectrum, out solution, out tmpUnderflow, ConSole, stepSize); //Utilities.Methods.GradientDescent.SolveFromGradientDescent(unitSpectrum, mixedSpectrum, PrecursorIntensityInCTrap, out solution, out tmpUnderflow, ConSole); double sumOfIntensities = 0; foreach (double val in mixedSpectrum.Values) { sumOfIntensities += val; } underFlow = tmpUnderflow; List <SolvedResult> result = GetResultList(solution, underFlow, sumOfIntensities); Dictionary <CharacterizedPrecursor, SolvedResult> resultPerSample = new Dictionary <CharacterizedPrecursor, SolvedResult>(); int i = 0; foreach (CharacterizedPrecursor key in ratiosToFit) { resultPerSample.Add(key, result[i]); i++; } if (writerFrag != null) { foreach (CharacterizedPrecursor cPrec in ratiosToFit) { string line = cPrec.Peptide.Sequence; foreach (double key in mixedSpectrum.Keys) { line += "," + cPrec.NormalizedFragments[nbProductsToKeep][key] * resultPerSample[cPrec].NbFitTimes; } writerFrag.AddLine(line); } writerFrag.WriteToFile(); } percentError = (underFlow / sumOfIntensities); return(resultPerSample); } else { percentError = 1.0; underFlow = 0; return(new Dictionary <CharacterizedPrecursor, SolvedResult>()); } }
public static void Launch(IConSol console) { vsCSV csv = new vsCSV(@"C:\Users\caronlio\Downloads\Via.Science.Pre.Interview.Assignment.Data.2013.10.18.csv"); Dictionary <DateTime, List <Variable> > DicOfTime = new Dictionary <DateTime, List <Variable> >(); Dictionary <string, List <Variable> > DicOfVar = new Dictionary <string, List <Variable> >(); //Data sorted based on date for (int i = 1; i < csv.LINES_LIST.Count; i++) { Variable tmpVar = new Variable(csv.LINES_LIST[i]); if (!DicOfTime.ContainsKey(tmpVar.time)) { DicOfTime.Add(tmpVar.time, new List <Variable>()); } DicOfTime[tmpVar.time].Add(tmpVar); if (!DicOfVar.ContainsKey(tmpVar.name)) { DicOfVar.Add(tmpVar.name, new List <Variable>()); } DicOfVar[tmpVar.name].Add(tmpVar); } foreach (string name in DicOfVar.Keys) { InterpolateMissingValues(name, DicOfTime, DicOfVar); } //Rebuild DicOfVar DicOfVar.Clear(); foreach (List <Variable> list in DicOfTime.Values) { foreach (Variable variable in list) { if (!DicOfVar.ContainsKey(variable.name)) { DicOfVar.Add(variable.name, new List <Variable>()); } DicOfVar[variable.name].Add(variable); } } //Compute Normalized values NormalizeVariables(DicOfVar); //Foreach variable, compare correlation with the "price" variable List <double> prices = GetArrayofNormed("price", DicOfTime, DicOfVar); Dictionary <string, double> DicOfCorrelation = new Dictionary <string, double>(); foreach (string name in DicOfVar.Keys) { List <double> normedVals = GetArrayofNormed(name, DicOfTime, DicOfVar); double corr = MathNet.Numerics.Statistics.Correlation.Pearson(prices, normedVals); if (name.CompareTo("price") == 0) { Console.WriteLine("test"); } DicOfCorrelation.Add(name, corr); } //Prediction vsCSVWriter output = new vsCSVWriter(@"C:\_IRIC\predictions.csv"); output.AddLine("Time,Price,Prediction"); foreach (DateTime time in DicOfTime.Keys) { double pred = 0; foreach (string name in DicOfCorrelation.Keys) { if (name.CompareTo("price") != 0) { foreach (Variable v in DicOfTime[time]) { if (v.name.CompareTo(name) == 0) { pred += DicOfCorrelation[name] * v.normValue; } } } } pred *= 100000; output.AddLine(pred.ToString()); } output.WriteToFile(); //Export a csv of the varialbes, ordered by date ExportAllVariables(@"C:\_IRIC\assignOut.csv", DicOfTime, DicOfVar, DicOfCorrelation); Console.WriteLine("Done!"); }