public static List<string> IDList(string idpXML, string mzML) { List<string> list = new List<string>(); MSDataFile foo = new MSDataFile(mzML); SpectrumList sl = foo.run.spectrumList; IDPicker.Workspace workspace = new IDPicker.Workspace(); Package.loadWorkspace(ref workspace, idpXML); foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups) foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true)) foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra) { IDPicker.ResultInstance ri = sItr.Value.results[1]; IDPicker.VariantInfo vi = ri.info.peptides.Min; bool boolCharge = sItr.Value.id.charge.Equals(3); if (boolCharge) { string rawPepSequence = vi.ToString(); string interpretation = vi.ToSimpleString(); // Look up the index with nativeID object idOrIndex = null; if (sItr.Value.nativeID != null && sItr.Value.nativeID.Length > 0) { idOrIndex = sItr.Value.nativeID; list.Add(idOrIndex.ToString()); } }//end if (boolcharge) }//end foreach return list; }
public SpectraPeptides(string path) { string name = Path.GetFileNameWithoutExtension(path); IDPicker.Workspace workspace = new IDPicker.Workspace(); Package.loadWorkspace(ref workspace, path); foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups) foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true)) foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra) { IDPicker.ResultInstance ri = sItr.Value.results[1]; IDPicker.VariantInfo vi = ri.info.peptides.Min; string rawPepSequence = vi.ToString(); string pepSequence = vi.peptide.sequence; string index = sItr.Value.id.index.ToString(); string spectraID = name + "." + index; peptideslist.Add(pepSequence); spectralist.Add(spectraID); } peptideslist = Package.removeDuplicate(peptideslist); spectralist = Package.removeDuplicate(spectralist); peptides = peptideslist.Count; spectra = spectralist.Count; }
///<summary> ///original code for grabbing the required ions ///get pepSequence,source,scanID, write into a csv file. ///have tons of information: theretical ion intensity, pep info, chargeLabel... ///</summary> public static List<string> idpReader_original(string idpXMLFile, string mzMLFile, double TicCutoffPercentage, int z, List<string> pepList, List<string> output) { //get the path and filename of output csv file: string fileName = Path.GetFileNameWithoutExtension(idpXMLFile); string filePath = Path.GetDirectoryName(idpXMLFile); IDPicker.Workspace workspace = new IDPicker.Workspace(); Package.loadWorkspace(ref workspace, idpXMLFile); MSDataFile foo = new MSDataFile(mzMLFile); SpectrumList sl = foo.run.spectrumList; foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups) foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true)) foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra) { IDPicker.ResultInstance ri = sItr.Value.results[1]; IDPicker.VariantInfo vi = ri.info.peptides.Min; string ss = vi.ToString() + "," + sItr.Value.id.source.name + "," + sItr.Value.nativeID; bool boolCharge = sItr.Value.id.charge.Equals(z); if (boolCharge) { string rawPepSequence = vi.ToString(); string pepSequence = vi.peptide.sequence; int len = pepSequence.Length; //make sure that the peptide is what we want if (pepList.Contains(pepSequence)) { // Look up the index with nativeID object idOrIndex = null; if (sItr.Value.nativeID != null && sItr.Value.nativeID.Length > 0) idOrIndex = sItr.Value.nativeID; int spectrumIndex = sl.find(idOrIndex as string); // Trust the local index, if the nativeID lookup fails if (spectrumIndex >= sl.size()) spectrumIndex = sItr.Value.id.index; // Bail of the loca index is larger than the spectrum list size if (spectrumIndex >= sl.size()) throw new Exception("Can't find spectrum associated with the index."); //Console.WriteLine(idOrIndex.ToString()); //get base peak and TIC and converted to string Spectrum spec1 = sl.spectrum(spectrumIndex, true); MZIntensityPairList peaks = new MZIntensityPairList(); spec1.getMZIntensityPairs(ref peaks); Set<Peak> peakList = new Set<Peak>(); //get base peak and TIC double basePeakValue = 0; double TICValue = 0; CVParamList list = spec1.cvParams; foreach (CVParam CVP in list) { if (CVP.name == "base peak intensity") { basePeakValue = CVP.value; } if (CVP.name == "total ion current") { TICValue = CVP.value; } } string basePeak = basePeakValue.ToString(); string TIC = TICValue.ToString(); //very important. Surendra put them here //to change those with modifications {} into a format //that fragment method will accept. string interpretation = vi.ToSimpleString(); Peptide peptide = new Peptide(interpretation, ModificationParsing.ModificationParsing_Auto, ModificationDelimiter.ModificationDelimiter_Brackets); Fragmentation fragmentation = peptide.fragmentation(true, true); //prepare the qualified peaklist double intenThreshhold = 0; int totalIntenClass = 0; double[] intenArray = new double[peaks.Count]; //used during the foreach loop int indexPeaks = 0; //get all the peaks no matter how small they are //then calculate the threshhold TIC //test here foreach (MZIntensityPair mzIntensity in peaks) { //Peak p = new Peak(mzIntensity.mz, mzIntensity.intensity); //peakList.Add(p); intenArray[indexPeaks] = mzIntensity.intensity; indexPeaks++; } Array.Sort(intenArray); Array.Reverse(intenArray, 0, peaks.Count); //if currTIC>=cutoff, then break double currTIC = 0; double cutOffTIC = TicCutoffPercentage * TICValue; foreach (double inten in intenArray) { currTIC = currTIC + inten; if (currTIC < cutOffTIC) { intenThreshhold = inten; totalIntenClass++; } else break; } //then based on that, generate a new peaklist that contains only ABC peaks //then calculate the intensity classes foreach (MZIntensityPair mzIntensity in peaks) { if (mzIntensity.intensity >= intenThreshhold) { Peak p = new Peak(mzIntensity.mz, mzIntensity.intensity); peakList.Add(p); } } //rowDic contains row information of each peptide bond Dictionary<int, string> rowDic = new Dictionary<int, string>(); //intensityDic contains intensity information of fragment ions for each peptide bond Dictionary<int, List<double>> intensityDic = new Dictionary<int, List<double>>(); //commonList contains the common intensities List<double> duplicateList = new List<double>(); //call the method List<double> completeIntensityList = new List<double>(); for (int k = 1; k < len; k++) { List<double> intensityList = new List<double>(); string bion = pepSequence.Substring(0, k); string yion = pepSequence.Substring(k, len - k); int NR = Package.parseAAResidues(bion, 'R'); int NK = Package.parseAAResidues(bion, 'K'); int NH = Package.parseAAResidues(bion, 'H'); int NL = k; int CR = Package.parseAAResidues(yion, 'R'); int CK = Package.parseAAResidues(yion, 'K'); int CH = Package.parseAAResidues(yion, 'H'); int CL = len - k; int R = NR - CR; int K = NK - CK; int H = NH - CH; int L = NL - CL; int pepBond = k; int NBasicAA = NR + NK + NH; int CBasicAA = CR + CK + CH; string AA = NBasicAA + "," + CBasicAA + "," + NR + "," + NK + "," + NH + "," + NL + "," + CR + "," + CK + "," + CH + "," + CL + "," + R + "," + K + "," + H + "," + L; double[] bCharge = new double[z + 1]; double[] yCharge = new double[z + 1]; double[] bIntensity = new double[z + 1]; double[] yIntensity = new double[z + 1]; string bIonIntensity = ""; string yIonIntensity = ""; //to judge if the sum of intensities are 0 //so to exclude the case with all "0s" double sumIntensity = 0; //return the b ion charge 1 Intensity, if matched for (int i = 1; i <= z; i++) { bCharge[i] = fragmentation.b(k, i); yCharge[i] = fragmentation.y(len - k, i); //change for Q-star purposes. Peak bmatched = Package.findClose(peakList, bCharge[i], 70 * bCharge[i] * Math.Pow(10, -6)); Peak ymatched = Package.findClose(peakList, yCharge[i], 70 * yCharge[i] * Math.Pow(10, -6)); if (bmatched != null) { bIntensity[i] = bmatched.rankOrIntensity; intensityList.Add(bmatched.rankOrIntensity); completeIntensityList.Add(bmatched.rankOrIntensity); } else bIntensity[i] = 0; if (ymatched != null) { yIntensity[i] = ymatched.rankOrIntensity; intensityList.Add(ymatched.rankOrIntensity); completeIntensityList.Add(ymatched.rankOrIntensity); } else yIntensity[i] = 0; sumIntensity = sumIntensity + bIntensity[i] + yIntensity[i]; //record b/y ion intensity information into a string bIonIntensity = bIonIntensity + "," + bIntensity[i]; yIonIntensity = yIonIntensity + "," + yIntensity[i]; } intensityDic.Add(pepBond, intensityList); //to determine charge label, need to split by precursor charge //first need to make a metric to determine if all intensities are "0" if (z == 3) { if (sumIntensity != 0) { //////////////////////////////////////////////// //set the ambiguity label as follows: //-3: (0/+3) y3 only //-2: (0/+3, +1/+2) y3, b1y2 //-1: (+1/+2): b1y2 //0: (+1/+2, +2/+1): b1y2, b2y1 //1: (+2/+1): b2y1 //2: (+2/+1, +3/0): b2y1, b3 //3: (+3/0): b3 only //////////////////////////////////////////////// double b1 = bIntensity[1]; double b2 = bIntensity[2]; double y1 = yIntensity[1]; double y2 = yIntensity[2]; double b3 = bIntensity[3]; double y3 = yIntensity[3]; double b1y2 = b1 + y2; double b2y1 = b2 + y1; string ambiguityLabel = ""; //first part: set the intensity group: y3, b1y2, b2y1, b3 //if one group was found, set the label //if two were found, but adjacent to each other, then ambiguity label is set if (y3 != 0 && (b1y2 + b2y1 + b3) == 0) ambiguityLabel = "-3"; else if (y3 != 0 && b1y2 != 0 && (b2y1 + b3) == 0) ambiguityLabel = "-2"; else if (b1y2 != 0 && (y3 + b2y1 + b3) == 0) ambiguityLabel = "-1"; else if (b1y2 != 0 && b2y1 != 0 && (y3 + b3) == 0) ambiguityLabel = "0"; else if (b2y1 != 0 && (y3 + b1y2 + b3) == 0) ambiguityLabel = "1"; else if (b2y1 != 0 && b3 != 0 && (y3 + b1y2) == 0) ambiguityLabel = "2"; else if (b3 != 0 && (y3 + b1y2 + b2y1) == 0) ambiguityLabel = "3"; else ambiguityLabel = "error"; string finalString = idOrIndex + "," + pepSequence + "," + basePeak + "," + TIC + bIonIntensity + yIonIntensity + "," + len + "," + pepBond + "," + AA + "," + ambiguityLabel; rowDic.Add(pepBond, finalString); } } }//end for each peptide bond //now we have: rowDic, intensityDic for each pep bond //and we have: and completeIntensityList for each peptide //the purpose of this is to remove such rows with duplicate matches duplicateList = Package.findCommon(completeIntensityList); foreach (int bond in rowDic.Keys) { bool unique = true; foreach (double inten in duplicateList) { if (intensityDic[bond].Contains(inten)) { unique = false; Console.WriteLine("kick"); break; } } if (unique) { output.Add(rowDic[bond]); } } }//end of if peplist contains pepsequence }//end if z==3 }//end foreach peptide return output; }
//the speccharge2score get each spectrum-peptide, and then map it to scores. public static Dictionary<string, double> SpecCharge2Score(string idpxml, string qonversion, List<string> list) { ///<summary> ///given qonversion and idpxml files, find the scores, DecoyState, making a dictionary. ///make it peptide-eccentric. ///</summary> //get peptide sequence string name = Path.GetFileNameWithoutExtension(idpxml); Dictionary<string, string> dic_spectrum = new Dictionary<string, string>(); IDPicker.Workspace workspace = new IDPicker.Workspace(); Package.loadWorkspace(ref workspace, idpxml); int repeatedIndex = 0; foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups) foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true)) foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra) { IDPicker.ResultInstance ri = sItr.Value.results[1]; IDPicker.VariantInfo vi = ri.info.peptides.Min; string rawPepSequence = vi.ToString(); string pepSequence = vi.peptide.sequence; string index = sItr.Value.id.index.ToString(); int z = sItr.Value.id.charge; string id = name + "." + index + "," + z.ToString(); if (!dic_spectrum.ContainsKey(id)) { dic_spectrum.Add(id, pepSequence); } else repeatedIndex++; } //Console.WriteLine("repeated index (comes from spectrum with ambiguous charge state) is: " + repeatedIndex); //then read qonverion.txt //get index, TotalScore, DecoyState //add pep and RNASeq_support information //dic_peptide is what we wanna return Dictionary<string, double> finaldic = new Dictionary<string, double>(); string t; int unmatchedPep = 0; TextReader file = new StreamReader(qonversion); while ((t = file.ReadLine()) != null) { if (t.Contains("(")) { Regex r = new Regex(" +"); string[] str = r.Split(t); string Index = str[2]; double TotalScore = Convert.ToDouble(str[8]); string charge = str[3]; string DecoyState = str[4]; string id = name + "." + Index + "," + charge; if (dic_spectrum.ContainsKey(id)) { string pep = dic_spectrum[id]; string key = id + "," + pep + "," + DecoyState; finaldic.Add(key, TotalScore); list.Add(key); } else unmatchedPep++; } } //Console.WriteLine("unmatched peptide (peptide that was not found in the csv file) is: " + unmatchedPep); Console.WriteLine("--one set xml-qonversion done with unmatched (unexpected): " + unmatchedPep); return finaldic; }
//given a assembly.xml file, grab peptide ids based on charge state. //z=0, output all the peptides public static List<string> PepSecurity(string xml, int z) { List<string> peptideList = new List<string>(); IDPicker.Workspace workspace = new IDPicker.Workspace(); Package.loadWorkspace(ref workspace, xml); foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups) foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true)) foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra) { IDPicker.ResultInstance ri = sItr.Value.results[1]; IDPicker.VariantInfo vi = ri.info.peptides.Min; string rawPepSequence = vi.ToString(); string pepSequence = vi.peptide.sequence; <<<<<<< HEAD if (z == 0) { if (sItr.Value.id.charge.Equals(2) || sItr.Value.id.charge.Equals(3) || sItr.Value.id.charge.Equals(4)) { peptideList.Add(pepSequence); } } //if (z == 0) peptideList.Add(pepSequence); ======= //if (z == 0) //{ // if (sItr.Value.id.charge.Equals(2) || sItr.Value.id.charge.Equals(3) || sItr.Value.id.charge.Equals(4)) // { // peptideList.Add(pepSequence); // } //} if (z == 0) peptideList.Add(pepSequence); >>>>>>> 80fc9e47b4dafd28bcc96f478ae26543036b9ff6 else { if (sItr.Value.id.charge.Equals(z)) peptideList.Add(pepSequence); } } List<string> uniPepList = Package.removeDuplicate(peptideList); return uniPepList; }
public static Dictionary<string, string> spectra2RNASeq(string idpxml, string qonversion, Dictionary<string, string> dic_RNASeq) { ///<summary> ///now the problem is: ///d a idpQonvert file, read in all the peptides from pepxml without any fdr cutoff ///ead an idpxml file, read in all peptides that passed the fdr cutoff ///ow I restrict my rows to fdr-cutoff peptides, which might generate a limited pool ///ext is to consider fdr 0.1 cutoff, it will generate enough "0"s in the RNASeq_support column ///ifferent cutoff values here might have different merits. we'll find out which is the best ///</summary> ///next to check 1. whether 0.1 cutoff has the same idpqonvert file or not; Yes, the same idpqonvert file ///2. generate new idpxml with fdr cutoff=0.1 done. ///file path: X:\wangd5\idpXML_FDR1.00\myrimatch\FDR0.1 //then read idpxml file //get peptide sequence string name = Path.GetFileNameWithoutExtension(idpxml); Dictionary<string, string> dic_peptide = new Dictionary<string, string>(); //dic_spectra is what we want to return Dictionary<string, string> dic_spectra = new Dictionary<string, string>(); IDPicker.Workspace workspace = new IDPicker.Workspace(); Package.loadWorkspace(ref workspace, idpxml); int repeatedIndex = 0; foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups) foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true)) foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra) { IDPicker.ResultInstance ri = sItr.Value.results[1]; IDPicker.VariantInfo vi = ri.info.peptides.Min; string rawPepSequence = vi.ToString(); string pepSequence = vi.peptide.sequence; string index = sItr.Value.id.index.ToString(); int z = sItr.Value.id.charge; string id = name + "." + index + "." + z.ToString() ; if (!dic_peptide.ContainsKey(id)) { bool Nspecificity = vi.peptide.NTerminusIsSpecific; bool Cspecitificy = vi.peptide.CTerminusIsSpecific; string value = pepSequence + "," + Nspecificity.ToString() + "," + Cspecitificy.ToString(); dic_peptide.Add(id, pepSequence); dic_spectra.Add(id, value); } else repeatedIndex++; } Console.WriteLine("repeated index (comes from spectrum with ambiguous charge state) is: " + repeatedIndex); //then read qonverion.txt //get index, charges , TotalScore, DecoyState //add pep and RNASeq_support information string t; int unmatchedPep = 0; TextReader file = new StreamReader(qonversion); while ((t = file.ReadLine()) != null) { if (t.Contains("(")) { Regex r = new Regex(" +"); string[] str = r.Split(t); string NativeID = str[1]; string Index = str[2]; string charge = str[3]; string DecoyState = str[4]; string TotalScore = str[8]; string FDR = str[9]; string id = name + "." + Index + "." + charge; if (dic_peptide.ContainsKey(id)) { string pep = dic_peptide[id]; if (dic_RNASeq.ContainsKey(pep)) { string RNASeq = dic_RNASeq[pep]; dic_spectra[id] = NativeID + "," + dic_spectra[id] + "," + charge + "," + DecoyState + "," + TotalScore + "," + FDR + "," + RNASeq; } else unmatchedPep++; } } } Console.WriteLine("unmatched peptide (peptide that was not found in the csv file) is: " + unmatchedPep); return dic_spectra; }
//get pepSequence,source,scanID, write into a csv file. //have tons of information: theretical ion intensity, pep info, chargeLabel... public static void idpReader(string idpXMLFile, string mzMLFile, double TicCutoffPercentage, int z, int model) { //get the path and filename of output csv file: string fileName = Path.GetFileNameWithoutExtension(idpXMLFile); string filePath = Path.GetDirectoryName(idpXMLFile); string csvFile = Path.Combine(filePath, fileName) + "_" + z.ToString() + ".csv"; IDPicker.Workspace workspace = new IDPicker.Workspace(); Package.loadWorkspace(ref workspace, idpXMLFile); using (StreamWriter file = new StreamWriter(csvFile)) { //TODO idOrIndex + "," + pepSequence + "," + pepBond + "," + AA + "," + bIons + "," + yIons; file.WriteLine("nativeID,pepSequence,bond,b1,b2,y1,y2"); MSDataFile foo = new MSDataFile(mzMLFile); SpectrumList sl = foo.run.spectrumList; foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups) foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true)) foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra) { IDPicker.ResultInstance ri = sItr.Value.results[1]; IDPicker.VariantInfo vi = ri.info.peptides.Min; string ss = vi.ToString() + "," + sItr.Value.id.source.name + "," + sItr.Value.nativeID; bool boolCharge = sItr.Value.id.charge.Equals(z); if (boolCharge) { string rawPepSequence = vi.ToString(); string pepSequence = vi.peptide.sequence; int len = pepSequence.Length; // Look up the index with nativeID object idOrIndex = null; if( sItr.Value.nativeID != null && sItr.Value.nativeID.Length > 0 ) idOrIndex = sItr.Value.nativeID; int spectrumIndex = sl.find(idOrIndex as string); // Trust the local index, if the nativeID lookup fails if( spectrumIndex >= sl.size() ) spectrumIndex = sItr.Value.id.index; // Bail of the loca index is larger than the spectrum list size if( spectrumIndex >= sl.size() ) throw new Exception( "Can't find spectrum associated with the index." ); //get base peak and TIC and converted to string Spectrum spec1 = sl.spectrum(spectrumIndex, true); MZIntensityPairList peaks = new MZIntensityPairList(); spec1.getMZIntensityPairs(ref peaks); Set<Peak> peakList = new Set<Peak>(); //get base peak and TIC double basePeakValue = 0; double TICValue = 0; CVParamList list = spec1.cvParams; foreach (CVParam CVP in list) { if (CVP.name == "base peak intensity") { basePeakValue = CVP.value; } if (CVP.name == "total ion current") { TICValue = CVP.value; } } string basePeak = basePeakValue.ToString(); string TIC = TICValue.ToString(); //very important. Surendra put them here //to change those with modifications {} into a format //that fragment method will accept. string interpretation = vi.ToSimpleString(); Peptide peptide = new Peptide(interpretation, ModificationParsing.ModificationParsing_Auto, ModificationDelimiter.ModificationDelimiter_Brackets); Fragmentation fragmentation = peptide.fragmentation(true, true); //prepare the qualified peaklist double intenThreshhold = 0; int totalIntenClass = 0; double[] intenArray = new double[peaks.Count]; //used during the foreach loop int indexPeaks = 0; //get all the peaks no matter how small they are //then calculate the threshhold TIC //test here foreach (MZIntensityPair mzIntensity in peaks) { //Peak p = new Peak(mzIntensity.mz, mzIntensity.intensity); //peakList.Add(p); intenArray[indexPeaks] = mzIntensity.intensity; indexPeaks++; } Array.Sort(intenArray); Array.Reverse(intenArray, 0, peaks.Count); //if currTIC>=cutoff, then break double currTIC = 0; double cutOffTIC = TicCutoffPercentage * TICValue; foreach (double inten in intenArray) { currTIC = currTIC + inten; if (currTIC < cutOffTIC) { intenThreshhold = inten; totalIntenClass++; } else break; } //then based on that, generate a new peaklist that contains only ABC peaks //then calculate the intensity classes foreach (MZIntensityPair mzIntensity in peaks) { if (mzIntensity.intensity >= intenThreshhold) { //note 0 here. This is to tell people that the orbiorbi fragment charge is unknown. //the peaklist will be updated later. Peak p = new Peak(mzIntensity.mz, mzIntensity.intensity,0); peakList.Add(p); } } Console.WriteLine("nativeID: =============" + idOrIndex); //update peaklist for charge states. peakList = Package.chargeAssignment(peakList); //int ones = 0; //int twos = 0; //foreach (var peak in peakList) //{ // //Console.WriteLine(peak.fragmentCharge); // if (peak.fragmentCharge == 1) ones++; // else if (peak.fragmentCharge == 2) twos++; //} //Console.WriteLine("charge 1: " + ones); //Console.WriteLine("charge 2: " + twos); //rowDic contains row information of each peptide bond Dictionary<int, string> rowDic = new Dictionary<int, string>(); //intensityDic contains intensity information of fragment ions for each peptide bond Dictionary<int, List<double>> intensityDic = new Dictionary<int, List<double>>(); //commonList contains the common intensities List<double> duplicateList = new List<double>(); //call the method List<double> completeIntensityList = new List<double>(); for (int k = 1; k < len; k++) { List<double> intensityList = new List<double>(); string bion = pepSequence.Substring(0, k); string yion = pepSequence.Substring(k, len - k); int NR = Package.parseAAResidues(bion, 'R'); int NK = Package.parseAAResidues(bion, 'K'); int NH = Package.parseAAResidues(bion, 'H'); int NL = k; int CR = Package.parseAAResidues(yion, 'R'); int CK = Package.parseAAResidues(yion, 'K'); int CH = Package.parseAAResidues(yion, 'H'); int CL = len - k; int pepBond = k; int NBasicAA = NR + NK + NH; int CBasicAA = CR + CK + CH; string AA = NR + "," + NK + "," + NH + "," + NL + "," + CR + "," + CK + "," + CH + "," + CL; double[] bCharge = new double[z + 1]; double[] yCharge = new double[z + 1]; //double[] bIntensity = new double[z + 1]; //double[] yIntensity = new double[z + 1]; //add variable for "real" charges if (model == 0) //naive model { int[] bFragmentCharge = new int[z + 1]; int[] yFragmentCharge = new int[z + 1]; //return the b ion charge 1 Intensity, if matched for (int i = 1; i < z; i++) { bCharge[i] = fragmentation.b(k, i); yCharge[i] = fragmentation.y(len - k, i); //change for ORBI-ORBI purposes. Peak bmatched = Package.findClose(peakList, bCharge[i], bCharge[i] * 30 * Math.Pow(10, -6)); Peak ymatched = Package.findClose(peakList, yCharge[i], yCharge[i] * 30 * Math.Pow(10, -6)); if (bmatched != null) { //bIntensity[i] = bmatched.rankOrIntensity; int fragmentCharge = bmatched.fragmentCharge; if (fragmentCharge == i) bFragmentCharge[i] = 3; else bFragmentCharge[i] = 2; //intensityList.Add(bmatched.rankOrIntensity); //completeIntensityList.Add(bmatched.rankOrIntensity); } else bFragmentCharge[i] = 1; //else bIntensity[i] = 0; if (ymatched != null) { //yIntensity[i] = ymatched.rankOrIntensity; //yFragmentCharge[i] = ymatched.fragmentCharge; int fragmentCharge = ymatched.fragmentCharge; if (fragmentCharge == i) yFragmentCharge[i] = 3; else yFragmentCharge[i] = 2; //intensityList.Add(ymatched.rankOrIntensity); //completeIntensityList.Add(ymatched.rankOrIntensity); } else yFragmentCharge[i] = 1; //else yIntensity[i] = 0; } string finalString = idOrIndex + "," + pepSequence + "," + pepBond + "," + bFragmentCharge[1] + "," + bFragmentCharge[2] + "," + yFragmentCharge[1] + "," + yFragmentCharge[2]; file.WriteLine(finalString); } else if (model == 1) //my binary logistic regression basophile model { int b1=0, b2=0, y1=0, y2 = 0; double y1_logit = 0.1098112 * NR + 0.2085831 * NK + 0.1512109 * NH + 0.0460839 * NL - 0.3872417 * CR - 0.3684911 * CK - 0.1634741 * CH - 0.1693931 * CL + 1.2632997; double y2_logit =-0.6345364 * NR - 0.3365917 * NK - 0.4577882 * NH - 0.1492703 * NL + 0.7738133 * CR + 0.6036758 * CK + 0.5942542 * CH + 0.0701467 * CL + 0.0806280; double b1_logit = 0.0801432 * NR - 0.1088081 * NK - 0.1338220 * NH - 0.1413059 * NL - 0.3157957 * CR - 0.2708274 * CK - 0.3703136 * CH + 0.0157418 * CL + 1.2124699; double b2_logit = 0.8606449 * NR + 0.2763119 * NK + 0.4969152 * NH + 0.0685712 * NL - 1.3346995 * CR - 1.0977316 * CK - 1.0973677 * CH - 0.2028884 * CL + 1.9355980; if (b1_logit > -0.5) { double mz_b1 = fragmentation.b(k, 1); Peak matched = Package.findClose(peakList, mz_b1, mz_b1 * 30 * Math.Pow(10, -6)); if (matched != null) { if (matched.fragmentCharge == 1) b1 = 3; else b1 = 2; } else b1 = 1; } else b1 = 0; if (b2_logit > 0) { double mz_b2 = fragmentation.b(k, 2); Peak matched = Package.findClose(peakList, mz_b2, mz_b2 * 30 * Math.Pow(10, -6)); if (matched != null) { if (matched.fragmentCharge == 2) b2 = 3; else b2 = 2; } else b2 = 1; } else b2 = 0; if (y1_logit > -0.5) { double mz_y1 = fragmentation.y(len - k, 1); Peak matched = Package.findClose(peakList, mz_y1, mz_y1 * 30 * Math.Pow(10, -6)); if (matched != null) { if (matched.fragmentCharge == 1) y1 = 3; else y1 = 2; } else y1 = 1; } else y1 = 0; if (y2_logit > -0.5) { double mz_y2 = fragmentation.y(len - k, 2); Peak matched = Package.findClose(peakList, mz_y2, mz_y2 * 30 * Math.Pow(10, -6)); if (matched != null) { if (matched.fragmentCharge == 2) y2 = 3; else y2 = 2; } else y2 = 1; } else y2 = 0; string finalString = idOrIndex + "," + pepSequence + "," + pepBond + "," + b1 + "," + b2 + "," + y1 + "," + y2; file.WriteLine(finalString); } else if (model == 2) //Surendra's ordinal model { int b1 = 0, b2 = 0, y1 = 0, y2 = 0; double logit = NR * 0.9862 + NH * 0.8772 + NK * 0.7064 + NL * 0.4133 - CR * 1.1688 - CH * 0.3948 - CK * 0.6710 - CL * 0.4859; //charge Label = "1", generate b+,y++ if (logit < -2.2502) { double mz_b1 = fragmentation.b(k, 1); Peak b1matched = Package.findClose(peakList, mz_b1, mz_b1 * 30 * Math.Pow(10, -6)); if (b1matched != null) { if (b1matched.fragmentCharge == 1) b1 = 3; //matched, charge agree else b1 = 2; //matched, charge does not agree } else b1 = 1; //unmatched, but predicted. double mz_y2 = fragmentation.y(len - k, 2); Peak y2matched = Package.findClose(peakList, mz_y2, mz_y2 * 30 * Math.Pow(10, -6)); if (y2matched != null) { if (y2matched.fragmentCharge == 2) y2 = 3; //matched, charge agree else y2 = 2; //matched, charge does not agree } else y2 = 1; //unmatched, but predicted. } //ambiLabel = "2", generate b+, y+, b++, y++ else if (logit < 0.7872) { double mz_b1 = fragmentation.b(k, 1); Peak b1matched = Package.findClose(peakList, mz_b1, mz_b1 * 30 * Math.Pow(10, -6)); if (b1matched != null) { if (b1matched.fragmentCharge == 1) b1 = 3; //matched, charge agree else b1 = 2; //matched, charge does not agree } else b1 = 1; //unmatched, but predicted. double mz_y1 = fragmentation.y(len - k, 1); Peak y1matched = Package.findClose(peakList, mz_y1, mz_y1 * 30 * Math.Pow(10, -6)); if (y1matched != null) { if (y1matched.fragmentCharge == 1) y1 = 3; //matched, charge agree else y1 = 2; //matched, charge does not agree } else y1 = 1; //unmatched, but predicted. double mz_b2 = fragmentation.b(k, 2); Peak b2matched = Package.findClose(peakList, mz_b2, mz_b2 * 30 * Math.Pow(10, -6)); if (b2matched != null) { if (b2matched.fragmentCharge == 2) b2 = 3; //matched, charge agree else b2 = 2; //matched, charge does not agree } else b2 = 1; //unmatched, but predicted. double mz_y2 = fragmentation.y(len - k, 2); Peak y2matched = Package.findClose(peakList, mz_y2, mz_y2 * 30 * Math.Pow(10, -6)); if (y2matched != null) { if (y2matched.fragmentCharge == 2) y2 = 3; //matched, charge agree else y2 = 2; //matched, charge does not agree } else y2 = 1; //unmatched, but predicted. } //ambiLabel = "3", generate b++,y+ else { double mz_b2 = fragmentation.b(k, 2); Peak b2matched = Package.findClose(peakList, mz_b2, mz_b2 * 30 * Math.Pow(10, -6)); if (b2matched != null) { if (b2matched.fragmentCharge == 2) b2 = 3; //matched, charge agree else b2 = 2; //matched, charge does not agree } else b2 = 1; //unmatched, but predicted. double mz_y1 = fragmentation.y(len - k, 1); Peak y1matched = Package.findClose(peakList, mz_y1, mz_y1 * 30 * Math.Pow(10, -6)); if (y1matched != null) { if (y1matched.fragmentCharge == 1) y1 = 3; //matched, charge agree else y1 = 2; //matched, charge does not agree } else y1 = 1; //unmatched, but predicted. } string finalString = idOrIndex + "," + pepSequence + "," + pepBond + "," + b1 + "," + b2 + "," + y1 + "," + y2; file.WriteLine(finalString); } //intensityDic.Add(pepBond, intensityList); //rowDic.Add(pepBond, finalString); }//end for each peptide bond }//end if z==3 }//end foreach peptide }//end using }
static void Main(string[] args) { //Console.WriteLine("started"); //string naive = "Z:\\home\\dwang\\fragmentation\\UPS\\naive\\klc_031308p_cptac_study6_6_QC1.idpXML"; //string baso = "Z:\\home\\dwang\\fragmentation\\UPS\\basophilenew\\klc_031308p_cptac_study6_6_QC1.idpXML"; //for (int z=0; z<=4; z++) //{ // List<string> pep_naive = Package.PepSecurity(naive, z); // List<string> pep_baso = Package.PepSecurity(baso, z); // List<string> common = Package.findCommon(pep_baso, pep_naive); // Console.WriteLine("z==: " + z); // Console.WriteLine("pep in naive: " + pep_naive.Count); // Console.WriteLine("pep in baso: " + pep_baso.Count); // Console.WriteLine("common: " + common.Count); //} /////////////////////////////////////////////////////////// //start myrimatch /////////////////////////////////////////////////////////// Dictionary<string, string> peptideDic = new Dictionary<string, string>(); TextReader file_temp = new StreamReader("X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\merge.csv"); DataTable table_temp = CSV.CsvParser.Parse(file_temp, true); foreach (DataRow dr in table_temp.Rows) { string pep = dr[2].ToString(); string decoy = dr[3].ToString(); if (!peptideDic.ContainsKey(pep)) peptideDic.Add(pep, decoy); } string xml = "X:\\wangd5\\idpXML_FDR1.00\\myrimatch\\Assemble_MM.xml"; Dictionary<string,string> PSM = new Dictionary<string,string>(); Console.WriteLine("preparing reading idpXML"); int index = 0; IDPicker.Workspace workspace = new IDPicker.Workspace(); Package.loadWorkspace(ref workspace, xml); foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups) foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true)) foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra) { IDPicker.ResultInstance ri = sItr.Value.results[1]; IDPicker.VariantInfo vi = ri.info.peptides.Min; string pepSequence = vi.peptide.sequence; var scores = ri.searchScores.Values; float[] scoreArr = new float[2]; scores.CopyTo(scoreArr, 0); float mvh = scoreArr[0]; float mzfidelity = scoreArr[1]; string z = sItr.Value.id.charge.ToString(); string key = pepSequence + "." + index; PSM.Add(key, mvh + "," + mzfidelity + "," + z); index++; } Console.WriteLine("preparing reading RNA-seq"); Dictionary<string, string> dic_RNASeq = new Dictionary<string, string>(); TextReader file_csv = new StreamReader("X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\fdr1.0.csv"); DataTable table = CSV.CsvParser.Parse(file_csv, true); foreach (DataRow dr in table.Rows) { string pep = dr[0].ToString(); string RNASeq = dr[4].ToString(); dic_RNASeq.Add(pep, RNASeq); } List<string> finalList = new List<string>(); int unmatched = 0; foreach (string key in PSM.Keys) { string pep = key.Split('.')[0]; if (dic_RNASeq.ContainsKey(pep) && peptideDic.ContainsKey(pep)) { string rna = dic_RNASeq[pep]; finalList.Add(pep + "," + PSM[key] + "," + peptideDic[pep] + "," + rna); } else unmatched++; } Console.WriteLine("information: unmatched number is: " + unmatched); Console.WriteLine("preparing writing files..."); string output = "X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\myrimatchscorecombination.csv"; TextWriter file = new StreamWriter(output); file.WriteLine("pep,mvh,mz,z,rna"); foreach (string ss in finalList) { file.WriteLine(ss); } file.Close(); ///////////////////////////////////////////////////////////////////////// //for 3 search engines //////////////////////////////////////////////////////////////////////// /* //=========================================================== string test_mm = "X:\\wangd5\\SW480\\MM\\FDR0.05\\mm.xml"; string test_sq = "X:\\wangd5\\SW480\\SQ\\FDR0.05\\sq.xml"; string test_xt = "X:\\wangd5\\SW480\\XT\\FDR0.05\\xt.xml"; string test_p = "X:\\wangd5\\SW480\\evaluation\\p.csv"; //string test_mm = "Z:\\home\\dwang\\fragmentation\\RNA-Seq\\RKO\\Assemble_MM.xml"; //string test_sq = "Z:\\home\\dwang\\fragmentation\\RNA-Seq\\RKO\\Assemble_SQ.xml"; //string test_xt = "Z:\\home\\dwang\\fragmentation\\RNA-Seq\\RKO\\Assemble_XT.xml"; //get peptides in p List<string> p = new List<string>(); TextReader file_p = new StreamReader(test_p); DataTable dt = CSV.CsvParser.Parse(file_p, true); foreach (DataRow dr in dt.Rows) { string pep = dr[2].ToString(); p.Add(pep); } List<string> m = Package.PepSecurity(test_mm, 0); List<string> x = Package.PepSecurity(test_xt, 0); List<string> s = Package.PepSecurity(test_sq, 0); m = Package.removeDuplicate(m); x = Package.removeDuplicate(x); s = Package.removeDuplicate(s); p = Package.removeDuplicate(p); List<string> pm = Package.findCommon(p, m); List<string> mx = Package.findCommon(m, x); List<string> ms = Package.findCommon(m, s); List<string> xs = Package.findCommon(x, s); List<string> mxs = Package.findCommon(mx, s); /////////////////////////////////////////////////////////////////////////////////////// //read the fdr1.0.csv, to get all the peptide sequences that identified by 3 engines\ //no matter how low the score is //also, the file contains the RNASeq information Dictionary<string, string> dic_RNASeq = new Dictionary<string, string>(); TextReader file_csv = new StreamReader("X:\\wangd5\\SW480\\evaluation\\PepRNA_FDR1.csv"); //TextReader file_csv = new StreamReader("X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\fdr1.0.csvn"); //TextReader file_csv = new StreamReader("X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\fdr1.0.csv"); DataTable table = CSV.CsvParser.Parse(file_csv, true); foreach (DataRow dr in table.Rows) { string pep = dr[0].ToString(); string RNASeq = dr[1].ToString(); dic_RNASeq.Add(pep, RNASeq); } int m_n = 0; int x_n = 0; int s_n = 0; int p_n = 0; int ms_n = 0; int mx_n = 0; int xs_n = 0; int mxs_n = 0; int pm_n = 0; foreach (string ss in m) { if (dic_RNASeq.ContainsKey(ss)) { if (dic_RNASeq[ss] == "1") m_n++; } else Console.WriteLine("crap"); } foreach (string ss in x) { if (dic_RNASeq.ContainsKey(ss)) { if (dic_RNASeq[ss] == "1") x_n++; } } foreach (string ss in s) { if (dic_RNASeq.ContainsKey(ss)) { if (dic_RNASeq[ss] == "1") s_n++; } } foreach (string ss in p) { if (dic_RNASeq.ContainsKey(ss)) { if (dic_RNASeq[ss] == "1") p_n++; } } foreach (string ss in ms) { if (dic_RNASeq.ContainsKey(ss)) { if (dic_RNASeq[ss] == "1") ms_n++; } } foreach (string ss in mx) { if (dic_RNASeq.ContainsKey(ss)) { if (dic_RNASeq[ss] == "1") mx_n++; } } foreach (string ss in xs) { if (dic_RNASeq.ContainsKey(ss)) { if (dic_RNASeq[ss] == "1") xs_n++; } } foreach (string ss in mxs) { if (dic_RNASeq.ContainsKey(ss)) { if (dic_RNASeq[ss] == "1") mxs_n++; } } foreach (string ss in pm) { if (dic_RNASeq.ContainsKey(ss)) { if (dic_RNASeq[ss] == "1") pm_n++; } } Console.WriteLine("m: " + m.Count + "=" + m_n); Console.WriteLine("x: " + x.Count + "=" + x_n); Console.WriteLine("s: " + s.Count + "=" + s_n); Console.WriteLine("p: " + p.Count + "=" + p_n); Console.WriteLine("mx: " + mx.Count + "=" + mx_n); Console.WriteLine("ms: " + ms.Count + "=" + ms_n); Console.WriteLine("xs: " + xs.Count + "=" + xs_n); Console.WriteLine("mxs: " + mxs.Count + "=" + mxs_n); Console.WriteLine("pm: " + pm.Count + "=" + pm_n); string path_mm = "X:\\wangd5\\SW480\\MM\\FDR1.00\\mam_012808n_SW480_200ug_"; string path_sq = "X:\\wangd5\\SW480\\SQ\\FDR1.00\\mam_012808n_SW480_200ug_"; string path_xt = "X:\\wangd5\\SW480\\XT\\FDR1.00\\mam_012808n_SW480_200ug_"; List<string> keyslist = new List<string>(); Dictionary<string, double> dic_mm = Package.RKO(path_mm, keyslist); Dictionary<string, double> dic_sq = Package.RKO(path_sq, keyslist); Dictionary<string, double> dic_xt = Package.RKO(path_xt, keyslist); keyslist = Package.removeDuplicate(keyslist); //merge the three dictionaries into one. Dictionary<string, string> dic_merge = new Dictionary<string,string>(); int misses = 0; foreach (string key in keyslist) { string mm = ""; string sq = ""; string xt = ""; string[] str = key.Split(','); string pep = str[2]; if (dic_RNASeq.ContainsKey(pep)) { string RNA = dic_RNASeq[pep]; if (dic_mm.ContainsKey(key)) { mm = dic_mm[key].ToString(); } else mm = "0"; if (dic_xt.ContainsKey(key)) { xt = dic_xt[key].ToString(); } else xt = "0"; if (dic_sq.ContainsKey(key)) { sq = dic_sq[key].ToString(); } else sq = "0"; dic_merge.Add(key, mm + "," + xt + "," + sq + "," + RNA); } else misses++; } Console.WriteLine("the number of pep-rna misses is: " + misses); //write into file string output = "X:\\wangd5\\SW480\\evaluation\\merge.csv"; TextWriter file = new StreamWriter(output); file.WriteLine("spectrum,charge,pep,decoystate,mm,xt,sq,rna"); foreach (string key in dic_merge.Keys) { file.WriteLine(key + "," + dic_merge[key]); } } */ }