Esempio n. 1
        public static List<string> IDList(string idpXML, string mzML)
            List<string> list = new List<string>();
            MSDataFile foo = new MSDataFile(mzML);
            SpectrumList sl =;

            IDPicker.Workspace workspace = new IDPicker.Workspace();
            Package.loadWorkspace(ref workspace, idpXML);

            foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups)
                foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true))
                    foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra)
                        IDPicker.ResultInstance ri = sItr.Value.results[1];
                        IDPicker.VariantInfo vi =;
                        bool boolCharge =;
                        if (boolCharge)
                            string rawPepSequence = vi.ToString();
                            string interpretation = vi.ToSimpleString();
                            // Look up the index with nativeID
                            object idOrIndex = null;
                            if (sItr.Value.nativeID != null && sItr.Value.nativeID.Length > 0)
                                idOrIndex = sItr.Value.nativeID;
                        }//end if (boolcharge)
                    }//end foreach
            return list;
Esempio n. 2
 public SpectraPeptides(string path)
     string name = Path.GetFileNameWithoutExtension(path);
     IDPicker.Workspace workspace = new IDPicker.Workspace();
     Package.loadWorkspace(ref workspace, path);
     foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups)
         foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true))
             foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra)
                 IDPicker.ResultInstance ri = sItr.Value.results[1];
                 IDPicker.VariantInfo vi =;
                 string rawPepSequence = vi.ToString();
                 string pepSequence = vi.peptide.sequence;
                 string index =;
                 string spectraID = name + "." + index;
     peptideslist = Package.removeDuplicate(peptideslist);
     spectralist = Package.removeDuplicate(spectralist);
     peptides = peptideslist.Count;
     spectra = spectralist.Count;
Esempio n. 3
        ///original code for grabbing the required ions
        ///get pepSequence,source,scanID, write into a csv file. 
        ///have tons of information: theretical ion intensity, pep info, chargeLabel...
        public static List<string> idpReader_original(string idpXMLFile, string mzMLFile, double TicCutoffPercentage, int z, List<string> pepList, List<string> output)
            //get the path and filename of output csv file:
            string fileName = Path.GetFileNameWithoutExtension(idpXMLFile);
            string filePath = Path.GetDirectoryName(idpXMLFile);
            IDPicker.Workspace workspace = new IDPicker.Workspace();
            Package.loadWorkspace(ref workspace, idpXMLFile);

            MSDataFile foo = new MSDataFile(mzMLFile);
            SpectrumList sl =;

            foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups)
                foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true))
                    foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra)

                        IDPicker.ResultInstance ri = sItr.Value.results[1];
                        IDPicker.VariantInfo vi =;

                        string ss = vi.ToString() + "," + + "," + sItr.Value.nativeID;
                        bool boolCharge =;
                        if (boolCharge)
                            string rawPepSequence = vi.ToString();
                            string pepSequence = vi.peptide.sequence;
                            int len = pepSequence.Length;

                            //make sure that the peptide is what we want
                            if (pepList.Contains(pepSequence))
                                // Look up the index with nativeID
                                object idOrIndex = null;
                                if (sItr.Value.nativeID != null && sItr.Value.nativeID.Length > 0)
                                    idOrIndex = sItr.Value.nativeID;
                                int spectrumIndex = sl.find(idOrIndex as string);
                                // Trust the local index, if the nativeID lookup fails
                                if (spectrumIndex >= sl.size())
                                    spectrumIndex =;
                                // Bail of the loca index is larger than the spectrum list size
                                if (spectrumIndex >= sl.size())
                                    throw new Exception("Can't find spectrum associated with the index.");

                                //get base peak and TIC and converted to string
                                Spectrum spec1 = sl.spectrum(spectrumIndex, true);
                                MZIntensityPairList peaks = new MZIntensityPairList();
                                spec1.getMZIntensityPairs(ref peaks);
                                Set<Peak> peakList = new Set<Peak>();

                                //get base peak and TIC
                                double basePeakValue = 0;
                                double TICValue = 0;
                                CVParamList list = spec1.cvParams;
                                foreach (CVParam CVP in list)
                                    if ( == "base peak intensity")
                                        basePeakValue = CVP.value;
                                    if ( == "total ion current")
                                        TICValue = CVP.value;
                                string basePeak = basePeakValue.ToString();
                                string TIC = TICValue.ToString();

                                //very important. Surendra put them here
                                //to change those with modifications {} into a format
                                //that fragment method will accept.
                                string interpretation = vi.ToSimpleString();

                                Peptide peptide = new Peptide(interpretation, ModificationParsing.ModificationParsing_Auto, ModificationDelimiter.ModificationDelimiter_Brackets);

                                Fragmentation fragmentation = peptide.fragmentation(true, true);
                                //prepare the qualified peaklist
                                double intenThreshhold = 0;
                                int totalIntenClass = 0;

                                double[] intenArray = new double[peaks.Count];
                                //used during the foreach loop
                                int indexPeaks = 0;

                                //get all the peaks no matter how small they are
                                //then calculate the threshhold TIC
                                //test here
                                foreach (MZIntensityPair mzIntensity in peaks)
                                    //Peak p = new Peak(, mzIntensity.intensity);
                                    intenArray[indexPeaks] = mzIntensity.intensity;
                                Array.Reverse(intenArray, 0, peaks.Count);

                                //if currTIC>=cutoff, then break
                                double currTIC = 0;
                                double cutOffTIC = TicCutoffPercentage * TICValue;
                                foreach (double inten in intenArray)
                                    currTIC = currTIC + inten;
                                    if (currTIC < cutOffTIC)
                                        intenThreshhold = inten;
                                    else break;

                                //then based on that, generate a new peaklist that contains only ABC peaks
                                //then calculate the intensity classes
                                foreach (MZIntensityPair mzIntensity in peaks)
                                    if (mzIntensity.intensity >= intenThreshhold)
                                        Peak p = new Peak(, mzIntensity.intensity);

                                //rowDic contains row information of each peptide bond
                                Dictionary<int, string> rowDic = new Dictionary<int, string>();
                                //intensityDic contains intensity information of fragment ions for each peptide bond
                                Dictionary<int, List<double>> intensityDic = new Dictionary<int, List<double>>();
                                //commonList contains the common intensities
                                List<double> duplicateList = new List<double>();

                                //call the method
                                List<double> completeIntensityList = new List<double>();
                                for (int k = 1; k < len; k++)
                                    List<double> intensityList = new List<double>();

                                    string bion = pepSequence.Substring(0, k);
                                    string yion = pepSequence.Substring(k, len - k);

                                    int NR = Package.parseAAResidues(bion, 'R');
                                    int NK = Package.parseAAResidues(bion, 'K');
                                    int NH = Package.parseAAResidues(bion, 'H');
                                    int NL = k;
                                    int CR = Package.parseAAResidues(yion, 'R');
                                    int CK = Package.parseAAResidues(yion, 'K');
                                    int CH = Package.parseAAResidues(yion, 'H');
                                    int CL = len - k;
                                    int R = NR - CR;
                                    int K = NK - CK;
                                    int H = NH - CH;
                                    int L = NL - CL;
                                    int pepBond = k;
                                    int NBasicAA = NR + NK + NH;
                                    int CBasicAA = CR + CK + CH;
                                    string AA = NBasicAA + "," + CBasicAA + "," + NR + "," + NK + "," + NH + "," + NL + "," + CR + "," + CK + "," + CH + "," + CL + "," + R + "," + K + "," + H + "," + L;

                                    double[] bCharge = new double[z + 1];
                                    double[] yCharge = new double[z + 1];
                                    double[] bIntensity = new double[z + 1];
                                    double[] yIntensity = new double[z + 1];
                                    string bIonIntensity = "";
                                    string yIonIntensity = "";
                                    //to judge if the sum of intensities are 0
                                    //so to exclude the case with all "0s"
                                    double sumIntensity = 0;

                                    //return the b ion charge 1 Intensity, if matched
                                    for (int i = 1; i <= z; i++)
                                        bCharge[i] = fragmentation.b(k, i);
                                        yCharge[i] = fragmentation.y(len - k, i);
                                        //change for Q-star purposes.
                                        Peak bmatched = Package.findClose(peakList, bCharge[i], 70 * bCharge[i] * Math.Pow(10, -6));
                                        Peak ymatched = Package.findClose(peakList, yCharge[i], 70 * yCharge[i] * Math.Pow(10, -6));
                                        if (bmatched != null)
                                            bIntensity[i] = bmatched.rankOrIntensity;
                                        else bIntensity[i] = 0;
                                        if (ymatched != null)
                                            yIntensity[i] = ymatched.rankOrIntensity;
                                        else yIntensity[i] = 0;

                                        sumIntensity = sumIntensity + bIntensity[i] + yIntensity[i];

                                        //record b/y ion intensity information into a string
                                        bIonIntensity = bIonIntensity + "," + bIntensity[i];
                                        yIonIntensity = yIonIntensity + "," + yIntensity[i];


                                    intensityDic.Add(pepBond, intensityList);
                                    //to determine charge label, need to split by precursor charge
                                    //first need to make a metric to determine if all intensities are "0"

                                    if (z == 3)
                                        if (sumIntensity != 0)
                                            //set the ambiguity label as follows:
                                            //-3: (0/+3) y3 only
                                            //-2: (0/+3, +1/+2) y3, b1y2
                                            //-1: (+1/+2): b1y2
                                            //0: (+1/+2, +2/+1): b1y2, b2y1
                                            //1: (+2/+1): b2y1
                                            //2: (+2/+1, +3/0): b2y1, b3
                                            //3: (+3/0): b3 only

                                            double b1 = bIntensity[1];
                                            double b2 = bIntensity[2];
                                            double y1 = yIntensity[1];
                                            double y2 = yIntensity[2];
                                            double b3 = bIntensity[3];
                                            double y3 = yIntensity[3];
                                            double b1y2 = b1 + y2;
                                            double b2y1 = b2 + y1;
                                            string ambiguityLabel = "";
                                            //first part: set the intensity group: y3, b1y2, b2y1, b3
                                            //if one group was found, set the label
                                            //if two were found, but adjacent to each other, then ambiguity label is set
                                            if (y3 != 0 && (b1y2 + b2y1 + b3) == 0) ambiguityLabel = "-3";
                                            else if (y3 != 0 && b1y2 != 0 && (b2y1 + b3) == 0) ambiguityLabel = "-2";
                                            else if (b1y2 != 0 && (y3 + b2y1 + b3) == 0) ambiguityLabel = "-1";
                                            else if (b1y2 != 0 && b2y1 != 0 && (y3 + b3) == 0) ambiguityLabel = "0";
                                            else if (b2y1 != 0 && (y3 + b1y2 + b3) == 0) ambiguityLabel = "1";
                                            else if (b2y1 != 0 && b3 != 0 && (y3 + b1y2) == 0) ambiguityLabel = "2";
                                            else if (b3 != 0 && (y3 + b1y2 + b2y1) == 0) ambiguityLabel = "3";
                                            else ambiguityLabel = "error";

                                            string finalString = idOrIndex + "," + pepSequence + "," + basePeak + "," + TIC + bIonIntensity + yIonIntensity + "," + len + "," + pepBond + "," + AA + "," + ambiguityLabel;
                                            rowDic.Add(pepBond, finalString);
                                }//end for each peptide bond

                                //now we have: rowDic, intensityDic for each pep bond
                                //and we have: and completeIntensityList for each peptide
                                //the purpose of this is to remove such rows with duplicate matches
                                duplicateList = Package.findCommon(completeIntensityList);
                                foreach (int bond in rowDic.Keys)
                                    bool unique = true;
                                    foreach (double inten in duplicateList)
                                        if (intensityDic[bond].Contains(inten))
                                            unique = false;
                                    if (unique)
                            }//end of if peplist contains pepsequence
                        }//end if z==3
                    }//end foreach peptide
            return output;
Esempio n. 4
        //the speccharge2score get each spectrum-peptide, and then map it to scores.
        public static Dictionary<string, double> SpecCharge2Score(string idpxml, string qonversion, List<string> list)
            ///given qonversion and idpxml files, find the scores, DecoyState, making a dictionary.
            ///make it peptide-eccentric.

            //get peptide sequence
            string name = Path.GetFileNameWithoutExtension(idpxml);

            Dictionary<string, string> dic_spectrum = new Dictionary<string, string>();

            IDPicker.Workspace workspace = new IDPicker.Workspace();
            Package.loadWorkspace(ref workspace, idpxml);
            int repeatedIndex = 0;
            foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups)
                foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true))
                    foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra)
                        IDPicker.ResultInstance ri = sItr.Value.results[1];
                        IDPicker.VariantInfo vi =;
                        string rawPepSequence = vi.ToString();
                        string pepSequence = vi.peptide.sequence;
                        string index =;
                        int z =;
                        string id = name + "." + index + "," + z.ToString();
                        if (!dic_spectrum.ContainsKey(id))
                            dic_spectrum.Add(id, pepSequence);
                        else repeatedIndex++;

            //Console.WriteLine("repeated index (comes from spectrum with ambiguous charge state) is: " + repeatedIndex);

            //then read qonverion.txt
            //get index, TotalScore, DecoyState
            //add pep and RNASeq_support information

            //dic_peptide is what we wanna return
            Dictionary<string, double> finaldic = new Dictionary<string, double>();
            string t;
            int unmatchedPep = 0;
            TextReader file = new StreamReader(qonversion);
            while ((t = file.ReadLine()) != null)
                if (t.Contains("("))
                    Regex r = new Regex(" +");
                    string[] str = r.Split(t);
                    string Index = str[2];
                    double TotalScore = Convert.ToDouble(str[8]);
                    string charge = str[3];
                    string DecoyState = str[4];
                    string id = name + "." + Index + "," + charge;
                    if (dic_spectrum.ContainsKey(id))
                        string pep = dic_spectrum[id];
                        string key = id + "," + pep + "," + DecoyState;
                        finaldic.Add(key, TotalScore);
                    else unmatchedPep++;

            //Console.WriteLine("unmatched peptide (peptide that was not found in the csv file) is: " + unmatchedPep);
            Console.WriteLine("--one set xml-qonversion done with unmatched (unexpected): " + unmatchedPep);
            return finaldic;
Esempio n. 5
        //given a assembly.xml file, grab peptide ids based on charge state.
        //z=0, output all the peptides
        public static List<string> PepSecurity(string xml, int z)
            List<string> peptideList = new List<string>();
            IDPicker.Workspace workspace = new IDPicker.Workspace();
            Package.loadWorkspace(ref workspace, xml);
            foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups)
                foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true))
                    foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra)
                        IDPicker.ResultInstance ri = sItr.Value.results[1];
                        IDPicker.VariantInfo vi =;
                        string rawPepSequence = vi.ToString();
                        string pepSequence = vi.peptide.sequence;
            <<<<<<< HEAD
                        if (z == 0)
                            if ( || ||

                        //if (z == 0) peptideList.Add(pepSequence);
                        //if (z == 0)
                        //    if ( || ||
                        //    {
                        //        peptideList.Add(pepSequence);
                        //    }
                        if (z == 0) peptideList.Add(pepSequence);
            >>>>>>> 80fc9e47b4dafd28bcc96f478ae26543036b9ff6
                            if ( peptideList.Add(pepSequence);
            List<string> uniPepList = Package.removeDuplicate(peptideList);

            return uniPepList;
Esempio n. 6
        public static Dictionary<string, string> spectra2RNASeq(string idpxml, string qonversion, Dictionary<string, string> dic_RNASeq)
            ///now the problem is:
            ///d a idpQonvert file, read in all the peptides from pepxml without any fdr cutoff
            ///ead an idpxml file, read in all peptides that passed the fdr cutoff
            ///ow I restrict my rows to fdr-cutoff peptides, which might generate a limited pool
            ///ext is to consider fdr 0.1 cutoff, it will generate enough "0"s in the RNASeq_support column
            ///ifferent cutoff values here might have different merits. we'll find out which is the best
            ///next to check 1. whether 0.1 cutoff has the same idpqonvert file or not; Yes, the same idpqonvert file
            ///2. generate new idpxml with fdr cutoff=0.1 done.
            ///file path: X:\wangd5\idpXML_FDR1.00\myrimatch\FDR0.1

            //then read idpxml file
            //get peptide sequence
            string name = Path.GetFileNameWithoutExtension(idpxml);

            Dictionary<string, string> dic_peptide = new Dictionary<string, string>();
            //dic_spectra is what we want to return
            Dictionary<string, string> dic_spectra = new Dictionary<string, string>();
            IDPicker.Workspace workspace = new IDPicker.Workspace();
            Package.loadWorkspace(ref workspace, idpxml);
            int repeatedIndex = 0;
            foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups)
                foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true))
                    foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra)
                        IDPicker.ResultInstance ri = sItr.Value.results[1];
                        IDPicker.VariantInfo vi =;
                        string rawPepSequence = vi.ToString();
                        string pepSequence = vi.peptide.sequence;
                        string index =;
                        int z =;
                        string id = name + "." + index + "." + z.ToString() ;
                        if (!dic_peptide.ContainsKey(id))
                            bool Nspecificity = vi.peptide.NTerminusIsSpecific;
                            bool Cspecitificy = vi.peptide.CTerminusIsSpecific;

                            string value = pepSequence + "," + Nspecificity.ToString() + "," + Cspecitificy.ToString();
                            dic_peptide.Add(id, pepSequence);
                            dic_spectra.Add(id, value);
                        else repeatedIndex++;

            Console.WriteLine("repeated index (comes from spectrum with ambiguous charge state) is: " + repeatedIndex);

            //then read qonverion.txt
            //get index, charges , TotalScore, DecoyState
            //add pep and RNASeq_support information
            string t;
            int unmatchedPep = 0;
            TextReader file = new StreamReader(qonversion);
            while ((t = file.ReadLine()) != null)
                if (t.Contains("("))
                    Regex r = new Regex(" +");
                    string[] str = r.Split(t);
                    string NativeID = str[1];
                    string Index = str[2];
                    string charge = str[3];
                    string DecoyState = str[4];
                    string TotalScore = str[8];
                    string FDR = str[9];
                    string id = name + "." + Index + "." + charge;
                    if (dic_peptide.ContainsKey(id))
                        string pep = dic_peptide[id];
                        if (dic_RNASeq.ContainsKey(pep))
                            string RNASeq = dic_RNASeq[pep];
                            dic_spectra[id] = NativeID + "," + dic_spectra[id] + "," + charge + "," + DecoyState + "," + TotalScore + "," + FDR + "," + RNASeq;
                        else unmatchedPep++;

            Console.WriteLine("unmatched peptide (peptide that was not found in the csv file) is: " + unmatchedPep);
            return dic_spectra;
Esempio n. 7
        //get pepSequence,source,scanID, write into a csv file.
        //have tons of information: theretical ion intensity, pep info, chargeLabel...
        public static void idpReader(string idpXMLFile, string mzMLFile, double TicCutoffPercentage, int z, int model)
            //get the path and filename of output csv file:
            string fileName = Path.GetFileNameWithoutExtension(idpXMLFile);
            string filePath = Path.GetDirectoryName(idpXMLFile);
            string csvFile = Path.Combine(filePath, fileName) + "_" + z.ToString() + ".csv";

            IDPicker.Workspace workspace = new IDPicker.Workspace();
            Package.loadWorkspace(ref workspace, idpXMLFile);

            using (StreamWriter file = new StreamWriter(csvFile))
                //TODO idOrIndex + "," + pepSequence + "," + pepBond + "," + AA + "," + bIons + "," + yIons;

                MSDataFile foo = new MSDataFile(mzMLFile);
                SpectrumList sl =;

                foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups)
                    foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true))
                        foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra)

                            IDPicker.ResultInstance ri = sItr.Value.results[1];
                            IDPicker.VariantInfo vi =;

                            string ss = vi.ToString() + "," + + "," + sItr.Value.nativeID;
                            bool boolCharge =;
                            if (boolCharge)
                                string rawPepSequence = vi.ToString();
                                string pepSequence = vi.peptide.sequence;
                                int len = pepSequence.Length;

                                // Look up the index with nativeID
                                object idOrIndex = null;
                                if( sItr.Value.nativeID != null && sItr.Value.nativeID.Length > 0 )
                                    idOrIndex = sItr.Value.nativeID;
                                int spectrumIndex = sl.find(idOrIndex as string);
                                // Trust the local index, if the nativeID lookup fails
                                if( spectrumIndex >= sl.size() )
                                    spectrumIndex =;
                                // Bail of the loca index is larger than the spectrum list size
                                if( spectrumIndex >= sl.size() )
                                    throw new Exception( "Can't find spectrum associated with the index." );

                                //get base peak and TIC and converted to string
                                Spectrum spec1 = sl.spectrum(spectrumIndex, true);
                                MZIntensityPairList peaks = new MZIntensityPairList();
                                spec1.getMZIntensityPairs(ref peaks);
                                Set<Peak> peakList = new Set<Peak>();

                                //get base peak and TIC
                                double basePeakValue = 0;
                                double TICValue = 0;
                                CVParamList list = spec1.cvParams;
                                foreach (CVParam CVP in list)
                                    if ( == "base peak intensity")
                                        basePeakValue = CVP.value;
                                    if ( == "total ion current")
                                        TICValue = CVP.value;
                                string basePeak = basePeakValue.ToString();
                                string TIC = TICValue.ToString();

                                //very important. Surendra put them here
                                //to change those with modifications {} into a format
                                //that fragment method will accept.
                                string interpretation = vi.ToSimpleString();

                                Peptide peptide = new Peptide(interpretation, ModificationParsing.ModificationParsing_Auto, ModificationDelimiter.ModificationDelimiter_Brackets);

                                Fragmentation fragmentation = peptide.fragmentation(true, true);
                                //prepare the qualified peaklist
                                double intenThreshhold = 0;
                                int totalIntenClass = 0;

                                double[] intenArray = new double[peaks.Count];
                                //used during the foreach loop
                                int indexPeaks = 0;

                                //get all the peaks no matter how small they are
                                //then calculate the threshhold TIC
                                //test here
                                foreach (MZIntensityPair mzIntensity in peaks)
                                    //Peak p = new Peak(, mzIntensity.intensity);
                                    intenArray[indexPeaks] = mzIntensity.intensity;
                                Array.Reverse(intenArray, 0, peaks.Count);

                                //if currTIC>=cutoff, then break
                                double currTIC = 0;
                                double cutOffTIC = TicCutoffPercentage * TICValue;
                                foreach (double inten in intenArray)
                                    currTIC = currTIC + inten;
                                    if (currTIC < cutOffTIC)
                                        intenThreshhold = inten;
                                    else break;

                                //then based on that, generate a new peaklist that contains only ABC peaks
                                //then calculate the intensity classes
                                foreach (MZIntensityPair mzIntensity in peaks)
                                    if (mzIntensity.intensity >= intenThreshhold)
                                        //note 0 here. This is to tell people that the orbiorbi fragment charge is unknown.
                                        //the peaklist will be updated later.
                                        Peak p = new Peak(, mzIntensity.intensity,0);
                                Console.WriteLine("nativeID: =============" + idOrIndex);
                                //update peaklist for charge states.
                                peakList = Package.chargeAssignment(peakList);
                                        //int ones = 0;
                                        //int twos = 0;
                                        //foreach (var peak in peakList)
                                        //    //Console.WriteLine(peak.fragmentCharge);
                                        //    if (peak.fragmentCharge == 1) ones++;
                                        //    else if (peak.fragmentCharge == 2) twos++;

                                        //Console.WriteLine("charge 1: " + ones);
                                        //Console.WriteLine("charge 2: " + twos);

                                //rowDic contains row information of each peptide bond
                                Dictionary<int, string> rowDic = new Dictionary<int, string>();
                                //intensityDic contains intensity information of fragment ions for each peptide bond
                                Dictionary<int, List<double>> intensityDic = new Dictionary<int, List<double>>();
                                //commonList contains the common intensities
                                List<double> duplicateList = new List<double>();

                                //call the method
                                List<double> completeIntensityList = new List<double>();
                                for (int k = 1; k < len; k++)
                                    List<double> intensityList = new List<double>();

                                    string bion = pepSequence.Substring(0, k);
                                    string yion = pepSequence.Substring(k, len - k);

                                    int NR = Package.parseAAResidues(bion, 'R');
                                    int NK = Package.parseAAResidues(bion, 'K');
                                    int NH = Package.parseAAResidues(bion, 'H');
                                    int NL = k;
                                    int CR = Package.parseAAResidues(yion, 'R');
                                    int CK = Package.parseAAResidues(yion, 'K');
                                    int CH = Package.parseAAResidues(yion, 'H');
                                    int CL = len - k;
                                    int pepBond = k;
                                    int NBasicAA = NR + NK + NH;
                                    int CBasicAA = CR + CK + CH;
                                    string AA = NR + "," + NK + "," + NH + "," + NL + "," + CR + "," + CK + "," + CH + "," + CL;

                                    double[] bCharge = new double[z + 1];
                                    double[] yCharge = new double[z + 1];
                                    //double[] bIntensity = new double[z + 1];
                                    //double[] yIntensity = new double[z + 1];
                                    //add variable for "real" charges

                                    if (model == 0) //naive model
                                        int[] bFragmentCharge = new int[z + 1];
                                        int[] yFragmentCharge = new int[z + 1];
                                        //return the b ion charge 1 Intensity, if matched
                                        for (int i = 1; i < z; i++)
                                            bCharge[i] = fragmentation.b(k, i);
                                            yCharge[i] = fragmentation.y(len - k, i);
                                            //change for ORBI-ORBI purposes.
                                            Peak bmatched = Package.findClose(peakList, bCharge[i], bCharge[i] * 30 * Math.Pow(10, -6));
                                            Peak ymatched = Package.findClose(peakList, yCharge[i], yCharge[i] * 30 * Math.Pow(10, -6));
                                            if (bmatched != null)
                                                //bIntensity[i] = bmatched.rankOrIntensity;
                                                int fragmentCharge = bmatched.fragmentCharge;
                                                if (fragmentCharge == i)
                                                    bFragmentCharge[i] = 3;
                                                else bFragmentCharge[i] = 2;
                                            else bFragmentCharge[i] = 1;
                                            //else bIntensity[i] = 0;
                                            if (ymatched != null)
                                                //yIntensity[i] = ymatched.rankOrIntensity;
                                                //yFragmentCharge[i] = ymatched.fragmentCharge;
                                                int fragmentCharge = ymatched.fragmentCharge;
                                                if (fragmentCharge == i)
                                                    yFragmentCharge[i] = 3;
                                                else yFragmentCharge[i] = 2;
                                            else yFragmentCharge[i] = 1;
                                            //else yIntensity[i] = 0;
                                        string finalString = idOrIndex + "," + pepSequence + "," + pepBond + "," + bFragmentCharge[1] + "," + bFragmentCharge[2] + "," + yFragmentCharge[1] + "," + yFragmentCharge[2];
                                    else if (model == 1) //my binary logistic regression basophile model
                                        int b1=0, b2=0, y1=0, y2 = 0;
                                        double y1_logit = 0.1098112 * NR + 0.2085831 * NK + 0.1512109 * NH + 0.0460839 * NL
                                                        - 0.3872417 * CR - 0.3684911 * CK - 0.1634741 * CH - 0.1693931 * CL + 1.2632997;
                                        double y2_logit =-0.6345364 * NR - 0.3365917 * NK - 0.4577882 * NH - 0.1492703 * NL
                                                        + 0.7738133 * CR + 0.6036758 * CK + 0.5942542 * CH + 0.0701467 * CL + 0.0806280;
                                        double b1_logit = 0.0801432 * NR - 0.1088081 * NK - 0.1338220 * NH - 0.1413059 * NL
                                                        - 0.3157957 * CR - 0.2708274 * CK - 0.3703136 * CH + 0.0157418 * CL + 1.2124699;
                                        double b2_logit = 0.8606449 * NR + 0.2763119 * NK + 0.4969152 * NH + 0.0685712 * NL
                                                        - 1.3346995 * CR - 1.0977316 * CK - 1.0973677 * CH - 0.2028884 * CL + 1.9355980;
                                        if (b1_logit > -0.5)
                                            double mz_b1 = fragmentation.b(k, 1);
                                            Peak matched = Package.findClose(peakList, mz_b1, mz_b1 * 30 * Math.Pow(10, -6));
                                            if (matched != null)
                                                if (matched.fragmentCharge == 1) b1 = 3;
                                                else b1 = 2;
                                            else b1 = 1;
                                        else b1 = 0;
                                        if (b2_logit > 0)
                                            double mz_b2 = fragmentation.b(k, 2);
                                            Peak matched = Package.findClose(peakList, mz_b2, mz_b2 * 30 * Math.Pow(10, -6));
                                            if (matched != null)
                                                if (matched.fragmentCharge == 2) b2 = 3;
                                                else b2 = 2;
                                            else b2 = 1;
                                        else b2 = 0;
                                        if (y1_logit > -0.5)
                                            double mz_y1 = fragmentation.y(len - k, 1);
                                            Peak matched = Package.findClose(peakList, mz_y1, mz_y1 * 30 * Math.Pow(10, -6));
                                            if (matched != null)
                                                if (matched.fragmentCharge == 1) y1 = 3;
                                                else y1 = 2;
                                            else y1 = 1;
                                        else y1 = 0;
                                        if (y2_logit > -0.5)
                                            double mz_y2 = fragmentation.y(len - k, 2);
                                            Peak matched = Package.findClose(peakList, mz_y2, mz_y2 * 30 * Math.Pow(10, -6));
                                            if (matched != null)
                                                if (matched.fragmentCharge == 2) y2 = 3;
                                                else y2 = 2;
                                            else y2 = 1;
                                        else y2 = 0;
                                        string finalString = idOrIndex + "," + pepSequence + "," + pepBond + "," + b1 + "," + b2 + "," + y1 + "," + y2;

                                    else if (model == 2) //Surendra's ordinal model
                                        int b1 = 0, b2 = 0, y1 = 0, y2 = 0;
                                        double logit = NR * 0.9862 + NH * 0.8772 + NK * 0.7064 + NL * 0.4133
                                                     - CR * 1.1688 - CH * 0.3948 - CK * 0.6710 - CL * 0.4859;
                                        //charge Label = "1", generate b+,y++
                                        if (logit < -2.2502)
                                            double mz_b1 = fragmentation.b(k, 1);
                                            Peak b1matched = Package.findClose(peakList, mz_b1, mz_b1 * 30 * Math.Pow(10, -6));
                                            if (b1matched != null)
                                                if (b1matched.fragmentCharge == 1) b1 = 3;  //matched, charge agree
                                                else b1 = 2; //matched, charge does not agree
                                            else b1 = 1; //unmatched, but predicted.

                                            double mz_y2 = fragmentation.y(len - k, 2);
                                            Peak y2matched = Package.findClose(peakList, mz_y2, mz_y2 * 30 * Math.Pow(10, -6));
                                            if (y2matched != null)
                                                if (y2matched.fragmentCharge == 2) y2 = 3;  //matched, charge agree
                                                else y2 = 2; //matched, charge does not agree
                                            else y2 = 1; //unmatched, but predicted.
                                        //ambiLabel = "2", generate b+, y+, b++, y++
                                        else if (logit < 0.7872)
                                            double mz_b1 = fragmentation.b(k, 1);
                                            Peak b1matched = Package.findClose(peakList, mz_b1, mz_b1 * 30 * Math.Pow(10, -6));
                                            if (b1matched != null)
                                                if (b1matched.fragmentCharge == 1) b1 = 3;  //matched, charge agree
                                                else b1 = 2; //matched, charge does not agree
                                            else b1 = 1; //unmatched, but predicted.

                                            double mz_y1 = fragmentation.y(len - k, 1);
                                            Peak y1matched = Package.findClose(peakList, mz_y1, mz_y1 * 30 * Math.Pow(10, -6));
                                            if (y1matched != null)
                                                if (y1matched.fragmentCharge == 1) y1 = 3;  //matched, charge agree
                                                else y1 = 2; //matched, charge does not agree
                                            else y1 = 1; //unmatched, but predicted.

                                            double mz_b2 = fragmentation.b(k, 2);
                                            Peak b2matched = Package.findClose(peakList, mz_b2, mz_b2 * 30 * Math.Pow(10, -6));
                                            if (b2matched != null)
                                                if (b2matched.fragmentCharge == 2) b2 = 3;  //matched, charge agree
                                                else b2 = 2; //matched, charge does not agree
                                            else b2 = 1; //unmatched, but predicted.

                                            double mz_y2 = fragmentation.y(len - k, 2);
                                            Peak y2matched = Package.findClose(peakList, mz_y2, mz_y2 * 30 * Math.Pow(10, -6));
                                            if (y2matched != null)
                                                if (y2matched.fragmentCharge == 2) y2 = 3;  //matched, charge agree
                                                else y2 = 2; //matched, charge does not agree
                                            else y2 = 1; //unmatched, but predicted.
                                        //ambiLabel = "3", generate b++,y+
                                            double mz_b2 = fragmentation.b(k, 2);
                                            Peak b2matched = Package.findClose(peakList, mz_b2, mz_b2 * 30 * Math.Pow(10, -6));
                                            if (b2matched != null)
                                                if (b2matched.fragmentCharge == 2) b2 = 3;  //matched, charge agree
                                                else b2 = 2; //matched, charge does not agree
                                            else b2 = 1; //unmatched, but predicted.

                                            double mz_y1 = fragmentation.y(len - k, 1);
                                            Peak y1matched = Package.findClose(peakList, mz_y1, mz_y1 * 30 * Math.Pow(10, -6));
                                            if (y1matched != null)
                                                if (y1matched.fragmentCharge == 1) y1 = 3;  //matched, charge agree
                                                else y1 = 2; //matched, charge does not agree
                                            else y1 = 1; //unmatched, but predicted.
                                        string finalString = idOrIndex + "," + pepSequence + "," + pepBond + "," + b1 + "," + b2 + "," + y1 + "," + y2;

                                    //intensityDic.Add(pepBond, intensityList);

                                    //rowDic.Add(pepBond, finalString);
                                }//end for each peptide bond
                            }//end if z==3
                        }//end foreach peptide
            }//end using
Esempio n. 8
        static void Main(string[] args)
            //string naive = "Z:\\home\\dwang\\fragmentation\\UPS\\naive\\klc_031308p_cptac_study6_6_QC1.idpXML";
            //string baso = "Z:\\home\\dwang\\fragmentation\\UPS\\basophilenew\\klc_031308p_cptac_study6_6_QC1.idpXML";
            //for (int z=0; z<=4; z++)
            //    List<string> pep_naive = Package.PepSecurity(naive, z);
            //    List<string> pep_baso = Package.PepSecurity(baso, z);
            //    List<string> common = Package.findCommon(pep_baso, pep_naive);

            //    Console.WriteLine("z==: " + z);
            //    Console.WriteLine("pep in naive: " + pep_naive.Count);
            //    Console.WriteLine("pep in baso: " + pep_baso.Count);
            //    Console.WriteLine("common: " + common.Count);


            //start myrimatch
            Dictionary<string, string> peptideDic = new Dictionary<string, string>();
            TextReader file_temp = new StreamReader("X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\merge.csv");
            DataTable table_temp = CSV.CsvParser.Parse(file_temp, true);
            foreach (DataRow dr in table_temp.Rows)
                string pep = dr[2].ToString();
                string decoy = dr[3].ToString();
                if (!peptideDic.ContainsKey(pep))
                    peptideDic.Add(pep, decoy);

            string xml = "X:\\wangd5\\idpXML_FDR1.00\\myrimatch\\Assemble_MM.xml";
            Dictionary<string,string> PSM = new Dictionary<string,string>();
            Console.WriteLine("preparing reading idpXML");
            int index = 0;
            IDPicker.Workspace workspace = new IDPicker.Workspace();
            Package.loadWorkspace(ref workspace, xml);
            foreach (IDPicker.SourceGroupList.MapPair groupItr in workspace.groups)
                foreach (IDPicker.SourceInfo source in groupItr.Value.getSources(true))
                    foreach (IDPicker.SpectrumList.MapPair sItr in source.spectra)
                        IDPicker.ResultInstance ri = sItr.Value.results[1];
                        IDPicker.VariantInfo vi =;
                        string pepSequence = vi.peptide.sequence;

                        var scores = ri.searchScores.Values;
                        float[] scoreArr = new float[2];
                        scores.CopyTo(scoreArr, 0);
                        float mvh = scoreArr[0];
                        float mzfidelity = scoreArr[1];

                        string z =;

                        string key = pepSequence + "." + index;

                        PSM.Add(key, mvh + "," + mzfidelity + "," + z);


            Console.WriteLine("preparing reading RNA-seq");
            Dictionary<string, string> dic_RNASeq = new Dictionary<string, string>();
            TextReader file_csv = new StreamReader("X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\fdr1.0.csv");
            DataTable table = CSV.CsvParser.Parse(file_csv, true);
            foreach (DataRow dr in table.Rows)
                string pep = dr[0].ToString();
                string RNASeq = dr[4].ToString();
                dic_RNASeq.Add(pep, RNASeq);

            List<string> finalList = new List<string>();
            int unmatched = 0;
            foreach (string key in PSM.Keys)
                string pep = key.Split('.')[0];
                if (dic_RNASeq.ContainsKey(pep) && peptideDic.ContainsKey(pep))
                    string rna = dic_RNASeq[pep];
                    finalList.Add(pep + "," + PSM[key] + "," + peptideDic[pep] + "," + rna);
                else unmatched++;

            Console.WriteLine("information: unmatched number is: " + unmatched);

            Console.WriteLine("preparing writing files...");
            string output = "X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\myrimatchscorecombination.csv";
            TextWriter file = new StreamWriter(output);
            foreach (string ss in finalList)

            //for 3 search engines
            string test_mm = "X:\\wangd5\\SW480\\MM\\FDR0.05\\mm.xml";
            string test_sq = "X:\\wangd5\\SW480\\SQ\\FDR0.05\\sq.xml";
            string test_xt = "X:\\wangd5\\SW480\\XT\\FDR0.05\\xt.xml";
            string test_p = "X:\\wangd5\\SW480\\evaluation\\p.csv";

            //string test_mm = "Z:\\home\\dwang\\fragmentation\\RNA-Seq\\RKO\\Assemble_MM.xml";
            //string test_sq = "Z:\\home\\dwang\\fragmentation\\RNA-Seq\\RKO\\Assemble_SQ.xml";
            //string test_xt = "Z:\\home\\dwang\\fragmentation\\RNA-Seq\\RKO\\Assemble_XT.xml";

            //get peptides in p
            List<string> p = new List<string>();
            TextReader file_p = new StreamReader(test_p);
            DataTable dt = CSV.CsvParser.Parse(file_p, true);
            foreach (DataRow dr in dt.Rows)
                string pep = dr[2].ToString();

            List<string> m = Package.PepSecurity(test_mm, 0);
            List<string> x = Package.PepSecurity(test_xt, 0);
            List<string> s = Package.PepSecurity(test_sq, 0);

            m = Package.removeDuplicate(m);
            x = Package.removeDuplicate(x);
            s = Package.removeDuplicate(s);
            p = Package.removeDuplicate(p);

            List<string> pm = Package.findCommon(p, m);

            List<string> mx = Package.findCommon(m, x);
            List<string> ms = Package.findCommon(m, s);
            List<string> xs = Package.findCommon(x, s);
            List<string> mxs = Package.findCommon(mx, s);


            //read the fdr1.0.csv, to get all the peptide sequences that identified by 3 engines\
            //no matter how low the score is
            //also, the file contains the RNASeq information
            Dictionary<string, string> dic_RNASeq = new Dictionary<string, string>();
            TextReader file_csv = new StreamReader("X:\\wangd5\\SW480\\evaluation\\PepRNA_FDR1.csv");
            //TextReader file_csv = new StreamReader("X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\fdr1.0.csvn");
            //TextReader file_csv = new StreamReader("X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\fdr1.0.csv");
            DataTable table = CSV.CsvParser.Parse(file_csv, true);
            foreach (DataRow dr in table.Rows)
                string pep = dr[0].ToString();
                string RNASeq = dr[1].ToString();
                dic_RNASeq.Add(pep, RNASeq);

            int m_n = 0;
            int x_n = 0;
            int s_n = 0;
            int p_n = 0;
            int ms_n = 0;
            int mx_n = 0;
            int xs_n = 0;
            int mxs_n = 0;
            int pm_n = 0;

            foreach (string ss in m)
                if (dic_RNASeq.ContainsKey(ss))
                    if (dic_RNASeq[ss] == "1") m_n++;
                else Console.WriteLine("crap");

            foreach (string ss in x)
                if (dic_RNASeq.ContainsKey(ss))
                    if (dic_RNASeq[ss] == "1") x_n++;


            foreach (string ss in s)
                if (dic_RNASeq.ContainsKey(ss))
                    if (dic_RNASeq[ss] == "1") s_n++;


            foreach (string ss in p)
                if (dic_RNASeq.ContainsKey(ss))
                    if (dic_RNASeq[ss] == "1") p_n++;


            foreach (string ss in ms)
                if (dic_RNASeq.ContainsKey(ss))
                    if (dic_RNASeq[ss] == "1") ms_n++;

            foreach (string ss in mx)
                if (dic_RNASeq.ContainsKey(ss))
                    if (dic_RNASeq[ss] == "1") mx_n++;

            foreach (string ss in xs)
                if (dic_RNASeq.ContainsKey(ss))
                    if (dic_RNASeq[ss] == "1") xs_n++;

            foreach (string ss in mxs)
                if (dic_RNASeq.ContainsKey(ss))
                    if (dic_RNASeq[ss] == "1") mxs_n++;


            foreach (string ss in pm)
                if (dic_RNASeq.ContainsKey(ss))
                    if (dic_RNASeq[ss] == "1") pm_n++;


            Console.WriteLine("m: " + m.Count + "=" + m_n);
            Console.WriteLine("x: " + x.Count + "=" + x_n);
            Console.WriteLine("s: " + s.Count + "=" + s_n);
            Console.WriteLine("p: " + p.Count + "=" + p_n);

            Console.WriteLine("mx: " + mx.Count + "=" + mx_n);
            Console.WriteLine("ms: " + ms.Count + "=" + ms_n);
            Console.WriteLine("xs: " + xs.Count + "=" + xs_n);

            Console.WriteLine("mxs: " + mxs.Count + "=" + mxs_n);

            Console.WriteLine("pm: " + pm.Count + "=" + pm_n);

            string path_mm = "X:\\wangd5\\SW480\\MM\\FDR1.00\\mam_012808n_SW480_200ug_";
            string path_sq = "X:\\wangd5\\SW480\\SQ\\FDR1.00\\mam_012808n_SW480_200ug_";
            string path_xt = "X:\\wangd5\\SW480\\XT\\FDR1.00\\mam_012808n_SW480_200ug_";

            List<string> keyslist = new List<string>();
            Dictionary<string, double> dic_mm = Package.RKO(path_mm, keyslist);
            Dictionary<string, double> dic_sq = Package.RKO(path_sq, keyslist);
            Dictionary<string, double> dic_xt = Package.RKO(path_xt, keyslist);

            keyslist = Package.removeDuplicate(keyslist);

            //merge the three dictionaries into one.
            Dictionary<string, string> dic_merge = new Dictionary<string,string>();
            int misses = 0;
            foreach (string key in keyslist)
                string mm = "";
                string sq = "";
                string xt = "";
                string[] str = key.Split(',');
                string pep = str[2];
                if (dic_RNASeq.ContainsKey(pep))
                    string RNA = dic_RNASeq[pep];

                    if (dic_mm.ContainsKey(key))
                        mm = dic_mm[key].ToString();
                    else mm = "0";
                    if (dic_xt.ContainsKey(key))
                        xt = dic_xt[key].ToString();
                    else xt = "0";
                    if (dic_sq.ContainsKey(key))
                        sq = dic_sq[key].ToString();
                    else sq = "0";
                    dic_merge.Add(key, mm + "," + xt + "," + sq + "," + RNA);
                else misses++;


            Console.WriteLine("the number of pep-rna misses is: " + misses);
            //write into file
            string output = "X:\\wangd5\\SW480\\evaluation\\merge.csv";
            TextWriter file = new StreamWriter(output);
            foreach (string key in dic_merge.Keys)
                file.WriteLine(key + "," + dic_merge[key]);