예제 #1
0
 public static ResultInstance OverlapShortInstances(this ResultInstance shortInstance1, ResultInstance shortInstance2)
 {
     if (shortInstance1.start > shortInstance2.end || shortInstance2.start > shortInstance1.end)
     {
         return(null);
     }
     return(new ResultInstance
     {
         featureDetails = new List <FeatureDetails>()
         {
             new FeatureDetails()
             {
                 featureType = shortInstance1.featureDetails[0].featureType,
                 info = shortInstance1.featureDetails[0].info
             },
             new FeatureDetails()
             {
                 featureType = shortInstance2.featureDetails[0].featureType,
                 info = shortInstance2.featureDetails[0].info
             },
         },
         start = new[] { shortInstance1.start, shortInstance2.start }.Max(),
         end = new[] { shortInstance1.end, shortInstance2.end }.Min(),
     });
 }
예제 #2
0
 public static List <ResultInstance> UnionShortInstances(this ResultInstance shortInstance1, ResultInstance shortInstance2)
 {
     if (shortInstance1.start > shortInstance2.end || shortInstance2.start > shortInstance1.end)
     {
         return(new List <ResultInstance> {
             shortInstance1, shortInstance2
         });
     }
     return(new List <ResultInstance>
     {
         new ResultInstance {
             //FeatureType = shortInstance1.FeatureType + shortInstance2.FeatureType,
             start = new[] { shortInstance1.start, shortInstance2.start }.Min(),
             end = new[] { shortInstance1.end, shortInstance2.end }.Max(),
         }
     });
 }
예제 #3
0
        /// <summary>
        /// add spectra labels to ScanRanker metrics file
        /// identified spectra ids stored in idpXML file by idqQonvert
        /// </summary>
        public void AddSpectraLabel()
        {
            // get pepXML files and run idpQonvert for all, so only read database once
            idpCfg.PepXMLFiles = "";
            foreach (FileInfo file in inFileList)
            {
                string fileBaseName = Path.GetFileNameWithoutExtension(file.Name);
                string pepXMLFile   = idpCfg.PepXMLFileDir + "\\" + fileBaseName + ".pepXML"; // pepXML file has to be the same basename
                idpCfg.PepXMLFiles += "\"" + pepXMLFile + "\"" + " ";                         // each file enclosed by "..." and separated by a space
                string idpDBFilename = fileBaseName + ".idpDB";                               // pepXML file has to be the same basename as spectra filename
                if (File.Exists(idpDBFilename))
                {
                    File.Delete(idpDBFilename);
                    Workspace.SetText("\r\nRemoved existing idpDB file: " + idpDBFilename + "\n");
                }
            }
            //Workspace.SetText("\r\npepXMLFies\r\n" + idpCfg.PepXMLFiles + "\r\n\r\n");

            Workspace.SetText("\r\nRunning idpQonvert for peptide validation\r\n");
            try
            {
                runIdpQonvert(idpCfg, outDir);
            }
            catch (Exception exc)
            {
                Workspace.SetText("\r\nError in running idpQonvert\r\n");
                Workspace.ChangeButtonTo("Close");
                throw new Exception(exc.Message);
            }

            // use IDPickerWorkspace.dll to parse idpXML, get spectra->peptides->proteins info, work for IDP2
            // use System.Data.SQLite to read idpDB for IDP3, get spectra->peptides->proteins info
            // add labels, peptide and proteins to metrics file
            foreach (FileInfo file in inFileList)
            {
                string fileBaseName = Path.GetFileNameWithoutExtension(file.Name);
                //string idpXMLFilename = fileBaseName + ".idpXML";  // pepXML file has to be the same basename as spectra filename
                string idpDBFilename = fileBaseName + ".idpDB";  // pepXML file has to be the same basename as spectra filename

                if (!File.Exists(idpDBFilename))
                {
                    Workspace.SetText("\r\nError: Cannot find idpDB file: " + idpDBFilename + " in output directory!");
                    Workspace.SetText("\r\nPlease check IDPicker configurations and the database file");
                    Workspace.ChangeButtonTo("Close");
                    return;
                }

                // parse idpDB file
                Workspace.SetText("\r\nReading idpDB file: " + idpDBFilename);

                Map <string, ResultInstance> resultsMap = new Map <string, ResultInstance>();

                // Connect to database
                string dbConnectionString = "Data Source=" + idpDBFilename;
                using (SQLiteConnection sqliteCon = new SQLiteConnection(dbConnectionString))
                {
                    // Execute query on database
                    string querySQL = @"SELECT s.NativeID,  
                                    REPLACE(IFNULL(GROUP_CONCAT(DISTINCT pm.Offset || '=' || mod.MonoMassDelta),''),',',';') as Mods,
                                    IFNULL(IFNULL(SUBSTR(pd.Sequence, pi.Offset+1, pi.Length),
                                        (SELECT DecoySequence FROM Peptide p WHERE p.Id = pi.Peptide)),
                                        (SELECT SUBSTR(pd.Sequence, pi.Offset+1, pi.Length)
                                        FROM PeptideInstance pi 
                                        JOIN Protein pro ON pi.Protein = pro.Id AND pro.IsDecoy = 0
                                        LEFT JOIN ProteinData pd ON pi.Protein=pd.Id
                                        WHERE psm.Peptide = pi.Peptide
                                        UNION
                                        SELECT p.DecoySequence
                                        FROM Peptide p
                                        JOIN PeptideInstance pi ON p.Id = pi.Peptide
                                        JOIN Protein pro ON pi.Protein = pro.Id AND pro.IsDecoy = 1
                                        WHERE psm.Peptide = pi.Peptide AND p.DecoySequence is not null)) as Peptide,
                                    GROUP_CONCAT(pro.Accession) as Protein
                                    FROM PeptideSpectrumMatch psm 
                                    JOIN Spectrum s ON s.Id = psm.Spectrum
                                    JOIN SpectrumSource source ON s.Source = source.Id
                                    JOIN PeptideInstance pi ON psm.Peptide = pi.Peptide
                                    JOIN Protein pro ON pi.Protein = pro.Id
                                    LEFT JOIN ProteinData pd ON pi.Protein=pd.Id
                                    LEFT JOIN PeptideModification pm ON psm.Id = pm.PeptideSpectrumMatch
                                    LEFT JOIN Modification mod ON pm.Modification = mod.Id
                                    Where psm.Rank = 1 and psm.Qvalue < " + idpCfg.MaxFDR.ToString() +
                                      " GROUP BY psm.Id";

                    Workspace.SetText("\r\nExtracting identified spectra, peptides and proteins from " + idpDBFilename);
                    using (SQLiteCommand queryCommand = new SQLiteCommand(querySQL, sqliteCon))
                    {
                        sqliteCon.Open();
                        using (SQLiteDataReader dataReader = queryCommand.ExecuteReader())
                        {
                            // Iterate every record
                            while (dataReader.Read())
                            {
                                Console.WriteLine(dataReader.GetString(0) + " " + dataReader.GetString(1) + " " + dataReader.GetString(2));
                                ResultInstance ri       = new ResultInstance();
                                string         nativeID = dataReader.GetString(0);
                                ri.Mods    = dataReader.GetString(1);
                                ri.Peptide = dataReader.GetString(2);
                                ri.Protein = dataReader.GetString(3);
                                resultsMap.Add(nativeID, ri);
                            }
                        }
                    }
                }
                //dataReader.Close();
                //sqliteCon.Close();


                //IDPicker.Workspace ws = new IDPicker.Workspace();
                //using (StreamReader idpXMLStream = new StreamReader(idpDBFilename))
                //{
                //    ws.readPeptidesXml(idpXMLStream, "", (float)idpCfg.MaxFDR, 1);
                //}


                //foreach (SourceGroupInfo group in ws.groups.Values)
                //    foreach (SourceInfo source in group.getSources())
                //        foreach (SpectrumInfo spectrum in source.spectra.Values)
                //        {
                //            ResultInstance ri = spectrum.results[1];
                //            resultsMap.Add(spectrum.nativeID, ri);
                //            //Workspace.SetText("\r\n" + spectrum.nativeID);
                //        }

                // remove idpDB file
                if (File.Exists(idpDBFilename))
                {
                    File.Delete(idpDBFilename);
                }
                Workspace.SetText("\r\nRemoved idpDB file: " + idpDBFilename + "\n");

                // read idpxml, extract spectra id, save to a dictionary

                /*Dictionary<string, int> idtScanDict = new Dictionary<string, int>();
                 *
                 * try
                 * {
                 *  using (XmlTextReader reader = new XmlTextReader(idpXMLFilename))
                 *  {
                 *      while (reader.Read())
                 *      {
                 *          if (reader.NodeType.Equals(XmlNodeType.Element) && reader.Name.Equals("spectrum"))
                 *          {
                 *              // Read the spectrum tag
                 *             //  <spectrum id="scan=3601" index="2834" z="2" mass="1466.6829557734375" time="33.856500000000004" targets="45327" decoys="45504" results="1">
                 *              string nativeID = getAttributeAs<string>(reader, "id", false);   // id in idpXML = nativeID in DirecTag
                 *              Match m = Regex.Match(nativeID, @"scan=(\d+)");
                 *              if (m.Success)
                 *              {
                 *                  nativeID = m.Groups[1].Value;
                 *              }
                 *              //int z = getAttributeAs<int>(reader, "z", true);
                 *              //int index = getAttributeAs<int>(reader, "index", true);
                 *              //string idtScan = nativeID + "." + Convert.ToString(z);
                 *              if (!idtScanDict.ContainsKey(nativeID))
                 *                  idtScanDict.Add(nativeID, 1);   // use only scan number as identification, no charge info
                 *          }
                 *      }
                 *  }
                 * }
                 * catch (Exception exc)
                 * {
                 *  Workspace.SetText("\r\nError in reading idpXML file, please check IDPicker configuration and try again\r\n");
                 *  Workspace.ChangeButtonTo("Close");
                 *  throw new Exception(exc.Message);
                 * }*/

                // open metrics file, if identified, add label, write to a new file
                string metricsFileName = fileBaseName + metricsFileSuffix + ".txt";
                string outFileName     = fileBaseName + metricsFileSuffix + outFileSuffix + ".txt";

                Workspace.SetText("\r\nWriting labbled metrics file: " + outFileName);

                //List<int> unidentifiedSpectra = new List<int>();
                List <string> unidentifiedSpectra = new List <string>();
                int           numSpectra          = 0;
                int           cumsum = 0;

                try
                {
                    if (File.Exists(outFileName))
                    {
                        File.Delete(outFileName);
                    }

                    using (TextReader r = File.OpenText(metricsFileName))
                    {
                        using (TextWriter w = File.CreateText(outFileName))
                        {
                            w.WriteLine(r.ReadLine());                                                                         // read and write the 1st header line
                            w.WriteLine(r.ReadLine());                                                                         // read and write the 1st header line
                            string header = r.ReadLine();                                                                      // read the 3rd header line
                            w.WriteLine(header + "\t" + "Label" + "\t" + "CumsumLabel" + "\t" + "Peptide" + "\t" + "Protein"); // write the 3rd header line

                            string line = string.Empty;
                            while ((line = r.ReadLine()) != null)
                            {
                                numSpectra++;
                                string[] items = line.Split('\t');
                                //string scanNativeID = items[2];   //  nativeID
                                string scanNativeID = items[1];   //  nativeID
                                //int index = Convert.ToInt32(items[1]);  //index

                                //Match m = Regex.Match(scanNativeID, @"scan=(\d+)");  // extract scan number in nativeID
                                //if (m.Success)
                                //{
                                //    scanNativeID = m.Groups[1].Value;
                                //}

                                if (resultsMap.Contains(scanNativeID))
                                {
                                    cumsum += 1;
                                    ResultInstance ri = resultsMap[scanNativeID];
                                    if (ri.Mods.Equals(string.Empty))
                                    {
                                        w.WriteLine(line + "\t1\t" + cumsum + "\t" + ri.Peptide.ToString() + "\t" + ri.Protein.ToString());
                                    }
                                    else
                                    {
                                        w.WriteLine(line + "\t1\t" + cumsum + "\t" + ri.Peptide.ToString() + '{' + ri.Mods.ToString() + '}' + "\t" + ri.Protein.ToString());
                                    }
                                }
                                else
                                {
                                    w.WriteLine(line + "\t0\t" + cumsum);
                                    //unidentifiedSpectra.Add(index);
                                    unidentifiedSpectra.Add(scanNativeID);
                                }

                                //scanNativeID = scanNativeID + "." + items[2]; // use nativeID scanNumber.charge as scanID
                            }
                        }
                    }
                }
                catch (Exception exc)
                {
                    //throw new Exception("Error in creating spectra lable file\r\n", exc);
                    Workspace.SetText("\r\nError in creating a file with spectra labels, please check the ScanRanker metrics file\r\n");
                    Workspace.ChangeButtonTo("Close");
                    throw new Exception(exc.Message);
                }
                Workspace.SetText("\r\nFinished adding spectral labels for file: " + metricsFileName + " \r\n\r\n");

                // write out unidentfied high quality spectra
                if (unidentifiedSpectra.Count == numSpectra)
                {
                    Workspace.SetText("\r\nNo spectra were identified. This might because the spectrumNativeID attribute is missing in pepXML files");
                    Workspace.ChangeButtonTo("Close");
                    return;
                }
                if (writeOutUnidentifiedSpectra)
                {
                    writeUnidentifiedSpectra(file.Name, outFileSuffix, unidentifiedSpectra, recoveryCutoff, recoveryOutFormat);
                }

                Workspace.SetText("\r\n" + file.Name
                                  + ":\r\n\tTotal number of spectra: " + numSpectra.ToString()
                                  + "\r\n\tNumber of identified spectra: " + cumsum.ToString()
                                  + "\r\n\tNumber of unidentified spectra in output: " + (Convert.ToInt32(unidentifiedSpectra.Count * recoveryCutoff)).ToString()
                                  + "\r\n");

                //delete old metrics file
                //if (File.Exists(metricsFileName))
                //{
                //    File.Delete(metricsFileName);
                //}
            } // end of foreach file


            Workspace.SetText("\r\nFinished adding spectral labels!");
            Workspace.ChangeButtonTo("Close");
        }// end of addSpectraLabel()