public static ResultInstance OverlapShortInstances(this ResultInstance shortInstance1, ResultInstance shortInstance2) { if (shortInstance1.start > shortInstance2.end || shortInstance2.start > shortInstance1.end) { return(null); } return(new ResultInstance { featureDetails = new List <FeatureDetails>() { new FeatureDetails() { featureType = shortInstance1.featureDetails[0].featureType, info = shortInstance1.featureDetails[0].info }, new FeatureDetails() { featureType = shortInstance2.featureDetails[0].featureType, info = shortInstance2.featureDetails[0].info }, }, start = new[] { shortInstance1.start, shortInstance2.start }.Max(), end = new[] { shortInstance1.end, shortInstance2.end }.Min(), }); }
public static List <ResultInstance> UnionShortInstances(this ResultInstance shortInstance1, ResultInstance shortInstance2) { if (shortInstance1.start > shortInstance2.end || shortInstance2.start > shortInstance1.end) { return(new List <ResultInstance> { shortInstance1, shortInstance2 }); } return(new List <ResultInstance> { new ResultInstance { //FeatureType = shortInstance1.FeatureType + shortInstance2.FeatureType, start = new[] { shortInstance1.start, shortInstance2.start }.Min(), end = new[] { shortInstance1.end, shortInstance2.end }.Max(), } }); }
/// <summary> /// add spectra labels to ScanRanker metrics file /// identified spectra ids stored in idpXML file by idqQonvert /// </summary> public void AddSpectraLabel() { // get pepXML files and run idpQonvert for all, so only read database once idpCfg.PepXMLFiles = ""; foreach (FileInfo file in inFileList) { string fileBaseName = Path.GetFileNameWithoutExtension(file.Name); string pepXMLFile = idpCfg.PepXMLFileDir + "\\" + fileBaseName + ".pepXML"; // pepXML file has to be the same basename idpCfg.PepXMLFiles += "\"" + pepXMLFile + "\"" + " "; // each file enclosed by "..." and separated by a space string idpDBFilename = fileBaseName + ".idpDB"; // pepXML file has to be the same basename as spectra filename if (File.Exists(idpDBFilename)) { File.Delete(idpDBFilename); Workspace.SetText("\r\nRemoved existing idpDB file: " + idpDBFilename + "\n"); } } //Workspace.SetText("\r\npepXMLFies\r\n" + idpCfg.PepXMLFiles + "\r\n\r\n"); Workspace.SetText("\r\nRunning idpQonvert for peptide validation\r\n"); try { runIdpQonvert(idpCfg, outDir); } catch (Exception exc) { Workspace.SetText("\r\nError in running idpQonvert\r\n"); Workspace.ChangeButtonTo("Close"); throw new Exception(exc.Message); } // use IDPickerWorkspace.dll to parse idpXML, get spectra->peptides->proteins info, work for IDP2 // use System.Data.SQLite to read idpDB for IDP3, get spectra->peptides->proteins info // add labels, peptide and proteins to metrics file foreach (FileInfo file in inFileList) { string fileBaseName = Path.GetFileNameWithoutExtension(file.Name); //string idpXMLFilename = fileBaseName + ".idpXML"; // pepXML file has to be the same basename as spectra filename string idpDBFilename = fileBaseName + ".idpDB"; // pepXML file has to be the same basename as spectra filename if (!File.Exists(idpDBFilename)) { Workspace.SetText("\r\nError: Cannot find idpDB file: " + idpDBFilename + " in output directory!"); Workspace.SetText("\r\nPlease check IDPicker configurations and the database file"); Workspace.ChangeButtonTo("Close"); return; } // parse idpDB file Workspace.SetText("\r\nReading idpDB file: " + idpDBFilename); Map <string, ResultInstance> resultsMap = new Map <string, ResultInstance>(); // Connect to database string dbConnectionString = "Data Source=" + idpDBFilename; using (SQLiteConnection sqliteCon = new SQLiteConnection(dbConnectionString)) { // Execute query on database string querySQL = @"SELECT s.NativeID, REPLACE(IFNULL(GROUP_CONCAT(DISTINCT pm.Offset || '=' || mod.MonoMassDelta),''),',',';') as Mods, IFNULL(IFNULL(SUBSTR(pd.Sequence, pi.Offset+1, pi.Length), (SELECT DecoySequence FROM Peptide p WHERE p.Id = pi.Peptide)), (SELECT SUBSTR(pd.Sequence, pi.Offset+1, pi.Length) FROM PeptideInstance pi JOIN Protein pro ON pi.Protein = pro.Id AND pro.IsDecoy = 0 LEFT JOIN ProteinData pd ON pi.Protein=pd.Id WHERE psm.Peptide = pi.Peptide UNION SELECT p.DecoySequence FROM Peptide p JOIN PeptideInstance pi ON p.Id = pi.Peptide JOIN Protein pro ON pi.Protein = pro.Id AND pro.IsDecoy = 1 WHERE psm.Peptide = pi.Peptide AND p.DecoySequence is not null)) as Peptide, GROUP_CONCAT(pro.Accession) as Protein FROM PeptideSpectrumMatch psm JOIN Spectrum s ON s.Id = psm.Spectrum JOIN SpectrumSource source ON s.Source = source.Id JOIN PeptideInstance pi ON psm.Peptide = pi.Peptide JOIN Protein pro ON pi.Protein = pro.Id LEFT JOIN ProteinData pd ON pi.Protein=pd.Id LEFT JOIN PeptideModification pm ON psm.Id = pm.PeptideSpectrumMatch LEFT JOIN Modification mod ON pm.Modification = mod.Id Where psm.Rank = 1 and psm.Qvalue < " + idpCfg.MaxFDR.ToString() + " GROUP BY psm.Id"; Workspace.SetText("\r\nExtracting identified spectra, peptides and proteins from " + idpDBFilename); using (SQLiteCommand queryCommand = new SQLiteCommand(querySQL, sqliteCon)) { sqliteCon.Open(); using (SQLiteDataReader dataReader = queryCommand.ExecuteReader()) { // Iterate every record while (dataReader.Read()) { Console.WriteLine(dataReader.GetString(0) + " " + dataReader.GetString(1) + " " + dataReader.GetString(2)); ResultInstance ri = new ResultInstance(); string nativeID = dataReader.GetString(0); ri.Mods = dataReader.GetString(1); ri.Peptide = dataReader.GetString(2); ri.Protein = dataReader.GetString(3); resultsMap.Add(nativeID, ri); } } } } //dataReader.Close(); //sqliteCon.Close(); //IDPicker.Workspace ws = new IDPicker.Workspace(); //using (StreamReader idpXMLStream = new StreamReader(idpDBFilename)) //{ // ws.readPeptidesXml(idpXMLStream, "", (float)idpCfg.MaxFDR, 1); //} //foreach (SourceGroupInfo group in ws.groups.Values) // foreach (SourceInfo source in group.getSources()) // foreach (SpectrumInfo spectrum in source.spectra.Values) // { // ResultInstance ri = spectrum.results[1]; // resultsMap.Add(spectrum.nativeID, ri); // //Workspace.SetText("\r\n" + spectrum.nativeID); // } // remove idpDB file if (File.Exists(idpDBFilename)) { File.Delete(idpDBFilename); } Workspace.SetText("\r\nRemoved idpDB file: " + idpDBFilename + "\n"); // read idpxml, extract spectra id, save to a dictionary /*Dictionary<string, int> idtScanDict = new Dictionary<string, int>(); * * try * { * using (XmlTextReader reader = new XmlTextReader(idpXMLFilename)) * { * while (reader.Read()) * { * if (reader.NodeType.Equals(XmlNodeType.Element) && reader.Name.Equals("spectrum")) * { * // Read the spectrum tag * // <spectrum id="scan=3601" index="2834" z="2" mass="1466.6829557734375" time="33.856500000000004" targets="45327" decoys="45504" results="1"> * string nativeID = getAttributeAs<string>(reader, "id", false); // id in idpXML = nativeID in DirecTag * Match m = Regex.Match(nativeID, @"scan=(\d+)"); * if (m.Success) * { * nativeID = m.Groups[1].Value; * } * //int z = getAttributeAs<int>(reader, "z", true); * //int index = getAttributeAs<int>(reader, "index", true); * //string idtScan = nativeID + "." + Convert.ToString(z); * if (!idtScanDict.ContainsKey(nativeID)) * idtScanDict.Add(nativeID, 1); // use only scan number as identification, no charge info * } * } * } * } * catch (Exception exc) * { * Workspace.SetText("\r\nError in reading idpXML file, please check IDPicker configuration and try again\r\n"); * Workspace.ChangeButtonTo("Close"); * throw new Exception(exc.Message); * }*/ // open metrics file, if identified, add label, write to a new file string metricsFileName = fileBaseName + metricsFileSuffix + ".txt"; string outFileName = fileBaseName + metricsFileSuffix + outFileSuffix + ".txt"; Workspace.SetText("\r\nWriting labbled metrics file: " + outFileName); //List<int> unidentifiedSpectra = new List<int>(); List <string> unidentifiedSpectra = new List <string>(); int numSpectra = 0; int cumsum = 0; try { if (File.Exists(outFileName)) { File.Delete(outFileName); } using (TextReader r = File.OpenText(metricsFileName)) { using (TextWriter w = File.CreateText(outFileName)) { w.WriteLine(r.ReadLine()); // read and write the 1st header line w.WriteLine(r.ReadLine()); // read and write the 1st header line string header = r.ReadLine(); // read the 3rd header line w.WriteLine(header + "\t" + "Label" + "\t" + "CumsumLabel" + "\t" + "Peptide" + "\t" + "Protein"); // write the 3rd header line string line = string.Empty; while ((line = r.ReadLine()) != null) { numSpectra++; string[] items = line.Split('\t'); //string scanNativeID = items[2]; // nativeID string scanNativeID = items[1]; // nativeID //int index = Convert.ToInt32(items[1]); //index //Match m = Regex.Match(scanNativeID, @"scan=(\d+)"); // extract scan number in nativeID //if (m.Success) //{ // scanNativeID = m.Groups[1].Value; //} if (resultsMap.Contains(scanNativeID)) { cumsum += 1; ResultInstance ri = resultsMap[scanNativeID]; if (ri.Mods.Equals(string.Empty)) { w.WriteLine(line + "\t1\t" + cumsum + "\t" + ri.Peptide.ToString() + "\t" + ri.Protein.ToString()); } else { w.WriteLine(line + "\t1\t" + cumsum + "\t" + ri.Peptide.ToString() + '{' + ri.Mods.ToString() + '}' + "\t" + ri.Protein.ToString()); } } else { w.WriteLine(line + "\t0\t" + cumsum); //unidentifiedSpectra.Add(index); unidentifiedSpectra.Add(scanNativeID); } //scanNativeID = scanNativeID + "." + items[2]; // use nativeID scanNumber.charge as scanID } } } } catch (Exception exc) { //throw new Exception("Error in creating spectra lable file\r\n", exc); Workspace.SetText("\r\nError in creating a file with spectra labels, please check the ScanRanker metrics file\r\n"); Workspace.ChangeButtonTo("Close"); throw new Exception(exc.Message); } Workspace.SetText("\r\nFinished adding spectral labels for file: " + metricsFileName + " \r\n\r\n"); // write out unidentfied high quality spectra if (unidentifiedSpectra.Count == numSpectra) { Workspace.SetText("\r\nNo spectra were identified. This might because the spectrumNativeID attribute is missing in pepXML files"); Workspace.ChangeButtonTo("Close"); return; } if (writeOutUnidentifiedSpectra) { writeUnidentifiedSpectra(file.Name, outFileSuffix, unidentifiedSpectra, recoveryCutoff, recoveryOutFormat); } Workspace.SetText("\r\n" + file.Name + ":\r\n\tTotal number of spectra: " + numSpectra.ToString() + "\r\n\tNumber of identified spectra: " + cumsum.ToString() + "\r\n\tNumber of unidentified spectra in output: " + (Convert.ToInt32(unidentifiedSpectra.Count * recoveryCutoff)).ToString() + "\r\n"); //delete old metrics file //if (File.Exists(metricsFileName)) //{ // File.Delete(metricsFileName); //} } // end of foreach file Workspace.SetText("\r\nFinished adding spectral labels!"); Workspace.ChangeButtonTo("Close"); }// end of addSpectraLabel()