예제 #1
0
 private void AddProtein(string ID, string Description)
 {
     if (!myProteins.Exists((a) => a.ID.Equals(ID)))
     {
         try
         {
             ProteinInfo pt = new ProteinInfo();
             if (ID.StartsWith("IPI"))
             {
                 //remove the decimal at the end
                 ID = Regex.Replace(ID, @"\..*", "");
             }
             pt.ID          = ID;
             pt.Description = Description;
             myProteins.Add(pt);
         }
         catch {
             throw (new Exception("Failed parsing result report"));
         }
     }
 }
예제 #2
0
        /// <summary>
        /// Load the Sequence to Protein mapping using the specified PHRP result file
        /// </summary>
        /// <param name="filePath"></param>
        /// <param name="seqToProteinMap">Sequence to protein map</param>
        /// <returns>True if successful, false if an error</returns>
        private bool LoadSeqToProteinMapping(string filePath, IDictionary <int, List <ProteinInfo> > seqToProteinMap)
        {
            var headerLineParsed = false;

            try
            {
                // Initialize the column mapping
                // Using a case-insensitive comparer
                var columnHeaders = new SortedDictionary <string, int>(StringComparer.OrdinalIgnoreCase)
                {
                    { SEQ_PROT_MAP_COLUMN_Unique_Seq_ID, 0 },
                    { SEQ_PROT_MAP_COLUMN_Cleavage_State, 1 },
                    { SEQ_PROT_MAP_COLUMN_Terminus_State, 2 },
                    { SEQ_PROT_MAP_COLUMN_Protein_Name, 3 },
                    { SEQ_PROT_MAP_COLUMN_Protein_EValue, 4 },
                    { SEQ_PROT_MAP_COLUMN_Protein_Intensity, 5 }
                };

                // Read the data from the sequence to protein map file
                using var reader = new StreamReader(new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite));

                while (!reader.EndOfStream)
                {
                    var lineIn   = reader.ReadLine();
                    var skipLine = false;

                    if (string.IsNullOrEmpty(lineIn))
                    {
                        continue;
                    }

                    var splitLine = lineIn.Split('\t');

                    if (!headerLineParsed)
                    {
                        if (string.Equals(splitLine[0], SEQ_PROT_MAP_COLUMN_Unique_Seq_ID, StringComparison.OrdinalIgnoreCase))
                        {
                            // Parse the header line to confirm the column ordering
                            ReaderFactory.ParseColumnHeaders(splitLine, columnHeaders);
                            skipLine = true;
                        }

                        headerLineParsed = true;
                    }

                    if (skipLine || splitLine.Length < 3)
                    {
                        continue;
                    }

                    if (!int.TryParse(splitLine[0], out var seqID))
                    {
                        continue;
                    }

                    var proteinName = ReaderFactory.LookupColumnValue(splitLine, SEQ_PROT_MAP_COLUMN_Protein_Name, columnHeaders, string.Empty);

                    if (string.IsNullOrEmpty(proteinName))
                    {
                        continue;
                    }

                    var cleavageState = (PeptideCleavageStateCalculator.PeptideCleavageState)ReaderFactory.LookupColumnValue(splitLine, SEQ_PROT_MAP_COLUMN_Cleavage_State, columnHeaders, 0);
                    var terminusState = (PeptideCleavageStateCalculator.PeptideTerminusState)ReaderFactory.LookupColumnValue(splitLine, SEQ_PROT_MAP_COLUMN_Terminus_State, columnHeaders, 0);

                    var proteinInfo = new ProteinInfo(proteinName, seqID, cleavageState, terminusState);

                    if (seqToProteinMap.TryGetValue(seqID, out var proteins))
                    {
                        // Sequence already exists in seqToProteinMap; add the new protein info
                        if (MaxProteinsPerSeqID == 0 || proteins.Count < MaxProteinsPerSeqID)
                        {
                            proteins.Add(proteinInfo);
                        }
                    }
                    else
                    {
                        // New Sequence ID
                        proteins = new List <ProteinInfo> {
                            proteinInfo
                        };
                        seqToProteinMap.Add(seqID, proteins);
                    }
                }
            }
            catch (Exception ex)
            {
                throw new Exception("Exception loading Seq to Protein Mapping from " + Path.GetFileName(filePath) + ": " + ex.Message);
            }

            return(true);
        }
예제 #3
0
    public bool CreateReport(string TaskID, string FileName, string searchModule, string sc, int distinctPeptides)
    {
        //string strReportName = FileName.Substring(0, FileName.LastIndexOf(".")) + ".txt";
        Dictionary <string, ProteinInfo> Proteins = new Dictionary <string, ProteinInfo>();
        string strReportFile = WebConfigurationManager.ConnectionStrings["UploadPath"].ConnectionString + TaskID + "\\Result.txt";

        if (File.Exists(strReportFile))
        {
            FileStream   fs = File.Open(strReportFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
            StreamReader sr = new StreamReader(fs);
            string       line;
            while ((line = sr.ReadLine()) != null)
            {
                //the sample line:
                //Y 349 CoPep00035959 0.686646308289647 0.345257544453165 0.686646308289647 60989
                //N 369
                string[] tokens = line.Split(new string[] { "|" }, StringSplitOptions.None);
                if (tokens[0] == "Y")
                {
                    string peptideID = tokens[2];
                    string strIPI    = tokens[12];

                    //string strSQL = "select ref_protein_id,t1.protein_cop_id ,protein_name,organism_source,sequence_length from protein_tbl t1, pp_relation_tbl t2 where t1.protein_cop_id = t2.protein_cop_id and t2.peptide_cop_id = '{0}'";
                    //strSQL = string.Format(strSQL, peptideID);
                    //DBInterface.ConnectDB();
                    //DbDataReader result = DBInterface.QuerySQL(strSQL);
                    //if (result != null)
                    //{
                    //    while (result.Read())
                    //    {
                    //        string strIPI = result.GetString (0);
                    if (Proteins.ContainsKey(strIPI))
                    {
                        ProteinInfo PI     = Proteins[strIPI];
                        ScanPeptide spinfo = new ScanPeptide();
                        spinfo.ScanNO          = tokens[1];
                        spinfo.PeptideID       = peptideID;
                        spinfo.SimilarityScore = tokens[5];
                        spinfo.DetaM           = double.Parse(tokens[6]);
                        spinfo.SpectrumSeq     = tokens[7];
                        spinfo.PeptideSequence = tokens[8];
                        spinfo.ModifiedType    = tokens[9];
                        spinfo.FinalScore      = tokens[10];
                        spinfo.mzFile          = tokens[11];
                        PI.AddScanPeptide(spinfo);
                        Proteins[strIPI] = PI;
                    }
                    else
                    {
                        string strCOPaID      = strIPI;
                        string strProteinName = tokens[13];
                        string strOrganism;
                        if (this.lbSearchingModule.Text.ToLower().Contains("human"))
                        {
                            strOrganism = "H**o sapiens (Human)";
                        }
                        else
                        {
                            strOrganism = "Mus musculus (Mouse)";
                        }
                        int         length = int.Parse(tokens[14]);
                        ProteinInfo PI     = new ProteinInfo(strIPI, strCOPaID, strProteinName, strOrganism, length);
                        ScanPeptide spinfo = new ScanPeptide();
                        spinfo.ScanNO          = tokens[1];
                        spinfo.PeptideID       = peptideID;
                        spinfo.SimilarityScore = tokens[5];
                        spinfo.DetaM           = double.Parse(tokens[6]);
                        spinfo.SpectrumSeq     = tokens[7];
                        spinfo.PeptideSequence = tokens[8];
                        spinfo.ModifiedType    = tokens[9];
                        spinfo.FinalScore      = tokens[10];
                        spinfo.mzFile          = tokens[11];
                        PI.AddScanPeptide(spinfo);
                        Proteins.Add(strIPI, PI);
                    }

                    //    }
                    //    result.Close();

                    //}
                    //DBInterface.CloseDB();
                }
            }
            sr.Close();
        }

        //write the protein view info to report 2

        XmlWriterSettings settings = new XmlWriterSettings();

        settings.Indent      = true;
        settings.IndentChars = ("    ");
        //string strReportFile = Properties.Settings.Default.upload_path + searchInfo.task_id + "\\" + searchInfo.report_filename;
        string XSLFile = strReportFile + ".xls";

        strReportFile += ".xml";
        //XSL table colums : SCAN, PeptideSequence, ModifiedType,Protein Access Numbers, Protein Name, Species, Protein_COPa_ID, Peptide_COPa_ID,Spectra_COPa_ID, MatchScore,DetaM
        string ProteinLine = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}";

        try
        {
            using (StreamWriter sw = new StreamWriter(XSLFile))
            {
                sw.WriteLine(string.Format(ProteinLine, "mzFile", "Scan", "PeptideSequence", "ModifiedType", "Protein Access Numbers", "Protein Name", "Species", "Protein COPaID", "Peptide COPaID", "Spectrum COPaID", "Similarity Score", "DetaM/Z", "Final Score"));
                using (XmlWriter writer = XmlWriter.Create(strReportFile, settings))
                {
                    writer.WriteStartElement("COPaReport");
                    writer.WriteAttributeString("TaskID", TaskID);
                    writer.WriteAttributeString("mzFile", FileName);
                    string strLibModule = searchModule;


                    writer.WriteAttributeString("LibModule", strLibModule);
                    writer.WriteAttributeString("SearchFilter", sc);
                    writer.WriteAttributeString("IDProteins", Proteins.Count.ToString());
                    foreach (KeyValuePair <string, ProteinInfo> kvp in Proteins)
                    {
                        ArrayList splists = kvp.Value.ScanPeptides;
                        if (IsPassFilter(splists, distinctPeptides))
                        {
                            writer.WriteStartElement("Proteins");
                            writer.WriteAttributeString("IPI", kvp.Key);
                            writer.WriteAttributeString("COPaID", kvp.Value.COPaID);
                            writer.WriteAttributeString("ProteinName", kvp.Value.ProteinName);
                            writer.WriteAttributeString("Organism", kvp.Value.Organism);
                            //ArrayList splists = kvp.Value.ScanPeptides;
                            writer.WriteAttributeString("SpectraCount", splists.Count.ToString());
                            writer.WriteAttributeString("NormalizCount", ((float)(splists.Count) / kvp.Value.ProteinLength).ToString());
                            foreach (ScanPeptide sp in splists)
                            {
                                writer.WriteStartElement("Scan-Peptide");
                                writer.WriteAttributeString("mzFile", sp.mzFile);
                                writer.WriteAttributeString("Scan", sp.ScanNO);
                                writer.WriteAttributeString("PeptideSequence", sp.PeptideSequence);
                                if (sp.ModifiedType != "")
                                {
                                    writer.WriteAttributeString("ModifiedType", sp.ModifiedType);
                                }
                                writer.WriteAttributeString("Peptide", sp.PeptideID);
                                writer.WriteAttributeString("SimilarityScore", sp.SimilarityScore);
                                writer.WriteAttributeString("DetaMZ", sp.DetaM.ToString());
                                writer.WriteAttributeString("Spectrum", sp.SpectrumSeq);
                                writer.WriteAttributeString("FinalyScore", sp.FinalScore);
                                writer.WriteEndElement();
                                sw.WriteLine(ProteinLine, sp.mzFile, sp.ScanNO, sp.PeptideSequence, sp.ModifiedType, kvp.Key, kvp.Value.ProteinName, kvp.Value.Organism, kvp.Value.COPaID, sp.PeptideID, sp.SpectrumSeq, sp.SimilarityScore, sp.DetaM, sp.FinalScore);
                            }
                            writer.WriteEndElement();
                        }
                    }
                    writer.WriteEndElement();
                    writer.Flush();
                    sw.Close();
                }
            }
        }
        catch (Exception ex)
        {
            DBInterface.LogEvent(ex.ToString(), System.Diagnostics.EventLogEntryType.Error);
        }

        return(true);
    }
예제 #4
0
    public bool CreateReport(string TaskID, string FileName, string searchModule, string sc, int distinctPeptides)
    {
        //string strReportName = FileName.Substring(0, FileName.LastIndexOf(".")) + ".txt";
        Dictionary<string, ProteinInfo> Proteins = new Dictionary<string, ProteinInfo>();
        string strReportFile = WebConfigurationManager.ConnectionStrings["UploadPath"].ConnectionString + TaskID + "\\Result.txt";
        if (File.Exists(strReportFile))
        {
            FileStream fs = File.Open(strReportFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
            StreamReader sr = new StreamReader(fs);
            string line;
            while ((line = sr.ReadLine()) != null)
            {
                //the sample line:
                //Y 349 CoPep00035959 0.686646308289647 0.345257544453165 0.686646308289647 60989
                //N 369
                string[] tokens = line.Split(new string[] { "|" }, StringSplitOptions.None);
                if (tokens[0] == "Y")
                {
                    string peptideID = tokens[2];
                    string strIPI = tokens[12];

                    //string strSQL = "select ref_protein_id,t1.protein_cop_id ,protein_name,organism_source,sequence_length from protein_tbl t1, pp_relation_tbl t2 where t1.protein_cop_id = t2.protein_cop_id and t2.peptide_cop_id = '{0}'";
                    //strSQL = string.Format(strSQL, peptideID);
                    //DBInterface.ConnectDB();
                    //DbDataReader result = DBInterface.QuerySQL(strSQL);
                    //if (result != null)
                    //{
                    //    while (result.Read())
                    //    {
                    //        string strIPI = result.GetString (0);
                    if (Proteins.ContainsKey(strIPI))
                    {
                        ProteinInfo PI = Proteins[strIPI];
                        ScanPeptide spinfo = new ScanPeptide();
                        spinfo.ScanNO = tokens[1];
                        spinfo.PeptideID = peptideID;
                        spinfo.SimilarityScore = tokens[5];
                        spinfo.DetaM = double.Parse(tokens[6]);
                        spinfo.SpectrumSeq = tokens[7];
                        spinfo.PeptideSequence = tokens[8];
                        spinfo.ModifiedType = tokens[9];
                        spinfo.FinalScore = tokens[10];
                        spinfo.mzFile = tokens[11];
                        PI.AddScanPeptide(spinfo);
                        Proteins[strIPI] = PI;
                    }
                    else
                    {
                        string strCOPaID = strIPI;
                        string strProteinName = tokens[13];
                        string strOrganism;
                        if (this.lbSearchingModule.Text.ToLower().Contains("human"))
                            strOrganism = "H**o sapiens (Human)";
                        else
                            strOrganism = "Mus musculus (Mouse)";
                        int length = int.Parse(tokens[14]);
                        ProteinInfo PI = new ProteinInfo(strIPI, strCOPaID, strProteinName, strOrganism, length);
                        ScanPeptide spinfo = new ScanPeptide();
                        spinfo.ScanNO = tokens[1];
                        spinfo.PeptideID = peptideID;
                        spinfo.SimilarityScore = tokens[5];
                        spinfo.DetaM = double.Parse(tokens[6]);
                        spinfo.SpectrumSeq = tokens[7];
                        spinfo.PeptideSequence = tokens[8];
                        spinfo.ModifiedType = tokens[9];
                        spinfo.FinalScore = tokens[10];
                        spinfo.mzFile = tokens[11];
                        PI.AddScanPeptide(spinfo);
                        Proteins.Add(strIPI, PI);
                    }

                    //    }
                    //    result.Close();

                    //}
                    //DBInterface.CloseDB();

                }
            }
            sr.Close();
        }

        //write the protein view info to report 2

        XmlWriterSettings settings = new XmlWriterSettings();
        settings.Indent = true;
        settings.IndentChars = ("    ");
        //string strReportFile = Properties.Settings.Default.upload_path + searchInfo.task_id + "\\" + searchInfo.report_filename;
        string XSLFile = strReportFile + ".xls";
        strReportFile += ".xml";
        //XSL table colums : SCAN, PeptideSequence, ModifiedType,Protein Access Numbers, Protein Name, Species, Protein_COPa_ID, Peptide_COPa_ID,Spectra_COPa_ID, MatchScore,DetaM
        string ProteinLine = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}";
        try
        {
            using (StreamWriter sw = new StreamWriter(XSLFile))
            {
                sw.WriteLine(string.Format(ProteinLine, "mzFile", "Scan", "PeptideSequence", "ModifiedType", "Protein Access Numbers", "Protein Name", "Species", "Protein COPaID", "Peptide COPaID", "Spectrum COPaID", "Similarity Score", "DetaM/Z", "Final Score"));
                using (XmlWriter writer = XmlWriter.Create(strReportFile, settings))
                {
                    writer.WriteStartElement("COPaReport");
                    writer.WriteAttributeString("TaskID", TaskID);
                    writer.WriteAttributeString("mzFile", FileName);
                    string strLibModule = searchModule;

                    writer.WriteAttributeString("LibModule", strLibModule);
                    writer.WriteAttributeString("SearchFilter", sc);
                    writer.WriteAttributeString("IDProteins", Proteins.Count.ToString());
                    foreach (KeyValuePair<string, ProteinInfo> kvp in Proteins)
                    {
                        ArrayList splists = kvp.Value.ScanPeptides;
                        if (IsPassFilter(splists, distinctPeptides))
                        {
                            writer.WriteStartElement("Proteins");
                            writer.WriteAttributeString("IPI", kvp.Key);
                            writer.WriteAttributeString("COPaID", kvp.Value.COPaID);
                            writer.WriteAttributeString("ProteinName", kvp.Value.ProteinName);
                            writer.WriteAttributeString("Organism", kvp.Value.Organism);
                            //ArrayList splists = kvp.Value.ScanPeptides;
                            writer.WriteAttributeString("SpectraCount", splists.Count.ToString());
                            writer.WriteAttributeString("NormalizCount", ((float)(splists.Count) / kvp.Value.ProteinLength).ToString());
                            foreach (ScanPeptide sp in splists)
                            {
                                writer.WriteStartElement("Scan-Peptide");
                                writer.WriteAttributeString("mzFile", sp.mzFile);
                                writer.WriteAttributeString("Scan", sp.ScanNO);
                                writer.WriteAttributeString("PeptideSequence", sp.PeptideSequence);
                                if (sp.ModifiedType != "")
                                {
                                    writer.WriteAttributeString("ModifiedType", sp.ModifiedType);
                                }
                                writer.WriteAttributeString("Peptide", sp.PeptideID);
                                writer.WriteAttributeString("SimilarityScore", sp.SimilarityScore);
                                writer.WriteAttributeString("DetaMZ", sp.DetaM.ToString());
                                writer.WriteAttributeString("Spectrum", sp.SpectrumSeq);
                                writer.WriteAttributeString("FinalyScore", sp.FinalScore);
                                writer.WriteEndElement();
                                sw.WriteLine(ProteinLine, sp.mzFile, sp.ScanNO, sp.PeptideSequence, sp.ModifiedType, kvp.Key, kvp.Value.ProteinName, kvp.Value.Organism, kvp.Value.COPaID, sp.PeptideID, sp.SpectrumSeq, sp.SimilarityScore, sp.DetaM, sp.FinalScore);

                            }
                            writer.WriteEndElement();
                        }
                    }
                    writer.WriteEndElement();
                    writer.Flush();
                    sw.Close();
                }
            }
        }
        catch (Exception ex)
        {
            DBInterface.LogEvent(ex.ToString(), System.Diagnostics.EventLogEntryType.Error);
        }

        return true;
    }