Beispiel #1
0
 public void ReadInMLNFile(string filename, int _numdatasets)
 {
     // Read in MultiAlign file
     // Note : sep 26, 2011 - This will need to be updated when the new MLN format comes out
     CSVFileHandler CsvFileHandler2 = new CSVFileHandler(filename, CSVFileHandler.READ_ONLY);
     CsvFileHandler2.openFile();
     String[] Attributes2;
     CsvFileHandler2.readLine();
     while ((Attributes2 = CsvFileHandler2.readLine()) != null)
     {
         MultiAlignRecord mrecord = new MultiAlignRecord();
         mrecord.ID = int.Parse(Attributes2[0]);
         mrecord.Size = int.Parse(Attributes2[1]);
         mrecord.Mass = double.Parse(Attributes2[2]);
         mrecord.NET = double.Parse(Attributes2[3]);
         mrecord.AllocateNumberDatasets(_numdatasets);
         int n_dataset = 0;
         for (int i = 4; i < Attributes2.Length; i += 4)
         {
             UMCRecord u = new UMCRecord();
             if (Attributes2[i] != "")
             {
                 u.ScanRep = int.Parse(Attributes2[i]);
                 u.Abundance = double.Parse(Attributes2[i + 1]);
                 u.ScanStart = int.Parse(Attributes2[i + 2]);
                 u.ScanEnd = int.Parse(Attributes2[i + 3]);
             }
             mrecord._AssociatedUMCRecords.Add(u);
             n_dataset++;
         }
         AddRecord(mrecord);
     }
 }
Beispiel #2
0
        /*public void WriteOutMapToCsv(ref Classes.MapRecord _glycoMap, string filename)
        {
            CSVFileHandler outfile = new CSVFileHandler(filename, CSVFileHandler.WRITE_ONLY);
            outfile.openFile();
            int numdatasets = _glycoMap._AssociatedDatasetNames.Count;

            string[] header = { "ClusterID", "Mass", "NET" };
            for (int i = 0; i < numdatasets; i++)
            {
                string[] ar2;
                ar2 = new string[header.Length + 9];
                header.CopyTo(ar2, 0);
                ar2.SetValue("DatasetID" + "." + i.ToString(), header.Length);
                ar2.SetValue("HCDScore" + "." + i.ToString(), header.Length + 1);
                ar2.SetValue("CIDScore" + "." + i.ToString(), header.Length + 2);
                ar2.SetValue("ETDScore" + "." + i.ToString(), header.Length + 3);
                ar2.SetValue("Protein" + "." + i.ToString(), header.Length + 4);
                ar2.SetValue("Peptide" + "." + i.ToString(), header.Length + 5);
                ar2.SetValue("Site" + "." + i.ToString(), header.Length + 6);
                ar2.SetValue("Glycan" + "." + i.ToString(), header.Length + 7);
                ar2.SetValue("Abundance" + "." + i.ToString(), header.Length + 8);
                header = ar2;
            }
            outfile.writeLine(header);

            _glycoMap._AllMLNRecords.ForEach(delegate(Classes.MultiAlignRecord m)
            {
                string[] outline = { Convert.ToString(m.ID), Convert.ToString(m.Mass), Convert.ToString(m.NET) };

                m._AssociatedUMCRecords.Sort(delegate(Classes.UMCRecord u1, Classes.UMCRecord u2) { return u1.DatasetID.CompareTo(u2.DatasetID); });

                int count_index = 0;
                for (int i = 0; i < numdatasets; i++)
                {
                    string[] ar3 = new string[outline.Length + 9];
                    outline.CopyTo(ar3, 0);
                    Classes.UMCRecord u = m._AssociatedUMCRecords[count_index];
                    if (u.DatasetID == i)
                    {
                        ar3.SetValue(Convert.ToString(u.DatasetID), outline.Length);
                        ar3.SetValue(Convert.ToString(u.HCDScore), outline.Length + 1);
                        ar3.SetValue(Convert.ToString(u.CIDScore), outline.Length + 2);
                        ar3.SetValue(Convert.ToString(u.ETDScore), outline.Length + 3);
                        ar3.SetValue(u.ProteinName, outline.Length + 4);
                        ar3.SetValue(u.PeptideSeq, outline.Length + 5);
                        ar3.SetValue(u.NGlycoSite, outline.Length + 6);
                        ar3.SetValue(u.GlycanComposition, outline.Length + 7);
                        ar3.SetValue(Convert.ToString(u.Abundance), outline.Length + 8);
                        count_index++;
                        if (count_index >= m._AssociatedUMCRecords.Count)
                            count_index--; // To keep it within index.
                    }
                    else
                    {
                        ar3.SetValue(Convert.ToString(i), outline.Length);
                        ar3.SetValue(Convert.ToString(1), outline.Length + 1);
                        ar3.SetValue(Convert.ToString(0), outline.Length + 2);
                        ar3.SetValue(Convert.ToString(0), outline.Length + 3);
                        ar3.SetValue("", outline.Length + 4);
                        ar3.SetValue("", outline.Length + 5);
                        ar3.SetValue("", outline.Length + 6);
                        ar3.SetValue("", outline.Length + 7);
                        ar3.SetValue(Convert.ToString(0), outline.Length + 8);

                    }
                    outline = ar3;
                }
                outfile.writeLine(outline);

            });

            outfile.closeFile();
        }*/
        public void WriteOutMapToCSV(ref Classes.MapRecord _glycoMap, string filename)
        {
            CSVFileHandler outfile = new CSVFileHandler(filename, CSVFileHandler.WRITE_ONLY);
            outfile.openFile();
            int numdatasets = _glycoMap._AssociatedDatasetNames.Count;

            string[] header = { "ClusterID", "Mass", "NET", "Protein", "Peptide", "Site", "Glycan", "PeptideMass", "GlycanMass",
                              "TypeID", "TRUE_FALSE", "RepHCDScore", "RepCIDScore","RepETDScore", "RepCIDSeqScore" };
            for (int i = 0; i < numdatasets; i++)
            {
                string[] ar2;
                ar2 = new string[header.Length + 5];
                header.CopyTo(ar2, 0);
                ar2.SetValue("DatasetID" + "." + i.ToString(), header.Length);
                ar2.SetValue("HCDScore" + "." + i.ToString(), header.Length + 1);
                ar2.SetValue("CIDScore" + "." + i.ToString(), header.Length + 2);
                ar2.SetValue("ETDScore" + "." + i.ToString(), header.Length + 3);
                ar2.SetValue("Abundance" + "." + i.ToString(), header.Length + 4);
                header = ar2;
            }
            outfile.writeLine(header);

            _glycoMap._AllMLNRecords.ForEach(delegate(Classes.MultiAlignRecord m)
              {

              if (m.ID == 2290)
              {

                  bool test = true;
              }
              if (m.BestMatchProteinName != "")
              {
                  string true_false= "TRUE" ;
                  if(m.BestMatchFalseHit)
                      true_false = "FALSE";

                  string[] outline = { Convert.ToString(m.ID), Convert.ToString(m.Mass), Convert.ToString(m.NET), m.BestMatchProteinName,
                                       m.BestMatchPeptideSeq, m.BestMatchNGlycoSite, m.BestMatchGlycanComposition,
                                   m.BestMatchPeptideMass.ToString(), m.BestMatchGlycanMass.ToString(),
                                   m.IDLabel.ToString(), true_false ,  m.RepHCDScore.ToString(), m.RepCIDScore.ToString(), m.RepETDScore.ToString(), m.RepCIDSequencingScore.ToString()};

                  m._AssociatedUMCRecords.Sort(delegate(Classes.UMCRecord u1, Classes.UMCRecord u2) { return u1.DatasetID.CompareTo(u2.DatasetID); });

                  int count_index = 0;
                  for (int i = 0; i < numdatasets; i++)
                  {
                      string[] ar3 = new string[outline.Length + 5];
                      outline.CopyTo(ar3, 0);
                      Classes.UMCRecord u = m._AssociatedUMCRecords[count_index];
                      if (u.DatasetID == i)
                      {
                          ar3.SetValue(Convert.ToString(u.DatasetID), outline.Length);
                          ar3.SetValue(Convert.ToString(u.UMCRepHCDScore), outline.Length + 1);
                          ar3.SetValue(Convert.ToString(u.UMCRepCIDScore), outline.Length + 2);
                          ar3.SetValue(Convert.ToString(u.UMCRepETDScore), outline.Length + 3);
                          ar3.SetValue(Convert.ToString(u.Abundance), outline.Length + 4);
                          count_index++;
                          if (count_index >= m._AssociatedUMCRecords.Count)
                              count_index--; // To keep it within index.
                      }
                      else
                      {
                          ar3.SetValue(Convert.ToString(i), outline.Length);
                          ar3.SetValue(Convert.ToString(1), outline.Length + 1);
                          ar3.SetValue(Convert.ToString(0), outline.Length + 2);
                          ar3.SetValue(Convert.ToString(0), outline.Length + 3);
                          ar3.SetValue(Convert.ToString(0), outline.Length + 4);

                      }
                      outline = ar3;
                  }
                  outfile.writeLine(outline);
              }
              });

              outfile.closeFile();
        }
Beispiel #3
0
        public void WriteOutPrecursorInfoToFileV2(ref Classes.MapRecord _glycoMap, string filename)
        {
            CSVFileHandler outfile = new CSVFileHandler(filename, CSVFileHandler.WRITE_ONLY);
            outfile.openFile();
            int numdatasets = _glycoMap._AssociatedDatasetNames.Count;

            string[] header = { "ClusterID", "Mass", "NET", "Protein", "Peptide", "Site", "Glycan", "PeptideMass", "GlycanMass",
                              "TypeID", "TRUE_FALSE", "RepHCDScore", "RepCIDScore","RepETDScore", "RepCIDSeqScore","ParentMZ", "ParentScanTime"};
            for (int i = 0; i < numdatasets; i++)
            {
            }
            outfile.writeLine(header);

            _glycoMap._AllMLNRecords.ForEach(delegate(Classes.MultiAlignRecord m)
               {
               if (m.BestMatchProteinName != "")
               {
                   string true_false = "TRUE";
                   if (m.BestMatchFalseHit)
                       true_false = "FALSE";

                   string[] outline = { Convert.ToString(m.ID), Convert.ToString(m.Mass), Convert.ToString(m.NET), m.BestMatchProteinName,
                                       m.BestMatchPeptideSeq, m.BestMatchNGlycoSite, m.BestMatchGlycanComposition,
                                   m.BestMatchPeptideMass.ToString(), m.BestMatchGlycanMass.ToString(),
                                   m.IDLabel.ToString(), true_false ,  m.RepHCDScore.ToString(), m.RepCIDScore.ToString(), m.RepETDScore.ToString(), m.RepCIDSequencingScore.ToString(),
                                  m.BestMatchParentMz.ToString(), m.BestMatchParentScanTime.ToString()};
                   outfile.writeLine(outline);
               }

               });

               outfile.closeFile();
        }
Beispiel #4
0
        public void ReanInMLNV2File(string filename, int _numdatasets)
        {
            // Read in new version of MLN file
            CSVFileHandler CsvFileHandler2 = new CSVFileHandler(filename, CSVFileHandler.READ_ONLY);
            CsvFileHandler2.openFile();
            String[] Attributes2;
            CsvFileHandler2.readLine();
            while ((Attributes2 = CsvFileHandler2.readLine()) != null)
            {
                MultiAlignRecord mrecord = new MultiAlignRecord();
                mrecord.ID = int.Parse(Attributes2[0]);

                mrecord.Mass = double.Parse(Attributes2[1]);
                mrecord.NET = double.Parse(Attributes2[2]);
                int numDatasetsToAllocate = 0;

                if (mrecord.Mass < _MAP_MIN_MASS)
                    _MAP_MIN_MASS = mrecord.Mass;
                if (mrecord.Mass > _MAP_MAX_MASS)
                    _MAP_MAX_MASS = mrecord.Mass;

                if ((Attributes2.Length-5) / 8 != _numdatasets)
                    numDatasetsToAllocate = (Attributes2.Length -5) / 8;
                else
                    numDatasetsToAllocate = _numdatasets;

                mrecord.AllocateNumberDatasets(numDatasetsToAllocate);
                for (int i = 5; i < Attributes2.Length; i += 8)
                {
                    UMCRecord u = new UMCRecord();
                    if (Attributes2[i] != "")
                    {
                        u.ID = int.Parse(Attributes2[i]);
                        u.DatasetID = int.Parse(Attributes2[i + 1]);
                        u.MW = double.Parse(Attributes2[i + 2]);
                        u.Abundance = double.Parse(Attributes2[i + 4]);
                        u.ScanRep = int.Parse(Attributes2[i + 5]);
                        u.ScanStart = int.Parse(Attributes2[i + 6]);
                        u.ScanEnd = int.Parse(Attributes2[i + 7]);
                        mrecord._AssociatedUMCRecords.Add(u);
                    }
                }
                AddRecord(mrecord);
                if (mrecord._AssociatedUMCRecords.Count > 10)
                {
                    bool debig = true;
                }
            }
        }
Beispiel #5
0
        /// <summary>
        /// Function to write out glycopeptides dictionary to file so that next time
        /// </summary>
        /// <param name="outfile"></param>
        public void WriteOutGlycopeptidesToFile(string filename)
        {
            CSVFileHandler outfile = new CSVFileHandler(filename, CSVFileHandler.WRITE_ONLY);
            outfile.openFile();

            int numglycopeptides = _glycopeptides.Count;
            string[] header = { "GlycoPeptideMonoMass", "GlycoPeptideAvgMass", "Protein", "Peptide", "DECOY_Peptide",
                                  "Site", "PeptideMonoMass", "PeptideAvgMass", "Glycan", "DECOY_Glycan", "GlycanMonoMass", "GlycanAvgMass" };
            outfile.writeLine(header) ;
            for (int i = 0; i < numglycopeptides; i++)
            {
                string[] outline = {Convert.ToString(_glycopeptides[i].GP_Mono_Mass),
                                       Convert.ToString(_glycopeptides[i].GP_Average_Mass),
                                       Convert.ToString(_glycopeptides[i].Sequence.proteinName),
                                       Convert.ToString(_glycopeptides[i].Sequence.sequence),
                                       Convert.ToString(_glycopeptides[i].Sequence.is_decoy),
                                       Convert.ToString(_glycopeptides[i].Sequence.nGlycoSite),
                                       Convert.ToString(_glycopeptides[i].SequenceMonoMass) ,
                                       Convert.ToString(_glycopeptides[i].SequenceAverageMass),
                                       Convert.ToString(_glycopeptides[i].Glycan.composition) ,
                                       Convert.ToString(_glycopeptides[i].Glycan.is_decoy),
                                       Convert.ToString(_glycopeptides[i].GlycanMonoMass),
                                       Convert.ToString(_glycopeptides[i].GlycanAverageMass)};

                outfile.writeLine(outline);
            }

            outfile.closeFile();
        }
Beispiel #6
0
        /// <summary>
        /// Function to read in glycopeptides dictionary from file
        /// </summary>
        /// <param name="infile"></param>
        public void LoadGlycopeptidesFromFile(string infile, double min_mass, double max_mass, bool create_hash)
        {
            CSVFileHandler CsvFileHandler2 = new CSVFileHandler(infile, CSVFileHandler.READ_ONLY);
            CsvFileHandler2.openFile();
            String[] Attributes2;
            CsvFileHandler2.readLine();

            if (create_hash)
                _dGlycopeptides = new Dictionary<int, List<GlycopeptideRecord>>();
            else
                _glycopeptides = new List<GlycopeptideRecord>();

            while ((Attributes2 = CsvFileHandler2.readLine()) != null)
            {
                double gpmass = double.Parse(Attributes2[0]);

                if (gpmass >= min_mass && gpmass <= max_mass)
                {
                    GlycopeptideRecord gp = new GlycopeptideRecord();
                    gp.GP_Mono_Mass = gpmass ;
                    gp.GP_Average_Mass = double.Parse(Attributes2[1]) ;
                    gp.Sequence.proteinName = Attributes2[2] ;
                    gp.Sequence.sequence = Attributes2[3];
                    gp.Sequence.is_decoy = bool.Parse(Attributes2[4]);
                    gp.Sequence.nGlycoSite = Attributes2[5] ;
                    gp.SequenceMonoMass = double.Parse(Attributes2[6]) ;
                    gp.SequenceAverageMass = double.Parse(Attributes2[7]) ;
                    gp.Glycan.SetMonosaccharideCompostion(Attributes2[8]) ;
                    gp.Glycan.composition = Attributes2[8];
                    gp.Glycan.is_decoy = bool.Parse(Attributes2[9]) ;
                    gp.GlycanMonoMass = double.Parse(Attributes2[10]) ;
                    gp.GlycanAverageMass = double.Parse(Attributes2[11]) ;

                    gp.IsDecoy = gp.Sequence.is_decoy | gp.Glycan.is_decoy;

                    if (create_hash)
                    {
                        List<GlycopeptideRecord> value_gp_list = new List<GlycopeptideRecord>();
                        int key_mass = (int)Math.Floor(gp.GP_Mono_Mass);

                        int min_mass_int = (int)Math.Floor(min_mass);
                        int max_mass_int = (int)Math.Floor(max_mass);

                        if (!_dGlycopeptides.ContainsKey(key_mass))
                        {
                            if (value_gp_list.Count > 0)
                                value_gp_list.Clear();
                            value_gp_list.Add(gp);
                            _dGlycopeptides.Add(key_mass, value_gp_list);
                        }
                        else
                        {
                            value_gp_list.Clear();
                            value_gp_list = _dGlycopeptides[key_mass];
                            value_gp_list.Add(gp);
                            _dGlycopeptides[key_mass] = value_gp_list;

                        }
                    }
                    else
                        _glycopeptides.Add(gp);
                }
            }
            CsvFileHandler2.closeFile();
        }
Beispiel #7
0
        public void SequenceCIDPeaks(ref Classes.MapRecord _glycoMap, ref Classes.Params _params, string sequencingFolder)
        {
            Classes.MapRecord _tempMap = new MapRecord();
            _tempMap._AssociatedDatasetNames = _glycoMap._AssociatedDatasetNames;
            _tempMap._IsCID = _glycoMap._IsCID;
            _tempMap._IsETD = _glycoMap._IsETD;
            _tempMap._IsHCD = _glycoMap._IsHCD;

            Classes.FragEvents e = new FragEvents();
            if (sequencingFolder == "")
             sequencingFolder = @"c:\sequencing\";

            for (int i = 0; i < _glycoMap._AllMLNRecords.Count; i++)
            {
                MultiAlignRecord m = new MultiAlignRecord();
                m = _glycoMap._AllMLNRecords[i];
                int num_records = m._AssociatedUMCRecords.Count;

                 if (m.ID == 2290)
                {

                    bool test = true;
                }

                //for (int j = 0; j < num_records; j++)
                //{
                   // for (int k = 0; k < m._AssociatedUMCRecords[j]._AssociatedFragEvents.Count; k++)
                for (int k = 0; k < m._ClusterRepFragEvents.Count; k++)
                {
                    e = new FragEvents();
                    e = m._ClusterRepFragEvents[k]; // m._AssociatedUMCRecords[j]._AssociatedFragEvents[k];

                    if (e.ETDScore > 0)
                    {
                        float PeptideMass = 0;
                        float GlcNAcMass = 0;
                        COL.MassLib.MSScan _msScan = new COL.MassLib.MSScan(e.CIDMzs, e.CIDIntensities, Convert.ToSingle(e.TransformResult.mdbl_mz),
                            Convert.ToSingle(e.TransformResult.mdbl_mono_mw), Convert.ToSingle(e.TransformResult.mdbl_average_mw), Convert.ToInt32(e.TransformResult.mshort_cs));
                        if (e.TransformResult.mdbl_average_mw > 0)
                        {
                            PeptideMass = Convert.ToSingle(e.GP_Record.SequenceAverageMass); // this means proper deisotoping has occured so use average compostion
                            GlcNAcMass = COL.GlycoLib.GlycanMass.GetGlycanAVGMass(COL.GlycoLib.Glycan.Type.HexNAc);
                        }
                        else
                        {
                            PeptideMass = Convert.ToSingle(e.GP_Record.SequenceMonoMass); // CS has been assigned, in which case both y1 and precursor should be just mono
                            GlcNAcMass = COL.GlycoLib.GlycanMass.GetGlycanMass(COL.GlycoLib.Glycan.Type.HexNAc);
                        }

                        short y1cs = e.TransformResult.mshort_cs;
                        y1cs--;
                        while (y1cs > 0)
                        {
                            float y1Mz = ((PeptideMass + GlcNAcMass) + (float)_utils._CC_MASS * y1cs) / y1cs;
                            COL.GlycoSequence.GlycanSequencing _Gs = new COL.GlycoSequence.GlycanSequencing(_msScan, y1Mz, y1cs, e.GP_Record.Glycan.numHex,
                                e.GP_Record.Glycan.numHexNAc, e.GP_Record.Glycan.numDeHex, e.GP_Record.Glycan.numNeuAc, 0, sequencingFolder, true, 0.8f, 60);

                            _Gs.NumbersOfPeaksForSequencing = 140;
                            _Gs.CreatePrecursotMZ = true;
                            _Gs.RewardForCompleteStructure = 3;

                            if (e.TransformResult.mdbl_average_mw > 0)
                                _Gs.UseAVGMass = true;
                            else
                                _Gs.UseAVGMass = false;
                            int structure_count = _Gs.StartSequencing();
                            if (structure_count > 0)
                            {
                                List<COL.GlycoLib.GlycanStructure> topstructures = _Gs.GetTopRankScoreStructre(1);
                                e.CIDSequencingScore = topstructures[0].Score;
                                e.GP_Record.GlycanSequence = topstructures[0].IUPACString;

                                // Printing out the sequences
                                string opfile = sequencingFolder + m.ID.ToString() + "_" + e.CIDScan.ToString() + ".txt";
                                Utils.CSVFileHandler cidString = new CSVFileHandler(opfile, CSVFileHandler.WRITE_ONLY);
                                cidString.openFile();
                                for (int zz = 0; zz < topstructures.Count; zz++)
                                {
                                    cidString.writeLine(topstructures[zz].IUPACString);
                                    /*COL.GlycoLib.GlycansDrawer _gdraw = new COL.GlycoLib.GlycansDrawer(topstructures[zz].IUPACString, false);
                                    System.Drawing.Image img1 = _gdraw.GetImage();*/
                                }
                                cidString.closeFile();
                                break;
                            }
                            y1cs--;
                        }
                    }

                }

            }
        }