public void ReadInMLNFile(string filename, int _numdatasets) { // Read in MultiAlign file // Note : sep 26, 2011 - This will need to be updated when the new MLN format comes out CSVFileHandler CsvFileHandler2 = new CSVFileHandler(filename, CSVFileHandler.READ_ONLY); CsvFileHandler2.openFile(); String[] Attributes2; CsvFileHandler2.readLine(); while ((Attributes2 = CsvFileHandler2.readLine()) != null) { MultiAlignRecord mrecord = new MultiAlignRecord(); mrecord.ID = int.Parse(Attributes2[0]); mrecord.Size = int.Parse(Attributes2[1]); mrecord.Mass = double.Parse(Attributes2[2]); mrecord.NET = double.Parse(Attributes2[3]); mrecord.AllocateNumberDatasets(_numdatasets); int n_dataset = 0; for (int i = 4; i < Attributes2.Length; i += 4) { UMCRecord u = new UMCRecord(); if (Attributes2[i] != "") { u.ScanRep = int.Parse(Attributes2[i]); u.Abundance = double.Parse(Attributes2[i + 1]); u.ScanStart = int.Parse(Attributes2[i + 2]); u.ScanEnd = int.Parse(Attributes2[i + 3]); } mrecord._AssociatedUMCRecords.Add(u); n_dataset++; } AddRecord(mrecord); } }
/*public void WriteOutMapToCsv(ref Classes.MapRecord _glycoMap, string filename) { CSVFileHandler outfile = new CSVFileHandler(filename, CSVFileHandler.WRITE_ONLY); outfile.openFile(); int numdatasets = _glycoMap._AssociatedDatasetNames.Count; string[] header = { "ClusterID", "Mass", "NET" }; for (int i = 0; i < numdatasets; i++) { string[] ar2; ar2 = new string[header.Length + 9]; header.CopyTo(ar2, 0); ar2.SetValue("DatasetID" + "." + i.ToString(), header.Length); ar2.SetValue("HCDScore" + "." + i.ToString(), header.Length + 1); ar2.SetValue("CIDScore" + "." + i.ToString(), header.Length + 2); ar2.SetValue("ETDScore" + "." + i.ToString(), header.Length + 3); ar2.SetValue("Protein" + "." + i.ToString(), header.Length + 4); ar2.SetValue("Peptide" + "." + i.ToString(), header.Length + 5); ar2.SetValue("Site" + "." + i.ToString(), header.Length + 6); ar2.SetValue("Glycan" + "." + i.ToString(), header.Length + 7); ar2.SetValue("Abundance" + "." + i.ToString(), header.Length + 8); header = ar2; } outfile.writeLine(header); _glycoMap._AllMLNRecords.ForEach(delegate(Classes.MultiAlignRecord m) { string[] outline = { Convert.ToString(m.ID), Convert.ToString(m.Mass), Convert.ToString(m.NET) }; m._AssociatedUMCRecords.Sort(delegate(Classes.UMCRecord u1, Classes.UMCRecord u2) { return u1.DatasetID.CompareTo(u2.DatasetID); }); int count_index = 0; for (int i = 0; i < numdatasets; i++) { string[] ar3 = new string[outline.Length + 9]; outline.CopyTo(ar3, 0); Classes.UMCRecord u = m._AssociatedUMCRecords[count_index]; if (u.DatasetID == i) { ar3.SetValue(Convert.ToString(u.DatasetID), outline.Length); ar3.SetValue(Convert.ToString(u.HCDScore), outline.Length + 1); ar3.SetValue(Convert.ToString(u.CIDScore), outline.Length + 2); ar3.SetValue(Convert.ToString(u.ETDScore), outline.Length + 3); ar3.SetValue(u.ProteinName, outline.Length + 4); ar3.SetValue(u.PeptideSeq, outline.Length + 5); ar3.SetValue(u.NGlycoSite, outline.Length + 6); ar3.SetValue(u.GlycanComposition, outline.Length + 7); ar3.SetValue(Convert.ToString(u.Abundance), outline.Length + 8); count_index++; if (count_index >= m._AssociatedUMCRecords.Count) count_index--; // To keep it within index. } else { ar3.SetValue(Convert.ToString(i), outline.Length); ar3.SetValue(Convert.ToString(1), outline.Length + 1); ar3.SetValue(Convert.ToString(0), outline.Length + 2); ar3.SetValue(Convert.ToString(0), outline.Length + 3); ar3.SetValue("", outline.Length + 4); ar3.SetValue("", outline.Length + 5); ar3.SetValue("", outline.Length + 6); ar3.SetValue("", outline.Length + 7); ar3.SetValue(Convert.ToString(0), outline.Length + 8); } outline = ar3; } outfile.writeLine(outline); }); outfile.closeFile(); }*/ public void WriteOutMapToCSV(ref Classes.MapRecord _glycoMap, string filename) { CSVFileHandler outfile = new CSVFileHandler(filename, CSVFileHandler.WRITE_ONLY); outfile.openFile(); int numdatasets = _glycoMap._AssociatedDatasetNames.Count; string[] header = { "ClusterID", "Mass", "NET", "Protein", "Peptide", "Site", "Glycan", "PeptideMass", "GlycanMass", "TypeID", "TRUE_FALSE", "RepHCDScore", "RepCIDScore","RepETDScore", "RepCIDSeqScore" }; for (int i = 0; i < numdatasets; i++) { string[] ar2; ar2 = new string[header.Length + 5]; header.CopyTo(ar2, 0); ar2.SetValue("DatasetID" + "." + i.ToString(), header.Length); ar2.SetValue("HCDScore" + "." + i.ToString(), header.Length + 1); ar2.SetValue("CIDScore" + "." + i.ToString(), header.Length + 2); ar2.SetValue("ETDScore" + "." + i.ToString(), header.Length + 3); ar2.SetValue("Abundance" + "." + i.ToString(), header.Length + 4); header = ar2; } outfile.writeLine(header); _glycoMap._AllMLNRecords.ForEach(delegate(Classes.MultiAlignRecord m) { if (m.ID == 2290) { bool test = true; } if (m.BestMatchProteinName != "") { string true_false= "TRUE" ; if(m.BestMatchFalseHit) true_false = "FALSE"; string[] outline = { Convert.ToString(m.ID), Convert.ToString(m.Mass), Convert.ToString(m.NET), m.BestMatchProteinName, m.BestMatchPeptideSeq, m.BestMatchNGlycoSite, m.BestMatchGlycanComposition, m.BestMatchPeptideMass.ToString(), m.BestMatchGlycanMass.ToString(), m.IDLabel.ToString(), true_false , m.RepHCDScore.ToString(), m.RepCIDScore.ToString(), m.RepETDScore.ToString(), m.RepCIDSequencingScore.ToString()}; m._AssociatedUMCRecords.Sort(delegate(Classes.UMCRecord u1, Classes.UMCRecord u2) { return u1.DatasetID.CompareTo(u2.DatasetID); }); int count_index = 0; for (int i = 0; i < numdatasets; i++) { string[] ar3 = new string[outline.Length + 5]; outline.CopyTo(ar3, 0); Classes.UMCRecord u = m._AssociatedUMCRecords[count_index]; if (u.DatasetID == i) { ar3.SetValue(Convert.ToString(u.DatasetID), outline.Length); ar3.SetValue(Convert.ToString(u.UMCRepHCDScore), outline.Length + 1); ar3.SetValue(Convert.ToString(u.UMCRepCIDScore), outline.Length + 2); ar3.SetValue(Convert.ToString(u.UMCRepETDScore), outline.Length + 3); ar3.SetValue(Convert.ToString(u.Abundance), outline.Length + 4); count_index++; if (count_index >= m._AssociatedUMCRecords.Count) count_index--; // To keep it within index. } else { ar3.SetValue(Convert.ToString(i), outline.Length); ar3.SetValue(Convert.ToString(1), outline.Length + 1); ar3.SetValue(Convert.ToString(0), outline.Length + 2); ar3.SetValue(Convert.ToString(0), outline.Length + 3); ar3.SetValue(Convert.ToString(0), outline.Length + 4); } outline = ar3; } outfile.writeLine(outline); } }); outfile.closeFile(); }
public void WriteOutPrecursorInfoToFileV2(ref Classes.MapRecord _glycoMap, string filename) { CSVFileHandler outfile = new CSVFileHandler(filename, CSVFileHandler.WRITE_ONLY); outfile.openFile(); int numdatasets = _glycoMap._AssociatedDatasetNames.Count; string[] header = { "ClusterID", "Mass", "NET", "Protein", "Peptide", "Site", "Glycan", "PeptideMass", "GlycanMass", "TypeID", "TRUE_FALSE", "RepHCDScore", "RepCIDScore","RepETDScore", "RepCIDSeqScore","ParentMZ", "ParentScanTime"}; for (int i = 0; i < numdatasets; i++) { } outfile.writeLine(header); _glycoMap._AllMLNRecords.ForEach(delegate(Classes.MultiAlignRecord m) { if (m.BestMatchProteinName != "") { string true_false = "TRUE"; if (m.BestMatchFalseHit) true_false = "FALSE"; string[] outline = { Convert.ToString(m.ID), Convert.ToString(m.Mass), Convert.ToString(m.NET), m.BestMatchProteinName, m.BestMatchPeptideSeq, m.BestMatchNGlycoSite, m.BestMatchGlycanComposition, m.BestMatchPeptideMass.ToString(), m.BestMatchGlycanMass.ToString(), m.IDLabel.ToString(), true_false , m.RepHCDScore.ToString(), m.RepCIDScore.ToString(), m.RepETDScore.ToString(), m.RepCIDSequencingScore.ToString(), m.BestMatchParentMz.ToString(), m.BestMatchParentScanTime.ToString()}; outfile.writeLine(outline); } }); outfile.closeFile(); }
public void ReanInMLNV2File(string filename, int _numdatasets) { // Read in new version of MLN file CSVFileHandler CsvFileHandler2 = new CSVFileHandler(filename, CSVFileHandler.READ_ONLY); CsvFileHandler2.openFile(); String[] Attributes2; CsvFileHandler2.readLine(); while ((Attributes2 = CsvFileHandler2.readLine()) != null) { MultiAlignRecord mrecord = new MultiAlignRecord(); mrecord.ID = int.Parse(Attributes2[0]); mrecord.Mass = double.Parse(Attributes2[1]); mrecord.NET = double.Parse(Attributes2[2]); int numDatasetsToAllocate = 0; if (mrecord.Mass < _MAP_MIN_MASS) _MAP_MIN_MASS = mrecord.Mass; if (mrecord.Mass > _MAP_MAX_MASS) _MAP_MAX_MASS = mrecord.Mass; if ((Attributes2.Length-5) / 8 != _numdatasets) numDatasetsToAllocate = (Attributes2.Length -5) / 8; else numDatasetsToAllocate = _numdatasets; mrecord.AllocateNumberDatasets(numDatasetsToAllocate); for (int i = 5; i < Attributes2.Length; i += 8) { UMCRecord u = new UMCRecord(); if (Attributes2[i] != "") { u.ID = int.Parse(Attributes2[i]); u.DatasetID = int.Parse(Attributes2[i + 1]); u.MW = double.Parse(Attributes2[i + 2]); u.Abundance = double.Parse(Attributes2[i + 4]); u.ScanRep = int.Parse(Attributes2[i + 5]); u.ScanStart = int.Parse(Attributes2[i + 6]); u.ScanEnd = int.Parse(Attributes2[i + 7]); mrecord._AssociatedUMCRecords.Add(u); } } AddRecord(mrecord); if (mrecord._AssociatedUMCRecords.Count > 10) { bool debig = true; } } }
/// <summary> /// Function to write out glycopeptides dictionary to file so that next time /// </summary> /// <param name="outfile"></param> public void WriteOutGlycopeptidesToFile(string filename) { CSVFileHandler outfile = new CSVFileHandler(filename, CSVFileHandler.WRITE_ONLY); outfile.openFile(); int numglycopeptides = _glycopeptides.Count; string[] header = { "GlycoPeptideMonoMass", "GlycoPeptideAvgMass", "Protein", "Peptide", "DECOY_Peptide", "Site", "PeptideMonoMass", "PeptideAvgMass", "Glycan", "DECOY_Glycan", "GlycanMonoMass", "GlycanAvgMass" }; outfile.writeLine(header) ; for (int i = 0; i < numglycopeptides; i++) { string[] outline = {Convert.ToString(_glycopeptides[i].GP_Mono_Mass), Convert.ToString(_glycopeptides[i].GP_Average_Mass), Convert.ToString(_glycopeptides[i].Sequence.proteinName), Convert.ToString(_glycopeptides[i].Sequence.sequence), Convert.ToString(_glycopeptides[i].Sequence.is_decoy), Convert.ToString(_glycopeptides[i].Sequence.nGlycoSite), Convert.ToString(_glycopeptides[i].SequenceMonoMass) , Convert.ToString(_glycopeptides[i].SequenceAverageMass), Convert.ToString(_glycopeptides[i].Glycan.composition) , Convert.ToString(_glycopeptides[i].Glycan.is_decoy), Convert.ToString(_glycopeptides[i].GlycanMonoMass), Convert.ToString(_glycopeptides[i].GlycanAverageMass)}; outfile.writeLine(outline); } outfile.closeFile(); }
/// <summary> /// Function to read in glycopeptides dictionary from file /// </summary> /// <param name="infile"></param> public void LoadGlycopeptidesFromFile(string infile, double min_mass, double max_mass, bool create_hash) { CSVFileHandler CsvFileHandler2 = new CSVFileHandler(infile, CSVFileHandler.READ_ONLY); CsvFileHandler2.openFile(); String[] Attributes2; CsvFileHandler2.readLine(); if (create_hash) _dGlycopeptides = new Dictionary<int, List<GlycopeptideRecord>>(); else _glycopeptides = new List<GlycopeptideRecord>(); while ((Attributes2 = CsvFileHandler2.readLine()) != null) { double gpmass = double.Parse(Attributes2[0]); if (gpmass >= min_mass && gpmass <= max_mass) { GlycopeptideRecord gp = new GlycopeptideRecord(); gp.GP_Mono_Mass = gpmass ; gp.GP_Average_Mass = double.Parse(Attributes2[1]) ; gp.Sequence.proteinName = Attributes2[2] ; gp.Sequence.sequence = Attributes2[3]; gp.Sequence.is_decoy = bool.Parse(Attributes2[4]); gp.Sequence.nGlycoSite = Attributes2[5] ; gp.SequenceMonoMass = double.Parse(Attributes2[6]) ; gp.SequenceAverageMass = double.Parse(Attributes2[7]) ; gp.Glycan.SetMonosaccharideCompostion(Attributes2[8]) ; gp.Glycan.composition = Attributes2[8]; gp.Glycan.is_decoy = bool.Parse(Attributes2[9]) ; gp.GlycanMonoMass = double.Parse(Attributes2[10]) ; gp.GlycanAverageMass = double.Parse(Attributes2[11]) ; gp.IsDecoy = gp.Sequence.is_decoy | gp.Glycan.is_decoy; if (create_hash) { List<GlycopeptideRecord> value_gp_list = new List<GlycopeptideRecord>(); int key_mass = (int)Math.Floor(gp.GP_Mono_Mass); int min_mass_int = (int)Math.Floor(min_mass); int max_mass_int = (int)Math.Floor(max_mass); if (!_dGlycopeptides.ContainsKey(key_mass)) { if (value_gp_list.Count > 0) value_gp_list.Clear(); value_gp_list.Add(gp); _dGlycopeptides.Add(key_mass, value_gp_list); } else { value_gp_list.Clear(); value_gp_list = _dGlycopeptides[key_mass]; value_gp_list.Add(gp); _dGlycopeptides[key_mass] = value_gp_list; } } else _glycopeptides.Add(gp); } } CsvFileHandler2.closeFile(); }
public void SequenceCIDPeaks(ref Classes.MapRecord _glycoMap, ref Classes.Params _params, string sequencingFolder) { Classes.MapRecord _tempMap = new MapRecord(); _tempMap._AssociatedDatasetNames = _glycoMap._AssociatedDatasetNames; _tempMap._IsCID = _glycoMap._IsCID; _tempMap._IsETD = _glycoMap._IsETD; _tempMap._IsHCD = _glycoMap._IsHCD; Classes.FragEvents e = new FragEvents(); if (sequencingFolder == "") sequencingFolder = @"c:\sequencing\"; for (int i = 0; i < _glycoMap._AllMLNRecords.Count; i++) { MultiAlignRecord m = new MultiAlignRecord(); m = _glycoMap._AllMLNRecords[i]; int num_records = m._AssociatedUMCRecords.Count; if (m.ID == 2290) { bool test = true; } //for (int j = 0; j < num_records; j++) //{ // for (int k = 0; k < m._AssociatedUMCRecords[j]._AssociatedFragEvents.Count; k++) for (int k = 0; k < m._ClusterRepFragEvents.Count; k++) { e = new FragEvents(); e = m._ClusterRepFragEvents[k]; // m._AssociatedUMCRecords[j]._AssociatedFragEvents[k]; if (e.ETDScore > 0) { float PeptideMass = 0; float GlcNAcMass = 0; COL.MassLib.MSScan _msScan = new COL.MassLib.MSScan(e.CIDMzs, e.CIDIntensities, Convert.ToSingle(e.TransformResult.mdbl_mz), Convert.ToSingle(e.TransformResult.mdbl_mono_mw), Convert.ToSingle(e.TransformResult.mdbl_average_mw), Convert.ToInt32(e.TransformResult.mshort_cs)); if (e.TransformResult.mdbl_average_mw > 0) { PeptideMass = Convert.ToSingle(e.GP_Record.SequenceAverageMass); // this means proper deisotoping has occured so use average compostion GlcNAcMass = COL.GlycoLib.GlycanMass.GetGlycanAVGMass(COL.GlycoLib.Glycan.Type.HexNAc); } else { PeptideMass = Convert.ToSingle(e.GP_Record.SequenceMonoMass); // CS has been assigned, in which case both y1 and precursor should be just mono GlcNAcMass = COL.GlycoLib.GlycanMass.GetGlycanMass(COL.GlycoLib.Glycan.Type.HexNAc); } short y1cs = e.TransformResult.mshort_cs; y1cs--; while (y1cs > 0) { float y1Mz = ((PeptideMass + GlcNAcMass) + (float)_utils._CC_MASS * y1cs) / y1cs; COL.GlycoSequence.GlycanSequencing _Gs = new COL.GlycoSequence.GlycanSequencing(_msScan, y1Mz, y1cs, e.GP_Record.Glycan.numHex, e.GP_Record.Glycan.numHexNAc, e.GP_Record.Glycan.numDeHex, e.GP_Record.Glycan.numNeuAc, 0, sequencingFolder, true, 0.8f, 60); _Gs.NumbersOfPeaksForSequencing = 140; _Gs.CreatePrecursotMZ = true; _Gs.RewardForCompleteStructure = 3; if (e.TransformResult.mdbl_average_mw > 0) _Gs.UseAVGMass = true; else _Gs.UseAVGMass = false; int structure_count = _Gs.StartSequencing(); if (structure_count > 0) { List<COL.GlycoLib.GlycanStructure> topstructures = _Gs.GetTopRankScoreStructre(1); e.CIDSequencingScore = topstructures[0].Score; e.GP_Record.GlycanSequence = topstructures[0].IUPACString; // Printing out the sequences string opfile = sequencingFolder + m.ID.ToString() + "_" + e.CIDScan.ToString() + ".txt"; Utils.CSVFileHandler cidString = new CSVFileHandler(opfile, CSVFileHandler.WRITE_ONLY); cidString.openFile(); for (int zz = 0; zz < topstructures.Count; zz++) { cidString.writeLine(topstructures[zz].IUPACString); /*COL.GlycoLib.GlycansDrawer _gdraw = new COL.GlycoLib.GlycansDrawer(topstructures[zz].IUPACString, false); System.Drawing.Image img1 = _gdraw.GetImage();*/ } cidString.closeFile(); break; } y1cs--; } } } } }