public override IEnumerable <string> Process(string filename) { SequestResultTextFormat format = new SequestResultTextFormat(); Progress.SetMessage("Reading from " + filename + "..."); IIdentifiedResult sr = format.ReadFromFile(filename); HashSet <IIdentifiedSpectrum> result = new HashSet <IIdentifiedSpectrum>(); foreach (IIdentifiedProteinGroup spg in sr) { if (acceptFunc(spg)) { result.UnionWith(spg[0].GetSpectra()); } } List <IIdentifiedSpectrum> spectra = new List <IIdentifiedSpectrum>(result); spectra.Sort(); string resultFilename = MyConvert.Format("{0}.{1}.peptides", filename, uniqueStr); Progress.SetMessage("Writing to " + resultFilename + "..."); new SequestPeptideTextFormat(format.PeptideFormat.GetHeader()).WriteToFile(resultFilename, spectra); Progress.SetMessage("Finished"); return(new[] { resultFilename }); }
public AbstractQuantificationSummaryViewerUI() { InitializeComponent(); this.mr = null; this.pepProMap = null; this.ScoreDecimal = 0; this.DiffDecimal = 3; InsertButton(1, btnSave); InsertButton(2, btnExport); InsertButton(2, btnView); this.btnSave.Enabled = false; this.btnExport.Enabled = false; lvcProteins = new ListViewColumnField(lvProteins, "LvProteins"); AddComponent(lvcProteins); lvcPeptides = new ListViewColumnField(lvPeptides, "LvPeptides"); AddComponent(lvcPeptides); this.ExportScanHeaders = new List <string>(); bFirstLoad = true; lvProteins.DoubleBuffered(true); lvPeptides.DoubleBuffered(true); }
public void Calculate(IIdentifiedResult mr, Func <IIdentifiedSpectrum, bool> validFunc) { foreach (var mg in mr) { Calculate(mg, validFunc); } }
protected override void ProcessIdentifiedResult(IIdentifiedResult mr) { base.ProcessIdentifiedResult(mr); this.proteinScan.SummaryFilename = this.summaryFilename; this.proteinScan.DefaultDetailDirectory = this.GetDetailDirectoryName(); var recalc = true; foreach (var mpg in mr) { if (option.IsProteinRatioValid(mpg[0])) { recalc = false; break; } } if (recalc) { foreach (var mpg in mr) { calc.Calculate(mpg, m => true); } this.format.InitializeByResult(mr); } }
public override IEnumerable <string> Process(string fileName) { MascotResultTextFormat format = new MascotResultTextFormat(); IIdentifiedResult ir = format.ReadFromFile(fileName); List <IIdentifiedSpectrum> spectra = ir.GetSpectra(); spectra.ForEach(m => { for (int i = m.Peptides.Count - 1; i >= 0; i--) { IIdentifiedPeptide peptide = m.Peptides[i]; string seq = PeptideUtils.GetMatchedSequence(peptide.Sequence); if (!validator.Validate(seq)) { m.RemovePeptideAt(i); peptide.Spectrum = null; } } }); ir.Filter(m => { return(m.Spectrum != null); }); string result = fileName + ".Labeled"; format.WriteToFile(result, ir); return(new[] { result }); }
public void Calculate(IIdentifiedResult mr, Func <IIdentifiedSpectrum, bool> validFunc) { foreach (IIdentifiedProteinGroup mpg in mr) { Calculate(mpg, validFunc); } }
private void ITraqQuantificationSummaryViewerUI_FormClosed(object sender, FormClosedEventArgs e) { tvResult.Nodes.Clear(); proteins = null; GC.Collect(); GC.WaitForFullGCComplete(); }
protected override void ProcessIdentifiedResult(IIdentifiedResult mr) { this.option = new O18QuantificationSummaryViewerOptions(this.summaryFilename); this.calc = option.GetProteinRatioCalculator(); calc.SummaryFileDirectory = Path.GetDirectoryName(this.summaryFilename); calc.DetailDirectory = this.GetDetailDirectoryName(); base.ProcessIdentifiedResult(mr); this.proteinScan.SummaryFilename = this.summaryFilename; this.proteinScan.DefaultDetailDirectory = this.GetDetailDirectoryName(); var recalc = true; foreach (var mpg in mr) { if (option.IsProteinRatioValid(mpg[0])) { recalc = false; break; } } if (recalc) { calc.Calculate(mr, m => true); this.format.InitializeByResult(mr); } }
public override void Update(object sender, UpdateQuantificationItemEventArgs e) { IQuantificationSummaryOption option = e.Option as IQuantificationSummaryOption; IIdentifiedResult mr = e.Item as IIdentifiedResult; string xTitle = MyConvert.Format("(Log({0}) + Log({1})) / 2", option.Func.ReferenceKey, option.Func.SampleKey); string yTitle = MyConvert.Format("Log(Ratio)"); panel.InitGraphPane(title, xTitle, yTitle, true, 0.0); try { HashSet <IIdentifiedSpectrum> spectra = new HashSet <IIdentifiedSpectrum>(); var groups = from g in mr where g[0].IsEnabled(true) && option.IsProteinRatioValid(g[0]) select g; foreach (var mpg in groups) { var peptides = from p in mpg.GetPeptides() where p.IsEnabled(true) && option.IsPeptideRatioValid(p) select p; spectra.UnionWith(peptides); } List <LSPADItem> items = new List <LSPADItem>(); foreach (var pep in spectra) { double refIntensity = Math.Log(option.Func.GetReferenceIntensity(pep)); double sampleIntensity = Math.Log(option.Func.GetSampleIntensity(pep)); double A = (refIntensity + sampleIntensity) / 2; double ratio = Math.Log(option.GetPeptideRatio(pep)); items.Add(new LSPADItem() { LogRatio = ratio, Intensity = A, Tag = pep }); } LSPADItem.CalculatePValue(items); //this.panel.DrawProbabilityRange(maxX, ratios); //this.panel.AddPoints(pplSelected, SelectedColor, "Current Peptide"); //this.panel.AddPoints(pplGroup, GroupColor, "Current Protein"); //this.panel.AddPoints(pplOutlier, OutlierColor, "Outlier"); //this.panel.AddPoints(pplNormal, NormalColor, "Other"); } finally { ZedGraphicExtension.UpdateGraph(this.zgcGraph); } }
private void LoadData() { try { Progress.SetMessage("Reading proteins from " + proteinFile + " ..."); format.Progress = Progress; proteins = format.ReadFromFile(proteinFile); List <IIdentifiedSpectrum> spectra = proteins.GetSpectra(); Progress.SetMessage("Reading itraqs from " + itrapFile + " ..."); ITraqItemUtils.LoadITraq(spectra, itrapFile, true, Progress); for (int i = proteins.Count - 1; i >= 0; i--) { var group = proteins[i]; group[0].Peptides.RemoveAll(m => m.Spectrum.FindIsobaricItem() == null); if (group[0].Peptides.Count == 0) { proteins.Remove(group); } } } catch (UserTerminatedException) { Progress.SetMessage("User terminated."); } catch (Exception ex) { Progress.SetMessage("Error : {0}", ex.Message); MessageBox.Show(ex.Message); } }
public override IEnumerable <string> Process(string filename) { string[] files = new string[splitCount]; MascotResultDtaselectFormat dtaSelectFormat = new MascotResultDtaselectFormat(); Progress.SetMessage("Reading document ..."); IIdentifiedResult mr = dtaSelectFormat.ReadFromFile(filename); Progress.SetRange(0, splitCount); for (int i = 0; i < splitCount; i++) { files[i] = FileUtils.ChangeExtension(filename, (i + 1).ToString() + new FileInfo(filename).Extension); MascotResult curMr = new MascotResult(); for (int j = 0; j < mr.Count; j++) { if (j % splitCount == i) { curMr.Add(mr[j]); } } Progress.SetMessage("Writing document " + files[i] + " ..."); dtaSelectFormat.WriteToFile(files[i], curMr); Progress.SetPosition(i + 1); } Progress.End(); return(new List <string>(files)); }
private ResultCorrelationItem BuildResult(IIdentifiedResult ir) { var result = new ResultCorrelationItem(); result.ClassificationTitles = ClassificationSet.Keys.ToArray(); foreach (var g in ir) { var pro = g[0]; var protein = new ProteinCorrelationItem(); result.Add(protein); protein.Index = g.Index; protein.Protein = ParseItem(pro.Peptides, result.ClassificationTitles); protein.Protein.Name = pro.Name; var peps = pro.Peptides.GroupBy(m => PeptideUtils.GetMatchedSequence(m.Sequence)); foreach (var pep in peps) { var pepitem = ParseItem(pep.ToList(), result.ClassificationTitles); protein.Peptides.Add(pepitem); pepitem.Name = pep.Key; pepitem.Correlation = Correlation.Pearson(pepitem.Values, protein.Protein.Values); } } result.ForEach(m => { m.Peptides.Sort((m1, m2) => m2.Correlation.CompareTo(m1.Correlation)); }); return(result); }
protected override MascotResultTextFormat GetFormat(IIdentifiedResult ir) { var result = new MascotResultTextFormat(); result.PeptideFormat = format.PeptideFormat; result.InitializeByResult(ir); return(result); }
private void InitializeProteinFormat(IIdentifiedResult identifiedResult, string oldProteinHeader) { var proteins = identifiedResult.GetProteins(); List <string> proAnnotations = AnnotationUtils.GetAnnotationKeys(proteins); string newProteinHeader = StringUtils.GetMergedHeader(oldProteinHeader, proAnnotations, '\t'); ProteinFormat = new LineFormat <IIdentifiedProtein>(IdentifiedProteinPropertyConverterFactory.GetInstance(), newProteinHeader, GetEngineName(), proteins); }
private void InitializePeptideFormat(IIdentifiedResult identifiedResult, string oldPeptideHeader) { var spectra = identifiedResult.GetSpectra(); List <string> pepAnnotations = AnnotationUtils.GetAnnotationKeys(spectra); string newPeptideHeader = StringUtils.GetMergedHeader(oldPeptideHeader, pepAnnotations, '\t'); PeptideFormat = new LineFormat <IIdentifiedSpectrum>(IdentifiedSpectrumPropertyConverterFactory.GetInstance(), newPeptideHeader, GetEngineName(), spectra); }
public void Calculate(IIdentifiedResult mr, Func <IIdentifiedSpectrum, bool> validFunc) { var proteinFiles = new List <WaitingEntry>(); foreach (var mpg in mr) { var pf = DoCalculate(mpg, validFunc, false); if (pf != null) { proteinFiles.Add(pf); } } if (proteinFiles.Count > 0) { var listfile = (this.DetailDirectory + "/rlm_file.csv").Replace("\\", "/"); using (var sw = new StreamWriter(listfile)) { sw.WriteLine("Protein,IntensityFile"); foreach (var we in proteinFiles) { sw.WriteLine("\"{0}\",\"{1}\"", we.Group[0].Name, we.IntensityFile); } } var linearfile = new FileInfo(this.DetailDirectory + "/rlm.linear").FullName.Replace("\\", "/"); var roptions = new RTemplateProcessorOptions(); roptions.InputFile = listfile; roptions.OutputFile = linearfile; roptions.RTemplate = FileUtils.GetTemplateDir() + "/MultiplePairQuantification.r"; new RTemplateProcessor(roptions).Process(); var results = (from line in File.ReadAllLines(linearfile).Skip(1) let parts = line.Split('\t') select new { ProteinName = parts[0].StringAfter("\"").StringBefore("\""), LinearRegressionResult = ParseLinearRegressionRatioResult(parts, 2) }).ToDictionary(m => m.ProteinName); foreach (var pg in mr) { if (results.ContainsKey(pg[0].Name)) { var res = results[pg[0].Name]; var lrrr = res.LinearRegressionResult; foreach (IIdentifiedProtein protein in pg) { this.intensityFunc.SaveToAnnotation(protein, lrrr); } } } } }
public void Calculate(IIdentifiedResult proteins, Dictionary <string, List <string> > datasets) { if (proteins == null || proteins.Count == 0) { throw new ArgumentNullException("Argument proteins cannot be null or empty in NSAFProteinLabelfreeQuantificationCalculator.Calculate"); } if (proteins.Count > 0 && proteins.Any(m => m[0].Sequence == null)) { throw new Exception("NSAF Quantification Calculator needs protein sequence information."); } foreach (var g in proteins) { var lr = new LabelfreeResult(); foreach (var key in datasets.Keys) { lr[key] = new LabelfreeValue(); } foreach (var p in g) { p.SetLabelfreeResult(lr); } } double zeroCount = 0.16; foreach (var key in datasets.Keys) { List <string> exps = datasets[key]; double allnsaf = 0.0; foreach (var g in proteins) { var lr = g[0].GetLabelfreeResult(); var lv = lr[key]; lv.Count = g.GetPeptides().Count(m => exps.Contains(m.Query.FileScan.Experimental)); var sc = (double)(lv.Count); lv.Value = sc / g[0].Sequence.Length; allnsaf += lv.Value; } foreach (var g in proteins) { var lr = g[0].GetLabelfreeResult(); var lv = lr[key]; var nsaf = lv.Value; if (nsaf == 0) { nsaf = zeroCount / g[0].Sequence.Length; } lv.Value = Math.Log(nsaf / allnsaf); } } }
private static string GetPeptideHeader(IIdentifiedResult finalResult) { var peptideHeader = UniformHeader.PEPTIDE_HEADER; if (finalResult.All(m => m.All(l => l.Peptides.All(k => k.Spectrum.Query.FileScan.RetentionTime == 0.0)))) { peptideHeader = peptideHeader.Replace("\tRetentionTime", ""); } return(peptideHeader); }
public void InitializeByResult(IIdentifiedResult identifiedResult) { string oldProteinHeader = ProteinFormat == null?GetDefaultProteinHeader() : ProteinFormat.GetHeader(); InitializeProteinFormat(identifiedResult, oldProteinHeader); string oldPeptideHeader = PeptideFormat == null?GetDefaultPeptideHeader() : PeptideFormat.GetHeader(); InitializePeptideFormat(identifiedResult, oldPeptideHeader); }
protected IEnumerable <IIdentifiedProteinGroup> GetValidGroups(IIdentifiedResult identifiedResult) { if (ValidGroup == null) { return(identifiedResult); } return(from g in identifiedResult where ValidGroup(g) select g); }
public override IIdentifiedResult ReadFromFile(string fileName) { if (!File.Exists(fileName)) { throw new FileNotFoundException("Protein file not exist : " + fileName); } string peptideFilename = GetPeptideFileName(fileName); if (!File.Exists(peptideFilename)) { throw new FileNotFoundException("Peptide file not exist : " + peptideFilename); } string linkFileName = GetLinkFileName(fileName); if (!File.Exists(linkFileName)) { throw new FileNotFoundException("Peptide2group file not exist : " + linkFileName); } var pepFileReader = new PeptideTextReader(GetEngineName()); List <IIdentifiedSpectrum> spectra = pepFileReader.ReadFromFile(peptideFilename); this.PeptideFormat = pepFileReader.PeptideFormat; var proFileReader = new ProteinTextReader(GetEngineName()); List <IIdentifiedProtein> proteins = proFileReader.ReadFromFile(fileName); this.ProteinFormat = proFileReader.ProteinFormat; var peptideMap = spectra.ToDictionary(m => m.Id); var proteinMap = proteins.GroupBy(m => m.GroupIndex); IIdentifiedResult result = Allocate(); foreach (var pros in proteinMap) { var group = new IdentifiedProteinGroup(); pros.ToList().ForEach(m => group.Add(m)); result.Add(group); } new Peptide2GroupTextReader().LinkPeptideToGroup(linkFileName, peptideMap, result.ToDictionary(m => m.Index)); string fastaFile = fileName + ".fasta"; if (File.Exists(fastaFile)) { IdentifiedResultUtils.FillSequenceFromFasta(fastaFile, result, null); } return(result); }
public IFileFormat <IIdentifiedResult> GetIdetifiedResultFormat(IIdentifiedResult finalResult, IProgressCallback progress) { //保存非冗余蛋白质列表文件 var peptideHeader = GetPeptideHeader(finalResult); var proteinHeader = this.FalseDiscoveryRate.FilterByFdr ? UniformHeader.PROTEIN_HEADER + "\tDecoy" : UniformHeader.PROTEIN_HEADER; return(new MascotResultTextFormat(proteinHeader, peptideHeader) { Progress = progress }); }
protected virtual void RefineModifications(IIdentifiedResult ir) { HashSet <string> modifications = new HashSet <string>(); var spectra = ir.GetSpectra(); Regex reg = new Regex(@"\((.+)\)"); var chars = new char[] { ' ', '\t' }; foreach (var spectrum in spectra) { if (spectrum.Modifications == null) { continue; } string[] mods = spectrum.Modifications.Split(chars); foreach (var mod in mods) { var match = reg.Match(mod); modifications.Add(match.Groups[1].Value); } } var modstrings = (from m in modifications where m != "" orderby m select m).ToList(); Dictionary <string, string> modChars = new Dictionary <string, string>(); modstrings.ForEach(m => modChars[m] = MODIFICATION_CHAR[modChars.Count].ToString()); Regex reg2 = new Regex(@"(\d+)\((.+)\)"); foreach (var spectrum in spectra) { if (spectrum.Modifications == null) { continue; } string[] mods = spectrum.Modifications.Split(chars); for (int i = mods.Length - 1; i >= 0; i--) { if (mods[i] == "") { continue; } var match = reg2.Match(mods[i]); var index = Convert.ToInt32(match.Groups[1].Value); var modstr = match.Groups[2].Value; spectrum.Peptide.Sequence = spectrum.Sequence.Insert(index, modChars[modstr]); } } }
protected void CheckFormat(IIdentifiedResult identifiedResult) { if (ProteinFormat == null) { InitializeProteinFormat(identifiedResult, GetDefaultProteinHeader()); } if (PeptideFormat == null) { InitializePeptideFormat(identifiedResult, GetDefaultPeptideHeader()); } }
public void WriteSummary(StreamWriter sw, IIdentifiedResult mr) { var groups = GetValidGroups(mr); int totalProteinCount = (from g in groups select g.Count).Sum(); int totalGroupCount = groups.Count(); sw.WriteLine("Category\tValue"); sw.WriteLine("Total protein\t{0}", totalProteinCount); sw.WriteLine("Total protein group\t{0}", totalGroupCount); if (mr.PeptideFDR != -1) { sw.WriteLine("Peptide FDR\t{0}", mr.PeptideFDR); } if (mr.ProteinFDR != -1) { sw.WriteLine("Protein FDR\t{0}", mr.ProteinFDR); } if (totalGroupCount > 0) { sw.WriteLine(); sw.WriteLine("UniPepCount\tProteinGroupCount\tPercent\tProteinCount\tPercent"); var bin = new Dictionary <int, Pair <int, int> >(); foreach (IIdentifiedProteinGroup group in groups) { var spectra = GetValidSpectra(group.GetPeptides()); var unique = group[0].Peptides.GetUniquePeptideCount(m => IsValidSpectrum(m.Spectrum)); if (!bin.ContainsKey(unique)) { bin[unique] = new Pair <int, int>(0, 0); } Pair <int, int> counts = bin[unique]; counts.First = counts.First + 1; counts.Second = counts.Second + group.Count; } var uniques = new List <int>(bin.Keys); uniques.Sort(); foreach (int unique in uniques) { Pair <int, int> counts = bin[unique]; sw.WriteLine("{0}\t{1}\t{2:0.00}%\t{3}\t{4:0.00}%", unique, counts.First, (counts.First * 100.0) / totalGroupCount, counts.Second, (counts.Second * 100.0) / totalProteinCount); } } }
protected override void ParseToCalculationItems() { IIdentifiedResult sr = format.ReadFromFile(option.SourceFileName); calculationItems = (from proteinGroup in sr select new CalculationItem() { Key = proteinGroup, Peptides = proteinGroup[0].GetDistinctPeptides() }).ToList(); }
public override IEnumerable <string> Process(string fileName) { MascotResultTextFormat format = new MascotResultTextFormat(); IIdentifiedResult ir = format.ReadFromFile(fileName); KeepDistinctPeptideOnly(ir); string resultFileName = fileName + ".distinct"; format.WriteToFile(resultFileName, ir); return(new [] { resultFileName }); }
private Dictionary <string, List <IIdentifiedSpectrum> > GetFilePeptideMap(IIdentifiedResult mr) { List <IIdentifiedSpectrum> peptides = mr.GetSpectra(); Dictionary <string, List <IIdentifiedSpectrum> > filePepMap = new Dictionary <string, List <IIdentifiedSpectrum> >(); foreach (IIdentifiedSpectrum mp in peptides) { string filename = new FileInfo(options.RawDirectory + "/" + mp.Query.FileScan.Experimental + options.RawExtension).FullName; if (!filePepMap.ContainsKey(filename)) { filePepMap[filename] = new List <IIdentifiedSpectrum>(); } filePepMap[filename].Add(mp); } return(filePepMap); }
public override IEnumerable <string> Process(string fileName) { MascotResultTextFormat format = new MascotResultTextFormat(); IIdentifiedResult ir = format.ReadFromFile(fileName); foreach (IIdentifiedProteinGroup group in ir) { processor.Process(group); } string resultFileName = fileName + ".Unduplicated"; format.WriteToFile(resultFileName, ir); return(new[] { resultFileName }); }
protected override void ShowReturnInfo(IEnumerable <string> returnInfo) { if (File.Exists(noredundantFile.FullName)) { ir = new MascotResultTextFormat().ReadFromFile(noredundantFile.FullName); var peps = ir.GetSpectra(); pepMap = new Dictionary <string, List <IIdentifiedSpectrum> >(); foreach (var pep in peps) { if (!pepMap.ContainsKey(pep.Peptide.PureSequence)) { pepMap[pep.Peptide.PureSequence] = new List <IIdentifiedSpectrum>(); } pepMap[pep.Peptide.PureSequence].Add(pep); } foreach (var lst in pepMap.Values) { lst.Sort((m1, m2) => { var r = m1.Charge.CompareTo(m2.Charge); if (r == 0) { r = m1.Query.FileScan.FirstScan.CompareTo(m2.Query.FileScan.FirstScan); } return(r); }); } } else { ir = null; } UpdateProteins(); if (lbProteins.Items.Count > 0) { lbProteins.SelectedIndex = 0; } if (lbPeptides.Items.Count > 0) { lbPeptides.SelectedIndex = 0; } }