public void TestFilter() { var pro1 = new IdentifiedProtein("P1"); pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 1 })); pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 2 })); var pro2 = new IdentifiedProtein("P2"); pro2.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum() { Charge = 3 })); var g1 = new IdentifiedProteinGroup(); g1.Add(pro1); var g2 = new IdentifiedProteinGroup(); g2.Add(pro2); IdentifiedResult ir = new IdentifiedResult(); ir.Add(g1); ir.Add(g2); Assert.AreEqual(2, ir.Count); Assert.AreEqual(3, ir.GetSpectra().Count); ir.Filter(m => { return m.Spectrum.Query.Charge > 1; }); Assert.AreEqual(2, ir.Count); Assert.AreEqual(2, ir.GetSpectra().Count); ir.GetSpectra().All(m => { return m.Charge > 1; }); ir.Filter(m => { return m.Spectrum.Query.Charge > 2; }); Assert.AreEqual(1, ir.Count); Assert.AreEqual(1, ir.GetSpectra().Count); ir.GetSpectra().All(m => { return m.Charge > 2; }); Assert.AreEqual("P2", ir[0][0].Name); }
public void TestKeepDistinctPeptideOnly() { var spectrum1 = new IdentifiedSpectrum(); var pep1 = spectrum1.NewPeptide(); var spectrum2 = new IdentifiedSpectrum(); var pep2 = spectrum2.NewPeptide(); var spectrum3 = new IdentifiedSpectrum(); var pep3 = spectrum3.NewPeptide(); var spectrum4 = new IdentifiedSpectrum(); var pep4 = spectrum4.NewPeptide(); var protein1 = new IdentifiedProtein(); protein1.Peptides.Add(pep1); protein1.Peptides.Add(pep2); var protein2 = new IdentifiedProtein(); protein2.Peptides.Add(pep1); protein2.Peptides.Add(pep3); var protein3 = new IdentifiedProtein(); protein3.Peptides.Add(pep2); protein3.Peptides.Add(pep4); var g1 = new IdentifiedProteinGroup(); g1.Add(protein1); var g2 = new IdentifiedProteinGroup(); g2.Add(protein2); var g3 = new IdentifiedProteinGroup(); g3.Add(protein3); IIdentifiedResult ir = new IdentifiedResult(); ir.Add(g1); ir.Add(g2); ir.Add(g3); new DistinctResultDistiller().KeepDistinctPeptideOnly(ir); Assert.AreEqual(2, ir.Count); Assert.AreEqual(g2, ir[0]); Assert.AreEqual(g3, ir[1]); Assert.AreEqual(1, ir[0].GetPeptides().Count); Assert.AreEqual(spectrum3, ir[0].GetPeptides()[0]); Assert.AreEqual(1, ir[1].GetPeptides().Count); Assert.AreEqual(spectrum4, ir[1].GetPeptides()[0]); }
private IIdentifiedProteinGroup ReadNextProteinGroup(StreamReader filein, Dictionary <string, IIdentifiedSpectrum> peptideMap, ref string lastLine) { while (!IsProteinLine(lastLine) && (lastLine = filein.ReadLine()) != null) { } if (lastLine == null) { return(null); } IIdentifiedProteinGroup result = new IdentifiedProteinGroup(); while (IsProteinLine(lastLine)) { IIdentifiedProtein protein = new IdentifiedProtein(); this.proteinConverter.SetProperty(protein, lastLine); result.Add(protein); lastLine = filein.ReadLine(); } var peptides = new List <IIdentifiedSpectrum>(); while (!IsProteinLine(lastLine)) { IIdentifiedSpectrum mphit = new IdentifiedSpectrum(); this.peptideConverter.SetProperty(mphit, lastLine); string id = mphit.Query.FileScan.LongFileName + "-" + mphit.Rank; if (!peptideMap.ContainsKey(id)) { peptideMap[id] = mphit; } else { mphit = peptideMap[id]; } peptides.Add(mphit); lastLine = filein.ReadLine(); if (lastLine == null || lastLine.Trim().Length == 0) { break; } } peptides.Sort(); result.AddIdentifiedSpectra(peptides); return(result); }
public void Run() { IdentifiedProteinGroupContaminationDescriptionFilter filter = new IdentifiedProteinGroupContaminationDescriptionFilter("KERATIN"); IdentifiedProteinGroup group = new IdentifiedProteinGroup(); group.Add(new IdentifiedProtein("P1") { Description = "P1 Keratin" }); Assert.IsTrue(filter.Accept(group)); }
public void TestProcess() { IIdentifiedProteinGroup group = new IdentifiedProteinGroup(); group.Add(new IdentifiedProtein("BBBCCC")); group.Add(new IdentifiedProtein("AAABBB")); IIdentifiedProteinGroup finalGroup; ///两者都有,两个都保留 finalGroup = new ProteinNameProcessor(new string[]{"BBB"}).Process(group); Assert.AreEqual(2, finalGroup.Count); ///两者都没有,两个都保留 finalGroup = new ProteinNameProcessor(new string[] { "DDD" }).Process(group); Assert.AreEqual(2, finalGroup.Count); ///只有一个有,保留这一个 finalGroup = new ProteinNameProcessor(new string[] { "AAA" }).Process(group); Assert.AreEqual(1, finalGroup.Count); Assert.AreEqual("AAABBB", finalGroup[0].Name); }
public static List <IIdentifiedProteinGroup> BuildRedundantProteinGroups(List <IIdentifiedProtein> proteins) { foreach (IIdentifiedProtein mp in proteins) { mp.SortPeptides(); } proteins.Sort(); var result = new List <IIdentifiedProteinGroup>(); //Merge the proteins with same peptides to same protein group for (int i = 0; i < proteins.Count; i++) { var mpg = new IdentifiedProteinGroup(); result.Add(mpg); mpg.Add(proteins[i]); int j = i + 1; while (j < proteins.Count) { if (proteins[i].UniquePeptideCount != proteins[j].UniquePeptideCount || proteins[i].Peptides.Count != proteins[j].Peptides.Count) { break; } if (CollectionUtils.ValueEquals(proteins[i].GetSpectra(), proteins[j].GetSpectra())) { mpg.Add(proteins[j]); proteins.RemoveAt(j); } else { j++; } } } return(result); }
public void TestProcess() { IIdentifiedProteinGroup group = new IdentifiedProteinGroup(); group.Add(new IdentifiedProtein("BBBCCC")); group.Add(new IdentifiedProtein("AAABBB")); IIdentifiedProteinGroup finalGroup; ///两者都有,两个都保留 finalGroup = new ProteinNameProcessor(new string[] { "BBB" }).Process(group); Assert.AreEqual(2, finalGroup.Count); ///两者都没有,两个都保留 finalGroup = new ProteinNameProcessor(new string[] { "DDD" }).Process(group); Assert.AreEqual(2, finalGroup.Count); ///只有一个有,保留这一个 finalGroup = new ProteinNameProcessor(new string[] { "AAA" }).Process(group); Assert.AreEqual(1, finalGroup.Count); Assert.AreEqual("AAABBB", finalGroup[0].Name); }
public void TestFilter2() { var spectrum = new IdentifiedSpectrum(); spectrum.Query.FileScan.LongFileName = "ABDCDD.12.123.2.dat"; var pro1 = new IdentifiedProtein("P1"); pro1.Peptides.Add(new IdentifiedPeptide(spectrum) { Sequence = "AAAAAAA" }); var pro2 = new IdentifiedProtein("P2"); pro2.Peptides.Add(new IdentifiedPeptide(spectrum) { Sequence = "BBBBBBB" }); var g1 = new IdentifiedProteinGroup(); g1.Add(pro1); g1.Add(pro2); IdentifiedResult ir = new IdentifiedResult(); ir.Add(g1); Assert.AreEqual(1, ir.Count); Assert.AreEqual(2, ir[0].Count); Assert.AreEqual(1, ir.GetSpectra().Count); ir.Filter(m => { return m.Sequence.Contains('A'); }); Assert.AreEqual(1, ir.Count); Assert.AreEqual(1, ir[0].Count); Assert.AreEqual(1, ir.GetSpectra().Count); Assert.AreSame(pro1, ir[0][0]); ir.Filter(m => { return m.Sequence.Contains('C'); }); Assert.AreEqual(0, ir.Count); }
public void TestGetAnnotationKeys() { string key1 = "TEST1"; string key2 = "TEST2"; var mph1 = new IdentifiedSpectrum(); mph1.Annotations.Add(key1, null); mph1.Query.FileScan.Experimental = "EXP1"; new IdentifiedPeptide(mph1); var mph2 = new IdentifiedSpectrum(); mph2.Annotations.Add(key2, null); mph2.Query.FileScan.Experimental = "EXP2"; new IdentifiedPeptide(mph2); mph1.Peptide.Sequence = "SEQ1"; mph2.Peptide.Sequence = "SEQ2"; var protein = new IdentifiedProtein(); protein.Peptides.Add(mph1.Peptide); protein.Peptides.Add(mph2.Peptide); var mpg = new IdentifiedProteinGroup(); mpg.Add(protein); var mr = new MascotResult(); mr.Add(mpg); List <string> annotationKeys = AnnotationUtils.GetAnnotationKeys(mr.GetSpectra()); Assert.AreEqual(2, annotationKeys.Count); Assert.IsTrue(annotationKeys.Contains(key1)); Assert.IsTrue(annotationKeys.Contains(key2)); }
public void Update(object sender, UpdateQuantificationItemEventArgs e) { ITraqProteinStatisticOption option = e.Option as ITraqProteinStatisticOption; IIdentifiedProteinGroup protein = null; if (e.Item is IEnumerable <IIdentifiedSpectrum> ) { var spectra = e.Item as IEnumerable <IIdentifiedSpectrum>; protein = new IdentifiedProteinGroup(); protein.Add(new IdentifiedProtein()); protein[0].Peptides.AddRange(from s in spectra select s.Peptide); } else if (e.Item is IIdentifiedProteinGroup) { protein = e.Item as IIdentifiedProteinGroup; } if (protein == null) { throw new ArgumentException("e.Item should be IIdentifiedProteinGroup or IEnumerable<IIdentifiedSpectrum>"); } var validItem = protein[0].Peptides.FirstOrDefault(m => { var item = m.Spectrum.FindIsobaricItem(); return(null != item && item.Valid); }); if (null == validItem) { zgcGraph.ClearData(true); return; } var masterPane = zgcGraph.InitMasterPanel(g, 1, title, this.pl); var panel = masterPane[0]; var samples = option.GetSamples(validItem.Spectrum.FindIsobaricItem().PlexType); var dsNames = option.DatasetMap.Keys.OrderBy(m => m).ToList(); var ratioCalc = option.GetRatioCalculator(); xlabels.Clear(); double index = 0.0; string outlierStr = "Outlier"; string proteinStr = "Protein Ratio"; foreach (var dsName in dsNames) { var expNames = new HashSet <string>(option.DatasetMap[dsName]); foreach (var sample in samples) { index += 1.0; ratioCalc.GetSample = sample.GetValue; ratioCalc.DatasetName = dsName; ratioCalc.ChannelName = sample.ChannelRatioName; ratioCalc.Filter = m => expNames.Contains(m.Query.FileScan.Experimental); var ratios = ratioCalc.Calculate(protein); xlabels.Add(dsName + ":" + sample.Name); if (ratios.Count > 0) { var ratio = protein[0].FindITraqChannelItem(dsName, sample.ChannelRatioName).Ratio; PointPairList pplNormal = new PointPairList(); PointPairList pplOutlier = new PointPairList(); PointPairList pplProteinRatio = new PointPairList(); foreach (var r in ratios) { if (r.IsOutlier) { pplOutlier.Add(new PointPair(index, Math.Log(r.Ratio))); } else { pplNormal.Add(new PointPair(index, Math.Log(r.Ratio))); } } pplProteinRatio.Add(new PointPair(index, Math.Log(ratio))); panel.AddPoints(pplProteinRatio, Color.Red, proteinStr); if (pplOutlier.Count > 0) { panel.AddPoints(pplOutlier, Color.Green, outlierStr); outlierStr = string.Empty; } panel.AddPoints(pplNormal, Color.Black); proteinStr = string.Empty; } } } panel.XAxis.ScaleFormatEvent += new Axis.ScaleFormatHandler(XAxis_ScaleFormatEvent); panel.XAxis.Scale.Min = 0.0; panel.XAxis.Scale.Max = index + 1.0; panel.XAxis.Scale.FontSpec.Angle = 90; panel.YAxis.Title.Text = "log(Ratio)"; ZedGraphicExtension.UpdateGraph(zgcGraph); }
public void Update(object sender, UpdateQuantificationItemEventArgs e) { ITraqProteinStatisticOption option = e.Option as ITraqProteinStatisticOption; IIdentifiedProteinGroup protein = null; if (e.Item is IEnumerable <IIdentifiedSpectrum> ) { var spectra = e.Item as IEnumerable <IIdentifiedSpectrum>; protein = new IdentifiedProteinGroup(); protein.Add(new IdentifiedProtein()); protein[0].Peptides.AddRange(from s in spectra select s.Peptide); } else if (e.Item is IIdentifiedProteinGroup) { protein = e.Item as IIdentifiedProteinGroup; } if (protein == null) { throw new ArgumentException("e.Item should be IIdentifiedProteinGroup or IEnumerable<IIdentifiedSpectrum>"); } var validItem = protein[0].Peptides.FirstOrDefault(m => { var item = m.Spectrum.FindIsobaricItem(); return(null != item && item.Valid); }); if (null == validItem) { zgcGraph.ClearData(true); return; } var masterPane = zgcGraph.InitMasterPanel(g, 1, title, this.pl); var panel = masterPane[0]; var samples = option.GetSamples(validItem.Spectrum.FindIsobaricItem().PlexType); var dsNames = option.DatasetMap.Keys.OrderBy(m => m).ToList(); var ratioCalc = option.GetRatioCalculator(); List <string> xlabels = new List <string>(); List <PointPairList> outliers = new List <PointPairList>(); List <PointPairList> normals = new List <PointPairList>(); PointPairList proteins = new PointPairList(); //按照数据集循环 foreach (var dsName in dsNames) { var expNames = new HashSet <string>(option.DatasetMap[dsName]); //按照样品循环 foreach (var sample in samples) { ratioCalc.GetSample = sample.GetValue; ratioCalc.DatasetName = dsName; ratioCalc.ChannelName = sample.ChannelRatioName; ratioCalc.Filter = m => expNames.Contains(m.Query.FileScan.Experimental); var ratios = ratioCalc.Calculate(protein); //添加相应的分类名 xlabels.Add(dsName + ":" + sample.Name); //每个分类有三种数据:outlier,normal和proteinratio var outlier = new PointPairList(); outliers.Add(outlier); var normal = new PointPairList(); normals.Add(normal); if (ratios.Count > 0) { var ratio = protein[0].FindITraqChannelItem(dsName, sample.ChannelRatioName).Ratio; proteins.Add(new PointPair() { Y = Math.Log(ratio) }); ratios.ForEach(m => { if (m.IsOutlier) { outlier.Add(new PointPair() { Y = Math.Log(m.Ratio) }); } else { normal.Add(new PointPair() { Y = Math.Log(m.Ratio) }); } }); } else { //缺失值用missing表示。 proteins.Add(new PointPair() { Y = PointPair.Missing }); } } } panel.AddPoints(proteins, Color.Red, "Ratio"); AddOrdinalPoints(outliers, panel, Color.Green, "Outlier"); AddOrdinalPoints(normals, panel, Color.Black, ""); panel.XAxis.Type = AxisType.Text; panel.XAxis.Scale.FontSpec.Angle = 90; panel.XAxis.Scale.TextLabels = xlabels.ToArray(); panel.YAxis.Title.Text = "log(Ratio)"; ZedGraphicExtension.UpdateGraph(zgcGraph); }
public void TestGetAnnotationKeys() { string key1 = "TEST1"; string key2 = "TEST2"; var mph1 = new IdentifiedSpectrum(); mph1.Annotations.Add(key1, null); mph1.Query.FileScan.Experimental = "EXP1"; new IdentifiedPeptide(mph1); var mph2 = new IdentifiedSpectrum(); mph2.Annotations.Add(key2, null); mph2.Query.FileScan.Experimental = "EXP2"; new IdentifiedPeptide(mph2); mph1.Peptide.Sequence = "SEQ1"; mph2.Peptide.Sequence = "SEQ2"; var protein = new IdentifiedProtein(); protein.Peptides.Add(mph1.Peptide); protein.Peptides.Add(mph2.Peptide); var mpg = new IdentifiedProteinGroup(); mpg.Add(protein); var mr = new MascotResult(); mr.Add(mpg); List<string> annotationKeys = AnnotationUtils.GetAnnotationKeys(mr.GetSpectra()); Assert.AreEqual(2, annotationKeys.Count); Assert.IsTrue(annotationKeys.Contains(key1)); Assert.IsTrue(annotationKeys.Contains(key2)); }
public MascotResult ParseContent(String fileContent) { var result = new MascotResult(); this.modifications = new Dictionary <string, char>(); Pair <int, double> pValueScore = ParsePValueScore(fileContent); result.PValueScore = pValueScore.First; result.PValue = pValueScore.Second; var offsets = new List <Offset>(); try { result.PeakIsotopicType = ParsePeakIsotopicType(fileContent); } catch (ArgumentException) { } try { result.PeakTolerance = ParsePeakTolerance(fileContent); } catch (ArgumentException) { } var filters = new List <IFilter <IIdentifiedSpectrum> >(); if (this.filterByDefaultScoreAndPvalue) { filters.Add(new IdentifiedSpectrumScoreFilter(pValueScore.First)); filters.Add(new IdentifiedSpectrumExpectValueFilter(pValueScore.Second)); } filters.Add(new IdentifiedSpectrumRankFilter(1)); if (null != this.defaultPeptideFilter) { filters.Add(this.defaultPeptideFilter); } this.currentPeptideFilter = new AndFilter <IIdentifiedSpectrum>(filters); Match proteinMatch = GetProteinRegex().Match(fileContent); while (proteinMatch.Success) { IdentifiedProtein protein = ParseProtein(proteinMatch.Groups[1].Value); var group = new IdentifiedProteinGroup(); group.Add(protein); result.Add(group); offsets.Add(new Offset(proteinMatch.Index, proteinMatch.Index + proteinMatch.Length, group)); proteinMatch = proteinMatch.NextMatch(); } int endIndex = fileContent.IndexOf("Peptide matches not assigned to protein hits"); if (-1 == endIndex) { endIndex = fileContent.Length - 1; } for (int i = 0; i < offsets.Count; i++) { int start = offsets[i].End; int end = i == offsets.Count - 1 ? endIndex : offsets[i + 1].Start; String redundant = fileContent.Substring(start, end - start + 1); if (!redundant.Contains("Proteins matching the same set")) { continue; } List <IdentifiedProtein> sameMatchProteins = ParseSameMatchProteins(redundant); foreach (IdentifiedProtein mp in sameMatchProteins) { mp.Peptides.AddRange(offsets[i].Mpg[0].Peptides); offsets[i].Mpg.Add(mp); } } for (int i = result.Count - 1; i >= 0; i--) { if (0 == result[i][0].Peptides.Count) { result.RemoveAt(i); } } RefineModification(result); MergePeptides(result); result.InitUniquePeptideCount(); return(result); }