public void TestBuild()
        {
            List <IIdentifiedSpectrum> spectra = new SequestPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/TestBuilder.peptides");

            Assert.AreEqual(4, spectra.Count);

            IAccessNumberParser parser = AccessNumberParserFactory.FindOrCreateParser(@"(IPI\d+)", "IPI");

            List <IIdentifiedProtein> proteins = new IdentifiedProteinBuilder().Build(spectra);

            Assert.AreEqual(4, proteins.Count);

            List <IIdentifiedProteinGroup> groups = new IdentifiedProteinGroupBuilder().Build(proteins);

            Assert.AreEqual(2, groups.Count);

            Assert.AreEqual(1, groups[0].Count);
            Assert.AreEqual("IPI:IPI00784154.1|SW", groups[0][0].Name);

            Assert.AreEqual(2, groups[1].Count);
            Assert.AreEqual("REVERSED_00000001", groups[1][0].Name);
            Assert.AreEqual("REVERSED_00000002", groups[1][1].Name);

            IIdentifiedResult result = new IdentifiedResultBuilder(parser, "").Build(groups);
        }
Beispiel #2
0
        public void TestBuild()
        {
            var pep1 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("A", 1, 1, 1, ".dta")))
            {
                Sequence = "A"
            };
            var pep2 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("B", 1, 1, 1, ".dta")))
            {
                Sequence = "B"
            };
            var pep3 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("C", 1, 1, 1, ".dta")))
            {
                Sequence = "C"
            };
            var pep4 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("D", 1, 1, 1, ".dta")))
            {
                Sequence = "D"
            };
            var pep5 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("E", 1, 1, 1, ".dta")))
            {
                Sequence = "E"
            };
            var pep6 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("F", 1, 1, 1, ".dta")))
            {
                Sequence = "F"
            };

            var protein1 = new IdentifiedProtein()
            {
                Peptides = new IIdentifiedPeptide[] { pep1, pep3, pep5, pep6 }.ToList()
            };

            var protein2 = new IdentifiedProtein()
            {
                Peptides = new IIdentifiedPeptide[] { pep2, pep3, pep4 }.ToList()
            };

            //should be removed from final result since all peptides has been included in protein1 and protein2, even one protein contains both peptides
            var protein3 = new IdentifiedProtein()
            {
                Peptides = new IIdentifiedPeptide[] { pep1, pep2 }.ToList()
            };

            //should be removed from final result since all peptides has been included in protein1
            var protein4 = new IdentifiedProtein()
            {
                Peptides = new IIdentifiedPeptide[] { pep1, pep5 }.ToList()
            };

            var actual = new IdentifiedProteinGroupBuilder().Build(new IIdentifiedProtein[] { protein1, protein2, protein3 }.ToList());

            Assert.AreEqual(2, actual.Count);
            Assert.AreSame(protein1, actual[0][0]);
            Assert.AreSame(protein2, actual[1][0]);
        }
        private void RunCurrentParameter(string parameterFile, List <string> result, BuildSummaryOptions conf)
        {
            IStringParser <string> acParser = conf.Database.GetAccessNumberParser();

            IIdentifiedProteinBuilder      proteinBuilder = new IdentifiedProteinBuilder();
            IIdentifiedProteinGroupBuilder groupBuilder   = new IdentifiedProteinGroupBuilder()
            {
                Progress = this.Progress
            };

            IdentifiedSpectrumBuilderResult isbr;

            List <IIdentifiedSpectrum> finalPeptides;

            if (string.IsNullOrEmpty(options.PeptideFile))
            { //parse from configuration
              //build spectrum list
                IIdentifiedSpectrumBuilder spectrumBuilder = conf.GetSpectrumBuilder();
                if (spectrumBuilder is IProgress)
                {
                    (spectrumBuilder as IProgress).Progress = this.Progress;
                }

                isbr          = spectrumBuilder.Build(parameterFile);
                finalPeptides = isbr.Spectra;
            }
            else
            {
                Progress.SetMessage("Reading peptides from {0} ...", options.PeptideFile);
                finalPeptides         = new MascotPeptideTextFormat().ReadFromFile(options.PeptideFile);
                conf.SavePeptidesFile = false;
                isbr = null;
            }

            CalculateIsoelectricPoint(finalPeptides);

            //如果需要通过蛋白质注释去除contamination,首先需要在肽段水平删除
            if (conf.Database.HasContaminationDescriptionFilter() && (conf.FalseDiscoveryRate.FdrLevel != FalseDiscoveryRateLevel.Protein))
            {
                Progress.SetMessage("Removing contamination by description ...");
                var notConGroupFilter = conf.Database.GetNotContaminationDescriptionFilter(Progress);

                var tempResultBuilder = new IdentifiedResultBuilder(null, null);
                while (true)
                {
                    List <IIdentifiedProtein>      proteins = proteinBuilder.Build(finalPeptides);
                    List <IIdentifiedProteinGroup> groups   = groupBuilder.Build(proteins);
                    IIdentifiedResult tmpResult             = tempResultBuilder.Build(groups);

                    HashSet <IIdentifiedSpectrum> notConSpectra = new HashSet <IIdentifiedSpectrum>();
                    foreach (var group in tmpResult)
                    {
                        if (notConGroupFilter.Accept(group))
                        {
                            notConSpectra.UnionWith(group[0].GetSpectra());
                        }
                    }

                    if (notConSpectra.Count == finalPeptides.Count)
                    {
                        break;
                    }
                    finalPeptides = notConSpectra.ToList();
                }
            }

            if (conf.FalseDiscoveryRate.FilterOneHitWonder && conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount > 1)
            {
                Progress.SetMessage("Filtering single wonders ...");
                var proteinFilter = new IdentifiedProteinSingleWonderPeptideCountFilter(conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount);
                List <IIdentifiedProtein> proteins = proteinBuilder.Build(finalPeptides);
                int oldProteinCount = proteins.Count;
                proteins.RemoveAll(l => !proteinFilter.Accept(l));
                if (oldProteinCount != proteins.Count)
                {
                    HashSet <IIdentifiedSpectrum> newspectra = new HashSet <IIdentifiedSpectrum>();
                    foreach (var protein in proteins)
                    {
                        newspectra.UnionWith(protein.GetSpectra());
                    }
                    finalPeptides = newspectra.ToList();
                }
            }

            //if (conf.SavePeptidesFile && !(conf.FalseDiscoveryRate.FilterOneHitWonder && conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount > 1))
            if (conf.SavePeptidesFile)
            {
                if (conf.Database.RemovePeptideFromDecoyDB)
                {
                    DecoyPeptideBuilder.AssignDecoy(finalPeptides, conf.GetDecoySpectrumFilter());
                    for (int i = finalPeptides.Count - 1; i >= 0; i--)
                    {
                        if (finalPeptides[i].FromDecoy)
                        {
                            finalPeptides.RemoveAt(i);
                        }
                    }
                }

                finalPeptides.Sort();

                //保存肽段文件
                IFileFormat <List <IIdentifiedSpectrum> > peptideFormat = conf.GetIdentifiedSpectrumFormat();
                string peptideFile = FileUtils.ChangeExtension(parameterFile, ".peptides");
                Progress.SetMessage("Writing peptides file...");
                peptideFormat.WriteToFile(peptideFile, finalPeptides);
                result.Add(peptideFile);

                if (!conf.FalseDiscoveryRate.FilterByFdr && conf.Database.DecoyPatternDefined)
                {
                    WriteFdrFile(parameterFile, conf, finalPeptides);
                }

                Progress.SetMessage("Calculating precursor offset...");
                result.AddRange(new PrecursorOffsetCalculator(finalPeptides).Process(peptideFile));
            }

            Progress.SetMessage("Building protein...");
            //构建蛋白质列表
            List <IIdentifiedProtein> finalProteins = proteinBuilder.Build(finalPeptides);

            Progress.SetMessage("Building protein group...");
            //构建蛋白质群列表
            List <IIdentifiedProteinGroup> finalGroups = groupBuilder.Build(finalProteins);

            if (conf.Database.HasContaminationDescriptionFilter())
            {
                var notConGroupFilter = conf.Database.GetNotContaminationDescriptionFilter(Progress);

                for (int i = finalGroups.Count - 1; i >= 0; i--)
                {
                    if (!notConGroupFilter.Accept(finalGroups[i]))
                    {
                        finalGroups.RemoveAt(i);
                    }
                }
            }

            //构建最终鉴定结果
            var resultBuilder = conf.GetIdentifiedResultBuilder();

            resultBuilder.Progress = Progress;
            IIdentifiedResult finalResult = resultBuilder.Build(finalGroups);

            finalResult.BuildGroupIndex();

            if (conf.FalseDiscoveryRate.FilterByFdr)
            {
                var decoyGroupFilter = conf.GetDecoyGroupFilter();
                foreach (var group in finalResult)
                {
                    group.FromDecoy = decoyGroupFilter.Accept(group);
                    foreach (var protein in group)
                    {
                        protein.FromDecoy = group.FromDecoy;
                    }
                }

                finalResult.ProteinFDR = conf.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator().Calculate(finalResult.Count(l => l[0].FromDecoy), finalResult.Count(l => !l[0].FromDecoy));
            }

            CalculateIsoelectricPoint(finalResult.GetProteins());
            if (isbr != null)
            {
                finalResult.PeptideFDR = isbr.PeptideFDR;
            }

            //保存非冗余蛋白质列表文件

            var resultFormat = conf.GetIdetifiedResultFormat(finalResult, this.Progress);

            string noredundantFile = FileUtils.ChangeExtension(parameterFile, ".noredundant");

            Progress.SetMessage("Writing noredundant file...");
            resultFormat.WriteToFile(noredundantFile, finalResult);
            result.Add(noredundantFile);

            Progress.SetMessage("Finished!");
        }