示例#1
0
        public void TestUniqueCount()
        {
            var protein  = new IdentifiedProtein();
            var spectrum = new IdentifiedSpectrum();

            var peptides = new List <IIdentifiedPeptide>();

            peptides.Add(new IdentifiedPeptide(spectrum)
            {
                Sequence = "ILLLAR"
            });

            peptides.Add(new IdentifiedPeptide(spectrum)
            {
                Sequence = "LILIAR"
            });

            Assert.AreEqual(1, IdentifiedPeptideUtils.GetUniquePeptideCount(peptides));

            peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum())
            {
                Sequence = "LIIIAR"
            });

            Assert.AreEqual(1, IdentifiedPeptideUtils.GetUniquePeptideCount(peptides));
        }
示例#2
0
        public void TestNoredundant()
        {
            string header = "	Reference	PepCount	UniquePepCount	CoverPercent	MW	PI	IdentifiedName";
            IPropertyConverter <IIdentifiedProtein> converter = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(header, '\t');

            Assert.AreEqual(header, converter.Name);

            string             line    = "\tIPI:IPI00784154.1|SWISS-PROT:P10809|TREMBL:B2R5M6;Q53QD5;Q53SE2;Q96RI4;Q9UCR6|ENSEMBL:ENSP00000340019;ENSP00000373620|REFSEQ:NP_002147;NP_955472|H-INV:HIT000031088 Tax_Id=9606 Gene_Symbol=HSPD1 60 kDa heat shock protein, mitochondrial	84	19	43.46%	61054.43	5.70	IPI:IPI00784154.1|SWISS-PROT:P10809|TREMBL:B2R5M6;Q53QD5;Q53SE2;Q96RI4;Q9UCR6|ENSEMBL:ENSP00000340019;ENSP00000373620|REFSEQ:NP_002147;NP_955472|H-INV:HIT000031088 Tax_Id=9606 Gene_Symbol=HSPD1 60 kDa heat shock protein, mitochondrial";
            IIdentifiedProtein protein = new IdentifiedProtein();

            converter.SetProperty(protein, line);


            Assert.AreEqual("IPI:IPI00784154.1|SWISS-PROT:P10809|TREMBL:B2R5M6;Q53QD5;Q53SE2;Q96RI4;Q9UCR6|ENSEMBL:ENSP00000340019;ENSP00000373620|REFSEQ:NP_002147;NP_955472|H-INV:HIT000031088", protein.Name);
            Assert.AreEqual("Tax_Id=9606 Gene_Symbol=HSPD1 60 kDa heat shock protein, mitochondrial", protein.Description);
            Assert.AreEqual(19, protein.UniquePeptideCount);
            Assert.AreEqual(43.46, protein.Coverage);
            Assert.AreEqual(61054.43, protein.MolecularWeight);
            Assert.AreEqual(5.7, protein.IsoelectricPoint);

            for (int i = 0; i < 84; i++)
            {
                protein.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum()));
            }
            Assert.AreEqual(line, converter.GetProperty(protein));
        }
示例#3
0
        public void TestDtaselect()
        {
            string header = "Locus	Sequence Count	Spectrum Count	Sequence Coverage	Length	MolWt	pI	Validation Status	Descriptive Name";
            IPropertyConverter <IIdentifiedProtein> converter = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(header, '\t');

            Assert.AreEqual(header, converter.Name);

            string             line    = "YDR050C	495	495	81.10%	249	26795.41	5.74	U	YDR050C TPI1 SGDID:S000002457, Chr IV from 556470-555724, reverse complement, Verified ORF, \"Triose phosphate isomerase, abundant glycolytic enzyme; mRNA half-life is regulated by iron availability; transcription is controlled by activators Reb1p, Gcr1p, and Rap1p through binding sites in the 5' non-coding region\"";
            IIdentifiedProtein protein = new IdentifiedProtein();

            converter.SetProperty(protein, line);

            Assert.AreEqual("YDR050C", protein.Name);
            Assert.AreEqual("TPI1 SGDID:S000002457, Chr IV from 556470-555724, reverse complement, Verified ORF, \"Triose phosphate isomerase, abundant glycolytic enzyme; mRNA half-life is regulated by iron availability; transcription is controlled by activators Reb1p, Gcr1p, and Rap1p through binding sites in the 5' non-coding region\"", protein.Description);
            Assert.AreEqual(495, protein.UniquePeptideCount);
            Assert.AreEqual(81.1, protein.Coverage);
            Assert.AreEqual(26795.41, protein.MolecularWeight);
            Assert.AreEqual(5.74, protein.IsoelectricPoint);

            for (int i = 0; i < 495; i++)
            {
                protein.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum()));
            }
            Assert.AreEqual(line, converter.GetProperty(protein));
        }
示例#4
0
        public List <IIdentifiedProtein> Build <T>(IEnumerable <T> spectra) where T : IIdentifiedSpectrumBase
        {
            Dictionary <string, IIdentifiedProtein> proteins = new Dictionary <string, IIdentifiedProtein>();
            HashSet <string> inserted = new HashSet <string>();

            foreach (var spectrum in spectra)
            {
                inserted.Clear();
                foreach (var peptide in spectrum.Peptides)
                {
                    foreach (string ac in peptide.Proteins)
                    {
                        //如果一个蛋白中多个肽段都对应了这个谱图,只选择第一个肽段加入谱图
                        if (inserted.Contains(ac))
                        {
                            continue;
                        }
                        inserted.Add(ac);

                        if (!proteins.ContainsKey(ac))
                        {
                            IIdentifiedProtein pro = new IdentifiedProtein();
                            pro.Name     = ac;
                            proteins[ac] = pro;
                        }

                        proteins[ac].Peptides.Add(peptide);
                    }
                }
            }

            return(new List <IIdentifiedProtein>(proteins.Values));
        }
示例#5
0
        public void TestBuild()
        {
            var pep1 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("A", 1, 1, 1, ".dta")))
            {
                Sequence = "A"
            };
            var pep2 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("B", 1, 1, 1, ".dta")))
            {
                Sequence = "B"
            };
            var pep3 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("C", 1, 1, 1, ".dta")))
            {
                Sequence = "C"
            };
            var pep4 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("D", 1, 1, 1, ".dta")))
            {
                Sequence = "D"
            };
            var pep5 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("E", 1, 1, 1, ".dta")))
            {
                Sequence = "E"
            };
            var pep6 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("F", 1, 1, 1, ".dta")))
            {
                Sequence = "F"
            };

            var protein1 = new IdentifiedProtein()
            {
                Peptides = new IIdentifiedPeptide[] { pep1, pep3, pep5, pep6 }.ToList()
            };

            var protein2 = new IdentifiedProtein()
            {
                Peptides = new IIdentifiedPeptide[] { pep2, pep3, pep4 }.ToList()
            };

            //should be removed from final result since all peptides has been included in protein1 and protein2, even one protein contains both peptides
            var protein3 = new IdentifiedProtein()
            {
                Peptides = new IIdentifiedPeptide[] { pep1, pep2 }.ToList()
            };

            //should be removed from final result since all peptides has been included in protein1
            var protein4 = new IdentifiedProtein()
            {
                Peptides = new IIdentifiedPeptide[] { pep1, pep5 }.ToList()
            };

            var actual = new IdentifiedProteinGroupBuilder().Build(new IIdentifiedProtein[] { protein1, protein2, protein3 }.ToList());

            Assert.AreEqual(2, actual.Count);
            Assert.AreSame(protein1, actual[0][0]);
            Assert.AreSame(protein2, actual[1][0]);
        }
        public static IIdentifiedResult DoBuildGroupByPeptide(List <IIdentifiedSpectrum> spectra, Func <IIdentifiedPeptide, string> func)
        {
            IdentifiedResult result = new IdentifiedResult();

            var singlePeptides = (from s in spectra
                                  where s.Peptides.Count == 1
                                  select s).GroupBy(m => func(m.Peptide));

            var multiplePeptides = (from s in spectra
                                    where s.Peptides.Count > 1
                                    select s).ToList();

            Dictionary <string, List <IIdentifiedPeptide> > dic = new Dictionary <string, List <IIdentifiedPeptide> >();

            foreach (var g in singlePeptides)
            {
                dic[g.Key] = new List <IIdentifiedPeptide>(from s in g
                                                           select s.Peptide);
            }

            foreach (var o in multiplePeptides)
            {
                var pc = (from p in o.Peptides
                          let c = dic.ContainsKey(func(p)) ? dic[func(p)].Count : 0
                                  orderby c descending
                                  select p).First();
                if (!dic.ContainsKey(func(pc)))
                {
                    dic[func(pc)] = new List <IIdentifiedPeptide>();
                }
                dic[func(pc)].Add(pc);
            }

            var keys = new List <string>(dic.Keys);

            keys.Sort();

            foreach (var key in keys)
            {
                IdentifiedProtein protein = new IdentifiedProtein(key);
                protein.Peptides.AddRange(dic[key]);
                protein.UniquePeptideCount = 1;
                protein.Description        = dic[key][0].Proteins.Merge('/');

                IdentifiedProteinGroup group = new IdentifiedProteinGroup();
                group.Add(protein);

                result.Add(group);
            }

            result.BuildGroupIndex();
            //result.Sort();

            return(result);
        }
示例#7
0
        public void TestFilter2()
        {
            var spectrum = new IdentifiedSpectrum();

            spectrum.Query.FileScan.LongFileName = "ABDCDD.12.123.2.dat";

            var pro1 = new IdentifiedProtein("P1");

            pro1.Peptides.Add(new IdentifiedPeptide(spectrum)
            {
                Sequence = "AAAAAAA"
            });

            var pro2 = new IdentifiedProtein("P2");

            pro2.Peptides.Add(new IdentifiedPeptide(spectrum)
            {
                Sequence = "BBBBBBB"
            });

            var g1 = new IdentifiedProteinGroup();

            g1.Add(pro1);
            g1.Add(pro2);

            IdentifiedResult ir = new IdentifiedResult();

            ir.Add(g1);

            Assert.AreEqual(1, ir.Count);
            Assert.AreEqual(2, ir[0].Count);
            Assert.AreEqual(1, ir.GetSpectra().Count);

            ir.Filter(m =>
            {
                return(m.Sequence.Contains('A'));
            });

            Assert.AreEqual(1, ir.Count);
            Assert.AreEqual(1, ir[0].Count);
            Assert.AreEqual(1, ir.GetSpectra().Count);
            Assert.AreSame(pro1, ir[0][0]);

            ir.Filter(m =>
            {
                return(m.Sequence.Contains('C'));
            });

            Assert.AreEqual(0, ir.Count);
        }
示例#8
0
        public void TestFilter()
        {
            var pro1 = new IdentifiedProtein("P1");

            pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum()
            {
                Charge = 1
            }));
            pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum()
            {
                Charge = 2
            }));

            var pro2 = new IdentifiedProtein("P2");

            pro2.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum()
            {
                Charge = 3
            }));

            var g1 = new IdentifiedProteinGroup();

            g1.Add(pro1);

            var g2 = new IdentifiedProteinGroup();

            g2.Add(pro2);

            IdentifiedResult ir = new IdentifiedResult();

            ir.Add(g1);
            ir.Add(g2);

            Assert.AreEqual(2, ir.Count);
            Assert.AreEqual(3, ir.GetSpectra().Count);

            ir.Filter(m => { return(m.Spectrum.Query.Charge > 1); });

            Assert.AreEqual(2, ir.Count);
            Assert.AreEqual(2, ir.GetSpectra().Count);
            ir.GetSpectra().All(m => { return(m.Charge > 1); });

            ir.Filter(m => { return(m.Spectrum.Query.Charge > 2); });
            Assert.AreEqual(1, ir.Count);
            Assert.AreEqual(1, ir.GetSpectra().Count);
            ir.GetSpectra().All(m => { return(m.Charge > 2); });

            Assert.AreEqual("P2", ir[0][0].Name);
        }
        public void TestDistinctPeptides()
        {
            IdentifiedProtein protein = new IdentifiedProtein();

            IdentifiedSpectrum sp1 = new IdentifiedSpectrum();
            IdentifiedSpectrum sp2 = new IdentifiedSpectrum();

            protein.Peptides.Add(new IdentifiedPeptide(sp1));
            protein.Peptides.Add(new IdentifiedPeptide(sp1));
            protein.Peptides.Add(new IdentifiedPeptide(sp2));

            Assert.AreEqual(3, protein.Peptides.Count);
            Assert.AreEqual(2, protein.GetSpectra().Count);
            Assert.AreEqual(2, protein.GetDistinctPeptides().Count());
        }
        public void TestCalculateCoverage()
        {
            IdentifiedProtein protein = new IdentifiedProtein();

            //total 30 amino acids
            protein.Sequence = "ABCDEDFDEFDSESLKJFDJLSLGFGDDGD";

            IdentifiedSpectrum s1 = new IdentifiedSpectrum();
            IdentifiedPeptide  p1 = new IdentifiedPeptide(s1);

            p1.Sequence = "B.CDEDF.D";
            protein.Peptides.Add(p1);

            protein.CalculateCoverage();
            Assert.AreEqual(16.67, protein.Coverage, 0.01);

            IdentifiedSpectrum s2 = new IdentifiedSpectrum();
            IdentifiedPeptide  p2 = new IdentifiedPeptide(s2);

            p2.Sequence = "F.DSESL.K";
            protein.Peptides.Add(p2);

            protein.CalculateCoverage();
            Assert.AreEqual(33.33, protein.Coverage, 0.01);

            IdentifiedSpectrum s3 = new IdentifiedSpectrum();
            IdentifiedPeptide  p3 = new IdentifiedPeptide(s3);

            p3.Sequence = "L.SLGF.G";
            protein.Peptides.Add(p3);

            protein.CalculateCoverage();
            Assert.AreEqual(46.67, protein.Coverage, 0.01);

            IdentifiedSpectrum s4 = new IdentifiedSpectrum();
            IdentifiedPeptide  p4 = new IdentifiedPeptide(s4);

            p4.Sequence = "L.SLGFG.D";
            protein.Peptides.Add(p4);

            protein.CalculateCoverage();
            Assert.AreEqual(50.00, protein.Coverage, 0.01);
        }
示例#11
0
        private IdentifiedProtein ReadIndividualProtein(XmlTextReader reader)
        {
            IdentifiedProtein           result   = new IdentifiedProtein();
            Dictionary <string, string> elements = new Dictionary <string, string>();
            String nodeName = "";

            while (reader.Read())
            {
                switch (reader.NodeType)
                {
                case XmlNodeType.Element:
                    if (reader.Name.Equals(peptideTag))
                    {
                        result.Peptides.Add(ReadIndividualPeptide(reader, result.Name).Peptide);
                    }
                    else
                    {
                        nodeName = reader.Name;
                    }
                    break;

                case XmlNodeType.Text:
                    if (nodeName.Equals("reference"))
                    {
                        result.Reference = reader.Value;
                    }
                    else
                    {
                        elements.Add(nodeName, reader.Value);
                    }
                    break;

                case XmlNodeType.EndElement:
                    if (reader.Name.Equals(proteinTag))
                    {
                        AssignProteinValue(result, elements);
                        return(result);
                    }
                    break;
                }
            }
            return(result);
        }