public static IIdentifiedResult DoBuildGroupByPeptide(List <IIdentifiedSpectrum> spectra, Func <IIdentifiedPeptide, string> func)
        {
            IdentifiedResult result = new IdentifiedResult();

            var singlePeptides = (from s in spectra
                                  where s.Peptides.Count == 1
                                  select s).GroupBy(m => func(m.Peptide));

            var multiplePeptides = (from s in spectra
                                    where s.Peptides.Count > 1
                                    select s).ToList();

            Dictionary <string, List <IIdentifiedPeptide> > dic = new Dictionary <string, List <IIdentifiedPeptide> >();

            foreach (var g in singlePeptides)
            {
                dic[g.Key] = new List <IIdentifiedPeptide>(from s in g
                                                           select s.Peptide);
            }

            foreach (var o in multiplePeptides)
            {
                var pc = (from p in o.Peptides
                          let c = dic.ContainsKey(func(p)) ? dic[func(p)].Count : 0
                                  orderby c descending
                                  select p).First();
                if (!dic.ContainsKey(func(pc)))
                {
                    dic[func(pc)] = new List <IIdentifiedPeptide>();
                }
                dic[func(pc)].Add(pc);
            }

            var keys = new List <string>(dic.Keys);

            keys.Sort();

            foreach (var key in keys)
            {
                IdentifiedProtein protein = new IdentifiedProtein(key);
                protein.Peptides.AddRange(dic[key]);
                protein.UniquePeptideCount = 1;
                protein.Description        = dic[key][0].Proteins.Merge('/');

                IdentifiedProteinGroup group = new IdentifiedProteinGroup();
                group.Add(protein);

                result.Add(group);
            }

            result.BuildGroupIndex();
            //result.Sort();

            return(result);
        }
        public object Clone()
        {
            IdentifiedProteinGroup result = new IdentifiedProteinGroup();

            this.ForEach(p => result.Add((IIdentifiedProtein)p.Clone()));

            result.Index   = this.Index;
            result.Enabled = this.Enabled;

            return(result);
        }
        public override IIdentifiedResult ReadFromFile(string fileName)
        {
            if (!File.Exists(fileName))
            {
                throw new FileNotFoundException("Protein file not exist : " + fileName);
            }

            string peptideFilename = GetPeptideFileName(fileName);

            if (!File.Exists(peptideFilename))
            {
                throw new FileNotFoundException("Peptide file not exist : " + peptideFilename);
            }

            string linkFileName = GetLinkFileName(fileName);

            if (!File.Exists(linkFileName))
            {
                throw new FileNotFoundException("Peptide2group file not exist : " + linkFileName);
            }

            var pepFileReader = new PeptideTextReader(GetEngineName());
            List <IIdentifiedSpectrum> spectra = pepFileReader.ReadFromFile(peptideFilename);

            this.PeptideFormat = pepFileReader.PeptideFormat;

            var proFileReader = new ProteinTextReader(GetEngineName());
            List <IIdentifiedProtein> proteins = proFileReader.ReadFromFile(fileName);

            this.ProteinFormat = proFileReader.ProteinFormat;

            var peptideMap = spectra.ToDictionary(m => m.Id);
            var proteinMap = proteins.GroupBy(m => m.GroupIndex);

            IIdentifiedResult result = Allocate();

            foreach (var pros in proteinMap)
            {
                var group = new IdentifiedProteinGroup();
                pros.ToList().ForEach(m => group.Add(m));
                result.Add(group);
            }

            new Peptide2GroupTextReader().LinkPeptideToGroup(linkFileName, peptideMap, result.ToDictionary(m => m.Index));

            string fastaFile = fileName + ".fasta";

            if (File.Exists(fastaFile))
            {
                IdentifiedResultUtils.FillSequenceFromFasta(fastaFile, result, null);
            }

            return(result);
        }
예제 #4
0
        public void Run()
        {
            IdentifiedProteinGroupContaminationDescriptionFilter filter = new IdentifiedProteinGroupContaminationDescriptionFilter("KERATIN");
            IdentifiedProteinGroup group = new IdentifiedProteinGroup();

            group.Add(new IdentifiedProtein("P1")
            {
                Description = "P1 Keratin"
            });

            Assert.IsTrue(filter.Accept(group));
        }
예제 #5
0
        public void TestFilter2()
        {
            var spectrum = new IdentifiedSpectrum();

            spectrum.Query.FileScan.LongFileName = "ABDCDD.12.123.2.dat";

            var pro1 = new IdentifiedProtein("P1");

            pro1.Peptides.Add(new IdentifiedPeptide(spectrum)
            {
                Sequence = "AAAAAAA"
            });

            var pro2 = new IdentifiedProtein("P2");

            pro2.Peptides.Add(new IdentifiedPeptide(spectrum)
            {
                Sequence = "BBBBBBB"
            });

            var g1 = new IdentifiedProteinGroup();

            g1.Add(pro1);
            g1.Add(pro2);

            IdentifiedResult ir = new IdentifiedResult();

            ir.Add(g1);

            Assert.AreEqual(1, ir.Count);
            Assert.AreEqual(2, ir[0].Count);
            Assert.AreEqual(1, ir.GetSpectra().Count);

            ir.Filter(m =>
            {
                return(m.Sequence.Contains('A'));
            });

            Assert.AreEqual(1, ir.Count);
            Assert.AreEqual(1, ir[0].Count);
            Assert.AreEqual(1, ir.GetSpectra().Count);
            Assert.AreSame(pro1, ir[0][0]);

            ir.Filter(m =>
            {
                return(m.Sequence.Contains('C'));
            });

            Assert.AreEqual(0, ir.Count);
        }
예제 #6
0
        public void TestFilter()
        {
            var pro1 = new IdentifiedProtein("P1");

            pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum()
            {
                Charge = 1
            }));
            pro1.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum()
            {
                Charge = 2
            }));

            var pro2 = new IdentifiedProtein("P2");

            pro2.Peptides.Add(new IdentifiedPeptide(new IdentifiedSpectrum()
            {
                Charge = 3
            }));

            var g1 = new IdentifiedProteinGroup();

            g1.Add(pro1);

            var g2 = new IdentifiedProteinGroup();

            g2.Add(pro2);

            IdentifiedResult ir = new IdentifiedResult();

            ir.Add(g1);
            ir.Add(g2);

            Assert.AreEqual(2, ir.Count);
            Assert.AreEqual(3, ir.GetSpectra().Count);

            ir.Filter(m => { return(m.Spectrum.Query.Charge > 1); });

            Assert.AreEqual(2, ir.Count);
            Assert.AreEqual(2, ir.GetSpectra().Count);
            ir.GetSpectra().All(m => { return(m.Charge > 1); });

            ir.Filter(m => { return(m.Spectrum.Query.Charge > 2); });
            Assert.AreEqual(1, ir.Count);
            Assert.AreEqual(1, ir.GetSpectra().Count);
            ir.GetSpectra().All(m => { return(m.Charge > 2); });

            Assert.AreEqual("P2", ir[0][0].Name);
        }
 public int CompareTo(IdentifiedProteinGroup other)
 {
     return(CompareTo(other as IIdentifiedProteinGroup));
 }
        public List <IIdentifiedProteinGroup> Build(List <IIdentifiedProtein> proteins)
        {
            var result = new List <IIdentifiedProteinGroup>();

            Progress.SetMessage("Initializing protein group/spectra map ...");
            var groupMap = new Dictionary <IIdentifiedProteinGroup, HashSet <IIdentifiedSpectrum> >();

            foreach (IIdentifiedProtein protein in proteins)
            {
                IIdentifiedProteinGroup group = new IdentifiedProteinGroup();
                group.Add(protein);

                var spectraSet = new HashSet <IIdentifiedSpectrum>(protein.GetSpectra());
                groupMap[group] = spectraSet;

                result.Add(group);
            }

            Progress.SetMessage("Sorting protein groups ...");
            result.Sort((m1, m2) =>
            {
                int ret = -m1[0].PeptideCount.CompareTo(m2[0].PeptideCount);
                if (ret == 0)
                {
                    ret = -m1[0].UniquePeptideCount.CompareTo(m2[0].UniquePeptideCount);
                }
                return(ret);
            });

            Progress.SetMessage("Merging proteins with same peptide-spectrum matches ...");

            Progress.SetRange(0, result.Count);
            //首先合并所有内容相同的group
            for (int i = result.Count - 1; i > 0; i--)
            {
                Progress.SetPosition(result.Count - i);
                HashSet <IIdentifiedSpectrum> iSpectra = groupMap[result[i]];
                for (int j = i - 1; j >= 0; j--)
                {
                    if (result[j][0].PeptideCount == result[i][0].PeptideCount &&
                        result[j][0].UniquePeptideCount == result[i][0].UniquePeptideCount)
                    {
                        HashSet <IIdentifiedSpectrum> jSpectra = groupMap[result[j]];

                        if (jSpectra.SetEquals(iSpectra))
                        {
                            //如果内容一致,则合并两个group
                            foreach (IIdentifiedProtein protein in result[i])
                            {
                                result[j].Add(protein);
                            }

                            //删除group i
                            result.RemoveAt(i);
                            break;
                        }
                    }
                    else
                    {
                        break;
                    }
                }
            }

            Progress.SetMessage("Initializing peptide group count ...");
            InitializePeptideGroupCount(result);

            Progress.SetMessage("Extracting distinct protein groups ...");
            var temp = result;

            result = new List <IIdentifiedProteinGroup>();
            for (int i = temp.Count - 1; i > 0; i--)
            {
                if (temp[i].GetPeptides().All(m => m.GroupCount == 1))
                {
                    result.Add(temp[i]);
                    temp.RemoveAt(i);
                }
            }
            Progress.SetMessage("There are {0} distinct and {1} undistinct protein groups. ", result.Count, temp.Count);

            Progress.SetMessage("Removing redundant protein groups from undistinct protein groups...");

            var oldcount = temp.Count;

            Progress.SetRange(0, oldcount);
            //删除被包含的group
            for (int i = temp.Count - 1; i > 0; i--)
            {
                Progress.SetPosition(oldcount - i);
                HashSet <IIdentifiedSpectrum> iSpectra = groupMap[temp[i]];
                for (int j = i - 1; j >= 0; j--)
                {
                    HashSet <IIdentifiedSpectrum> jSpectra = groupMap[temp[j]];
                    if (jSpectra.Count == iSpectra.Count)
                    {
                        continue;
                    }

                    if (iSpectra.All(l => jSpectra.Contains(l)))
                    {
                        //删除group i
                        temp.RemoveAt(i);
                        break;
                    }
                }
            }

            RemoveUndistinctProteinGroups(temp);

            result.AddRange(temp);

            Progress.SetMessage("Sorting proteins in group ...");
            result.ForEach(m => m.SortByProteinName());

            Progress.SetMessage("Building protein groups done.");
            return(result);
        }
예제 #9
0
        private IIdentifiedProteinGroup ReadNextProteinGroup(StreamReader filein, Dictionary <string, IIdentifiedSpectrum> peptideMap, ref string lastLine)
        {
            Progress.SetPosition(filein.BaseStream.Position);

            while (!IdentifiedResultUtils.IsProteinLine(lastLine) && (lastLine = filein.ReadLine()) != null)
            {
            }

            if (lastLine == null)
            {
                return(null);
            }

            IIdentifiedProteinGroup result = new IdentifiedProteinGroup();

            while (IdentifiedResultUtils.IsProteinLine(lastLine))
            {
                IIdentifiedProtein protein = ProteinFormat.ParseString(lastLine);
                result.Add(protein);

                protein.GroupIndex = IdentifiedResultUtils.GetGroupIndex(lastLine);

                lastLine = filein.ReadLine();
            }

            List <IIdentifiedSpectrum> peptides = new List <IIdentifiedSpectrum>();

            while (!IdentifiedResultUtils.IsProteinLine(lastLine))
            {
                if (Progress.IsCancellationPending())
                {
                    throw new UserTerminatedException();
                }

                IIdentifiedSpectrum mphit = PeptideFormat.ParseString(lastLine);

                string id = string.Format("{0}-{1}-{2}-{3}", mphit.Query.FileScan.LongFileName, mphit.Rank, mphit.Engine, mphit.Tag);

                if (!peptideMap.ContainsKey(id))
                {
                    peptideMap[id] = mphit;
                }
                else
                {
                    mphit = peptideMap[id];
                }

                peptides.Add(mphit);

                lastLine = filein.ReadLine();

                if (lastLine == null || lastLine.Trim().Length == 0)
                {
                    break;
                }
            }

            foreach (IIdentifiedSpectrum hit in peptides)
            {
                result.AddIdentifiedSpectrum(hit);
            }

            return(result);
        }