Пример #1
0
        public void TestIsMutationOneIgnoreDeamidated()
        {
            int site = -1;

            //N->D
            Assert.IsTrue(MutationUtils.IsMutationOne2("ABNDEIR", "ABDDEIR", ref site, false, false, false));

            //N->D
            Assert.IsFalse(MutationUtils.IsMutationOne2("ABNDEIR", "ABDDEIR", ref site, false, true, false));

            //Q->E
            Assert.IsTrue(MutationUtils.IsMutationOne2("ABQDEIR", "ABEDEIR", ref site, false, false, false));

            //Q->E
            Assert.IsFalse(MutationUtils.IsMutationOne2("ABQDEIR", "ABEDEIR", ref site, false, true, false));

            //N->D
            Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABNDEIR", "ABDDELR", ref site, false, false, false));

            //N->D
            Assert.IsFalse(MutationUtils.IsMutationOneIL2("ABNDEIR", "ABDDELR", ref site, false, true, false));

            //Q->E
            Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABQDEIR", "ABEDELR", ref site, false, false, false));

            //Q->E
            Assert.IsFalse(MutationUtils.IsMutationOneIL2("ABQDEIR", "ABEDELR", ref site, false, true, false));
        }
Пример #2
0
        public void TestIsMutationOneIgnoreNTerminal()
        {
            int site = -1;

            Assert.IsTrue(MutationUtils.IsMutationOne2("ABCDEIR", "BBCDEIR", ref site, false, false, false));

            Assert.IsFalse(MutationUtils.IsMutationOne2("ABCDEIR", "BBCDEIR", ref site, true, false, false));

            Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABCDEIR", "BBCDELR", ref site, false, false, false));

            Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABCDELR", "BBCDEIR", ref site, false, false, false));

            Assert.IsFalse(MutationUtils.IsMutationOneIL2("ABCDELR", "BBCDEIR", ref site, true, false, false));
        }
Пример #3
0
        public override IEnumerable <string> Process(string fileName)
        {
            var aas = new Aminoacids();

            Progress.SetMessage("reading pNovo result from " + pNovoPeptideFile + " ...");
            var pNovoSpectra = new MascotPeptideTextFormat().ReadFromFile(pNovoPeptideFile);
            var pNovoMap     = new Dictionary <string, HashSet <string> >();

            foreach (var pep in pNovoSpectra)
            {
                var key = pep.Query.FileScan.LongFileName;
                if (!pNovoMap.ContainsKey(key))
                {
                    pNovoMap[key] = new HashSet <string>();
                }
                pNovoMap[key].UnionWith(from p in pep.Peptides select p.PureSequence);
            }

            var format = new MascotPeptideTextFormat();

            Progress.SetMessage("reading peptide-spectra-matches from " + fileName + " ...");
            var spectra = format.ReadFromFile(fileName);

            //价位筛选
            spectra.RemoveAll(m => !charges.Contains(m.Charge));
            //对于有不确定的氨基酸,直接忽略。
            spectra.ForEach(m =>
            {
                for (int i = m.Peptides.Count - 1; i >= 0; i--)
                {
                    if (m.Peptides[i].PureSequence.Any(n => aas[n].Codes.Length == 0))
                    {
                        m.RemovePeptideAt(i);
                    }
                }
            });
            spectra.RemoveAll(m => m.Peptides.Count == 0);

            Progress.SetMessage("comparing peptide-spectra-matches with pNovo result...");
            //与pNovo判定的mutation是否一致?
            spectra.RemoveAll(m =>
            {
                if (!IsMutationPeptide(m))
                {
                    return(false);
                }

                var key = m.Query.FileScan.LongFileName;
                if (!pNovoMap.ContainsKey(key))
                {
                    return(true);
                }

                var set = pNovoMap[key];
                return(!m.Peptides.Any(n => set.Contains(n.PureSequence.Replace('I', 'L'))));
            });

            //Get spectra whose peptides are all from mutated version
            var mutSpectra  = spectra.FindAll(m => IsMutationPeptide(m)).ToList();
            var mutPeptides = (from s in mutSpectra
                               from p in s.Peptides
                               select p).ToList();
            var mutGroup = mutPeptides.GroupBy(m => m.PureSequence);

            //Get specra whose peptides are all from wide version
            var fromSpectra = spectra.Except(mutSpectra).ToList();

            fromSpectra.RemoveAll(m => m.Proteins.Any(n => mutationReg.Match(n).Success));
            var fromPeptides = (from s in fromSpectra
                                from p in s.Peptides
                                select p).ToList();
            var fromGroup = fromPeptides.GroupBy(m => m.PureSequence).ToGroupDictionary(n => n.Key.Length);
            var minLength = fromGroup.Count == 0 ? 6 : fromGroup.Min(m => m.Key);
            var maxLength = fromGroup.Count == 0 ? 30 : fromGroup.Max(m => m.Key);

            //Check the mutation type
            var type1 = new List <List <IGrouping <string, IIdentifiedPeptide> > >();
            var type2 = new List <List <IGrouping <string, IIdentifiedPeptide> > >();
            var type3 = new List <List <IGrouping <string, IIdentifiedPeptide> > >();

            Progress.SetRange(0, mutGroup.Count());
            Progress.SetPosition(0);
            Progress.SetMessage("finding mutation-original pairs ...");

            foreach (var mut in mutGroup)
            {
                var matched = new List <IGrouping <string, IIdentifiedPeptide> >();
                matched.Add(mut);
                Progress.Increment(1);

                var protein = mut.First().Proteins[0];

                List <List <IGrouping <string, IIdentifiedPeptide> > > type;
                if (protein.EndsWith("type3"))
                {
                    type = type3;
                    var mutseq = mut.Key.Substring(0, mut.Key.Length - 1);
                    for (int i = mut.Key.Length + 1; i <= maxLength; i++)
                    {
                        if (fromGroup.ContainsKey(i))
                        {
                            var others = fromGroup[i];
                            foreach (var o in others)
                            {
                                if (o.Key.StartsWith(mutseq))
                                {
                                    matched.Add(o);
                                }
                            }
                        }
                    }
                }
                else if (protein.EndsWith("type2"))
                {
                    type = type2;
                    for (int i = minLength; i < mut.Key.Length; i++)
                    {
                        if (fromGroup.ContainsKey(i))
                        {
                            var others = fromGroup[i];
                            foreach (var o in others)
                            {
                                var oseq = o.Key.Substring(0, o.Key.Length - 1);
                                if (mut.Key.StartsWith(oseq))
                                {
                                    matched.Add(o);
                                }
                            }
                        }
                    }
                }
                else if (protein.EndsWith("type1"))
                {
                    type = type1;

                    if (fromGroup.ContainsKey(mut.Key.Length))
                    {
                        var oLength = fromGroup[mut.Key.Length];
                        foreach (var o in oLength)
                        {
                            int mutationSite = -1;
                            if (MutationUtils.IsMutationOneIL2(o.Key, mut.Key, ref mutationSite, IgnoreNtermMutation, IgnoreDeamidatedMutation, IgnoreMultipleNucleotideMutation))
                            {
                                matched.Add(o);
                            }
                        }
                    }
                }
                else
                {
                    throw new Exception("There is no mutation type information at protein name: " + protein + "\nIt should be like MUL_NHLGQK_type1, MUL_NHLGQK_type2 or MUL_NHLGQK_type3");
                }

                type.Add(matched);
            }

            type1.Sort((m1, m2) =>
            {
                var res = m1.Count.CompareTo(m2.Count);
                if (res == 0)
                {
                    res = m2[0].Count().CompareTo(m1[0].Count());
                }
                return(res);
            });

            Progress.SetMessage("reading protein sequences ...");
            var proteins = SequenceUtils.Read(new FastaFormat(), fastaFile);

            var proMap = proteins.ToDictionary(m =>
            {
                string ac;
                if (acParser.TryParse(m.Name, out ac))
                {
                    return(ac);
                }
                else
                {
                    return(m.Name);
                }
            });

            var    classification = GetClassification();
            string mutHeader      = "FileScan\tMH+\tDiff(MH+)\tCharge\tRank\tScore\tExpectValue\tModification";
            var    mutPepFormat   = new MascotPeptideTextFormat(mutHeader);

            Progress.SetMessage("writing result ...");
            var result1 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type1, ".type1");
            var result2 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type2, ".type2");
            var result3 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type3, ".type3");

            return(result1.Concat(result2).Concat(result3).ToArray());
        }