public void TestIsMutationOneIgnoreDeamidated() { int site = -1; //N->D Assert.IsTrue(MutationUtils.IsMutationOne2("ABNDEIR", "ABDDEIR", ref site, false, false, false)); //N->D Assert.IsFalse(MutationUtils.IsMutationOne2("ABNDEIR", "ABDDEIR", ref site, false, true, false)); //Q->E Assert.IsTrue(MutationUtils.IsMutationOne2("ABQDEIR", "ABEDEIR", ref site, false, false, false)); //Q->E Assert.IsFalse(MutationUtils.IsMutationOne2("ABQDEIR", "ABEDEIR", ref site, false, true, false)); //N->D Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABNDEIR", "ABDDELR", ref site, false, false, false)); //N->D Assert.IsFalse(MutationUtils.IsMutationOneIL2("ABNDEIR", "ABDDELR", ref site, false, true, false)); //Q->E Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABQDEIR", "ABEDELR", ref site, false, false, false)); //Q->E Assert.IsFalse(MutationUtils.IsMutationOneIL2("ABQDEIR", "ABEDELR", ref site, false, true, false)); }
public void TestIsMutationOneIgnoreNTerminal() { int site = -1; Assert.IsTrue(MutationUtils.IsMutationOne2("ABCDEIR", "BBCDEIR", ref site, false, false, false)); Assert.IsFalse(MutationUtils.IsMutationOne2("ABCDEIR", "BBCDEIR", ref site, true, false, false)); Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABCDEIR", "BBCDELR", ref site, false, false, false)); Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABCDELR", "BBCDEIR", ref site, false, false, false)); Assert.IsFalse(MutationUtils.IsMutationOneIL2("ABCDELR", "BBCDEIR", ref site, true, false, false)); }
public override IEnumerable <string> Process(string fileName) { var aas = new Aminoacids(); Progress.SetMessage("reading pNovo result from " + pNovoPeptideFile + " ..."); var pNovoSpectra = new MascotPeptideTextFormat().ReadFromFile(pNovoPeptideFile); var pNovoMap = new Dictionary <string, HashSet <string> >(); foreach (var pep in pNovoSpectra) { var key = pep.Query.FileScan.LongFileName; if (!pNovoMap.ContainsKey(key)) { pNovoMap[key] = new HashSet <string>(); } pNovoMap[key].UnionWith(from p in pep.Peptides select p.PureSequence); } var format = new MascotPeptideTextFormat(); Progress.SetMessage("reading peptide-spectra-matches from " + fileName + " ..."); var spectra = format.ReadFromFile(fileName); //价位筛选 spectra.RemoveAll(m => !charges.Contains(m.Charge)); //对于有不确定的氨基酸,直接忽略。 spectra.ForEach(m => { for (int i = m.Peptides.Count - 1; i >= 0; i--) { if (m.Peptides[i].PureSequence.Any(n => aas[n].Codes.Length == 0)) { m.RemovePeptideAt(i); } } }); spectra.RemoveAll(m => m.Peptides.Count == 0); Progress.SetMessage("comparing peptide-spectra-matches with pNovo result..."); //与pNovo判定的mutation是否一致? spectra.RemoveAll(m => { if (!IsMutationPeptide(m)) { return(false); } var key = m.Query.FileScan.LongFileName; if (!pNovoMap.ContainsKey(key)) { return(true); } var set = pNovoMap[key]; return(!m.Peptides.Any(n => set.Contains(n.PureSequence.Replace('I', 'L')))); }); //Get spectra whose peptides are all from mutated version var mutSpectra = spectra.FindAll(m => IsMutationPeptide(m)).ToList(); var mutPeptides = (from s in mutSpectra from p in s.Peptides select p).ToList(); var mutGroup = mutPeptides.GroupBy(m => m.PureSequence); //Get specra whose peptides are all from wide version var fromSpectra = spectra.Except(mutSpectra).ToList(); fromSpectra.RemoveAll(m => m.Proteins.Any(n => mutationReg.Match(n).Success)); var fromPeptides = (from s in fromSpectra from p in s.Peptides select p).ToList(); var fromGroup = fromPeptides.GroupBy(m => m.PureSequence).ToGroupDictionary(n => n.Key.Length); var minLength = fromGroup.Count == 0 ? 6 : fromGroup.Min(m => m.Key); var maxLength = fromGroup.Count == 0 ? 30 : fromGroup.Max(m => m.Key); //Check the mutation type var type1 = new List <List <IGrouping <string, IIdentifiedPeptide> > >(); var type2 = new List <List <IGrouping <string, IIdentifiedPeptide> > >(); var type3 = new List <List <IGrouping <string, IIdentifiedPeptide> > >(); Progress.SetRange(0, mutGroup.Count()); Progress.SetPosition(0); Progress.SetMessage("finding mutation-original pairs ..."); foreach (var mut in mutGroup) { var matched = new List <IGrouping <string, IIdentifiedPeptide> >(); matched.Add(mut); Progress.Increment(1); var protein = mut.First().Proteins[0]; List <List <IGrouping <string, IIdentifiedPeptide> > > type; if (protein.EndsWith("type3")) { type = type3; var mutseq = mut.Key.Substring(0, mut.Key.Length - 1); for (int i = mut.Key.Length + 1; i <= maxLength; i++) { if (fromGroup.ContainsKey(i)) { var others = fromGroup[i]; foreach (var o in others) { if (o.Key.StartsWith(mutseq)) { matched.Add(o); } } } } } else if (protein.EndsWith("type2")) { type = type2; for (int i = minLength; i < mut.Key.Length; i++) { if (fromGroup.ContainsKey(i)) { var others = fromGroup[i]; foreach (var o in others) { var oseq = o.Key.Substring(0, o.Key.Length - 1); if (mut.Key.StartsWith(oseq)) { matched.Add(o); } } } } } else if (protein.EndsWith("type1")) { type = type1; if (fromGroup.ContainsKey(mut.Key.Length)) { var oLength = fromGroup[mut.Key.Length]; foreach (var o in oLength) { int mutationSite = -1; if (MutationUtils.IsMutationOneIL2(o.Key, mut.Key, ref mutationSite, IgnoreNtermMutation, IgnoreDeamidatedMutation, IgnoreMultipleNucleotideMutation)) { matched.Add(o); } } } } else { throw new Exception("There is no mutation type information at protein name: " + protein + "\nIt should be like MUL_NHLGQK_type1, MUL_NHLGQK_type2 or MUL_NHLGQK_type3"); } type.Add(matched); } type1.Sort((m1, m2) => { var res = m1.Count.CompareTo(m2.Count); if (res == 0) { res = m2[0].Count().CompareTo(m1[0].Count()); } return(res); }); Progress.SetMessage("reading protein sequences ..."); var proteins = SequenceUtils.Read(new FastaFormat(), fastaFile); var proMap = proteins.ToDictionary(m => { string ac; if (acParser.TryParse(m.Name, out ac)) { return(ac); } else { return(m.Name); } }); var classification = GetClassification(); string mutHeader = "FileScan\tMH+\tDiff(MH+)\tCharge\tRank\tScore\tExpectValue\tModification"; var mutPepFormat = new MascotPeptideTextFormat(mutHeader); Progress.SetMessage("writing result ..."); var result1 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type1, ".type1"); var result2 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type2, ".type2"); var result3 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type3, ".type3"); return(result1.Concat(result2).Concat(result3).ToArray()); }