public void TestIsMutationOneIgnoreDeamidated() { int site = -1; //N->D Assert.IsTrue(MutationUtils.IsMutationOne2("ABNDEIR", "ABDDEIR", ref site, false, false, false)); //N->D Assert.IsFalse(MutationUtils.IsMutationOne2("ABNDEIR", "ABDDEIR", ref site, false, true, false)); //Q->E Assert.IsTrue(MutationUtils.IsMutationOne2("ABQDEIR", "ABEDEIR", ref site, false, false, false)); //Q->E Assert.IsFalse(MutationUtils.IsMutationOne2("ABQDEIR", "ABEDEIR", ref site, false, true, false)); //N->D Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABNDEIR", "ABDDELR", ref site, false, false, false)); //N->D Assert.IsFalse(MutationUtils.IsMutationOneIL2("ABNDEIR", "ABDDELR", ref site, false, true, false)); //Q->E Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABQDEIR", "ABEDELR", ref site, false, false, false)); //Q->E Assert.IsFalse(MutationUtils.IsMutationOneIL2("ABQDEIR", "ABEDELR", ref site, false, true, false)); }
public void TestIsAminoacidEquals() { Assert.IsTrue(MutationUtils.IsAminoacidEquals('I', 'L')); Assert.IsTrue(MutationUtils.IsAminoacidEquals('L', 'I')); Assert.IsFalse(MutationUtils.IsAminoacidEquals('L', 'A')); }
private Dictionary <char, List <TargetVariant> > GetSubstitutionDeltaMass() { var result = new Dictionary <char, List <TargetVariant> >(); var aa = new Aminoacids(); var validAA = aa.GetVisibleAminoacids(); foreach (var ai in validAA) { foreach (var aj in validAA) { if (ai == aj) { continue; } if (this.IsSingleNucleotideMutationOnly && !MutationUtils.IsSingleNucleotideMutation(ai, aj)) { continue; } if (this.IgnoreDeamidatedMutation && MutationUtils.IsDeamidatedMutation(ai, aj)) { continue; } if (!result.ContainsKey(ai)) { result[ai] = new List <TargetVariant>(); } var deltaMass = aa[aj].MonoMass - aa[ai].MonoMass; if (Math.Abs(deltaMass) < MinimumAminoacidSubstitutionDeltaMass) { continue; } result[ai].Add(new TargetVariant() { Source = ai.ToString(), Target = new HashSet <string>(new[] { aj.ToString() }), DeltaMass = deltaMass, TargetType = VariantType.SingleAminoacidPolymorphism }); } } foreach (var v in result.Values) { v.Sort((m1, m2) => m1.DeltaMass.CompareTo(m2.DeltaMass)); } return(result); }
public void TestIsMutationOneIgnoreNTerminal() { int site = -1; Assert.IsTrue(MutationUtils.IsMutationOne2("ABCDEIR", "BBCDEIR", ref site, false, false, false)); Assert.IsFalse(MutationUtils.IsMutationOne2("ABCDEIR", "BBCDEIR", ref site, true, false, false)); Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABCDEIR", "BBCDELR", ref site, false, false, false)); Assert.IsTrue(MutationUtils.IsMutationOneIL2("ABCDELR", "BBCDEIR", ref site, false, false, false)); Assert.IsFalse(MutationUtils.IsMutationOneIL2("ABCDELR", "BBCDEIR", ref site, true, false, false)); }
public void TestIsMutationOne() { //I -> L Assert.IsTrue(MutationUtils.IsMutationOne("ABCDEIR", "ABCDELR")); //no mutation Assert.IsFalse(MutationUtils.IsMutationOne("ABCDEIR", "ABCDEIR")); //two mutation Assert.IsFalse(MutationUtils.IsMutationOne("ABCDEIR", "ABCDELK")); int site = -1; MutationUtils.IsMutationOne("ABCDEIR", "ABCDELR", ref site); Assert.AreEqual(5, site); }
private bool FindMutationOneType1(string pnovoseq, ref string source, ref int site) { if (!miss0group.ContainsKey(pnovoseq.Length)) { return(false); } var lst = miss0group[pnovoseq.Length]; foreach (var m in lst) { if (MutationUtils.IsMutationOne2(m, pnovoseq, ref site, options.IgnoreNtermMutation, options.IgnoreDeamidatedMutation, options.IgnoreMultipleNucleotideMutation)) { source = miss0[m]; //Console.WriteLine("TYPE1 : {0} ==> {1}", m, pnovoseq); return(true); } } return(false); }
public void TestIsMutationOneIL() { int site = 0; //C -> N Assert.IsTrue(MutationUtils.IsMutationOneIL("ABCDEIR", "ABNDEIR", ref site)); Assert.AreEqual(2, site); //C -> N, I -> L Assert.IsTrue(MutationUtils.IsMutationOneIL("ABCDEIR", "ABNDELR", ref site)); Assert.AreEqual(2, site); //I -> L Assert.IsFalse(MutationUtils.IsMutationOneIL("ABCDEIR", "ABCDELR", ref site)); //no mutation Assert.IsFalse(MutationUtils.IsMutationOneIL("ABCDEIR", "ABCDEIR", ref site)); //two mutation Assert.IsFalse(MutationUtils.IsMutationOneIL("ABCDEIR", "ABCDEDK", ref site)); }
private static bool DoIsMutationOne(IsMutationOneDelegate func, string fromPeptide, string toPeptide, ref int mutationSite, bool ignoreNtermMutation, bool ignoreDeamidatedMutation, bool ignoreMultipleNucleotideMutation) { if (func(fromPeptide, toPeptide, ref mutationSite)) { if (ignoreNtermMutation && 0 == mutationSite) { return(false); } if (ignoreDeamidatedMutation && MutationUtils.IsDeamidatedMutation(fromPeptide[mutationSite], toPeptide[mutationSite])) { return(false); } if (ignoreMultipleNucleotideMutation && IsSingleNucleotideMutation(fromPeptide[mutationSite], toPeptide[mutationSite])) { return(false); } return(true); } return(false); }
private void GetCurrentImageFilename(out string mutationFilename, out string originalFilename) { if (gvPeptides.SelectedRows.Count == 0) { mutationFilename = string.Empty; originalFilename = string.Empty; return; } var spectrum = spectra[gvPeptides.SelectedRows[0].Index]; string imageFilename = GetImageFilename(spectrum); var isMutation = (bool)(spectrum.Annotations["IsMutation"]); var index = spectrum.Annotations["Index"].ToString(); var correspond = (from s in spectra let cindex = s.Annotations["Index"].ToString() let cIsMutation = (bool)(s.Annotations["IsMutation"]) where cindex.Equals(index) && s.Charge == spectrum.Charge && cIsMutation != isMutation orderby s.Score descending select s).ToList(); if (correspond.Count == 0) { string selectFilename = GetImageFilename(spectrum); if (isMutation) { mutationFilename = selectFilename; originalFilename = string.Empty; } else { mutationFilename = string.Empty; originalFilename = selectFilename; } lblMutation.Text = string.Empty; } else { IIdentifiedSpectrum oriSp, mutSp; if (isMutation) { mutSp = spectrum; oriSp = correspond[0]; } else { oriSp = spectrum; mutSp = correspond[0]; } mutationFilename = GetImageFilename(mutSp); originalFilename = GetImageFilename(oriSp); int mutationSite = -1; var mutSeq = mutSp.Peptide.PureSequence; var oriSeq = oriSp.Peptide.PureSequence; MutationUtils.IsMutationOneIL(mutSeq, oriSeq, ref mutationSite); if (mutationSite == 0) { lblMutation.Text = MyConvert.Format(".{0}.{1}\n.{2}.{3}", oriSeq[mutationSite], oriSeq.Substring(mutationSite + 1), mutSeq[mutationSite], mutSeq.Substring(mutationSite + 1)); } else if (mutationSite == mutSeq.Length - 1) { lblMutation.Text = MyConvert.Format("{0}.{1}.\n{2}.{3}.", oriSeq.Substring(0, mutationSite - 1), oriSeq[mutationSite], mutSeq.Substring(0, mutationSite - 1), mutSeq[mutationSite]); } else { lblMutation.Text = MyConvert.Format("{0}.{1}.{2}\n{3}.{4}.{5}", oriSeq.Substring(0, mutationSite - 1), oriSeq[mutationSite], oriSeq.Substring(mutationSite + 1), mutSeq.Substring(0, mutationSite - 1), mutSeq[mutationSite], mutSeq.Substring(mutationSite + 1)); } } }
public void TestReplaceLToI() { Assert.AreEqual("A.DKDKDFLEI*DR.C", MutationUtils.ReplaceLToI("A.DKDKDFLEL*DR.C", "DKIKDFLEIDR")); }
private string OutputPairedResult(Aminoacids aas, MascotPeptideTextFormat format, Dictionary <string, Sequence> proMap, IClassification <IIdentifiedPeptide> classification, string mutHeader, MascotPeptideTextFormat mutPepFormat, List <List <IGrouping <string, IIdentifiedPeptide> > > pairedMut, string pairedFile) { bool dbAnnotation = File.Exists(_uniprotXmlFile); List <TempResult> tr = new List <TempResult>(); int resIndex = 1; foreach (var res in pairedMut) { var mutCharges = (from r in res[0] orderby r.Spectrum.Charge select r.Spectrum.Charge).Distinct().ToList(); var pepCounts = GetPepCount(classification, res[0]); int peplabel = 0; bool bFound = false; for (int pepIndex = 1; pepIndex < res.Count; pepIndex++) { var charges = (from r in res[pepIndex] orderby r.Spectrum.Charge select r.Spectrum.Charge).Distinct().ToList(); var commonCharges = mutCharges.Intersect(charges).ToList(); if (commonCharges.Count == 0) { continue; } if (!bFound) { bFound = true; resIndex++; } peplabel++; var curMutSpectrum = (from r in res[0] where commonCharges.Contains(r.Spectrum.Charge) orderby r.Spectrum.Score descending select r).First(); var mutText = mutPepFormat.PeptideFormat.GetString(curMutSpectrum.Spectrum); var curOriginalSpectrum = (from r in res[pepIndex] where r.Spectrum.Charge == curMutSpectrum.Spectrum.Charge orderby r.Spectrum.Score descending select r).First(); var oriPureSeq = curOriginalSpectrum.PureSequence; var mutFixSeq = MutationUtils.ReplaceLToI(curMutSpectrum.Sequence, oriPureSeq); var mutFixPureSeq = PeptideUtils.GetPureSequence(mutFixSeq); int mutationSite = -1; string equalsToModification = string.Empty; string rnaediting = string.Empty; string databaseannotation = string.Empty; bool isType1 = mutFixPureSeq.Length == oriPureSeq.Length; if (isType1) { MutationUtils.IsMutationOneIL(mutFixPureSeq, oriPureSeq, ref mutationSite); equalsToModification = spTable.GetModification(oriPureSeq[mutationSite], mutFixPureSeq[mutationSite]); SnpCode.IsRnaEditing(aas[oriPureSeq[mutationSite]], aas[mutFixPureSeq[mutationSite]], out rnaediting); } else { mutationSite = Math.Min(mutFixPureSeq.Length, oriPureSeq.Length) - 1; } var pepMutation = MyConvert.Format("{0}{1}{2}", oriPureSeq[mutationSite], mutationSite + 1, mutFixPureSeq[mutationSite]); List <Sequence> seqs = new List <Sequence>(); foreach (var p in curOriginalSpectrum.Proteins) { var ac = acParser.GetValue(p); if (!proMap.ContainsKey(ac)) { throw new Exception("Cannot find protein " + p + " in sequence database!"); } seqs.Add(proMap[ac]); } var proMutations = (from p in curOriginalSpectrum.Proteins let ac = acParser.GetValue(p) let seq = proMap[ac] let pos = seq.SeqString.IndexOf(oriPureSeq) let pmu = MyConvert.Format("{0}{1}{2}", oriPureSeq[mutationSite], pos + mutationSite + 1, mutFixPureSeq[mutationSite]) select new { ProteinName = p, Mutation = pmu }).ToList(); var proMutation = (from pro in proMutations select pro.Mutation).Merge("/"); if (isType1 && dbAnnotation) { //sequence variants foreach (var pro in proMutations) { var entry = GetUniprotEntry(pro.ProteinName); if (entry == null) { continue; } foreach (var sv in entry.SequenceVariants) { var mut = string.Format("{0}{1}{2}", sv.Original, sv.Position, sv.Variation); if (pro.Mutation.Equals(mut)) { databaseannotation = string.Format("{0}=SequenceVariant {1}", pro.ProteinName, sv.Description); break; } } if (databaseannotation != string.Empty) { break; } } //sequence conflicts if (databaseannotation == string.Empty) { foreach (var pro in proMutations) { var entry = GetUniprotEntry(pro.ProteinName); if (entry == null) { continue; } foreach (var sv in entry.SequenceConflicts) { if ((sv.BeginPosition != sv.EndPosition) || sv.Original.Length != 1) { continue; } var mut = string.Format("{0}{1}{2}", sv.Original, sv.BeginPosition, sv.Variation); if (pro.Mutation.Equals(mut)) { databaseannotation = string.Format("{0}=SequenceConflict {1}", pro.ProteinName, sv.Description); break; } } if (databaseannotation != string.Empty) { break; } } } } List <string> proRefs = seqs.ConvertAll(m => m.Description).ToList(); int mutationCount; var dnaMutation = aas[oriPureSeq[mutationSite]].TransferTo(aas[mutFixPureSeq[mutationSite]], out mutationCount); var oriPepCounts = GetPepCount(classification, res[pepIndex]); var line = string.Format("${0}-{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}\t{16}\t{17}", resIndex, peplabel, mutText, mutFixSeq, mutFixPureSeq, (from p in pepCounts select p.ToString()).Merge("\t"), pepMutation, mutationCount, dnaMutation, curOriginalSpectrum.Spectrum.Query.FileScan.ShortFileName, oriPureSeq, curOriginalSpectrum.Proteins.Merge("/"), proRefs.Merge("/"), (from p in oriPepCounts select p.ToString()).Merge("\t"), proMutation, equalsToModification, rnaediting, databaseannotation ); tr.Add(new TempResult() { Line = line, PepCount = pepCounts.Sum(), OriginalSequence = oriPureSeq }); } var groups = tr.GroupBy(m => m.OriginalSequence).ToList(); groups.Sort((m1, m2) => - m1.Max(n => n.PepCount).CompareTo(m2.Max(n => n.PepCount))); List <TempResult> lines = new List <TempResult>(); groups.ForEach(g => lines.AddRange(from l in g orderby l.PepCount descending select l)); using (StreamWriter sw = new StreamWriter(pairedFile)) { string pepCountHeader = ""; string originalPepCountHeader = ""; foreach (var key in ClassificationSet.Keys) { pepCountHeader = pepCountHeader + "\t" + key + "_PepCount"; originalPepCountHeader = originalPepCountHeader + "\t" + key + "_OriginalCount"; } sw.WriteLine("Index\t" + mutHeader + "\tSequence\tPureSequence" + pepCountHeader + "\tPepMutation\tDNAMutationCount\tDNAMutation\tOriginalFileScan\tOriginalSequence\tOriginalProteins\tOriginalReferences" + originalPepCountHeader + "\tProMutation\tEqualsToModification\tRNA-Editing\tDatabaseAnnotation"); lines.ForEach(m => sw.WriteLine(m.Line)); } } var pairedPeptideFile = pairedFile + ".peptides"; SavePeptidesFile(pairedMut, format, pairedPeptideFile); return(pairedPeptideFile); }
public override IEnumerable <string> Process(string fileName) { var aas = new Aminoacids(); Progress.SetMessage("reading pNovo result from " + pNovoPeptideFile + " ..."); var pNovoSpectra = new MascotPeptideTextFormat().ReadFromFile(pNovoPeptideFile); var pNovoMap = new Dictionary <string, HashSet <string> >(); foreach (var pep in pNovoSpectra) { var key = pep.Query.FileScan.LongFileName; if (!pNovoMap.ContainsKey(key)) { pNovoMap[key] = new HashSet <string>(); } pNovoMap[key].UnionWith(from p in pep.Peptides select p.PureSequence); } var format = new MascotPeptideTextFormat(); Progress.SetMessage("reading peptide-spectra-matches from " + fileName + " ..."); var spectra = format.ReadFromFile(fileName); //价位筛选 spectra.RemoveAll(m => !charges.Contains(m.Charge)); //对于有不确定的氨基酸,直接忽略。 spectra.ForEach(m => { for (int i = m.Peptides.Count - 1; i >= 0; i--) { if (m.Peptides[i].PureSequence.Any(n => aas[n].Codes.Length == 0)) { m.RemovePeptideAt(i); } } }); spectra.RemoveAll(m => m.Peptides.Count == 0); Progress.SetMessage("comparing peptide-spectra-matches with pNovo result..."); //与pNovo判定的mutation是否一致? spectra.RemoveAll(m => { if (!IsMutationPeptide(m)) { return(false); } var key = m.Query.FileScan.LongFileName; if (!pNovoMap.ContainsKey(key)) { return(true); } var set = pNovoMap[key]; return(!m.Peptides.Any(n => set.Contains(n.PureSequence.Replace('I', 'L')))); }); //Get spectra whose peptides are all from mutated version var mutSpectra = spectra.FindAll(m => IsMutationPeptide(m)).ToList(); var mutPeptides = (from s in mutSpectra from p in s.Peptides select p).ToList(); var mutGroup = mutPeptides.GroupBy(m => m.PureSequence); //Get specra whose peptides are all from wide version var fromSpectra = spectra.Except(mutSpectra).ToList(); fromSpectra.RemoveAll(m => m.Proteins.Any(n => mutationReg.Match(n).Success)); var fromPeptides = (from s in fromSpectra from p in s.Peptides select p).ToList(); var fromGroup = fromPeptides.GroupBy(m => m.PureSequence).ToGroupDictionary(n => n.Key.Length); var minLength = fromGroup.Count == 0 ? 6 : fromGroup.Min(m => m.Key); var maxLength = fromGroup.Count == 0 ? 30 : fromGroup.Max(m => m.Key); //Check the mutation type var type1 = new List <List <IGrouping <string, IIdentifiedPeptide> > >(); var type2 = new List <List <IGrouping <string, IIdentifiedPeptide> > >(); var type3 = new List <List <IGrouping <string, IIdentifiedPeptide> > >(); Progress.SetRange(0, mutGroup.Count()); Progress.SetPosition(0); Progress.SetMessage("finding mutation-original pairs ..."); foreach (var mut in mutGroup) { var matched = new List <IGrouping <string, IIdentifiedPeptide> >(); matched.Add(mut); Progress.Increment(1); var protein = mut.First().Proteins[0]; List <List <IGrouping <string, IIdentifiedPeptide> > > type; if (protein.EndsWith("type3")) { type = type3; var mutseq = mut.Key.Substring(0, mut.Key.Length - 1); for (int i = mut.Key.Length + 1; i <= maxLength; i++) { if (fromGroup.ContainsKey(i)) { var others = fromGroup[i]; foreach (var o in others) { if (o.Key.StartsWith(mutseq)) { matched.Add(o); } } } } } else if (protein.EndsWith("type2")) { type = type2; for (int i = minLength; i < mut.Key.Length; i++) { if (fromGroup.ContainsKey(i)) { var others = fromGroup[i]; foreach (var o in others) { var oseq = o.Key.Substring(0, o.Key.Length - 1); if (mut.Key.StartsWith(oseq)) { matched.Add(o); } } } } } else if (protein.EndsWith("type1")) { type = type1; if (fromGroup.ContainsKey(mut.Key.Length)) { var oLength = fromGroup[mut.Key.Length]; foreach (var o in oLength) { int mutationSite = -1; if (MutationUtils.IsMutationOneIL2(o.Key, mut.Key, ref mutationSite, IgnoreNtermMutation, IgnoreDeamidatedMutation, IgnoreMultipleNucleotideMutation)) { matched.Add(o); } } } } else { throw new Exception("There is no mutation type information at protein name: " + protein + "\nIt should be like MUL_NHLGQK_type1, MUL_NHLGQK_type2 or MUL_NHLGQK_type3"); } type.Add(matched); } type1.Sort((m1, m2) => { var res = m1.Count.CompareTo(m2.Count); if (res == 0) { res = m2[0].Count().CompareTo(m1[0].Count()); } return(res); }); Progress.SetMessage("reading protein sequences ..."); var proteins = SequenceUtils.Read(new FastaFormat(), fastaFile); var proMap = proteins.ToDictionary(m => { string ac; if (acParser.TryParse(m.Name, out ac)) { return(ac); } else { return(m.Name); } }); var classification = GetClassification(); string mutHeader = "FileScan\tMH+\tDiff(MH+)\tCharge\tRank\tScore\tExpectValue\tModification"; var mutPepFormat = new MascotPeptideTextFormat(mutHeader); Progress.SetMessage("writing result ..."); var result1 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type1, ".type1"); var result2 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type2, ".type2"); var result3 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type3, ".type3"); return(result1.Concat(result2).Concat(result3).ToArray()); }