public int GetStudyKey(string SID, FindParam prm, out string patientid, out string studykey) { using (new DebugLog()) { return(sv.GetStudyKey(SID, prm, out patientid, out studykey)); } }
public int GetStudyList(string SID, FindParam prm, out List <StudyTag> tags, out int count) { using (new DebugLog()) { return(sv.GetStudyList(SID, prm, out tags, out count)); } }
private void FindMutation(object obj) { FindParam param = (FindParam)obj; for (int i = 0; i < param.PnovoSeqs.Count; i++) { if (Progress.IsCancellationPending()) { return; } param.FinishedCount++; var pnovoseq = param.PnovoSeqs[i]; string source = string.Empty; int site = -1; //是否有一个mutation的匹配,不改变酶切位点。 if (FindMutationOneType1(pnovoseq, ref source, ref site)) { var reference = string.Format("sp|MUL_{0}|type1 source={1} mutation={2}{3}{4}", pnovoseq, source, source[site], site + 1, pnovoseq[site]); param.Sequences.Add(new Sequence(reference, pnovoseq)); param.Type1Count++; continue; } //是否有一个mutation的匹配,从酶切位点突变为其他氨基酸,导致pnovo解析得到序列更长。 if (FindMutationOneType2(pnovoseq, ref source)) { param.Sequences.Add(new Sequence("sp|MUL_" + pnovoseq + "|type2 source=" + source, pnovoseq)); param.Type2Count++; continue; } //是否有一个mutation的匹配,从其他氨基酸突变为酶切位点,导致pnovo解析得到序列更短。 if (FindMutationOneType3(pnovoseq, ref source)) { param.Sequences.Add(new Sequence("sp|MUL_" + pnovoseq + "|type3 source=" + source, pnovoseq)); param.Type3Count++; continue; } } }
/// <summary> /// 读取fasta文件,进行数据处理。 /// </summary> /// <param name="fileName">fasta</param> /// <returns>result file</returns> public override IEnumerable <string> Process() { HashSet <string> pnovoseqs = new HashSet <string>(); var pnovoParser = new PNovoPlusParser(options.TitleParser); pnovoParser.Progress = this.Progress; //找到一个非酶切位点的氨基酸,可代表denovo序列前后氨基酸。 var anotheraa = 'A'; for (int i = 0; i < 26; i++) { anotheraa = (char)('A' + i); if (options.Enzyme.CleaveageResidues.Contains(anotheraa) || options.Enzyme.NotCleaveResidues.Contains(anotheraa)) { continue; } break; } Progress.SetRange(0, options.PnovoFiles.Length); int totalSpectrumCount = 0; int totalSpectrumPassScore = 0; foreach (var pnovoFile in options.PnovoFiles) { Progress.SetMessage("Reading " + pnovoFile + " ..."); int spectrumCount = pnovoParser.GetSpectrumCount(pnovoFile); var curSpectra = pnovoParser.ParsePeptides(pnovoFile, 10, options.MinScore); totalSpectrumCount += spectrumCount; totalSpectrumPassScore += curSpectra.Count; RemoveMissCleavagePeptides(anotheraa, curSpectra); pnovoseqs.UnionWith(from c in curSpectra from p in c.Peptides select p.PureSequence); Progress.Increment(1); } var pNovoStat = Path.Combine(options.TargetDirectory, "pNovo.SAP.stat"); using (StreamWriter sw = new StreamWriter(pNovoStat)) { sw.WriteLine("Total Spectrum Count\t" + totalSpectrumCount.ToString()); sw.WriteLine("Total Peptide-Spectrum-Match Passed Score Filter\t" + totalSpectrumPassScore.ToString()); } Progress.SetPosition(0); Progress.SetMessage("Reading " + options.TargetFastaFile + " ..."); var seqs = SequenceUtils.Read(new FastaFormat(), options.TargetFastaFile); Progress.SetMessage("Digesting sequences ..."); GetDigestPeptide(seqs); seqs.Clear(); seqs.TrimExcess(); GC.Collect(); GC.WaitForFullGCComplete(); //清除所有跟理论库一样的肽段。 Progress.SetMessage("Removing identical peptides ..."); pnovoseqs.ExceptWith(miss0.Keys); var pnovoArray = pnovoseqs.ToArray(); pnovoseqs.Clear(); GC.Collect(); GC.WaitForFullGCComplete(); miss0group = miss0.Keys.ToGroupDictionary(m => m.Length); var type2seqs = new List <Type2Sequence>(); var type2_2 = new List <string>(); foreach (var m in miss1.Keys) { int maxpos = -1; for (int i = 1; i < m.Length; i++) { if (options.Enzyme.IsCleavageSite(m[i - 1], m[i], anotheraa)) { maxpos = i - 1; break; } } if (maxpos == -1) { throw new Exception("There is no misscleavage in " + m); } if (maxpos == 0) { type2_2.Add(m); } else { type2seqs.Add(new Type2Sequence() { Sequence = m, PriorSequence = m.Substring(0, maxpos), PostSequence = m.Substring(maxpos + 1) }); } } miss1type2_1 = type2seqs.ToGroupDictionary(m => GetType2Key(m.Sequence)); miss1type2_2 = type2_2.ToGroupDictionary(m => m.Substring(1)); miss0type3 = miss0.Keys.ToGroupDictionary(m => GetType3Key(m)); type2seqs.Clear(); GC.Collect(); GC.WaitForFullGCComplete(); Progress.SetMessage("Finding mutation ..."); Progress.SetRange(0, pnovoArray.Length); var pre100 = pnovoArray.Length / 100; var pre10000 = pnovoArray.Length / 10000; if (pre10000 == 0) { pre10000 = 1; } var totalCount = pnovoArray.Length; var binSize = totalCount / options.ThreadCount; List <FindParam> fparams = new List <FindParam>(); List <Thread> threads = new List <Thread>(); var startPos = 0; for (int i = 0; i < options.ThreadCount; i++) { int count; if (i == options.ThreadCount - 1) { count = pnovoArray.Length - startPos; } else { count = binSize; } List <string> binSeq = new List <string>(); binSeq.AddRange(pnovoArray.Skip(startPos).Take(count)); startPos = startPos + count; var aparam = new FindParam() { PnovoSeqs = binSeq }; fparams.Add(aparam); Thread at = new Thread(this.FindMutation); threads.Add(at); at.IsBackground = true; at.Start(aparam); } pnovoArray = null; GC.Collect(); GC.WaitForFullGCComplete(); var startTime = DateTime.Now; Progress.SetRange(0, totalCount); while (true) { Thread.Sleep(1000); if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } int finishedCount = fparams.Sum(m => m.FinishedCount); Progress.SetPosition(finishedCount); if (finishedCount == 0) { continue; } var curTime = DateTime.Now; var costTime = curTime - startTime; var totalCostTime = new TimeSpan(costTime.Ticks * totalCount / finishedCount); var finishTime = curTime + new TimeSpan(costTime.Ticks * (totalCount - finishedCount) / finishedCount); StringBuilder costFormat = new StringBuilder(); if (totalCostTime.TotalHours >= 2.0) { costFormat.Append(Math.Truncate(totalCostTime.TotalHours).ToString() + " hours and "); } else if (totalCostTime.TotalHours >= 1.0) { costFormat.Append("one hour and "); } costFormat.Append(totalCostTime.Minutes.ToString() + " minutes"); Progress.SetMessage("Finding mutation {0} / {1}, will cost {2} and finish at {3} ...", finishedCount, totalCount, costFormat, finishTime); int finishedThreadCount = threads.Count(m => !m.IsAlive); if (finishedThreadCount == threads.Count) { break; } } int type1 = fparams.Sum(m => m.Type1Count); int type2 = fparams.Sum(m => m.Type2Count); int type3 = fparams.Sum(m => m.Type3Count); using (StreamWriter sw = new StreamWriter(pNovoStat, true)) { sw.WriteLine("Type1 Count\t" + type1.ToString()); sw.WriteLine("Type2 Count\t" + type2.ToString()); sw.WriteLine("Type3 Count\t" + type3.ToString()); } var singleMutation = (from f in fparams from s in f.Sequences select s).ToList(); string newFastaFile = new FileInfo(options.TargetDirectory + "/" + FileUtils.ChangeExtension(new FileInfo(options.DatabaseFastaFile).Name, "mutation.fasta")).FullName; using (StreamWriter sw = new StreamWriter(newFastaFile)) { using (StreamReader sr = new StreamReader(options.DatabaseFastaFile)) { string line = sr.ReadToEnd(); sw.WriteLine(line); foreach (var seq in singleMutation) { sw.WriteLine(">" + seq.Reference); sw.WriteLine(seq.SeqString); } } } Progress.SetRange(0, options.PnovoFiles.Length); var sapSequences = new HashSet <string>(singleMutation.ConvertAll(m => m.SeqString)); List <IIdentifiedSpectrum> allSpectra = new List <IIdentifiedSpectrum>(); foreach (var pnovoFile in options.PnovoFiles) { Progress.SetMessage("Reading " + pnovoFile + " ..."); var curSpectra = pnovoParser.ParsePeptides(pnovoFile, 10, options.MinScore); RemoveMissCleavagePeptides(anotheraa, curSpectra); curSpectra.RemoveAll(m => !m.Peptides.Any(n => sapSequences.Contains(n.PureSequence))); allSpectra.AddRange(curSpectra); Progress.Increment(1); } var pNovoPeptides = Path.Combine(options.TargetDirectory, "pNovo.SAP.peptides"); new MascotPeptideTextFormat("\tFileScan\tSequence\tCharge\tScore\tDeltaScore").WriteToFile(pNovoPeptides, allSpectra); Progress.SetMessage("Finished."); Progress.End(); return(new string[] { newFastaFile }); }
public int RSKey2StudyKey(string SID, string rskey, out string patid, out string studykey, out string path, out FindParam prm) { using (new DebugLog()) { return(sv.RSKey2StudyKey(SID, rskey, out patid, out studykey, out path, out prm)); } }
/// <summary> /// find and replace execute module parameters. /// </summary> /// <returns>a StringBuilder that contains a new execute module parameters.</returns> private StringBuilder FindAndReplaceParameter() { Stopwatch stopwatch = Stopwatch.StartNew(); // initialize Dictionaries Dictionary <int, int> oldExecuteParameters = new Dictionary <int, int>(); Dictionary <int, int> newExecuteParameters = new Dictionary <int, int>(); // convert execute module entry to Dictionary for (int i = 0; i < FindParam.Split(',').Length; i++) { // verify items are numbers if (int.TryParse(FindParam.Split(',')[i], out int key)) { if (int.TryParse(FindParam.Split(',')[i + 1], out int value)) { // add key, value pairs oldExecuteParameters.Add(key, value); // skip values as they included in previous step i++; } } } // is there any matching parameters? if (!oldExecuteParameters.Keys.Any(x => ItemsToFind.Contains(x.ToString()))) { // NO. return old parameters. NewParameterString = new StringBuilder(MyResources.Strings_DrIPNet_prepend + string.Join(",", FindParam)); //new StringBuilder(FindParam); // } else { // only one repeating item with one index List <int> duplicateEntries = ItemsToFind.GroupBy(x => x) .Where(g => g.Count() > 2) .Select(y => int.Parse(y.Key)) // new { Element = y.Key, Index = ItemsToFind.IndexOf(y.Key) }) .ToList(); // scan whole Execute parameter entry foreach (var currentKey in oldExecuteParameters.Keys) { // scan whole .csv file for the current key for (int i = 0; i < ItemsToFind.Count; i++) { // string matching would match strings of "4567" and "4567x" // so match them as numbers if (int.TryParse(ItemsToFind.ElementAt(i), out int oldKey)) { // verify both keys are matched. if (currentKey == oldKey) { // initialize the replacement key int replacementKey = default; // if the register has multiple entry for each regulator, if (duplicateEntries.Contains(oldKey)) { // use the one appropriate to the current regulator // eg: second for regulator 2 int.TryParse(ItemsToReplace.ElementAt(GetDuplicateIndex(i)), out replacementKey); } else { // otherwise use corresponding the new register value // from the excel file. int.TryParse(ItemsToReplace.ElementAt(i), out replacementKey); } // retrieve original value oldExecuteParameters.TryGetValue(currentKey, out int originalValue); // modify MB_ActiveProfile instead of using original value // TODO: This should be aware of if the user entry or a .csv file in use. newExecuteParameters.Add(replacementKey, ViewModel.ChangeActiveProfile == currentKey ? activeProfile - 1 : originalValue); // stop scanning and move on to next entry. break; } } } } // return new parameters. NewParameterString = new StringBuilder(MyResources.Strings_DrIPNet_prepend + string.Join(",", newExecuteParameters.Select(x => x.Key + "," + x.Value))); } Debug.WriteLine($"find: {FindParam}\r\nrep: {NewParameterString}"); Debug.WriteLine($"-----------------------------------------------------> total time: {stopwatch.ElapsedMilliseconds}"); // return new StringBuilder return(NewParameterString); }