Ejemplo n.º 1
0
 public int GetStudyKey(string SID, FindParam prm, out string patientid, out string studykey)
 {
     using (new DebugLog())
     {
         return(sv.GetStudyKey(SID, prm, out patientid, out studykey));
     }
 }
Ejemplo n.º 2
0
 public int GetStudyList(string SID, FindParam prm, out List <StudyTag> tags, out int count)
 {
     using (new DebugLog())
     {
         return(sv.GetStudyList(SID, prm, out tags, out count));
     }
 }
Ejemplo n.º 3
0
        private void FindMutation(object obj)
        {
            FindParam param = (FindParam)obj;

            for (int i = 0; i < param.PnovoSeqs.Count; i++)
            {
                if (Progress.IsCancellationPending())
                {
                    return;
                }

                param.FinishedCount++;

                var    pnovoseq = param.PnovoSeqs[i];
                string source   = string.Empty;
                int    site     = -1;

                //是否有一个mutation的匹配,不改变酶切位点。
                if (FindMutationOneType1(pnovoseq, ref source, ref site))
                {
                    var reference = string.Format("sp|MUL_{0}|type1 source={1} mutation={2}{3}{4}", pnovoseq, source, source[site], site + 1, pnovoseq[site]);
                    param.Sequences.Add(new Sequence(reference, pnovoseq));
                    param.Type1Count++;
                    continue;
                }

                //是否有一个mutation的匹配,从酶切位点突变为其他氨基酸,导致pnovo解析得到序列更长。
                if (FindMutationOneType2(pnovoseq, ref source))
                {
                    param.Sequences.Add(new Sequence("sp|MUL_" + pnovoseq + "|type2 source=" + source, pnovoseq));
                    param.Type2Count++;
                    continue;
                }

                //是否有一个mutation的匹配,从其他氨基酸突变为酶切位点,导致pnovo解析得到序列更短。
                if (FindMutationOneType3(pnovoseq, ref source))
                {
                    param.Sequences.Add(new Sequence("sp|MUL_" + pnovoseq + "|type3 source=" + source, pnovoseq));
                    param.Type3Count++;
                    continue;
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// 读取fasta文件,进行数据处理。
        /// </summary>
        /// <param name="fileName">fasta</param>
        /// <returns>result file</returns>
        public override IEnumerable <string> Process()
        {
            HashSet <string> pnovoseqs = new HashSet <string>();

            var pnovoParser = new PNovoPlusParser(options.TitleParser);

            pnovoParser.Progress = this.Progress;

            //找到一个非酶切位点的氨基酸,可代表denovo序列前后氨基酸。
            var anotheraa = 'A';

            for (int i = 0; i < 26; i++)
            {
                anotheraa = (char)('A' + i);
                if (options.Enzyme.CleaveageResidues.Contains(anotheraa) || options.Enzyme.NotCleaveResidues.Contains(anotheraa))
                {
                    continue;
                }
                break;
            }

            Progress.SetRange(0, options.PnovoFiles.Length);
            int totalSpectrumCount     = 0;
            int totalSpectrumPassScore = 0;

            foreach (var pnovoFile in options.PnovoFiles)
            {
                Progress.SetMessage("Reading " + pnovoFile + " ...");
                int spectrumCount = pnovoParser.GetSpectrumCount(pnovoFile);
                var curSpectra    = pnovoParser.ParsePeptides(pnovoFile, 10, options.MinScore);

                totalSpectrumCount     += spectrumCount;
                totalSpectrumPassScore += curSpectra.Count;

                RemoveMissCleavagePeptides(anotheraa, curSpectra);

                pnovoseqs.UnionWith(from c in curSpectra
                                    from p in c.Peptides
                                    select p.PureSequence);
                Progress.Increment(1);
            }


            var pNovoStat = Path.Combine(options.TargetDirectory, "pNovo.SAP.stat");

            using (StreamWriter sw = new StreamWriter(pNovoStat))
            {
                sw.WriteLine("Total Spectrum Count\t" + totalSpectrumCount.ToString());
                sw.WriteLine("Total Peptide-Spectrum-Match Passed Score Filter\t" + totalSpectrumPassScore.ToString());
            }

            Progress.SetPosition(0);
            Progress.SetMessage("Reading " + options.TargetFastaFile + " ...");
            var seqs = SequenceUtils.Read(new FastaFormat(), options.TargetFastaFile);

            Progress.SetMessage("Digesting sequences ...");

            GetDigestPeptide(seqs);

            seqs.Clear();
            seqs.TrimExcess();
            GC.Collect();
            GC.WaitForFullGCComplete();

            //清除所有跟理论库一样的肽段。
            Progress.SetMessage("Removing identical peptides ...");
            pnovoseqs.ExceptWith(miss0.Keys);

            var pnovoArray = pnovoseqs.ToArray();

            pnovoseqs.Clear();
            GC.Collect();
            GC.WaitForFullGCComplete();

            miss0group = miss0.Keys.ToGroupDictionary(m => m.Length);

            var type2seqs = new List <Type2Sequence>();
            var type2_2   = new List <string>();

            foreach (var m in miss1.Keys)
            {
                int maxpos = -1;
                for (int i = 1; i < m.Length; i++)
                {
                    if (options.Enzyme.IsCleavageSite(m[i - 1], m[i], anotheraa))
                    {
                        maxpos = i - 1;
                        break;
                    }
                }

                if (maxpos == -1)
                {
                    throw new Exception("There is no misscleavage in " + m);
                }

                if (maxpos == 0)
                {
                    type2_2.Add(m);
                }
                else
                {
                    type2seqs.Add(new Type2Sequence()
                    {
                        Sequence      = m,
                        PriorSequence = m.Substring(0, maxpos),
                        PostSequence  = m.Substring(maxpos + 1)
                    });
                }
            }
            miss1type2_1 = type2seqs.ToGroupDictionary(m => GetType2Key(m.Sequence));
            miss1type2_2 = type2_2.ToGroupDictionary(m => m.Substring(1));

            miss0type3 = miss0.Keys.ToGroupDictionary(m => GetType3Key(m));

            type2seqs.Clear();
            GC.Collect();
            GC.WaitForFullGCComplete();

            Progress.SetMessage("Finding mutation ...");
            Progress.SetRange(0, pnovoArray.Length);

            var pre100   = pnovoArray.Length / 100;
            var pre10000 = pnovoArray.Length / 10000;

            if (pre10000 == 0)
            {
                pre10000 = 1;
            }

            var totalCount           = pnovoArray.Length;
            var binSize              = totalCount / options.ThreadCount;
            List <FindParam> fparams = new List <FindParam>();
            List <Thread>    threads = new List <Thread>();
            var startPos             = 0;

            for (int i = 0; i < options.ThreadCount; i++)
            {
                int count;
                if (i == options.ThreadCount - 1)
                {
                    count = pnovoArray.Length - startPos;
                }
                else
                {
                    count = binSize;
                }
                List <string> binSeq = new List <string>();
                binSeq.AddRange(pnovoArray.Skip(startPos).Take(count));
                startPos = startPos + count;

                var aparam = new FindParam()
                {
                    PnovoSeqs = binSeq
                };
                fparams.Add(aparam);

                Thread at = new Thread(this.FindMutation);
                threads.Add(at);
                at.IsBackground = true;
                at.Start(aparam);
            }

            pnovoArray = null;
            GC.Collect();
            GC.WaitForFullGCComplete();

            var startTime = DateTime.Now;

            Progress.SetRange(0, totalCount);
            while (true)
            {
                Thread.Sleep(1000);

                if (Progress.IsCancellationPending())
                {
                    throw new UserTerminatedException();
                }

                int finishedCount = fparams.Sum(m => m.FinishedCount);
                Progress.SetPosition(finishedCount);

                if (finishedCount == 0)
                {
                    continue;
                }

                var curTime       = DateTime.Now;
                var costTime      = curTime - startTime;
                var totalCostTime = new TimeSpan(costTime.Ticks * totalCount / finishedCount);
                var finishTime    = curTime + new TimeSpan(costTime.Ticks * (totalCount - finishedCount) / finishedCount);

                StringBuilder costFormat = new StringBuilder();
                if (totalCostTime.TotalHours >= 2.0)
                {
                    costFormat.Append(Math.Truncate(totalCostTime.TotalHours).ToString() + " hours and ");
                }
                else if (totalCostTime.TotalHours >= 1.0)
                {
                    costFormat.Append("one hour and ");
                }
                costFormat.Append(totalCostTime.Minutes.ToString() + " minutes");

                Progress.SetMessage("Finding mutation {0} / {1}, will cost {2} and finish at {3} ...", finishedCount, totalCount, costFormat, finishTime);

                int finishedThreadCount = threads.Count(m => !m.IsAlive);
                if (finishedThreadCount == threads.Count)
                {
                    break;
                }
            }

            int type1 = fparams.Sum(m => m.Type1Count);
            int type2 = fparams.Sum(m => m.Type2Count);
            int type3 = fparams.Sum(m => m.Type3Count);

            using (StreamWriter sw = new StreamWriter(pNovoStat, true))
            {
                sw.WriteLine("Type1 Count\t" + type1.ToString());
                sw.WriteLine("Type2 Count\t" + type2.ToString());
                sw.WriteLine("Type3 Count\t" + type3.ToString());
            }

            var singleMutation = (from f in fparams
                                  from s in f.Sequences
                                  select s).ToList();

            string newFastaFile = new FileInfo(options.TargetDirectory + "/" + FileUtils.ChangeExtension(new FileInfo(options.DatabaseFastaFile).Name, "mutation.fasta")).FullName;

            using (StreamWriter sw = new StreamWriter(newFastaFile))
            {
                using (StreamReader sr = new StreamReader(options.DatabaseFastaFile))
                {
                    string line = sr.ReadToEnd();
                    sw.WriteLine(line);

                    foreach (var seq in singleMutation)
                    {
                        sw.WriteLine(">" + seq.Reference);
                        sw.WriteLine(seq.SeqString);
                    }
                }
            }

            Progress.SetRange(0, options.PnovoFiles.Length);
            var sapSequences = new HashSet <string>(singleMutation.ConvertAll(m => m.SeqString));
            List <IIdentifiedSpectrum> allSpectra = new List <IIdentifiedSpectrum>();

            foreach (var pnovoFile in options.PnovoFiles)
            {
                Progress.SetMessage("Reading " + pnovoFile + " ...");
                var curSpectra = pnovoParser.ParsePeptides(pnovoFile, 10, options.MinScore);

                RemoveMissCleavagePeptides(anotheraa, curSpectra);

                curSpectra.RemoveAll(m => !m.Peptides.Any(n => sapSequences.Contains(n.PureSequence)));
                allSpectra.AddRange(curSpectra);
                Progress.Increment(1);
            }

            var pNovoPeptides = Path.Combine(options.TargetDirectory, "pNovo.SAP.peptides");

            new MascotPeptideTextFormat("\tFileScan\tSequence\tCharge\tScore\tDeltaScore").WriteToFile(pNovoPeptides, allSpectra);

            Progress.SetMessage("Finished.");
            Progress.End();

            return(new string[] { newFastaFile });
        }
Ejemplo n.º 5
0
 public int RSKey2StudyKey(string SID, string rskey, out string patid, out string studykey, out string path, out FindParam prm)
 {
     using (new DebugLog())
     {
         return(sv.RSKey2StudyKey(SID, rskey, out patid, out studykey, out path, out prm));
     }
 }
Ejemplo n.º 6
0
        /// <summary>
        /// find and replace execute module parameters.
        /// </summary>
        /// <returns>a StringBuilder that contains a new execute module parameters.</returns>
        private StringBuilder FindAndReplaceParameter()
        {
            Stopwatch stopwatch = Stopwatch.StartNew();

            // initialize Dictionaries
            Dictionary <int, int> oldExecuteParameters = new Dictionary <int, int>();
            Dictionary <int, int> newExecuteParameters = new Dictionary <int, int>();

            // convert execute module entry to Dictionary
            for (int i = 0; i < FindParam.Split(',').Length; i++)
            {
                // verify items are numbers
                if (int.TryParse(FindParam.Split(',')[i], out int key))
                {
                    if (int.TryParse(FindParam.Split(',')[i + 1], out int value))
                    {
                        // add key, value pairs
                        oldExecuteParameters.Add(key, value);

                        // skip values as they included in previous step
                        i++;
                    }
                }
            }

            // is there any matching parameters?
            if (!oldExecuteParameters.Keys.Any(x => ItemsToFind.Contains(x.ToString())))
            {
                // NO. return old parameters.
                NewParameterString = new StringBuilder(MyResources.Strings_DrIPNet_prepend + string.Join(",", FindParam)); //new StringBuilder(FindParam); //
            }
            else
            {
                // only one repeating item with one index
                List <int> duplicateEntries = ItemsToFind.GroupBy(x => x)
                                              .Where(g => g.Count() > 2)
                                              .Select(y => int.Parse(y.Key))           // new { Element = y.Key, Index = ItemsToFind.IndexOf(y.Key) })
                                              .ToList();

                // scan whole Execute parameter entry
                foreach (var currentKey in oldExecuteParameters.Keys)
                {
                    // scan whole .csv file for the current key
                    for (int i = 0; i < ItemsToFind.Count; i++)
                    {
                        // string matching would match strings of "4567" and "4567x"
                        // so match them as numbers
                        if (int.TryParse(ItemsToFind.ElementAt(i), out int oldKey))
                        {
                            // verify both keys are matched.
                            if (currentKey == oldKey)
                            {
                                // initialize the replacement key
                                int replacementKey = default;

                                // if the register has multiple entry for each regulator,
                                if (duplicateEntries.Contains(oldKey))
                                {
                                    // use the one appropriate to the current regulator
                                    // eg: second for regulator 2
                                    int.TryParse(ItemsToReplace.ElementAt(GetDuplicateIndex(i)), out replacementKey);
                                }
                                else
                                {
                                    // otherwise use corresponding the new register value
                                    // from the excel file.
                                    int.TryParse(ItemsToReplace.ElementAt(i), out replacementKey);
                                }

                                // retrieve original value
                                oldExecuteParameters.TryGetValue(currentKey, out int originalValue);

                                // modify MB_ActiveProfile instead of using original value
                                // TODO: This should be aware of if the user entry or a .csv file in use.
                                newExecuteParameters.Add(replacementKey, ViewModel.ChangeActiveProfile == currentKey ? activeProfile - 1 : originalValue);

                                // stop scanning and move on to next entry.
                                break;
                            }
                        }
                    }
                }

                // return new parameters.
                NewParameterString = new StringBuilder(MyResources.Strings_DrIPNet_prepend + string.Join(",", newExecuteParameters.Select(x => x.Key + "," + x.Value)));
            }

            Debug.WriteLine($"find: {FindParam}\r\nrep: {NewParameterString}");
            Debug.WriteLine($"-----------------------------------------------------> total time: {stopwatch.ElapsedMilliseconds}");

            // return new StringBuilder
            return(NewParameterString);
        }