public override IEnumerable <string> Process()
        {
            Progress.SetMessage("Reading sequences from: " + _options.InputFile + "...");
            var seqs = SequenceUtils.Read(_options.InputFile);

            seqs.Sort((m1, m2) =>
            {
                var chr1    = m1.Name.StringBefore("_").StringAfter("chr");
                var suffix1 = m1.Name.Contains("_") ? m1.Name.StringAfter("_") : string.Empty;
                var chr2    = m2.Name.StringBefore("_").StringAfter("chr");
                var suffix2 = m2.Name.Contains("_") ? m2.Name.StringAfter("_") : string.Empty;

                if (string.IsNullOrWhiteSpace(suffix1))
                {
                    if (string.IsNullOrWhiteSpace(suffix2))
                    {
                        return(GenomeUtils.CompareChromosome(chr1, chr2));
                    }
                    else
                    {
                        return(-1);
                    }
                }
                else
                {
                    if (string.IsNullOrWhiteSpace(suffix2))
                    {
                        return(1);
                    }
                    else
                    {
                        var ret = GenomeUtils.CompareChromosome(chr1, chr2);
                        if (ret == 0)
                        {
                            ret = suffix1.CompareTo(suffix2);
                        }
                        return(ret);
                    }
                }
            });

            Progress.SetMessage("Writing sequences to: " + _options.OutputFile + "...");
            SequenceUtils.Write(new FastaFormat(), _options.OutputFile, seqs);

            Progress.SetMessage("Finished.");

            return(new[] { _options.OutputFile });
        }
        public override IEnumerable <string> Process(string fileName)
        {
            Progress.SetMessage("Loading sequences from " + fileName + "...");
            var seqs = SequenceUtils.Read(new FastaFormat(), fileName);

            Progress.SetMessage("Converint {0} sequences ...", seqs.Count);
            seqs.ForEach(m =>
            {
                m.SeqString = MiRnaToDna(m.SeqString);
            });

            var result = Path.ChangeExtension(fileName, ".dna" + Path.GetExtension(fileName));

            Progress.SetMessage("Saving {0} sequences to {1}", seqs.Count, result);
            SequenceUtils.Write(new FastaFormat(), result, seqs);

            Progress.SetMessage("Finished!");
            return(new string[] { result });
        }
        public override IEnumerable <string> Process()
        {
            var expRawfileMap = options.RawFiles.ToDictionary(m => Path.GetFileNameWithoutExtension(m));

            Progress.SetMessage("Reading library file ...");
            var liblist = new MS2ItemXmlFormat().ReadFromFile(options.LibraryFile);

            PreprocessingMS2ItemList(liblist);

            var lib = liblist.GroupBy(m => m.Charge).ToDictionary(m => m.Key, m => m.ToList());

            Progress.SetMessage("Building library sequence amino acid composition ...");
            lib.ForEach(m => m.Value.ForEach(l => l.AminoacidCompsition = (from a in l.Peptide
                                                                           where options.SubstitutionDeltaMassMap.ContainsKey(a)
                                                                           select a).Distinct().OrderBy(k => k).ToArray()));

            var expScanMap = (from p in liblist
                              from sq in p.FileScans
                              select sq).ToList().GroupBy(m => m.Experimental).ToDictionary(m => m.Key, m => new HashSet <int>(from l in m select l.FirstScan));

            if (File.Exists(options.PeptidesFile))
            {
                Progress.SetMessage("Reading peptides file used for excluding scan ...");
                var peptides = new MascotPeptideTextFormat().ReadFromFile(options.PeptidesFile);
                foreach (var pep in peptides)
                {
                    HashSet <int> scans;
                    if (!expScanMap.TryGetValue(pep.Query.FileScan.Experimental, out scans))
                    {
                        scans = new HashSet <int>();
                        expScanMap[pep.Query.FileScan.Experimental] = scans;
                    }
                    scans.Add(pep.Query.FileScan.FirstScan);
                }
            }

            Progress.SetMessage("Reading MS2/MS3 data ...");
            var result = GetCandidateMs2ItemList(expRawfileMap, expScanMap);

            PreprocessingMS2ItemList(result);

            //new MS2ItemXmlFormat().WriteToFile(options.OutputFile + ".xml", result);

            Progress.SetMessage("Finding SAP ...");
            List <SapPredicted> predicted = new List <SapPredicted>();

            var minDeltaMass = options.SubstitutionDeltaMassMap.Values.Min(l => l.Min(k => k.DeltaMass));
            var maxDeltaMass = options.SubstitutionDeltaMassMap.Values.Max(l => l.Max(k => k.DeltaMass));

            Progress.SetRange(0, result.Count);
            Progress.Begin();

            FindCandidates(lib, result, predicted, minDeltaMass, maxDeltaMass);

            var groups = predicted.ToGroupDictionary(m => m.Ms2.GetFileScans());

            predicted.Clear();
            foreach (var g in groups.Values)
            {
                var gg = g.ToGroupDictionary(m => m.LibMs2).Values.ToList();
                gg.Sort((m1, m2) =>
                {
                    return(CompareSapPrecitedList(m1, m2));
                });

                var expect = gg[0].FirstOrDefault(m => m.IsExpect);
                if (expect != null)
                {
                    predicted.Add(expect);
                }
                else
                {
                    predicted.AddRange(gg[0]);
                    for (int i = 1; i < gg.Count; i++)
                    {
                        if (CompareSapPrecitedList(gg[0], gg[i]) == 0)
                        {
                            predicted.AddRange(gg[i]);
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }

            if (File.Exists(options.MatchedFile))
            {
                new SapPredictedValidationWriter(options.MatchedFile).WriteToFile(options.OutputFile, predicted);
            }
            else
            {
                new SapPredictedWriter().WriteToFile(options.OutputTableFile, predicted);

                Progress.SetMessage("Generating SAP sequence ...");
                List <Sequence> predictedSeq = new List <Sequence>();
                foreach (var predict in predicted)
                {
                    var seq = PeptideUtils.GetPureSequence(predict.LibMs2.Peptide);
                    if (predict.Target.TargetType == VariantType.SingleAminoacidPolymorphism)
                    {
                        for (int i = 0; i < seq.Length; i++)
                        {
                            if (seq[i] == predict.Target.Source[0])
                            {
                                foreach (var t in predict.Target.Target)
                                {
                                    string targetSeq;
                                    if (i == 0)
                                    {
                                        targetSeq = t + seq.Substring(1);
                                    }
                                    else
                                    {
                                        targetSeq = seq.Substring(0, i) + t + seq.Substring(i + 1);
                                    }

                                    var reference = string.Format("sp|SAP_{0}_{1}|{2}_{3}_{4}_{5}", targetSeq, predict.Target.TargetType, seq, predict.Target.Source, i + 1, t);
                                    predictedSeq.Add(new Sequence(reference, targetSeq));
                                }
                            }
                        }
                    }
                    else
                    {
                        foreach (var tseq in predict.Target.Target)
                        {
                            string reference;
                            if (predict.Target.TargetType == VariantType.NTerminalLoss)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_loss_{3}", tseq, predict.Target.TargetType, seq, seq.Substring(0, seq.Length - tseq.Length));
                            }
                            else if (predict.Target.TargetType == VariantType.CTerminalLoss)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_loss_{3}", tseq, predict.Target.TargetType, seq, seq.Substring(tseq.Length));
                            }
                            else if (predict.Target.TargetType == VariantType.NTerminalExtension)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_ext_{3}", tseq, predict.Target.TargetType, seq, tseq.Substring(0, tseq.Length - seq.Length));
                            }
                            else if (predict.Target.TargetType == VariantType.CTerminalExtension)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_ext_{3}", tseq, predict.Target.TargetType, seq, tseq.Substring(seq.Length));
                            }
                            else
                            {
                                throw new Exception("I don't know how to deal with " + predict.Target.TargetType.ToString());
                            }

                            predictedSeq.Add(new Sequence(reference, tseq));
                        }
                    }
                }

                predictedSeq = (from g in predictedSeq.GroupBy(m => m.SeqString)
                                select g.First()).ToList();

                Progress.SetMessage("Reading database {0} ...", options.DatabaseFastaFile);
                var databases = SequenceUtils.Read(options.DatabaseFastaFile);

                Progress.SetMessage("Removing variant sequences which are already existed in database ...");
                for (int i = predictedSeq.Count - 1; i >= 0; i--)
                {
                    foreach (var db in databases)
                    {
                        if (db.SeqString.Contains(predictedSeq[i].SeqString))
                        {
                            predictedSeq.RemoveAt(i);
                            break;
                        }
                    }
                }
                databases.AddRange(predictedSeq);

                Progress.SetMessage("Writing SAP sequence and original database to {0} ...", options.OutputFile);

                SequenceUtils.Write(new FastaFormat(), options.OutputFile, databases);
            }

            Progress.End();

            return(new string[] { options.OutputFile, options.OutputTableFile });
        }