protected ReadSummary GetReadSummary(List <FeatureItemGroup> allmapped, HashSet <string> excludeQueries, List <SAMAlignedItem> reads, List <QueryInfo> totalQueries)
        {
            var result = new ReadSummary();

            if (File.Exists(options.CountFile))
            {
                result.TotalRead = Counts.GetTotalCount();
            }
            else
            {
                result.TotalRead = totalQueries.Count;
            }

            var featureQueries = new HashSet <string>(from fig in allmapped
                                                      from fi in fig
                                                      from loc in fi.Locations
                                                      from sl in loc.SamLocations
                                                      select sl.SamLocation.Parent.OriginalQname);

            result.FeatureRead = featureQueries.Sum(l => Counts.GetCount(l));

            result.ExcludeRead = excludeQueries.Sum(l => Counts.GetCount(l));

            result.GenomeRead = (from query in totalQueries
                                 where (!query.Name.Contains(SmallRNAConsts.NTA_TAG) || query.Name.EndsWith(SmallRNAConsts.NTA_TAG))
                                 let originalQname = query.Name.StringBefore(SmallRNAConsts.NTA_TAG)
                                                     where !featureQueries.Contains(originalQname) && query.Mismatch == 0 && query.Length >= options.TooShortReadLength
                                                     select originalQname).Distinct().Sum(m => Counts.GetCount(m));

            if (Counts.ItemMap != null)
            {
                result.TooShortRead = (from read in Counts.ItemMap.Values
                                       where !featureQueries.Contains(read.Qname) && read.SequenceLength < 20
                                       select read.Count).Sum();
            }
            else
            {
                result.TooShortRead = 0;
            }

            return(result);
        }
        public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            //read regions
            var featureLocations = this.MappedOptions.GetSequenceRegions();

            Progress.SetMessage("There are {0} coordinate entries", featureLocations.Count);
            if (featureLocations.Count == 0)
            {
                throw new Exception(string.Format("No coordinate found for {0} in file {1}", options.GtfFeatureName,
                                                  options.CoordinateFile));
            }

            var resultFilename = options.OutputFile;

            result.Add(resultFilename);

            //parsing reads
            List <QueryInfo> totalQueries;
            var reads = ParseCandidates(options.InputFile, resultFilename, out totalQueries);

            int totalQueryCount;

            if (reads.Count == totalQueries.Count && File.Exists(options.CountFile)) //only mapped reads in bam file.
            {
                totalQueryCount = Counts.GetTotalCount();
            }
            else
            {
                totalQueryCount = (from q in totalQueries select q.Name.StringBefore(SmallRNAConsts.NTA_TAG)).Distinct().Sum(m => Counts.GetCount(m));
            }

            if (reads.Count > 0 && reads[0].Qname.Contains(SmallRNAConsts.NTA_TAG))
            {
                if (!options.NTA)
                {
                    reads.RemoveAll(m => !m.Qname.EndsWith(SmallRNAConsts.NTA_TAG));
                }
            }
            var totalMappedCount = (from q in reads select q.Qname.StringBefore(SmallRNAConsts.NTA_TAG)).Distinct().Sum(m => Counts.GetCount(m));

            Progress.SetMessage("mapping reads to sequence regions...");
            MapReadToSequenceRegion(featureLocations, reads);

            var featureReadCount = reads.Where(m => m.Locations.Any(n => n.Features.Count > 0)).Sum(m => m.QueryCount);

            Console.WriteLine("feature reads = {0}", featureReadCount);

            var mappedItems = featureLocations.GroupByName();

            mappedItems.RemoveAll(m => m.GetEstimatedCount() == 0);

            mappedItems.ForEach(m => m.CombineLocations());

            var mappedGroups = mappedItems.GroupByIdenticalQuery();

            //group by miRNA name
            if (!options.NoMappedFile)
            {
                Progress.SetMessage("output mapping details...");
                var mappedfile = resultFilename + ".mapped.xml";
                new FeatureItemGroupXmlFormat().WriteToFile(mappedfile, mappedGroups);
                result.Add(mappedfile);
            }

            Progress.SetMessage("write result ...");
            mappedGroups.Sort((m1, m2) => m2.GetEstimatedCount().CompareTo(m1.GetEstimatedCount()));
            new FeatureItemGroupCountWriter().WriteToFile(resultFilename, mappedGroups);

            if (options.ExportLengthDistribution)
            {
                var disfile = resultFilename + ".length";
                new FeatureItemGroupReadLengthWriter().WriteToFile(disfile, mappedGroups);
                result.Add(disfile);
            }

            if (options.ExportSequenceCount)
            {
                var seqfile = resultFilename + ".seqcount";
                new FeatureItemGroupSequenceWriter().WriteToFile(seqfile, mappedGroups);
                result.Add(seqfile);
            }

            if (options.UnmappedFastq)
            {
                Progress.SetMessage("output unmapped query...");
                var unmappedFile = Path.ChangeExtension(resultFilename, ".unmapped.fastq.gz");
                var except       = new HashSet <string>(from r in reads
                                                        where r.Locations.Count > 0
                                                        select r.Qname);

                if (File.Exists(options.FastqFile))
                {
                    new FastqExtractorFromFastq {
                        Progress = Progress
                    }.Extract(options.FastqFile, unmappedFile, except, options.CountFile);
                }
                else
                {
                    new FastqExtractorFromBam()
                    {
                        Progress = Progress
                    }.Extract(options.InputFile, unmappedFile, except, options.CountFile);
                }
                result.Add(unmappedFile);
            }

            Progress.SetMessage("summarizing ...");
            var infoFile = Path.ChangeExtension(resultFilename, ".info");

            using (var sw = new StreamWriter(infoFile))
            {
                sw.WriteLine("#file\t{0}", options.InputFile);
                sw.WriteLine("#coordinate\t{0}", options.CoordinateFile);
                sw.WriteLine("#minLength\t{0}", options.MinimumReadLength);
                sw.WriteLine("#maxMismatchCount\t{0}", options.MaximumMismatch);
                if (File.Exists(options.CountFile))
                {
                    sw.WriteLine("#countFile\t{0}", options.CountFile);
                }

                sw.WriteLine("TotalReads\t{0}", totalQueryCount);
                sw.WriteLine("MappedReads\t{0}", totalMappedCount);
                sw.WriteLine("MultipleMappedReads\t{0}", reads.Where(m => m.Locations.Count > 1).Sum(m => m.QueryCount));
                sw.WriteLine("FeatureReads\t{0}", featureReadCount);
            }
            result.Add(infoFile);

            Progress.End();

            return(result);
        }