public int Extract(string sourceFile, string targetFile, IEnumerable <string> exceptQueryNames, string countFile)
        {
            int result = 0;

            var except = new HashSet <string>(exceptQueryNames);

            SmallRNACountMap cm      = new SmallRNACountMap();
            StreamWriter     swCount = null;

            if (File.Exists(countFile))
            {
                var oldCm = new SmallRNACountMap(countFile);
                foreach (var c in oldCm.Counts)
                {
                    cm.Counts[c.Key.StringBefore(SmallRNAConsts.NTA_TAG)] = c.Value;
                }
                swCount = new StreamWriter(targetFile + ".dupcount");
            }

            try
            {
                using (var sw = StreamUtils.GetWriter(targetFile, targetFile.ToLower().EndsWith(".gz")))
                {
                    using (var sr = SAMFactory.GetReader(sourceFile, true))
                    {
                        string line;
                        var    count = 0;
                        while ((line = sr.ReadLine()) != null)
                        {
                            count++;

                            if (count % 100000 == 0)
                            {
                                Progress.SetMessage("{0} reads", count);
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }
                            }

                            var ss = SAMUtils.Parse <SAMItemSlim>(line);
                            ss.Qname = ss.Qname.StringBefore(SmallRNAConsts.NTA_TAG);
                            if (except.Contains(ss.Qname))
                            {
                                continue;
                            }

                            if (Filter != null && !Filter.Accept(ss))
                            {
                                continue;
                            }

                            except.Add(ss.Qname);
                            ss.WriteFastq(sw);

                            if (swCount != null)
                            {
                                swCount.WriteLine("{0}\t{1}", ss.Qname, cm.Counts[ss.Qname]);
                            }

                            result++;
                        }
                    }
                }
            }
            finally
            {
                if (swCount != null)
                {
                    swCount.Close();
                }
            }
            return(result);
        }
示例#2
0
 private static SAMItemSlim LineToSamItem(string line)
 {
     return(SAMUtils.Parse <SAMItemSlim>(line));
 }