Ejemplo n.º 1
0
        protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries)
        {
            var result = new List <T>();

            _format = _options.GetSAMFormat();

            totalQueries = new List <QueryInfo>();

            using (var sr = SAMFactory.GetReader(fileName, true))
            {
                int    count        = 0;
                int    waitingcount = 0;
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    count++;

                    if (count % 1000 == 0)
                    {
                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }
                    }

                    var parts = line.Split('\t');

                    var qname = parts[SAMFormatConst.QNAME_INDEX];
                    var qi    = new QueryInfo(qname);
                    totalQueries.Add(qi);

                    SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
                    if (!_filter.AcceptFlags(flag))
                    {
                        continue;
                    }

                    var mismatchCount = _format.GetNumberOfMismatch(parts);
                    var seq           = parts[SAMFormatConst.SEQ_INDEX];

                    qi.Mismatch = mismatchCount;
                    qi.Length   = seq.Length;

                    //too many mismatchs
                    if (!_filter.AcceptMismatch(mismatchCount))
                    {
                        continue;
                    }

                    if (!_filter.AcceptQueryName(qname))
                    {
                        continue;
                    }

                    if (!_filter.AcceptLength(seq.Length))
                    {
                        continue;
                    }

                    var cigar = parts[SAMFormatConst.CIGAR_INDEX];
                    if (!_filter.AcceptCigar(cigar))
                    {
                        continue;
                    }

                    var seqname = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr");
                    var start   = int.Parse(parts[SAMFormatConst.POS_INDEX]);
                    var end     = SAMUtils.ParseEnd(start, cigar);

                    bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                    char strand;
                    if (isReversed)
                    {
                        strand = '-';
                    }
                    else
                    {
                        strand = '+';
                    }

                    var sam = new T();
                    var loc = new SAMAlignedLocation(sam)
                    {
                        Seqname = seqname,
                        Start   = start,
                        End     = end,
                        Strand  = strand,
                    };

                    if (!_filter.AcceptLocus(loc))
                    {
                        continue;
                    }

                    if (isReversed)
                    {
                        seq = SequenceUtils.GetReverseComplementedSequence(seq);
                    }

                    sam.Qname    = qname;
                    sam.Sequence = seq;

                    loc.AlignmentScore    = _format.GetAlignmentScore(parts);
                    loc.Cigar             = cigar;
                    loc.NumberOfMismatch  = mismatchCount;
                    loc.MismatchPositions = _format.GetMismatchPositions(parts);

                    if (_format.HasAlternativeHits)
                    {
                        _format.ParseAlternativeHits(parts, sam);
                    }

                    result.Add(sam);

                    waitingcount++;

                    if (waitingcount % 100 == 0)
                    {
                        Progress.SetMessage("{0} feature reads from {1} reads", waitingcount, count);
                    }
                }
            }

            return(result);
        }
        protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries)
        {
            var result = new List <T>();

            _format = _options.GetSAMFormat();

            totalQueries = new List <QueryInfo>();

            using (var sr = SAMFactory.GetReader(fileName, true))
            {
                int    count        = 0;
                int    waitingcount = 0;
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    if (count % 1000 == 0)
                    {
                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }
                    }

                    if (count % 100000 == 0 && count > 0)
                    {
                        Progress.SetMessage("{0} candidates from {1} reads", waitingcount, count);
                    }

                    count++;
                    var qname = line.StringBefore("\t");
                    //Console.WriteLine("line = {0}", line);
                    //Console.WriteLine("query = {0}", qname);

                    var qi = new QueryInfo(qname);
                    totalQueries.Add(qi);

                    var      parts = line.Split('\t');
                    SAMFlags flag  = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
                    //unmatched
                    if (flag.HasFlag(SAMFlags.UnmappedQuery))
                    {
                        continue;
                    }

                    //too many mismatchs
                    var mismatchCount = _format.GetNumberOfMismatch(parts);
                    var seq           = parts[SAMFormatConst.SEQ_INDEX];

                    qi.Mismatch          = mismatchCount;
                    qi.Length            = seq.Length;
                    qi.NoPenaltyMutation = 0;

                    if (_options.T2cAsNoPenaltyMutation)
                    {
                    }

                    if (mismatchCount > _options.MaximumMismatch)
                    {
                        continue;
                    }

                    if (!AcceptQueryName(qname))
                    {
                        continue;
                    }

                    //too short
                    if (seq.Length < _options.MinimumReadLength)
                    {
                        continue;
                    }

                    //too long
                    if (seq.Length > _options.MaximumReadLength)
                    {
                        continue;
                    }

                    var cigar = parts[SAMFormatConst.CIGAR_INDEX];
                    ////insertion/deletion
                    //if (cigar.Any(m => m == 'I' || m == 'D'))
                    //{
                    //  continue;
                    //}

                    bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                    char strand;
                    if (isReversed)
                    {
                        strand = '-';
                        seq    = SequenceUtils.GetReverseComplementedSequence(seq);
                    }
                    else
                    {
                        strand = '+';
                    }

                    var score = _format.GetAlignmentScore(parts);

                    var sam = new T()
                    {
                        Qname    = qname,
                        Sequence = seq
                    };

                    var seqname = parts[SAMFormatConst.RNAME_INDEX];
                    var loc     = new SAMAlignedLocation(sam)
                    {
                        Seqname           = seqname,
                        Start             = int.Parse(parts[SAMFormatConst.POS_INDEX]),
                        Strand            = strand,
                        Cigar             = cigar,
                        NumberOfMismatch  = mismatchCount,
                        AlignmentScore    = score,
                        MismatchPositions = _format.GetMismatchPositions(parts)
                    };

                    loc.ParseEnd(sam.Sequence);
                    sam.AddLocation(loc);

                    if (_format.HasAlternativeHits)
                    {
                        _format.ParseAlternativeHits(parts, sam);
                    }

                    result.Add(sam);

                    waitingcount++;
                }

                Progress.SetMessage("Finally, there are {0} candidates from {1} reads", waitingcount, count);
            }

            return(result);
        }