コード例 #1
0
        public List <SequenceRegionMapped> ReadFromFile(string fileName)
        {
            List <SequenceRegionMapped> result = new List <SequenceRegionMapped>();

            XElement root = XElement.Load(fileName);

            var qmmap = root.ToSAMAlignedItems().ToSAMAlignedLocationMap();

            foreach (var regionEle in root.Element("regions").Elements("region"))
            {
                var position = new SequenceRegionMapped();
                result.Add(position);

                position.Region      = new SequenceRegion();
                position.Region.Name = regionEle.Attribute("name").Value;
                position.Region.ParseLocation(regionEle);
                foreach (var queryEle in regionEle.Elements("query"))
                {
                    var qname = queryEle.Attribute("qname").Value;
                    var loc   = queryEle.Attribute("loc").Value;
                    var key   = SAMAlignedLocation.GetKey(qname, loc);
                    var query = qmmap[key];
                    position.AlignedLocations.Add(query);
                    query.Features.Add(position.Region);
                }
            }

            qmmap.Clear();

            return(result);
        }
コード例 #2
0
        public List <MappedItemGroup> ReadFromFile(string fileName)
        {
            var result = new List <MappedItemGroup>();

            XElement root = XElement.Load(fileName);

            //Console.WriteLine("read locations ...");
            Dictionary <string, SAMAlignedLocation> qmmap = root.ToSAMAlignedItems().ToSAMAlignedLocationMap();

            //Console.WriteLine("read mapped items ...");
            foreach (XElement groupEle in root.Element("subjectResult").Elements("subjectGroup"))
            {
                var group = new MappedItemGroup();
                result.Add(group);

                foreach (XElement mirnaEle in groupEle.Elements("subject"))
                {
                    var mirna = new MappedItem();
                    group.Add(mirna);
                    mirna.Name = mirnaEle.Attribute("name").Value;

                    foreach (XElement regionEle in mirnaEle.Elements("region"))
                    {
                        var region = new SequenceRegionMapped();
                        mirna.MappedRegions.Add(region);

                        region.Region.Name = mirna.Name;
                        region.Region.ParseLocation(regionEle);

                        if (regionEle.Attribute("sequence") != null)
                        {
                            region.Region.Sequence = regionEle.Attribute("sequence").Value;
                        }

                        if (regionEle.Attribute("query_count_before_filter") != null)
                        {
                            region.QueryCountBeforeFilter = int.Parse(regionEle.Attribute("query_count_before_filter").Value);
                        }

                        if (regionEle.Attribute("pvalue") != null)
                        {
                            region.PValue = double.Parse(regionEle.Attribute("pvalue").Value);
                        }

                        foreach (XElement queryEle in regionEle.Elements("query"))
                        {
                            string             qname = queryEle.Attribute("qname").Value;
                            string             loc   = queryEle.Attribute("loc").Value;
                            string             key   = SAMAlignedLocation.GetKey(qname, loc);
                            SAMAlignedLocation query = qmmap[key];
                            region.AlignedLocations.Add(query);
                            query.Features.Add(region.Region);
                        }
                    }
                }
            }
            qmmap.Clear();

            return(result);
        }
コード例 #3
0
        public override IEnumerable <string> Process(string useless)
        {
            var result = new MappedMirnaGroupXmlFileFormat().ReadFromFile(options.InputFile);

            using (StreamWriter sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("miRNA\tLocation\tTotalCount\tPerfectMatch\tMiss5_2\tMiss3_3\tMissInternal");
                foreach (var res in result)
                {
                    var items = res.GetAlignedLocations();

                    if (res.DisplayName.Equals("hsa-mir-486-5p:TCCTGTACTGAGCTGCCCCGAG"))
                    {
                        items.ForEach(m => Console.WriteLine(m.Parent.Qname + "\t" + m.Strand + "\t" + m.MismatchPositions));
                    }
                    var pmcount = items.Count(m => m.NumberOfMismatch == 0);
                    var mis5    = items.Count(m =>
                    {
                        SAMAlignedLocation loc = m;

                        if (loc.NumberOfMismatch == 0)
                        {
                            return(false);
                        }

                        var mp = loc.MismatchPositions;
                        if (loc.Strand == '-')
                        {
                            mp = new string(mp.Reverse().ToArray());
                        }

                        return(reg5.Match(mp).Success);
                    });

                    var mis3 = items.Count(m =>
                    {
                        var loc = m;
                        if (loc.NumberOfMismatch == 0)
                        {
                            return(false);
                        }

                        var mp = loc.MismatchPositions;
                        if (loc.Strand == '+')
                        {
                            mp = new string(mp.Reverse().ToArray());
                        }

                        return(reg3.Match(mp).Success);
                    });
                    sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", res.DisplayName, res.DisplayLocation, items.Count, pmcount, mis5, mis3, items.Count - pmcount - mis5 - mis3);
                }
            }
            return(new string[] { options.OutputFile });
        }
コード例 #4
0
        public virtual AcceptResult AcceptLocationPair(FeatureLocation floc, SAMAlignedLocation sloc)
        {
            var result = CheckNoPenaltyMutation(floc, sloc);

            if (!result.Accepted)
            {
                return(result);
            }

            result.OverlapPercentage = floc.OverlapPercentage(sloc);
            result.Accepted          = result.OverlapPercentage > 0 && result.OverlapPercentage >= Options.MinimumOverlapPercentage;

            return(result);
        }
コード例 #5
0
        public override AcceptResult AcceptLocationPair(FeatureLocation floc, SAMAlignedLocation sloc)
        {
            var result = base.AcceptLocationPair(floc, sloc);

            if (!result.Accepted)
            {
                return(result);
            }

            var offset = sloc.Offset(floc);

            result.Accepted = Options.Offsets.Contains(offset);

            return(result);
        }
コード例 #6
0
        private AcceptResult CheckNoPenaltyMutation(FeatureLocation floc, SAMAlignedLocation sloc)
        {
            if (sloc.NumberOfNoPenaltyMutation > 0)
            {
                var polys    = sloc.GetGsnapMismatches();
                var mismatch = 0;
                if (floc.Strand == sloc.Strand) //the non-penalty mutation has to be T2C
                {
                    mismatch = polys.Count(m => m.RefAllele != 'T' || m.SampleAllele != 'C');
                }
                else
                {
                    mismatch = polys.Count(m => m.RefAllele != 'A' || m.SampleAllele != 'G');
                }

                var nnpm = sloc.NumberOfMismatch + sloc.NumberOfNoPenaltyMutation - mismatch;
                if (mismatch > Options.MaximumMismatch || nnpm > Options.MaximumNoPenaltyMutationCount)
                {
                    return(new AcceptResult()
                    {
                        Accepted = false
                    });
                }

                return(new AcceptResult()
                {
                    Accepted = true,
                    NumberOfMismatch = mismatch,
                    NumberOfNoPenaltyMutation = sloc.NumberOfMismatch + sloc.NumberOfNoPenaltyMutation - mismatch
                });
            }
            else if (sloc.NumberOfMismatch > Options.MaximumMismatch)
            {
                return(new AcceptResult()
                {
                    Accepted = false
                });
            }
            else
            {
                return(new AcceptResult()
                {
                    Accepted = true,
                    NumberOfMismatch = sloc.NumberOfMismatch,
                    NumberOfNoPenaltyMutation = 0
                });
            }
        }
コード例 #7
0
        public override AcceptResult AcceptLocationPair(FeatureLocation floc, SAMAlignedLocation sloc)
        {
            if (sloc.Parent.Sequence.Length < Options.MinimumReadLengthForLongRNA)
            {
                return(new AcceptResult()
                {
                    Accepted = false
                });
            }

            if (sloc.NumberOfMismatch > Options.MaximumMismatchForLongRNA)
            {
                return(new AcceptResult()
                {
                    Accepted = false
                });
            }

            return(base.AcceptLocationPair(floc, sloc));
        }
コード例 #8
0
        public override bool AcceptLocus(SAMAlignedLocation loc)
        {
            var result = false;

            if (!loc.Seqname.Equals(lastSeqname))
            {
                if (!featureMap.TryGetValue(loc.Seqname, out lastFeatures))
                {
                    return(false);
                }
                lastSeqname = loc.Seqname;
            }

            foreach (var feature in lastFeatures)
            {
                if (feature.End < loc.Start)
                {
                    continue;
                }

                if (feature.Start > loc.End)
                {
                    break;
                }

                if (feature.Overlap(loc, this.minOverlapPercentage))
                {
                    result = true;
                    var samloc = new FeatureSamLocation(feature);
                    samloc.SamLocation = loc;
                    feature.SamLocations.Add(samloc);
                }
            }

            return(result);
        }
コード例 #9
0
        protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries)
        {
            var result = new List <T>();

            totalQueries = new List <QueryInfo>();

            using (var sr = StreamUtils.GetReader(fileName))
            {
                int    count        = 0;
                int    waitingcount = 0;
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    if (!line.StartsWith(">"))
                    {
                        continue;
                    }

                    if (count % 1000 == 0)
                    {
                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }
                    }

                    if (count % 100000 == 0 && count > 0)
                    {
                        Progress.SetMessage("{0} candidates from {1} reads", waitingcount, count);
                    }

                    count++;

                    //just for test
                    //if (waitingcount == 10000)
                    //{
                    //  break;
                    //}

                    var  parts     = line.Split('\t');
                    var  qname     = parts[3];
                    bool hasNTATag = qname.HasNTATag();
                    bool hasNTA    = qname.HasNTA();

                    if (_options.IgnoreNTA)
                    {
                        if (hasNTA)
                        {
                            continue;
                        }
                    }

                    var qi = new QueryInfo(qname);

                    totalQueries.Add(qi);

                    int matchCount = int.Parse(parts[1]);
                    if (matchCount == 0)
                    {
                        continue;
                    }

                    var seq = parts[0].Substring(1);
                    qi.Length = seq.Length;

                    //contains 'N'
                    if (seq.Contains('N'))
                    {
                        continue;
                    }

                    //too short
                    if (seq.Length < _options.MinimumReadLength)
                    {
                        continue;
                    }

                    //too long
                    if (seq.Length > _options.MaximumReadLength)
                    {
                        continue;
                    }

                    var sam = new T()
                    {
                        Qname    = qname,
                        Sequence = seq
                    };

                    for (int i = 0; i < matchCount; i++)
                    {
                        string matchline = sr.ReadLine();

                        if (string.IsNullOrWhiteSpace(matchline))
                        {
                            sam.ClearLocations();
                            break;
                        }

                        var matchparts  = matchline.Split('\t');
                        var matchgenome = matchparts[0].Trim();

                        if (matchgenome.Contains('-'))//insertion or deletion, not allowed now
                        {
                            continue;
                        }

                        if (matchgenome.Contains('*'))//soft clip, not allowed now
                        {
                            continue;
                        }

                        string mismatchPosition = string.Empty;
                        string cigar            = string.Empty;
                        int    mismatch;
                        int    mutation;
                        GetMismatchPositions(seq, matchgenome, ref mismatchPosition, ref cigar, out mutation, out mismatch);
                        qi.Mismatch = mismatch;
                        if (mismatch > _options.MaximumMismatch)
                        {
                            continue;
                        }
                        if (mutation > _options.MaximumNoPenaltyMutationCount)
                        {
                            continue;
                        }

                        if (_options.IgnoreNTAAndNoPenaltyMutation)
                        {
                            if (mutation > 0)
                            {
                                if (hasNTA)
                                {
                                    continue;
                                }

                                if (hasNTATag)
                                {
                                    var pos = cigar.LastIndexOf('.');
                                    if (pos >= cigar.Length - 3)
                                    {
                                        continue;
                                    }
                                }
                            }
                        }

                        var match  = locReg.Match(matchparts[2]);
                        var strand = match.Groups[1].Value[0];
                        var chr    = match.Groups[2].Value;
                        var start  = int.Parse(match.Groups[3].Value);
                        var end    = int.Parse(match.Groups[4].Value);

                        var loc = new SAMAlignedLocation(sam)
                        {
                            Seqname                   = chr,
                            Start                     = strand == '+' ? start : end,
                            End                       = strand == '+' ? end : start,
                            Strand                    = strand,
                            NumberOfMismatch          = mismatch,
                            NumberOfNoPenaltyMutation = mutation,
                            Cigar                     = cigar,
                            MismatchPositions         = mismatchPosition
                        };

                        sam.AddLocation(loc);
                    }

                    if (sam.Locations.Count > 0)
                    {
                        if (sam.Locations.Count > 1)
                        {
                            var minNNPM = sam.Locations.Min(m => m.NumberOfNoPenaltyMutation);
                            sam.RemoveLocation(m => m.NumberOfNoPenaltyMutation > minNNPM);
                        }

                        result.Add(sam);
                        waitingcount++;
                    }
                }

                Progress.SetMessage("Finally, there are {0} candidates from {1} reads", waitingcount, count);
            }

            return(result);
        }
コード例 #10
0
        public SAMAlignedItem NextSAMAlignedItem()
        {
            string line;

            while ((line = _file.ReadLine()) != null)
            {
                var parts = line.Split('\t');

                var qname = parts[SAMFormatConst.QNAME_INDEX];
                var seq   = parts[SAMFormatConst.SEQ_INDEX];

                var flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
                //unmatched
                if (flag.HasFlag(SAMFlags.UnmappedQuery))
                {
                    continue;
                }

                //check map quality
                var mapq = int.Parse(parts[SAMFormatConst.MAPQ_INDEX]);
                if (mapq < _options.MinimumReadQuality)
                {
                    continue;
                }

                var sam = new SAMAlignedItem
                {
                    Qname = qname,
                };

                bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                char strand;
                if (isReversed)
                {
                    strand       = '-';
                    sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq);
                }
                else
                {
                    strand       = '+';
                    sam.Sequence = seq;
                }

                var loc = new SAMAlignedLocation(sam)
                {
                    Seqname           = parts[SAMFormatConst.RNAME_INDEX],
                    Start             = int.Parse(parts[SAMFormatConst.POS_INDEX]),
                    Strand            = strand,
                    Cigar             = parts[SAMFormatConst.CIGAR_INDEX],
                    MismatchPositions = _format.GetMismatchPositions(parts),
                    NumberOfMismatch  = _format.GetNumberOfMismatch(parts),
                    Sequence          = seq,
                    Qual = parts[SAMFormatConst.QUAL_INDEX]
                };

                loc.ParseEnd(sam.Sequence);
                sam.AddLocation(loc);

                if (_format.HasAlternativeHits)
                {
                    _format.ParseAlternativeHits(parts, sam);
                }

                return(sam);
            }

            return(null);
        }
        public virtual List <FeatureItemGroup> ReadFromFile(string fileName)
        {
            var result = new List <FeatureItemGroup>();

            using (XmlReader source = XmlReader.Create(fileName))
            {
                Progress.SetMessage("reading queries ...");

                List <SAMAlignedItem> queries = SAMAlignedItemUtils.ReadFrom(source);

                Progress.SetMessage("{0} queries read.", queries.Count);

                var qmmap = queries.ToSAMAlignedLocationMap();
                queries.Clear();

                Progress.SetMessage("reading subjects ...");
                string value;
                source.ReadToFollowing("subjectResult");
                if (source.ReadToDescendant("subjectGroup"))
                {
                    do
                    {
                        var featureGroup = new FeatureItemGroup();
                        result.Add(featureGroup);

                        if (source.ReadToDescendant("subject"))
                        {
                            do
                            {
                                var item = new FeatureItem();
                                featureGroup.Add(item);
                                item.Name = source.GetAttribute("name");

                                if (source.ReadToDescendant("region"))
                                {
                                    do
                                    {
                                        var fl = new FeatureLocation();
                                        item.Locations.Add(fl);

                                        fl.Name     = item.Name;
                                        fl.Seqname  = source.GetAttribute("seqname");
                                        fl.Start    = long.Parse(source.GetAttribute("start"));
                                        fl.End      = long.Parse(source.GetAttribute("end"));
                                        fl.Strand   = source.GetAttribute("strand")[0];
                                        fl.Sequence = source.GetAttribute("sequence");

                                        value = source.GetAttribute("query_count_before_filter");
                                        if (value != null)
                                        {
                                            fl.QueryCountBeforeFilter = int.Parse(value);
                                        }

                                        value = source.GetAttribute("pvalue");
                                        if (value != null)
                                        {
                                            fl.PValue = double.Parse(value);
                                        }

                                        if (source.ReadToDescendant("query"))
                                        {
                                            do
                                            {
                                                string             qname = source.GetAttribute("qname");
                                                string             loc   = source.GetAttribute("loc");
                                                string             key   = SAMAlignedLocation.GetKey(qname, loc);
                                                SAMAlignedLocation query = qmmap[key];

                                                FeatureSamLocation fsl = new FeatureSamLocation(fl);
                                                fsl.SamLocation = query;

                                                fsl.Offset = int.Parse(source.GetAttribute("offset"));

                                                var attr = source.GetAttribute("overlap");
                                                if (attr == null)
                                                {
                                                    fsl.OverlapPercentage = query.OverlapPercentage(fl);
                                                }
                                                else
                                                {
                                                    fsl.OverlapPercentage = double.Parse(attr);
                                                }

                                                var nmi = source.GetAttribute("nmi");
                                                if (nmi != null)
                                                {
                                                    fsl.NumberOfMismatch = int.Parse(nmi);
                                                }

                                                var nnpm = source.GetAttribute("nnpm");
                                                if (nnpm != null)
                                                {
                                                    fsl.NumberOfNoPenaltyMutation = int.Parse(nnpm);
                                                }
                                            } while (source.ReadToNextSibling("query"));
                                        }
                                    } while (source.ReadToNextSibling("region"));
                                }
                            } while (source.ReadToNextSibling("subject"));
                        }
                    } while (source.ReadToNextSibling("subjectGroup"));
                }
                qmmap.Clear();
            }

            Progress.SetMessage("{0} subjects read.", result.Count);
            return(result);
        }
コード例 #12
0
        public List <FeatureItemGroup> ReadFromFile(string fileName)
        {
            Console.WriteLine("read file {0} ...", fileName);
            var result = new List <FeatureItemGroup>();

            XElement root = XElement.Load(fileName);

            //Console.WriteLine("read locations ...");
            Dictionary <string, SAMAlignedLocation> qmmap = root.ToSAMAlignedItems().ToSAMAlignedLocationMap();

            //Console.WriteLine("read mapped items ...");
            foreach (XElement groupEle in root.Element("subjectResult").Elements("subjectGroup"))
            {
                var group = new FeatureItemGroup();
                result.Add(group);

                foreach (XElement featureEle in groupEle.Elements("subject"))
                {
                    var item = new FeatureItem();
                    group.Add(item);
                    item.Name = featureEle.Attribute("name").Value;

                    foreach (XElement locEle in featureEle.Elements("region"))
                    {
                        var fl = new FeatureLocation();
                        item.Locations.Add(fl);

                        fl.Name = item.Name;
                        fl.ParseLocation(locEle);

                        if (locEle.Attribute("sequence") != null)
                        {
                            fl.Sequence = locEle.Attribute("sequence").Value;
                        }

                        if (locEle.Attribute("query_count_before_filter") != null)
                        {
                            fl.QueryCountBeforeFilter = int.Parse(locEle.Attribute("query_count_before_filter").Value);
                        }

                        if (locEle.Attribute("pvalue") != null)
                        {
                            fl.PValue = double.Parse(locEle.Attribute("pvalue").Value);
                        }

                        foreach (XElement queryEle in locEle.Elements("query"))
                        {
                            string             qname = queryEle.Attribute("qname").Value;
                            string             loc   = queryEle.Attribute("loc").Value;
                            string             key   = SAMAlignedLocation.GetKey(qname, loc);
                            SAMAlignedLocation query = qmmap[key];

                            FeatureSamLocation fsl = new FeatureSamLocation(fl);
                            fsl.SamLocation = query;
                            var attr = queryEle.FindAttribute("overlap");
                            if (attr == null)
                            {
                                fsl.OverlapPercentage = query.OverlapPercentage(fl);
                            }
                            else
                            {
                                fsl.OverlapPercentage = double.Parse(attr.Value);
                            }

                            var nnpm = queryEle.FindAttribute("nnpm");
                            if (nnpm == null)
                            {
                                nnpm = queryEle.FindAttribute("nnmp");
                            }
                            if (nnpm != null)
                            {
                                fsl.NumberOfNoPenaltyMutation = int.Parse(nnpm.Value);
                            }

                            var nmi = queryEle.FindAttribute("nmi");
                            if (nmi != null)
                            {
                                fsl.NumberOfMismatch = int.Parse(nmi.Value);
                            }
                        }
                    }
                }
            }
            qmmap.Clear();

            return(result);
        }
コード例 #13
0
        public override IEnumerable <string> Process()
        {
            PileupCountList pc = new PileupCountList();

            var format = options.GetSAMFormat();

            var cm = new SmallRNACountMap(options.CountFile);

            var srItems = SequenceRegionUtils.GetSequenceRegions(options.CoordinateFile, "miRNA", options.BedAsGtf);

            srItems.ForEach(m =>
            {
                m.Seqname = m.Seqname.StringAfter("chr");
            });
            var srmap = srItems.GroupBy(m => m.Seqname).ToDictionary(m => m.Key, m => m.ToList());

            StreamWriter swScript = null;

            try
            {
                if (options.ExportIgvScript)
                {
                    swScript = new StreamWriter(options.OutputFile + ".igv");
                    swScript.WriteLine("snapshotDirectory {0}", Path.GetDirectoryName(options.OutputFile).Replace('\\', '/'));
                }

                using (StreamWriter sw = new StreamWriter(options.OutputFile))
                {
                    sw.WriteLine(@"##fileformat=VCFv4.2
##fileDate={0:yyyyMMdd}
##source={1}
##phasing=partial
##INFO=<ID=NS,Number=1,Type=Integer,Description=""Number of Samples With Data"">
##INFO=<ID=DP,Number=1,Type=Integer,Description=""Total Depth"">
##INFO=<ID=AF,Number=A,Type=Float,Description=""Allele Frequency"">
##INFO=<ID=FP,Number=1,Type=Float,Description=""Fisher Exact Test P-Value"">
##INFO=<ID=MN,Number=.,Type=String,Description=""miRNA name contains this position"">
##FILTER=<ID=FisherET,Description=""Fisher exact test Pvalue less than {2}"">
##FILTER=<ID=AltAlleFreq,Description=""Alternative allele frequency less than {3}"">
##FILTER=<ID=notMiRNA,Description=""Position not located in miRNA locus"">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description=""Read Depth"">
##FORMAT=<ID=AD,Number=1,Type=Integer,Description=""Allelic Depth"">
#CHROM  POS ID  REF ALT QUAL  FILTER  INFO  FORMAT  {4}",
                                 DateTime.Now,
                                 "PileupCountBuilder",
                                 options.FisherPValue,
                                 options.MinimumAlternativeAlleleFrequency,
                                 Path.GetFileNameWithoutExtension(options.InputFile));

                    using (var sr = SAMFactory.GetReader(options.InputFile, true))
                    {
                        int    count = 0;
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            count++;

                            if (count % 100 == 0)
                            {
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }
                            }

                            if (count % 100000 == 0)
                            {
                                Progress.SetMessage("{0} reads processed", count);
                            }

                            var parts = line.Split('\t');

                            var qname = parts[SAMFormatConst.QNAME_INDEX];
                            var seq   = parts[SAMFormatConst.SEQ_INDEX];

                            //too short
                            if (seq.Length < options.MinimumReadLength)
                            {
                                continue;
                            }

                            SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
                            //unmatched
                            if (flag.HasFlag(SAMFlags.UnmappedQuery))
                            {
                                continue;
                            }

                            var cigar = parts[SAMFormatConst.CIGAR_INDEX];
                            //insertion/deletion
                            if (cigar.Any(m => m == 'I' || m == 'D'))
                            {
                                continue;
                            }

                            var sam = new SAMAlignedItem()
                            {
                                Qname = qname,
                            };

                            bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                            char strand;
                            if (isReversed)
                            {
                                strand       = '-';
                                sam.Sequence = SequenceUtils.GetReverseComplementedSequence(seq);
                            }
                            else
                            {
                                strand       = '+';
                                sam.Sequence = seq;
                            }

                            var loc = new SAMAlignedLocation(sam)
                            {
                                Seqname           = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr"),
                                Start             = int.Parse(parts[SAMFormatConst.POS_INDEX]),
                                Strand            = strand,
                                Cigar             = parts[SAMFormatConst.CIGAR_INDEX],
                                MismatchPositions = format.GetMismatchPositions(parts),
                                NumberOfMismatch  = format.GetNumberOfMismatch(parts),
                                Sequence          = seq
                            };

                            loc.ParseEnd(sam.Sequence);
                            sam.AddLocation(loc);

                            if (format.HasAlternativeHits)
                            {
                                format.ParseAlternativeHits(parts, sam);
                            }

                            var finished = pc.Add(sam, cm.GetCount(sam.Qname));
                            if (null == finished || 0 == finished.Count)
                            {
                                continue;
                            }

                            foreach (var fin in finished)
                            {
                                //if (fin.Chromosome.Equals("1") && fin.Position == 5160725)
                                //{
                                //  Console.WriteLine(fin);
                                //}
                                var ft = fin.FisherExactTest();
                                if (ft.PValue <= options.FisherPValue)
                                {
                                    var total     = fin.Sum(m => m.Value);
                                    var minallele = total * options.MinimumAlternativeAlleleFrequency;
                                    if (ft.Sample2.Failed >= minallele)
                                    {
                                        List <GtfItem> srs;
                                        List <string>  ranges = new List <string>();

                                        if (srmap.TryGetValue(sam.Locations[0].Seqname, out srs))
                                        {
                                            foreach (var seqr in srs)
                                            {
                                                if (seqr.Contains(fin.Position))
                                                {
                                                    ranges.Add(seqr.GetNameLocation());
                                                }
                                            }
                                        }

                                        var alter = (from r in fin
                                                     where r.Key != fin.Reference
                                                     orderby r.Key
                                                     select r).ToList();

                                        var str = string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\tNS={7};DP={8};AF={9};FP={10:0.##E0}{11}\tDP:AD\t{12}:{13},{14}",
                                                                fin.Chromosome,
                                                                fin.Position,
                                                                ".",
                                                                fin.Reference,
                                                                (from r in alter
                                                                 select r.Key.ToString()).Merge(","),
                                                                0,
                                                                ranges.Count == 0 ? "notMiRNA" : "PASS",
                                                                1,
                                                                total,
                                                                (from r in alter
                                                                 select string.Format("{0:0.###}", r.Value * 1.0 / total)).Merge(","),
                                                                ft.PValue,
                                                                ranges.Count == 0 ? "" : ";" + ranges.Merge(","),
                                                                total,
                                                                ft.Sample2.Succeed,
                                                                (from r in alter
                                                                 select r.Value.ToString()).Merge(","));

                                        sw.WriteLine(str);
                                        //Console.WriteLine(str);

                                        if (swScript != null && ranges.Count > 0)
                                        {
                                            swScript.WriteLine(@"goto {0}:{1}
sort position
snapshot {0}_{2}_{1}.png", fin.Chromosome, fin.Position, ranges[0].Replace('(', '_').Replace(')', '_').Replace(':', '_'));
                                        }
                                    }
                                }
                            }

                            finished.Clear();
                        }
                    }
                }
            }
            finally
            {
                if (swScript != null)
                {
                    swScript.Close();
                }
            }
            return(new string[] { options.OutputFile });
        }
コード例 #14
0
        protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries)
        {
            var result = new List <T>();

            _format = _options.GetSAMFormat();

            totalQueries = new List <QueryInfo>();

            using (var sr = SAMFactory.GetReader(fileName, true))
            {
                int    count        = 0;
                int    waitingcount = 0;
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    if (count % 1000 == 0)
                    {
                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }
                    }

                    if (count % 100000 == 0 && count > 0)
                    {
                        Progress.SetMessage("{0} candidates from {1} reads", waitingcount, count);
                    }

                    count++;
                    var qname = line.StringBefore("\t");
                    //Console.WriteLine("line = {0}", line);
                    //Console.WriteLine("query = {0}", qname);

                    var qi = new QueryInfo(qname);
                    totalQueries.Add(qi);

                    var      parts = line.Split('\t');
                    SAMFlags flag  = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
                    //unmatched
                    if (flag.HasFlag(SAMFlags.UnmappedQuery))
                    {
                        continue;
                    }

                    //too many mismatchs
                    var mismatchCount = _format.GetNumberOfMismatch(parts);
                    var seq           = parts[SAMFormatConst.SEQ_INDEX];

                    qi.Mismatch          = mismatchCount;
                    qi.Length            = seq.Length;
                    qi.NoPenaltyMutation = 0;

                    if (_options.T2cAsNoPenaltyMutation)
                    {
                    }

                    if (mismatchCount > _options.MaximumMismatch)
                    {
                        continue;
                    }

                    if (!AcceptQueryName(qname))
                    {
                        continue;
                    }

                    //too short
                    if (seq.Length < _options.MinimumReadLength)
                    {
                        continue;
                    }

                    //too long
                    if (seq.Length > _options.MaximumReadLength)
                    {
                        continue;
                    }

                    var cigar = parts[SAMFormatConst.CIGAR_INDEX];
                    ////insertion/deletion
                    //if (cigar.Any(m => m == 'I' || m == 'D'))
                    //{
                    //  continue;
                    //}

                    bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                    char strand;
                    if (isReversed)
                    {
                        strand = '-';
                        seq    = SequenceUtils.GetReverseComplementedSequence(seq);
                    }
                    else
                    {
                        strand = '+';
                    }

                    var score = _format.GetAlignmentScore(parts);

                    var sam = new T()
                    {
                        Qname    = qname,
                        Sequence = seq
                    };

                    var seqname = parts[SAMFormatConst.RNAME_INDEX];
                    var loc     = new SAMAlignedLocation(sam)
                    {
                        Seqname           = seqname,
                        Start             = int.Parse(parts[SAMFormatConst.POS_INDEX]),
                        Strand            = strand,
                        Cigar             = cigar,
                        NumberOfMismatch  = mismatchCount,
                        AlignmentScore    = score,
                        MismatchPositions = _format.GetMismatchPositions(parts)
                    };

                    loc.ParseEnd(sam.Sequence);
                    sam.AddLocation(loc);

                    if (_format.HasAlternativeHits)
                    {
                        _format.ParseAlternativeHits(parts, sam);
                    }

                    result.Add(sam);

                    waitingcount++;
                }

                Progress.SetMessage("Finally, there are {0} candidates from {1} reads", waitingcount, count);
            }

            return(result);
        }
コード例 #15
0
 private void FindLocation(List <SAMAlignedLocation> list, List <MappedMirnaRegion> list_2, out SAMAlignedLocation loc, out MappedMirnaRegion reg)
 {
     throw new NotImplementedException();
 }
コード例 #16
0
 public virtual bool AcceptLocus(SAMAlignedLocation loc)
 {
     return(true);
 }
コード例 #17
0
        protected override List <T> DoBuild <T>(string fileName, out List <QueryInfo> totalQueries)
        {
            var result = new List <T>();

            _format = _options.GetSAMFormat();

            totalQueries = new List <QueryInfo>();

            using (var sr = SAMFactory.GetReader(fileName, true))
            {
                int    count        = 0;
                int    waitingcount = 0;
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    count++;

                    if (count % 1000 == 0)
                    {
                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }
                    }

                    var parts = line.Split('\t');

                    var qname = parts[SAMFormatConst.QNAME_INDEX];
                    var qi    = new QueryInfo(qname);
                    totalQueries.Add(qi);

                    SAMFlags flag = (SAMFlags)int.Parse(parts[SAMFormatConst.FLAG_INDEX]);
                    if (!_filter.AcceptFlags(flag))
                    {
                        continue;
                    }

                    var mismatchCount = _format.GetNumberOfMismatch(parts);
                    var seq           = parts[SAMFormatConst.SEQ_INDEX];

                    qi.Mismatch = mismatchCount;
                    qi.Length   = seq.Length;

                    //too many mismatchs
                    if (!_filter.AcceptMismatch(mismatchCount))
                    {
                        continue;
                    }

                    if (!_filter.AcceptQueryName(qname))
                    {
                        continue;
                    }

                    if (!_filter.AcceptLength(seq.Length))
                    {
                        continue;
                    }

                    var cigar = parts[SAMFormatConst.CIGAR_INDEX];
                    if (!_filter.AcceptCigar(cigar))
                    {
                        continue;
                    }

                    var seqname = parts[SAMFormatConst.RNAME_INDEX].StringAfter("chr");
                    var start   = int.Parse(parts[SAMFormatConst.POS_INDEX]);
                    var end     = SAMUtils.ParseEnd(start, cigar);

                    bool isReversed = flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                    char strand;
                    if (isReversed)
                    {
                        strand = '-';
                    }
                    else
                    {
                        strand = '+';
                    }

                    var sam = new T();
                    var loc = new SAMAlignedLocation(sam)
                    {
                        Seqname = seqname,
                        Start   = start,
                        End     = end,
                        Strand  = strand,
                    };

                    if (!_filter.AcceptLocus(loc))
                    {
                        continue;
                    }

                    if (isReversed)
                    {
                        seq = SequenceUtils.GetReverseComplementedSequence(seq);
                    }

                    sam.Qname    = qname;
                    sam.Sequence = seq;

                    loc.AlignmentScore    = _format.GetAlignmentScore(parts);
                    loc.Cigar             = cigar;
                    loc.NumberOfMismatch  = mismatchCount;
                    loc.MismatchPositions = _format.GetMismatchPositions(parts);

                    if (_format.HasAlternativeHits)
                    {
                        _format.ParseAlternativeHits(parts, sam);
                    }

                    result.Add(sam);

                    waitingcount++;

                    if (waitingcount % 100 == 0)
                    {
                        Progress.SetMessage("{0} feature reads from {1} reads", waitingcount, count);
                    }
                }
            }

            return(result);
        }