예제 #1
0
        public static List <CoverageRegion> GetTargetCoverageRegion(ITargetBuilderOptions options, IProgressCallback progress, bool removeRegionWithoutSequence = true)
        {
            List <CoverageRegion> result;

            if (options.TargetFile.EndsWith(".xml"))
            {
                result = GetTargetCoverageRegionFromXml(options, progress);
            }
            else
            {
                result = GetTargetCoverageRegionFromBed(options, progress);
            }

            var dic = result.ToGroupDictionary(m => m.Seqname);

            progress.SetMessage("Filling sequence from {0}...", options.GenomeFastaFile);
            using (var sr = new StreamReader(options.GenomeFastaFile))
            {
                var      ff = new FastaFormat();
                Sequence seq;
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    progress.SetMessage("Processing chromosome {0} ...", seq.Reference);
                    var seqname = seq.Name.StringAfter("chr");
                    List <CoverageRegion> lst;
                    if (dic.TryGetValue(seqname, out lst))
                    {
                        foreach (var l in lst)
                        {
                            l.Sequence = seq.SeqString.Substring((int)(l.Start - 1), (int)l.Length);
                            if (l.Strand == '+')
                            {
                                l.ReverseComplementedSequence = SequenceUtils.GetReverseComplementedSequence(l.Sequence);
                            }
                        }
                    }
                }
            }
            if (removeRegionWithoutSequence)
            {
                result.RemoveAll(l => string.IsNullOrEmpty(l.Sequence));
            }

            progress.SetMessage("Filling sequence finished.");

            var namemap = new MapReader(1, 12).ReadFromFile(options.RefgeneFile);

            result.ForEach(m =>
            {
                var gene     = m.Name.StringBefore("_utr3");
                m.GeneSymbol = namemap.ContainsKey(gene) ? namemap[gene] : string.Empty;
            });

            return(result);
        }
        public override IEnumerable <string> Process()
        {
            this.Progress.SetMessage("Reading group sample map file ...");
            var groupSampleMap = new MapReader(0, 1).ReadFromFile(options.MapFile);

            Dictionary <string, SignificantItem> geneNameMap = new Dictionary <string, SignificantItem>();

            this.Progress.SetMessage("Reading cuffdiff significant files ...");
            var sigs = (from file in options.SignificantFiles
                        from line in File.ReadAllLines(file).Skip(1)
                        let parts = line.Split('\t')
                                    where parts.Length >= 3
                                    let gene = parts[1]
                                               let name = parts[2]
                                                          where !name.Equals("-")
                                                          let location = parts[3]
                                                                         select new SignificantItem()
            {
                Gene = gene, GeneName = name, GeneLocation = location
            }).ToList();

            foreach (var gene in sigs)
            {
                if (!geneNameMap.ContainsKey(gene.Gene))
                {
                    geneNameMap[gene.Gene] = gene;
                }
            }
            Func <string, bool> acceptGene = m => geneNameMap.ContainsKey(m);
            var countFile = options.OutputFilePrefix + ".count";
            var fpkmFile  = options.OutputFilePrefix + ".fpkm";

            var items = new List <TrackingItem>();

            foreach (var trackingFile in options.InputFiles)
            {
                this.Progress.SetMessage("Reading cuffdiff read_group_tracking file " + trackingFile + "...");
                using (StreamReader sr = new StreamReader(trackingFile))
                {
                    string line = sr.ReadLine();
                    while ((line = sr.ReadLine()) != null)
                    {
                        var parts = line.Split('\t');
                        if (parts.Length <= 7)
                        {
                            continue;
                        }
                        var gene = parts[0];
                        if (!acceptGene(gene))
                        {
                            continue;
                        }

                        var group_index = parts[1] + "_" + parts[2];
                        var sample      = groupSampleMap.ContainsKey(group_index) ? groupSampleMap[group_index] : group_index;
                        var count       = parts[3];
                        var fpkm        = parts[6];
                        var item        = new TrackingItem()
                        {
                            Gene = gene, Sample = sample, Count = count, FPKM = fpkm
                        };
                        items.Add(item);
                    }
                }
            }

            this.Progress.SetMessage("Preparing result ...");
            var samples = new HashSet <string>(from item in items
                                               select item.Sample).OrderBy(m => m).ToList();

            this.Progress.SetMessage(string.Format("There are {0} samples", samples.Count));

            var genes = new HashSet <string>(from item in items
                                             select item.Gene).OrderBy(m => m).ToList();

            this.Progress.SetMessage(string.Format("There are {0} genes", genes.Count));

            var map = ToDoubleDirectory(items);

            this.Progress.SetMessage("Removing empty genes ...");
            foreach (var gene in genes)
            {
                if (map[gene].All(m => m.Value.Count == "0"))
                {
                    map.Remove(gene);
                }
            }

            var finalGenes = map.Keys.OrderBy(m => m).ToList();

            this.Progress.SetMessage("Outputing result ...");
            OutputFile(samples, finalGenes, map, geneNameMap, m => Math.Round(double.Parse(m.Count)).ToString(), countFile);
            OutputFile(samples, finalGenes, map, geneNameMap, m => m.FPKM, fpkmFile);

            this.Progress.End();
            return(new[] { countFile, fpkmFile });
        }