예제 #1
0
        // Public Methods ===============================================================================================//

        /// <summary>
        /// Preprocesses the expression.
        /// </summary>
        public void PreprocessExpression()
        {
            this.currentStage = Stage.Preprocess;

            BedFile.ToFileBed6(this.LocusFile.Locations.Values.Where(x => !this.BadLocusSet.Contains(x.Name)).ToList(), this.ProcessedLociFilename);

            BedFile.ToFileBed6(this.TranscriptLocations.Values.ToList(), this.ProcessedTranscriptLocationFilename);

            using (TextWriter tw = Helpers.CreateStreamWriter(this.ProcessedExpressionFilename))
            {
                tw.WriteLine("TSS_ID\t" + string.Join("\t", this.TissueSources));
                tw.WriteLine(string.Join(
                                 "\n",
                                 this.TranscriptExpression
                                 .Select(x => x.Key + "\t" + string.Join(
                                             "\t",
                                             this.TissueSources.Select(c => x.Value.ContainsKey(c) ? x.Value[c].ToString() : "NaN")))));
            }
            using (TextWriter tw2 = Helpers.CreateStreamWriter(this.ProcessedHistoneFilename))
            {
                tw2.WriteLine("RE_Locus\t" + string.Join("\t", this.TissueSources));
                tw2.WriteLine(string.Join(
                                  "\n",
                                  this.LocusData
                                  .Select(x => x.Key + "\t" + string.Join(
                                              "\t",
                                              this.TissueSources.Select(c => x.Value.ContainsKey(c) ? x.Value[c].ToString() : "NaN")))));
            }
        }
 /// <summary>
 /// Gets the overlapping regions.
 /// </summary>
 /// <returns>The overlapping regions.</returns>
 /// <param name="regions">Regions to be overlapped</param>
 /// <param name="overlay">Regions to overlay</param>
 private HashSet <string> GetOverlappingRegions(BedFile regions, BedFile overlay)
 {
     return(new HashSet <string>(regions.Locations
                                 .Where(x => overlay.MaxLocationSize.ContainsKey(x.Value.Chromosome))
                                 .Select(x => TRFScorer.GetOverlaps(
                                             x.Value,
                                             overlay.ChromosomeIndexedLocations,
                                             BedFile.IndexSize,
                                             overlay.MaxLocationSize[x.Value.Chromosome]))
                                 .SelectMany(x => x)
                                 .Select(x => x.Name)));
 }
        /// <summary>
        /// Execute this instance.
        /// </summary>
        public void Execute()
        {
            var peaks        = new BedFile(this.ChIPSeqFile, BedFile.Bed6Plus4Layout);
            var motifMatches = new BedFile(this.MotifMatchFile, BedFile.Bed6Layout);
            var segmentation = new BedFile(this.SegmentationFile, BedFile.Bed6Layout);

            HashSet <string> peaksOverlappingMotifs   = this.GetOverlappingRegions(motifMatches, peaks);
            HashSet <string> peaksOverlappingSegment  = this.GetOverlappingRegions(segmentation, peaks);
            HashSet <string> motifsOverlappingSegment = this.GetOverlappingRegions(segmentation, motifMatches);

            int peaksAtMotifs   = peaksOverlappingMotifs.Count;
            int peaksInSegment  = peaksOverlappingSegment.Count(x => peaksOverlappingMotifs.Contains(x));
            int motifsInSegment = motifsOverlappingSegment.Count;

            int motifs = motifMatches.Locations.Count;

            Console.WriteLine("First peak overlapping segment " + peaksOverlappingSegment.First());
            Console.WriteLine("First peak overlapping motif " + peaksOverlappingMotifs.First());

            Console.WriteLine("Peaks\tPeaksOverlappingMotif\tMotifs\tPeaksInSegment\tPeaksInSegmentOverlappingMotif\tMotifsInSegment");
            Console.WriteLine(string.Join("\t", new List <int> {
                peaks.Locations.Count, peaksAtMotifs, motifs, peaksOverlappingSegment.Count, peaksInSegment, motifsInSegment
            }));
        }
        public void Convert()
        {
            var contacts = Helpers.GetFileDataLines(this.ContactFileName, true).Select(line =>
            {
                var fields = line.Split('\t');
                var pair   = fields[6];
                var ids    = pair.Split('_');

                return(new
                {
                    Pair = pair,
                    Id1 = ids[0],
                    Chr1 = fields[0],
                    Start1 = int.Parse(fields[1]),
                    End1 = int.Parse(fields[2]),
                    Id2 = ids[1],
                    Chr2 = fields[3],
                    Start2 = int.Parse(fields[4]),
                    End2 = int.Parse(fields[5]),
                    Score = double.Parse(fields[7]),
                });
            }).ToList();

            var locations = contacts.Select(x => new Location[]
            {
                new Location
                {
                    Name = x.Id1, Chromosome = x.Chr1, Start = x.Start1, End = x.End1, AlternateName = x.Id2, Score = x.Score
                },
                new Location
                {
                    Name = x.Id2, Chromosome = x.Chr2, Start = x.Start2, End = x.End2, AlternateName = x.Id1, Score = x.Score
                },
            }).SelectMany(x => x)
                            .ToList();

            var uniqueLocations = locations
                                  .ToLookup(x => x.Name)
                                  .ToDictionary(x => x.Key, x => x.First());

            var contactMap = locations
                             .ToLookup(x => x.Name, x => x)
                             .ToDictionary(x => x.Key, x => x
                                           .ToDictionary(y => y.AlternateName, y => uniqueLocations[y.AlternateName]));

            var indexedLocations = new BedFile(uniqueLocations);

            var LocusFile     = new BedFile(this.LocusFileName, BedFile.Bed3Layout);
            var annotationSet = new GtfExpressionFile(GtfExpressionFile.ExpressionType.Cage, this.AnnotationFileName);

            var LocusOverlaps = indexedLocations.Locations.ToDictionary(
                x => x.Key,
                x => TRFScorer.GetOverlaps(
                    x.Value,
                    LocusFile.ChromosomeIndexedLocations,
                    BedFile.IndexSize,
                    LocusFile.MaxLocationSize[x.Value.Chromosome]));

            var tssOverlaps = indexedLocations.Locations.ToDictionary(
                x => x.Key,
                x => TRFScorer.GetOverlaps(
                    x.Value,
                    annotationSet.ChromosomeIndexedLocations,
                    BedFile.IndexSize,
                    annotationSet.MaxLocationSize[x.Value.Chromosome],
                    Location.OverlapsDirectionalStart));

            var links = contactMap
                        .Where(x => tssOverlaps[x.Key] != null)
                        .ToDictionary(x => x.Key, x => x.Value.Where(y => LocusOverlaps[y.Key] != null))
                        .Where(x => x.Value.Any())
                        .Select(x => tssOverlaps[x.Key].Select(y => new {
                Id1       = x.Key,
                Tss       = y.Name,
                LocusList = x.Value
                            .Select(z => LocusOverlaps[z.Key]
                                    .Select(a => new { Name = a.Name, Id2 = z.Key }))
                            .SelectMany(z => z).ToList()
            })
                                .Select(y => y.LocusList.Select(z =>
            {
                var LocusLocation = LocusFile.Locations[z.Name];
                var tssStart      = annotationSet.Transcripts[y.Tss].DirectionalStart;
                var linkLength    = LocusLocation.End < tssStart ? LocusLocation.End - tssStart : LocusLocation.Start - tssStart;
                return(new MapLink
                {
                    TranscriptName = y.Tss,
                    TssName = y.Tss,
                    LocusName = z.Name,
                    ConfidenceScore = contactMap[y.Id1][z.Id2].Score,
                    LinkLength = linkLength,
                });
            }))
                                .SelectMany(y => y))
                        .SelectMany(x => x)
                        .ToList();

            var convertedMap = new TssRegulatoryMap(links);

            NullMapBuilder.WriteMap(convertedMap, annotationSet.Locations, "Contact", this.MapFileName);
        }