// Public Methods ===============================================================================================// /// <summary> /// Preprocesses the expression. /// </summary> public void PreprocessExpression() { this.currentStage = Stage.Preprocess; BedFile.ToFileBed6(this.LocusFile.Locations.Values.Where(x => !this.BadLocusSet.Contains(x.Name)).ToList(), this.ProcessedLociFilename); BedFile.ToFileBed6(this.TranscriptLocations.Values.ToList(), this.ProcessedTranscriptLocationFilename); using (TextWriter tw = Helpers.CreateStreamWriter(this.ProcessedExpressionFilename)) { tw.WriteLine("TSS_ID\t" + string.Join("\t", this.TissueSources)); tw.WriteLine(string.Join( "\n", this.TranscriptExpression .Select(x => x.Key + "\t" + string.Join( "\t", this.TissueSources.Select(c => x.Value.ContainsKey(c) ? x.Value[c].ToString() : "NaN"))))); } using (TextWriter tw2 = Helpers.CreateStreamWriter(this.ProcessedHistoneFilename)) { tw2.WriteLine("RE_Locus\t" + string.Join("\t", this.TissueSources)); tw2.WriteLine(string.Join( "\n", this.LocusData .Select(x => x.Key + "\t" + string.Join( "\t", this.TissueSources.Select(c => x.Value.ContainsKey(c) ? x.Value[c].ToString() : "NaN"))))); } }
/// <summary> /// Gets the overlapping regions. /// </summary> /// <returns>The overlapping regions.</returns> /// <param name="regions">Regions to be overlapped</param> /// <param name="overlay">Regions to overlay</param> private HashSet <string> GetOverlappingRegions(BedFile regions, BedFile overlay) { return(new HashSet <string>(regions.Locations .Where(x => overlay.MaxLocationSize.ContainsKey(x.Value.Chromosome)) .Select(x => TRFScorer.GetOverlaps( x.Value, overlay.ChromosomeIndexedLocations, BedFile.IndexSize, overlay.MaxLocationSize[x.Value.Chromosome])) .SelectMany(x => x) .Select(x => x.Name))); }
/// <summary> /// Execute this instance. /// </summary> public void Execute() { var peaks = new BedFile(this.ChIPSeqFile, BedFile.Bed6Plus4Layout); var motifMatches = new BedFile(this.MotifMatchFile, BedFile.Bed6Layout); var segmentation = new BedFile(this.SegmentationFile, BedFile.Bed6Layout); HashSet <string> peaksOverlappingMotifs = this.GetOverlappingRegions(motifMatches, peaks); HashSet <string> peaksOverlappingSegment = this.GetOverlappingRegions(segmentation, peaks); HashSet <string> motifsOverlappingSegment = this.GetOverlappingRegions(segmentation, motifMatches); int peaksAtMotifs = peaksOverlappingMotifs.Count; int peaksInSegment = peaksOverlappingSegment.Count(x => peaksOverlappingMotifs.Contains(x)); int motifsInSegment = motifsOverlappingSegment.Count; int motifs = motifMatches.Locations.Count; Console.WriteLine("First peak overlapping segment " + peaksOverlappingSegment.First()); Console.WriteLine("First peak overlapping motif " + peaksOverlappingMotifs.First()); Console.WriteLine("Peaks\tPeaksOverlappingMotif\tMotifs\tPeaksInSegment\tPeaksInSegmentOverlappingMotif\tMotifsInSegment"); Console.WriteLine(string.Join("\t", new List <int> { peaks.Locations.Count, peaksAtMotifs, motifs, peaksOverlappingSegment.Count, peaksInSegment, motifsInSegment })); }
public void Convert() { var contacts = Helpers.GetFileDataLines(this.ContactFileName, true).Select(line => { var fields = line.Split('\t'); var pair = fields[6]; var ids = pair.Split('_'); return(new { Pair = pair, Id1 = ids[0], Chr1 = fields[0], Start1 = int.Parse(fields[1]), End1 = int.Parse(fields[2]), Id2 = ids[1], Chr2 = fields[3], Start2 = int.Parse(fields[4]), End2 = int.Parse(fields[5]), Score = double.Parse(fields[7]), }); }).ToList(); var locations = contacts.Select(x => new Location[] { new Location { Name = x.Id1, Chromosome = x.Chr1, Start = x.Start1, End = x.End1, AlternateName = x.Id2, Score = x.Score }, new Location { Name = x.Id2, Chromosome = x.Chr2, Start = x.Start2, End = x.End2, AlternateName = x.Id1, Score = x.Score }, }).SelectMany(x => x) .ToList(); var uniqueLocations = locations .ToLookup(x => x.Name) .ToDictionary(x => x.Key, x => x.First()); var contactMap = locations .ToLookup(x => x.Name, x => x) .ToDictionary(x => x.Key, x => x .ToDictionary(y => y.AlternateName, y => uniqueLocations[y.AlternateName])); var indexedLocations = new BedFile(uniqueLocations); var LocusFile = new BedFile(this.LocusFileName, BedFile.Bed3Layout); var annotationSet = new GtfExpressionFile(GtfExpressionFile.ExpressionType.Cage, this.AnnotationFileName); var LocusOverlaps = indexedLocations.Locations.ToDictionary( x => x.Key, x => TRFScorer.GetOverlaps( x.Value, LocusFile.ChromosomeIndexedLocations, BedFile.IndexSize, LocusFile.MaxLocationSize[x.Value.Chromosome])); var tssOverlaps = indexedLocations.Locations.ToDictionary( x => x.Key, x => TRFScorer.GetOverlaps( x.Value, annotationSet.ChromosomeIndexedLocations, BedFile.IndexSize, annotationSet.MaxLocationSize[x.Value.Chromosome], Location.OverlapsDirectionalStart)); var links = contactMap .Where(x => tssOverlaps[x.Key] != null) .ToDictionary(x => x.Key, x => x.Value.Where(y => LocusOverlaps[y.Key] != null)) .Where(x => x.Value.Any()) .Select(x => tssOverlaps[x.Key].Select(y => new { Id1 = x.Key, Tss = y.Name, LocusList = x.Value .Select(z => LocusOverlaps[z.Key] .Select(a => new { Name = a.Name, Id2 = z.Key })) .SelectMany(z => z).ToList() }) .Select(y => y.LocusList.Select(z => { var LocusLocation = LocusFile.Locations[z.Name]; var tssStart = annotationSet.Transcripts[y.Tss].DirectionalStart; var linkLength = LocusLocation.End < tssStart ? LocusLocation.End - tssStart : LocusLocation.Start - tssStart; return(new MapLink { TranscriptName = y.Tss, TssName = y.Tss, LocusName = z.Name, ConfidenceScore = contactMap[y.Id1][z.Id2].Score, LinkLength = linkLength, }); })) .SelectMany(y => y)) .SelectMany(x => x) .ToList(); var convertedMap = new TssRegulatoryMap(links); NullMapBuilder.WriteMap(convertedMap, annotationSet.Locations, "Contact", this.MapFileName); }