public void Link(MapLinkFilter.LinkType type) { this.Map = TssRegulatoryMap.LoadMap(this.MapFileName, new MapLinkFilter { LinkTypeFilter = type }); NullMapBuilder.WriteMap(this.Map, this.ExpressionData.Genes, this.HistoneName, this.OutputFile); }
public static void WriteMap( TssRegulatoryMap map, Dictionary <string, Location> tssSet, string histoneName, string outputMapFile) { using (TextWriter tw = new StreamWriter(outputMapFile)) { foreach (var link in map.Links) { var tss = tssSet[link.TranscriptName]; tw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}", tss.Chromosome, tss.DirectionalStart, tss.DirectionalStart, link.TranscriptName, 1.0, tss.Strand, link.LocusName, link.Correlation, link.ConfidenceScore, link.LinkLength, histoneName, tss.AlternateName); } } }
public void AddReplaceLink( MapLink link, TssRegulatoryMap map, Func <MapLink, TssRegulatoryMap, bool> test) { if (test(link, map)) { map[link.TranscriptName].Clear(); map[link.TranscriptName].Add(link.LocusName, link); } }
/// <summary> /// Adds a link to the map if it passes the link filter criteria. /// </summary> /// <param name="tssMap">Tss map.</param> /// <param name="LocusMap">Locus map.</param> /// <param name="filter">Filter.</param> /// <param name="link">Link.</param> private static void AddLink( ref TssRegulatoryMap tssMap, ref LocusRegulatoryMap LocusMap, MapLinkFilter filter, MapLink link) { if (filter.IsValidLink(link.Strand, link.LinkLength, link.ConfidenceScore, link.Correlation)) { filter.ApplyLinkFilterType(link, tssMap, LocusMap); } }
public void EnsureKey(MapLink link, TssRegulatoryMap tssMap) { if (!tssMap.ContainsKey(link.TranscriptName)) { tssMap.Add(link.TranscriptName, new Dictionary <MapLink.Locus, MapLink>()); } if (tssMap[link.TranscriptName].ContainsKey(link.LocusName)) { throw new Exception(string.Format("Duplicate Locus-TSS link {0}-{1} in map", link.LocusName, link.TranscriptName)); } }
public void BestWorst(double fraction) { this.Map = TssRegulatoryMap.LoadMap(this.MapFileName, new MapLinkFilter { }); var bestLinkMap = this.Map.GetBestNeighborMap(1); var orderedLinks = bestLinkMap.Links.OrderBy(x => x.ConfidenceScore).ToList(); int linkCount = (int)(orderedLinks.Count * fraction); var topLinks = orderedLinks .Take(linkCount) .OrderBy(x => x.AbsLinkLength) .ToList(); var bottomLinks = orderedLinks .OrderBy(x => - x.ConfidenceScore) .Take(orderedLinks.Count - linkCount) .ToList(); List <MapLink> bestLinks = new List <MapLink>(); List <MapLink> worstLinks = new List <MapLink>(); foreach (var link in topLinks) { int linkIndex = int.MaxValue; for (int i = 0; i < bottomLinks.Count; i++) { var bottomLink = bottomLinks[i]; if (Math.Sign(link.LinkLength) == Math.Sign(bottomLink.LinkLength) && link.AbsLinkLength < bottomLink.AbsLinkLength * 1.01 && link.AbsLinkLength > bottomLink.AbsLinkLength * 0.99) { linkIndex = i; break; } } if (linkIndex != int.MaxValue) { bestLinks.Add(link); worstLinks.Add(bottomLinks[linkIndex]); bottomLinks.RemoveAt(linkIndex); } } var bottomMapFile = this.OutputFile.Replace(".bed", ".bottom.bed"); NullMapBuilder.WriteMap(new TssRegulatoryMap(bestLinks), this.ExpressionData.Genes, this.HistoneName, this.OutputFile); NullMapBuilder.WriteMap(new TssRegulatoryMap(worstLinks), this.ExpressionData.Genes, this.HistoneName, bottomMapFile); }
/// <summary> /// Loads the map. /// </summary> /// <returns>The map.</returns> /// <param name="mapFileName">Map file name.</param> /// <param name="filter">Filter of map links.</param> public static TssRegulatoryMap LoadMap( string mapFileName, MapLinkFilter filter) { using (TextReader tr = new StreamReader(mapFileName)) { string line = null; TssRegulatoryMap tssMap = new TssRegulatoryMap(); LocusRegulatoryMap LocusMap = new LocusRegulatoryMap(); while ((line = tr.ReadLine()) != null) { var fields = line.Split('\t'); var transcriptName = fields[3]; var confidence = double.Parse(fields[8]); var correlation = double.Parse(fields[7]); var distance = int.Parse(fields[9]); var LocusName = fields[6]; var strand = fields[5]; var link = new MapLink { ConfidenceScore = confidence, Correlation = correlation, LinkLength = distance, TranscriptName = transcriptName, TssName = transcriptName, LocusName = LocusName, Strand = strand, Chromosome = fields[0], TssPosition = int.Parse(fields[1]), HistoneName = fields[10], GeneName = fields[11], }; if ((filter.TranscriptSet == null || (filter.TranscriptSet != null && filter.TranscriptSet.Contains(transcriptName))) && (filter.LocusSet == null || (filter.LocusSet != null && filter.LocusSet.Contains(LocusName)))) { AddLink(ref tssMap, ref LocusMap, filter, link); } } return(filter.PostProcessLinkFilterType(tssMap, LocusMap)); } }
/// <summary> /// Loads a map. /// </summary> /// <returns>The map.</returns> /// <param name="mapFileName">Map file name.</param> /// <param name="configData">Config data.</param> /// <param name="maxRange">Max range.</param> public static TssRegulatoryMap LoadMap( string mapFileName, List <int> promoterRange, int maxRange, double confidenceThreshold) { var mapProperties = new MapLinkFilter { PromoterUpstreamRange = promoterRange[0], PromoterDownstreamRange = promoterRange.Count == 1 ? promoterRange[0] : promoterRange[1], MaximumLinkLength = maxRange, ConfidenceThreshold = confidenceThreshold, }; return(TssRegulatoryMap.LoadMap(mapFileName, mapProperties)); }
public void FilterMap(MapLinkFilter.LinkType linkType) { var mapLinkFilter = new MapLinkFilter { LinkTypeFilter = linkType, }; var map = TssRegulatoryMap.LoadMap(this.MapFileName, mapLinkFilter); var outputMapFile = this.MapFileName.Replace(".bed", "." + linkType.ToString() + ".bed"); var tssSet = this.GeneMap ? this.ExpressionFiles.First().Value.Genes : this.ExpressionFiles.First().Value.Transcripts; WriteMap(map, tssSet, this.HistoneName, outputMapFile); }
/// <summary> /// Convert the map to genes. /// </summary> public void Convert() { MapLinkFilter filter = new MapLinkFilter { MaximumLinkLength = this.MaxRange, ConfidenceThreshold = this.PvalueThreshold, LinkTypeFilter = MapLinkFilter.LinkType.Any, }; TssRegulatoryMap map = TssRegulatoryMap.LoadMap(this.MapFileName, filter); var expression = new GtfExpressionFile( GtfExpressionFile.ExpressionTypeFromString(this.RnaSource), this.AnnotationFileName); var geneMap = map.ConvertToGenes(IUnknown.QueryInterface <IExpressionData>(expression)); foreach (var link in geneMap.Links) { var geneLocation = expression.Transcripts[link.TranscriptName]; string[] lineData = new string[] { geneLocation.Chromosome, geneLocation.Start.ToString(), geneLocation.End.ToString(), link.GeneName, "NA", geneLocation.Strand, link.LocusName, link.Correlation.ToString(), link.ConfidenceScore.ToString(), link.LinkLength.ToString(), this.HistoneName, link.TranscriptName, }; Console.WriteLine(string.Join("\t", lineData)); } }
public void Convert() { var contacts = Helpers.GetFileDataLines(this.ContactFileName, true).Select(line => { var fields = line.Split('\t'); var pair = fields[6]; var ids = pair.Split('_'); return(new { Pair = pair, Id1 = ids[0], Chr1 = fields[0], Start1 = int.Parse(fields[1]), End1 = int.Parse(fields[2]), Id2 = ids[1], Chr2 = fields[3], Start2 = int.Parse(fields[4]), End2 = int.Parse(fields[5]), Score = double.Parse(fields[7]), }); }).ToList(); var locations = contacts.Select(x => new Location[] { new Location { Name = x.Id1, Chromosome = x.Chr1, Start = x.Start1, End = x.End1, AlternateName = x.Id2, Score = x.Score }, new Location { Name = x.Id2, Chromosome = x.Chr2, Start = x.Start2, End = x.End2, AlternateName = x.Id1, Score = x.Score }, }).SelectMany(x => x) .ToList(); var uniqueLocations = locations .ToLookup(x => x.Name) .ToDictionary(x => x.Key, x => x.First()); var contactMap = locations .ToLookup(x => x.Name, x => x) .ToDictionary(x => x.Key, x => x .ToDictionary(y => y.AlternateName, y => uniqueLocations[y.AlternateName])); var indexedLocations = new BedFile(uniqueLocations); var LocusFile = new BedFile(this.LocusFileName, BedFile.Bed3Layout); var annotationSet = new GtfExpressionFile(GtfExpressionFile.ExpressionType.Cage, this.AnnotationFileName); var LocusOverlaps = indexedLocations.Locations.ToDictionary( x => x.Key, x => TRFScorer.GetOverlaps( x.Value, LocusFile.ChromosomeIndexedLocations, BedFile.IndexSize, LocusFile.MaxLocationSize[x.Value.Chromosome])); var tssOverlaps = indexedLocations.Locations.ToDictionary( x => x.Key, x => TRFScorer.GetOverlaps( x.Value, annotationSet.ChromosomeIndexedLocations, BedFile.IndexSize, annotationSet.MaxLocationSize[x.Value.Chromosome], Location.OverlapsDirectionalStart)); var links = contactMap .Where(x => tssOverlaps[x.Key] != null) .ToDictionary(x => x.Key, x => x.Value.Where(y => LocusOverlaps[y.Key] != null)) .Where(x => x.Value.Any()) .Select(x => tssOverlaps[x.Key].Select(y => new { Id1 = x.Key, Tss = y.Name, LocusList = x.Value .Select(z => LocusOverlaps[z.Key] .Select(a => new { Name = a.Name, Id2 = z.Key })) .SelectMany(z => z).ToList() }) .Select(y => y.LocusList.Select(z => { var LocusLocation = LocusFile.Locations[z.Name]; var tssStart = annotationSet.Transcripts[y.Tss].DirectionalStart; var linkLength = LocusLocation.End < tssStart ? LocusLocation.End - tssStart : LocusLocation.Start - tssStart; return(new MapLink { TranscriptName = y.Tss, TssName = y.Tss, LocusName = z.Name, ConfidenceScore = contactMap[y.Id1][z.Id2].Score, LinkLength = linkLength, }); })) .SelectMany(y => y)) .SelectMany(x => x) .ToList(); var convertedMap = new TssRegulatoryMap(links); NullMapBuilder.WriteMap(convertedMap, annotationSet.Locations, "Contact", this.MapFileName); }
/// <summary> /// Determines whether this instance is worst Locus the specified link tssMap. /// </summary> /// <returns><c>true</c> if this instance is worst Locus the specified link tssMap; otherwise, <c>false</c>.</returns> /// <param name="link">Link.</param> /// <param name="tssMap">Tss map.</param> private bool IsWorstLocus(MapLink link, TssRegulatoryMap tssMap) { return(!tssMap[link.TranscriptName].Any(x => x.Value.ConfidenceScore > link.ConfidenceScore)); }
/// <summary> /// Determines whether this given link is closest link the specified link map. /// </summary> /// <returns><c>true</c> if this instance is closest link the specified link map; otherwise, <c>false</c>.</returns> /// <param name="link">Link.</param> /// <param name="map">Map.</param> public bool IsClosestLocus(MapLink link, TssRegulatoryMap tssMap) { return(!tssMap[link.TranscriptName].Any(x => x.Value.AbsLinkLength < link.AbsLinkLength)); }
/// <summary> /// Execute this instance. /// </summary> public void Execute() { var filter = new MapLinkFilter { }; if (this.Threshold >= 0) { filter.ConfidenceThreshold = this.Threshold; } var corrMap = TssRegulatoryMap.LoadMap(this.MapFileName, filter); if (corrMap.Count == 0) { return; } if (this.UseGenes) { corrMap = corrMap.ConvertToGenes(); } var scores = corrMap.Links.OrderBy(x => x.ConfidenceScore).ToArray(); const int thresholdCount = 10; double binSize = (double)scores.Length / thresholdCount; var binEdges = new double[] { 0.0 } .Concat(Enumerable.Range(1, 9).Select(x => scores[(int)Math.Floor(x * binSize)].ConfidenceScore)) .Concat(new double[] { 1 }) .Reverse() .ToArray(); var binColors = new string[] { "192,192,192" } .Concat(binEdges .Select((x, i) => string.Format( "{0},0,{1}", (int)((double)i / (thresholdCount - 1) * 255), 255 - (int)((double)i / (thresholdCount - 1) * 255)))) .ToList(); var links = corrMap.Links.Select(x => { var upstreamLocation = x.LocusLocation.Start < x.TssLocation.DirectionalStart ? x.LocusLocation : x.TssLocation; var downstreamLocation = x.LocusLocation.End > x.TssLocation.DirectionalStart ? x.LocusLocation : x.TssLocation; int upstreamLength = x.LocusLocation.Start < x.TssLocation.DirectionalStart ? x.LocusSize : 1; int downstreamLength = x.LocusLocation.End > x.TssLocation.DirectionalStart ? x.LocusSize : 1; int regionStart = upstreamLocation.Start - (upstreamLength == 1 ? 1 : 0); int regionEnd = downstreamLocation.End + (downstreamLength == 1 ? 1 : 0); var blockCount = "2"; var blockSizes = upstreamLength + "," + downstreamLength; var blockStarts = x.LinkLength > 0 ? "0," + (regionEnd - regionStart - 1) : "0," + (regionEnd - regionStart - x.LocusSize); if (x.LocusLocation.OverlapsDirectionalStart(x.TssLocation)) { blockCount = "1"; blockStarts = "0"; blockSizes = x.LocusSize.ToString(); } return(new string[] { x.LocusLocation.Chromosome, regionStart.ToString(), regionEnd.ToString(), x.LocusLocation.Name + "_" + x.TssLocation.Name, x.ConfidenceScore.ToString(), upstreamLocation == x.LocusLocation ? "+" : "-", regionStart.ToString(), regionEnd.ToString(), binColors[Bin(x.ConfidenceScore, binEdges)], blockCount, blockSizes, blockStarts }); }) .ToList(); Tables.ToNamedTsvFile( this.OutputFile, links, new string[] { "track name=regulatory_map description=\"Regulatory Map\" itemRgb=\"On\"" }); }
public TssRegulatoryMap PostProcessLinkFilterType(TssRegulatoryMap tssMap, LocusRegulatoryMap LocusMap) { switch (this.LinkTypeFilter) { case MapLinkFilter.LinkType.NearestGene: return(LocusMap.Invert()); case MapLinkFilter.LinkType.NearestBestGene: return(new TssRegulatoryMap(LocusMap.Links .ToLookup(x => x.TranscriptName, x => x) .Select(x => x.OrderBy(y => y.ConfidenceScore).First()))); case MapLinkFilter.LinkType.NearestLocusOfNearestGene: return(new TssRegulatoryMap(LocusMap.Links .ToLookup(x => x.TranscriptName, x => x) .Select(x => x.OrderBy(y => y.AbsLinkLength).First()))); case MapLinkFilter.LinkType.BestGeneLink: return(LocusMap.Invert()); case MapLinkFilter.LinkType.NearestLocusOrGene: return(new TssRegulatoryMap(tssMap.Links.Concat(LocusMap.Links))); case MapLinkFilter.LinkType.NearestLocusAndGene: return(new TssRegulatoryMap(tssMap.Links.Where(LocusMap.Links.Contains))); case MapLinkFilter.LinkType.BestLocusNearestGene: return(new TssRegulatoryMap(tssMap.Links.Where(LocusMap.Links.Contains))); case MapLinkFilter.LinkType.BestLocusBestGene: return(new TssRegulatoryMap(tssMap.Links.Where(LocusMap.Links.Contains))); case MapLinkFilter.LinkType.NotNearestGene: return(new TssRegulatoryMap(tssMap.Links.Where(link => LocusMap.Links.Contains(link)))); case MapLinkFilter.LinkType.NotNearestLocusOrGene: var Locus2nnmap = LocusMap.GetNearestNeighborMap(5); return(new TssRegulatoryMap(Locus2nnmap.Links.Where(link => !this.IsClosestLocus(link, tssMap) && !this.IsClosestGene(link, LocusMap)))); case MapLinkFilter.LinkType.BestLocusNotNearestLocusOrGene: return(new TssRegulatoryMap(LocusMap.Links .Where(link => !this.IsClosestLocus(link, tssMap) && !this.IsClosestGene(link, LocusMap) && this.auxiliaryMap.Links.Contains(link)))); case MapLinkFilter.LinkType.BestDistalLocus: { var distalLinks = tssMap.Links .Where(link => !this.IsClosestLocus(link, this.auxiliaryMap) && !this.IsClosestGene(link, LocusMap)); TssRegulatoryMap bestDistalMap = new TssRegulatoryMap(); foreach (var link in distalLinks) { this.EnsureKey(link, bestDistalMap); this.AddReplaceLink(link, bestDistalMap, IsBestLocus); } return(bestDistalMap); } default: return(tssMap); } }
/// <summary> /// Determines whether this instance is valid link type the specified link map. /// </summary> /// <returns><c>true</c> if this instance is valid link type the specified link map; otherwise, <c>false</c>.</returns> /// <param name="link">Link.</param> /// <param name="map">Map.</param> public void ApplyLinkFilterType(MapLink link, TssRegulatoryMap tssMap, LocusRegulatoryMap LocusMap) { this.EnsureKey(link, tssMap); if ((this.LinkTypeFilter == LinkType.BestLocusNotNearestLocusOrGene || this.LinkTypeFilter == LinkType.BestDistalLocus) && this.auxiliaryMap == null) { this.auxiliaryMap = new TssRegulatoryMap(); } switch (this.LinkTypeFilter) { case MapLinkFilter.LinkType.Any: tssMap[link.TranscriptName].Add(link.LocusName, link); break; case MapLinkFilter.LinkType.NearestLocus: this.AddReplaceLink(link, tssMap, this.IsClosestLocus); break; case MapLinkFilter.LinkType.NearestGene: case MapLinkFilter.LinkType.NearestLocusOfNearestGene: this.AddReplaceLink(link, LocusMap, this.IsClosestGene); break; case MapLinkFilter.LinkType.NearestBestGene: this.AddReplaceLink(link, LocusMap, this.IsClosestGene); break; case MapLinkFilter.LinkType.BestLocusNearestGene: this.AddReplaceLink(link, tssMap, this.IsBestLocus); this.AddReplaceLink(link, LocusMap, this.IsClosestGene); break; case MapLinkFilter.LinkType.BestLocusBestGene: this.AddReplaceLink(link, tssMap, this.IsBestLocus); this.AddReplaceLink(link, LocusMap, this.IsBestGene); break; case MapLinkFilter.LinkType.NearestLocusOrGene: this.AddReplaceLink(link, tssMap, this.IsClosestLocus); this.AddReplaceLink(link, LocusMap, this.IsClosestGene); break; case MapLinkFilter.LinkType.NearestLocusAndGene: this.AddReplaceLink(link, tssMap, this.IsClosestLocus); this.AddReplaceLink(link, LocusMap, this.IsClosestGene); break; case MapLinkFilter.LinkType.BestLocusLink: this.AddReplaceLink(link, tssMap, this.IsBestLocus); break; case MapLinkFilter.LinkType.WorstLocusLink: this.AddReplaceLink(link, tssMap, this.IsWorstLocus); break; case MapLinkFilter.LinkType.BestGeneLink: this.AddReplaceLink(link, LocusMap, this.IsBestGene); break; case MapLinkFilter.LinkType.NotNearestLocus: break; case MapLinkFilter.LinkType.NotNearestGene: tssMap[link.TranscriptName].Add(link.LocusName, link); this.AddReplaceLink(link, LocusMap, this.IsClosestGene); break; case MapLinkFilter.LinkType.NotNearestLocusOrGene: tssMap[link.TranscriptName].Add(link.LocusName, link); this.EnsureKey(link, LocusMap); LocusMap[link.LocusName].Add(link.TranscriptName, link); break; case MapLinkFilter.LinkType.BestLocusNotNearestLocusOrGene: tssMap[link.TranscriptName].Add(link.LocusName, link); this.EnsureKey(link, LocusMap); LocusMap[link.LocusName].Add(link.TranscriptName, link); this.EnsureKey(link, this.auxiliaryMap); this.AddReplaceLink(link, this.auxiliaryMap, this.IsBestLocus); break; case MapLinkFilter.LinkType.BestDistalLocus: tssMap[link.TranscriptName].Add(link.LocusName, link); this.EnsureKey(link, LocusMap); this.AddReplaceLink(link, LocusMap, this.IsClosestGene); this.EnsureKey(link, this.auxiliaryMap); this.AddReplaceLink(link, this.auxiliaryMap, this.IsClosestLocus); break; } }
/// <summary> /// Clears the map. /// </summary> public virtual void ClearMap() { this.links = null; GC.Collect(); }