public void Execute() { var validTss = this.TranscriptExpression .Select(x => new { Values = x.Value.Values, TissueExpressionData = new Analyses.MapBuilderData.TissueExpressionData { Tss = x.Key, Location = this.TranscriptLocations[x.Key], Expression = x.Value, } }) .Where(x => this.IsValidExpressionData(x.TissueExpressionData) && CorrelationMapBuilder.IsTwoFoldChange(x.Values)) .ToList(); string stem = string.Join("_", new string[] { "TssSet", this.Tissue, this.RnaSource, this.HistoneName, } .Concat(this.OmittedTissues != null ? this.OmittedTissues : new string[] {})); string file = string.Format("../temp/results/TssSets/{0}.nsv", stem); Tables.ToNamedNsvFile(file, validTss.Select(x => x.TissueExpressionData.Tss)); if (this.AnnotationFileName != null) { var annotation = new GtfExpressionFile( GtfExpressionFile.ExpressionTypeFromString(this.RnaSource), this.AnnotationFileName); var tssToGene = annotation.Transcripts.ToDictionary(x => x.Key, x => x.Value.AlternateName); string geneStem = string.Join("_", new string[] { "GeneSet", this.Tissue, this.RnaSource, this.HistoneName, } .Concat(this.OmittedTissues != null ? this.OmittedTissues : new string[] { })); string geneFile = string.Format("../temp/results/TssSets/{0}.nsv", geneStem); Tables.ToNamedNsvFile(geneFile, validTss .Where(x => tssToGene.ContainsKey(x.TissueExpressionData.Tss)) .ToLookup(x => tssToGene[x.TissueExpressionData.Tss], x => x) .Select(x => x.Key)); } }
/// <summary> /// Convert the map to genes. /// </summary> public void Convert() { MapLinkFilter filter = new MapLinkFilter { MaximumLinkLength = this.MaxRange, ConfidenceThreshold = this.PvalueThreshold, LinkTypeFilter = MapLinkFilter.LinkType.Any, }; TssRegulatoryMap map = TssRegulatoryMap.LoadMap(this.MapFileName, filter); var expression = new GtfExpressionFile( GtfExpressionFile.ExpressionTypeFromString(this.RnaSource), this.AnnotationFileName); var geneMap = map.ConvertToGenes(IUnknown.QueryInterface <IExpressionData>(expression)); foreach (var link in geneMap.Links) { var geneLocation = expression.Transcripts[link.TranscriptName]; string[] lineData = new string[] { geneLocation.Chromosome, geneLocation.Start.ToString(), geneLocation.End.ToString(), link.GeneName, "NA", geneLocation.Strand, link.LocusName, link.Correlation.ToString(), link.ConfidenceScore.ToString(), link.LinkLength.ToString(), this.HistoneName, link.TranscriptName, }; Console.WriteLine(string.Join("\t", lineData)); } }
public void Convert() { var contacts = Helpers.GetFileDataLines(this.ContactFileName, true).Select(line => { var fields = line.Split('\t'); var pair = fields[6]; var ids = pair.Split('_'); return(new { Pair = pair, Id1 = ids[0], Chr1 = fields[0], Start1 = int.Parse(fields[1]), End1 = int.Parse(fields[2]), Id2 = ids[1], Chr2 = fields[3], Start2 = int.Parse(fields[4]), End2 = int.Parse(fields[5]), Score = double.Parse(fields[7]), }); }).ToList(); var locations = contacts.Select(x => new Location[] { new Location { Name = x.Id1, Chromosome = x.Chr1, Start = x.Start1, End = x.End1, AlternateName = x.Id2, Score = x.Score }, new Location { Name = x.Id2, Chromosome = x.Chr2, Start = x.Start2, End = x.End2, AlternateName = x.Id1, Score = x.Score }, }).SelectMany(x => x) .ToList(); var uniqueLocations = locations .ToLookup(x => x.Name) .ToDictionary(x => x.Key, x => x.First()); var contactMap = locations .ToLookup(x => x.Name, x => x) .ToDictionary(x => x.Key, x => x .ToDictionary(y => y.AlternateName, y => uniqueLocations[y.AlternateName])); var indexedLocations = new BedFile(uniqueLocations); var LocusFile = new BedFile(this.LocusFileName, BedFile.Bed3Layout); var annotationSet = new GtfExpressionFile(GtfExpressionFile.ExpressionType.Cage, this.AnnotationFileName); var LocusOverlaps = indexedLocations.Locations.ToDictionary( x => x.Key, x => TRFScorer.GetOverlaps( x.Value, LocusFile.ChromosomeIndexedLocations, BedFile.IndexSize, LocusFile.MaxLocationSize[x.Value.Chromosome])); var tssOverlaps = indexedLocations.Locations.ToDictionary( x => x.Key, x => TRFScorer.GetOverlaps( x.Value, annotationSet.ChromosomeIndexedLocations, BedFile.IndexSize, annotationSet.MaxLocationSize[x.Value.Chromosome], Location.OverlapsDirectionalStart)); var links = contactMap .Where(x => tssOverlaps[x.Key] != null) .ToDictionary(x => x.Key, x => x.Value.Where(y => LocusOverlaps[y.Key] != null)) .Where(x => x.Value.Any()) .Select(x => tssOverlaps[x.Key].Select(y => new { Id1 = x.Key, Tss = y.Name, LocusList = x.Value .Select(z => LocusOverlaps[z.Key] .Select(a => new { Name = a.Name, Id2 = z.Key })) .SelectMany(z => z).ToList() }) .Select(y => y.LocusList.Select(z => { var LocusLocation = LocusFile.Locations[z.Name]; var tssStart = annotationSet.Transcripts[y.Tss].DirectionalStart; var linkLength = LocusLocation.End < tssStart ? LocusLocation.End - tssStart : LocusLocation.Start - tssStart; return(new MapLink { TranscriptName = y.Tss, TssName = y.Tss, LocusName = z.Name, ConfidenceScore = contactMap[y.Id1][z.Id2].Score, LinkLength = linkLength, }); })) .SelectMany(y => y)) .SelectMany(x => x) .ToList(); var convertedMap = new TssRegulatoryMap(links); NullMapBuilder.WriteMap(convertedMap, annotationSet.Locations, "Contact", this.MapFileName); }