public static IntervalForest <IGene> GetGeneForest(IntervalArray <ITranscript>[] transcriptIntervalArrays) { int numChromosomes = transcriptIntervalArrays.Length; var geneIntervalArrays = new IntervalArray <IGene> [numChromosomes]; var geneComparer = new GeneComparer(); for (var chrIndex = 0; chrIndex < numChromosomes; chrIndex++) { if (transcriptIntervalArrays[chrIndex] == null) { geneIntervalArrays[chrIndex] = EmptyIntervalArray; continue; // assign an empty IntervalArray to this chr } var geneList = new List <IGene>(); // keeps the order of genes, as the intervals are already sorted at trasncripts level var geneSet = new HashSet <IGene>(geneComparer); foreach (var transcriptInterval in transcriptIntervalArrays[chrIndex].Array) { var transcript = transcriptInterval.Value; var gene = transcript.Gene; if (geneSet.Contains(gene)) { continue; } geneSet.Add(gene); geneList.Add(gene); } geneIntervalArrays[chrIndex] = new IntervalArray <IGene>(geneList.Select(GetGeneInterval).ToArray()); } return(new IntervalForest <IGene>(geneIntervalArrays)); }
public GeneComparerTests() { _geneA = new Gene(ChromosomeUtilities.Chr1, 100, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123")); _geneB = new Gene(ChromosomeUtilities.Chr1, 100, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123")); _geneC = new Gene(ChromosomeUtilities.Chr1, 101, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123")); _geneComparer = new GeneComparer(); }
public GeneComparerTests() { var chromosome = new Chromosome("chr1", "1", 0); _geneA = new Gene(chromosome, 100, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123")); _geneB = new Gene(chromosome, 100, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123")); _geneC = new Gene(chromosome, 101, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123")); _geneComparer = new GeneComparer(); }
public static IDictionary <IGene, int> CreateDictionary(IGene[] genes) { var geneComparer = new GeneComparer(); var geneToInternalId = new Dictionary <IGene, int>(geneComparer); for (var geneIndex = 0; geneIndex < genes.Length; geneIndex++) { var gene = genes[geneIndex]; if (geneToInternalId.TryGetValue(gene, out int oldGeneIndex)) { throw new UserErrorException($"Found a duplicate gene in the dictionary: {genes[geneIndex]} ({geneIndex} vs {oldGeneIndex})"); } geneToInternalId[gene] = geneIndex; } return(geneToInternalId); }
/// <summary> /// writes the annotations to the current database file /// </summary> public void Write(TranscriptCacheData cacheData) { _blockStream.WriteHeader(_header.Write); WriteItems(_writer, cacheData.Genes, x => x.Write(_writer)); WriteItems(_writer, cacheData.TranscriptRegions, x => x.Write(_writer)); WriteItems(_writer, cacheData.Mirnas, x => x.Write(_writer)); WriteItems(_writer, cacheData.PeptideSeqs, x => _writer.WriteOptAscii(x)); var geneComparer = new GeneComparer(); var transcriptRegionComparer = new TranscriptRegionComparer(); var intervalComparer = new IntervalComparer(); var geneIndices = CreateIndex(cacheData.Genes, geneComparer); var transcriptRegionIndices = CreateIndex(cacheData.TranscriptRegions, transcriptRegionComparer); var microRnaIndices = CreateIndex(cacheData.Mirnas, intervalComparer); var peptideIndices = CreateIndex(cacheData.PeptideSeqs, EqualityComparer <string> .Default); WriteIntervals(_writer, cacheData.RegulatoryRegionIntervalArrays, x => x.Write(_writer)); WriteIntervals(_writer, cacheData.TranscriptIntervalArrays, x => x.Write(_writer, geneIndices, transcriptRegionIndices, microRnaIndices, peptideIndices)); }
private static (IGene[] Genes, ITranscriptRegion[] TranscriptRegions, IInterval[] Mirnas, string[] PeptideSeqs) GetUniqueData( IEnumerable <IntervalArray <ITranscript> > intervalArrays) { var intervalComparer = new IntervalComparer(); var transcriptRegionComparer = new TranscriptRegionComparer(); var geneComparer = new GeneComparer(); var geneSet = new HashSet <IGene>(geneComparer); var transcriptRegionSet = new HashSet <ITranscriptRegion>(transcriptRegionComparer); var mirnaSet = new HashSet <IInterval>(intervalComparer); var peptideSet = new HashSet <string>(); foreach (var intervalArray in intervalArrays) { if (intervalArray == null) { continue; } foreach (var interval in intervalArray.Array) { var transcript = interval.Value; geneSet.Add(transcript.Gene); AddString(peptideSet, transcript.Translation?.PeptideSeq); AddTranscriptRegions(transcriptRegionSet, transcript.TranscriptRegions); AddIntervals(mirnaSet, transcript.MicroRnas); } } var genes = GetUniqueGenes(geneSet); var transcriptRegions = GetUniqueTranscriptRegions(transcriptRegionSet); var mirnas = GetUniqueIntervals(mirnaSet); var peptideSeqs = GetUniqueStrings(peptideSet); return(genes, transcriptRegions, mirnas, peptideSeqs); }