示例#1
0
        public static IntervalForest <IGene> GetGeneForest(IntervalArray <ITranscript>[] transcriptIntervalArrays)
        {
            int numChromosomes     = transcriptIntervalArrays.Length;
            var geneIntervalArrays = new IntervalArray <IGene> [numChromosomes];
            var geneComparer       = new GeneComparer();

            for (var chrIndex = 0; chrIndex < numChromosomes; chrIndex++)
            {
                if (transcriptIntervalArrays[chrIndex] == null)
                {
                    geneIntervalArrays[chrIndex] = EmptyIntervalArray;
                    continue;                      // assign an empty IntervalArray to this chr
                }
                var geneList = new List <IGene>(); // keeps the order of genes, as the intervals are already sorted at trasncripts level
                var geneSet  = new HashSet <IGene>(geneComparer);
                foreach (var transcriptInterval in transcriptIntervalArrays[chrIndex].Array)
                {
                    var transcript = transcriptInterval.Value;

                    var gene = transcript.Gene;
                    if (geneSet.Contains(gene))
                    {
                        continue;
                    }

                    geneSet.Add(gene);
                    geneList.Add(gene);
                }
                geneIntervalArrays[chrIndex] = new IntervalArray <IGene>(geneList.Select(GetGeneInterval).ToArray());
            }
            return(new IntervalForest <IGene>(geneIntervalArrays));
        }
示例#2
0
 public GeneComparerTests()
 {
     _geneA        = new Gene(ChromosomeUtilities.Chr1, 100, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123"));
     _geneB        = new Gene(ChromosomeUtilities.Chr1, 100, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123"));
     _geneC        = new Gene(ChromosomeUtilities.Chr1, 101, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123"));
     _geneComparer = new GeneComparer();
 }
示例#3
0
        public GeneComparerTests()
        {
            var chromosome = new Chromosome("chr1", "1", 0);

            _geneA        = new Gene(chromosome, 100, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123"));
            _geneB        = new Gene(chromosome, 100, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123"));
            _geneC        = new Gene(chromosome, 101, 200, false, "PAX", 123, CompactId.Convert("NM_123"), CompactId.Convert("ENST0000123"));
            _geneComparer = new GeneComparer();
        }
示例#4
0
        public static IDictionary <IGene, int> CreateDictionary(IGene[] genes)
        {
            var geneComparer     = new GeneComparer();
            var geneToInternalId = new Dictionary <IGene, int>(geneComparer);

            for (var geneIndex = 0; geneIndex < genes.Length; geneIndex++)
            {
                var gene = genes[geneIndex];

                if (geneToInternalId.TryGetValue(gene, out int oldGeneIndex))
                {
                    throw new UserErrorException($"Found a duplicate gene in the dictionary: {genes[geneIndex]} ({geneIndex} vs {oldGeneIndex})");
                }

                geneToInternalId[gene] = geneIndex;
            }

            return(geneToInternalId);
        }
        /// <summary>
        /// writes the annotations to the current database file
        /// </summary>
        public void Write(TranscriptCacheData cacheData)
        {
            _blockStream.WriteHeader(_header.Write);

            WriteItems(_writer, cacheData.Genes, x => x.Write(_writer));
            WriteItems(_writer, cacheData.TranscriptRegions, x => x.Write(_writer));
            WriteItems(_writer, cacheData.Mirnas, x => x.Write(_writer));
            WriteItems(_writer, cacheData.PeptideSeqs, x => _writer.WriteOptAscii(x));

            var geneComparer             = new GeneComparer();
            var transcriptRegionComparer = new TranscriptRegionComparer();
            var intervalComparer         = new IntervalComparer();

            var geneIndices             = CreateIndex(cacheData.Genes, geneComparer);
            var transcriptRegionIndices = CreateIndex(cacheData.TranscriptRegions, transcriptRegionComparer);
            var microRnaIndices         = CreateIndex(cacheData.Mirnas, intervalComparer);
            var peptideIndices          = CreateIndex(cacheData.PeptideSeqs, EqualityComparer <string> .Default);

            WriteIntervals(_writer, cacheData.RegulatoryRegionIntervalArrays, x => x.Write(_writer));
            WriteIntervals(_writer, cacheData.TranscriptIntervalArrays, x => x.Write(_writer, geneIndices, transcriptRegionIndices, microRnaIndices, peptideIndices));
        }
        private static (IGene[] Genes, ITranscriptRegion[] TranscriptRegions, IInterval[] Mirnas, string[] PeptideSeqs) GetUniqueData(
            IEnumerable <IntervalArray <ITranscript> > intervalArrays)
        {
            var intervalComparer         = new IntervalComparer();
            var transcriptRegionComparer = new TranscriptRegionComparer();
            var geneComparer             = new GeneComparer();

            var geneSet             = new HashSet <IGene>(geneComparer);
            var transcriptRegionSet = new HashSet <ITranscriptRegion>(transcriptRegionComparer);
            var mirnaSet            = new HashSet <IInterval>(intervalComparer);
            var peptideSet          = new HashSet <string>();

            foreach (var intervalArray in intervalArrays)
            {
                if (intervalArray == null)
                {
                    continue;
                }

                foreach (var interval in intervalArray.Array)
                {
                    var transcript = interval.Value;
                    geneSet.Add(transcript.Gene);
                    AddString(peptideSet, transcript.Translation?.PeptideSeq);
                    AddTranscriptRegions(transcriptRegionSet, transcript.TranscriptRegions);
                    AddIntervals(mirnaSet, transcript.MicroRnas);
                }
            }

            var genes             = GetUniqueGenes(geneSet);
            var transcriptRegions = GetUniqueTranscriptRegions(transcriptRegionSet);
            var mirnas            = GetUniqueIntervals(mirnaSet);
            var peptideSeqs       = GetUniqueStrings(peptideSet);

            return(genes, transcriptRegions, mirnas, peptideSeqs);
        }