Exemplo n.º 1
0
        public void BeforeInitialization()
        {
            var emptyChromosomeNamer = new ChromosomeRenamer();

            Assert.Throws <InvalidOperationException>(() => emptyChromosomeNamer.GetUcscReferenceName("1"));
            Assert.Throws <InvalidOperationException>(() => emptyChromosomeNamer.GetEnsemblReferenceName("chr1"));
        }
Exemplo n.º 2
0
        public void Create()
        {
            string refName = null;

            CustomIntervalWriter customIntervalWriter = null;

            foreach (var interval in _intervalParser)
            {
                if (interval.ReferenceName != refName)
                {
                    if (refName != null)
                    {
                        _observedRefSeq.Add(refName);
                    }
                    // need to close open file and open a new one
                    customIntervalWriter?.Dispose();

                    refName = interval.ReferenceName;
                    var ucscRefName = _renamer.GetUcscReferenceName(refName);

                    if (_observedRefSeq.Contains(refName))
                    {
                        throw new GeneralException("The input file does not seem to be sorted by reference names. Please sort it and retry.");
                    }

                    var intervalType = interval.Type;
                    customIntervalWriter = new CustomIntervalWriter(Path.Combine(_outputDirectory, ucscRefName + ".nci"), refName, intervalType, _dataVersion);
                }

                customIntervalWriter?.WriteInterval(interval);
            }

            customIntervalWriter?.Dispose();
        }
Exemplo n.º 3
0
        public void GetUnknownDesiredReferenceName()
        {
            const string expectedChromosomeName = "O";

            var observedUcscReferenceName = _renamer.GetUcscReferenceName(expectedChromosomeName);

            Assert.Equal(expectedChromosomeName, observedUcscReferenceName);

            var observedEnsemblReferenceName = _renamer.GetEnsemblReferenceName(expectedChromosomeName);

            Assert.Equal(expectedChromosomeName, observedEnsemblReferenceName);
        }
Exemplo n.º 4
0
        public SuppAnnotExtractor(string compressedRefFile, string inputSuppAnnotFile, int begin, int end,
                                  string datasourceName = null, string outDirectory = null)
        {
            _renamer = ChromosomeRenamer.GetChromosomeRenamer(FileUtilities.GetReadStream(compressedRefFile));

            long intervalsPosition;
            var  saHeader = SupplementaryAnnotationReader.GetHeader(inputSuppAnnotFile, out intervalsPosition);

            _begin = begin;
            _end   = end;
            string miniSuppAnnotFile;

            if (datasourceName == null)
            {
                miniSuppAnnotFile = _renamer.GetUcscReferenceName(saHeader.ReferenceSequenceName)
                                    + '_' + begin.ToString(CultureInfo.InvariantCulture) + '_' +
                                    end.ToString(CultureInfo.InvariantCulture) + ".nsa";

                if (outDirectory != null)
                {
                    miniSuppAnnotFile = Path.Combine(outDirectory, miniSuppAnnotFile);
                }
            }
            else
            {
                miniSuppAnnotFile = _renamer.GetUcscReferenceName(saHeader.ReferenceSequenceName)
                                    + '_' + begin.ToString(CultureInfo.InvariantCulture) + '_' +
                                    end.ToString(CultureInfo.InvariantCulture) + '_' + datasourceName + ".nsa";
                if (outDirectory != null)
                {
                    miniSuppAnnotFile = Path.Combine(outDirectory, miniSuppAnnotFile);
                }
            }


            _writer = new SupplementaryAnnotationWriter(miniSuppAnnotFile, saHeader.ReferenceSequenceName, saHeader.DataSourceVersions);

            Console.WriteLine("MiniSA output to: " + miniSuppAnnotFile);
        }
Exemplo n.º 5
0
        public void AddReferenceNameEnsemblEmpty()
        {
            const string ucscReferenceName = "chr1";

            var emptyChromosomeNamer = new ChromosomeRenamer();
            var referenceMetadata    = new List <ReferenceMetadata>
            {
                new ReferenceMetadata(null, ucscReferenceName, true)
            };

            emptyChromosomeNamer.AddReferenceMetadata(referenceMetadata);

            var observedUcscReferenceName    = emptyChromosomeNamer.GetUcscReferenceName(null);
            var observedEnsemblReferenceName = emptyChromosomeNamer.GetEnsemblReferenceName(ucscReferenceName);

            Assert.Equal(ucscReferenceName, observedEnsemblReferenceName);
            Assert.Null(observedUcscReferenceName);
        }
Exemplo n.º 6
0
        private void OpenNewSaWriter()
        {
            var currentEnsemblRefName = _renamer.GetEnsemblReferenceName(_currentRefName);
            var currentUcscRefName    = _renamer.GetUcscReferenceName(_currentRefName);

            if (InputFileParserUtilities.ProcessedReferences.Contains(currentEnsemblRefName))
            {
                throw new Exception($"usorted file, for chromsome {_currentRefName}, SA will be rewritten");
            }

            InputFileParserUtilities.ProcessedReferences.Add(currentEnsemblRefName);
            var saPath = Path.Combine(_nsdBaseFileName, currentUcscRefName + ".nsa");

            _saWriter = new SupplementaryAnnotationWriter(saPath, _currentRefName, _dataSources, _compressedSequence.GenomeAssembly);
            Console.WriteLine("Populating {0} data...", currentUcscRefName);

            _creationBench.Reset();
            _numSaWritten = 0;
        }
Exemplo n.º 7
0
        public List <Tuple <string, string> > GetUcscKaryotypeOrder(string dirPath)
        {
            var vepDirectories = Directory.GetDirectories(dirPath);
            var referenceDict  = new SortedDictionary <ushort, Tuple <string, string> >();

            foreach (var dir in vepDirectories)
            {
                string referenceName = Path.GetFileName(dir);
                if (!_renamer.InReferenceAndVep(referenceName))
                {
                    continue;
                }

                string ucscReferenceName = _renamer.GetUcscReferenceName(referenceName, false);

                var refIndex = GetIndex(ucscReferenceName);
                referenceDict[refIndex] = new Tuple <string, string>(ucscReferenceName, dir);
            }

            return(referenceDict.Values.ToList());
        }
Exemplo n.º 8
0
        public void GetUcscReferenceName(string ensemblReferenceName, string expectedReferenceName)
        {
            var observedReferenceName = _renamer.GetUcscReferenceName(ensemblReferenceName);

            Assert.Equal(expectedReferenceName, observedReferenceName);
        }