public void BeforeInitialization() { var emptyChromosomeNamer = new ChromosomeRenamer(); Assert.Throws <InvalidOperationException>(() => emptyChromosomeNamer.GetUcscReferenceName("1")); Assert.Throws <InvalidOperationException>(() => emptyChromosomeNamer.GetEnsemblReferenceName("chr1")); }
public void Create() { string refName = null; CustomIntervalWriter customIntervalWriter = null; foreach (var interval in _intervalParser) { if (interval.ReferenceName != refName) { if (refName != null) { _observedRefSeq.Add(refName); } // need to close open file and open a new one customIntervalWriter?.Dispose(); refName = interval.ReferenceName; var ucscRefName = _renamer.GetUcscReferenceName(refName); if (_observedRefSeq.Contains(refName)) { throw new GeneralException("The input file does not seem to be sorted by reference names. Please sort it and retry."); } var intervalType = interval.Type; customIntervalWriter = new CustomIntervalWriter(Path.Combine(_outputDirectory, ucscRefName + ".nci"), refName, intervalType, _dataVersion); } customIntervalWriter?.WriteInterval(interval); } customIntervalWriter?.Dispose(); }
public void GetUnknownDesiredReferenceName() { const string expectedChromosomeName = "O"; var observedUcscReferenceName = _renamer.GetUcscReferenceName(expectedChromosomeName); Assert.Equal(expectedChromosomeName, observedUcscReferenceName); var observedEnsemblReferenceName = _renamer.GetEnsemblReferenceName(expectedChromosomeName); Assert.Equal(expectedChromosomeName, observedEnsemblReferenceName); }
public SuppAnnotExtractor(string compressedRefFile, string inputSuppAnnotFile, int begin, int end, string datasourceName = null, string outDirectory = null) { _renamer = ChromosomeRenamer.GetChromosomeRenamer(FileUtilities.GetReadStream(compressedRefFile)); long intervalsPosition; var saHeader = SupplementaryAnnotationReader.GetHeader(inputSuppAnnotFile, out intervalsPosition); _begin = begin; _end = end; string miniSuppAnnotFile; if (datasourceName == null) { miniSuppAnnotFile = _renamer.GetUcscReferenceName(saHeader.ReferenceSequenceName) + '_' + begin.ToString(CultureInfo.InvariantCulture) + '_' + end.ToString(CultureInfo.InvariantCulture) + ".nsa"; if (outDirectory != null) { miniSuppAnnotFile = Path.Combine(outDirectory, miniSuppAnnotFile); } } else { miniSuppAnnotFile = _renamer.GetUcscReferenceName(saHeader.ReferenceSequenceName) + '_' + begin.ToString(CultureInfo.InvariantCulture) + '_' + end.ToString(CultureInfo.InvariantCulture) + '_' + datasourceName + ".nsa"; if (outDirectory != null) { miniSuppAnnotFile = Path.Combine(outDirectory, miniSuppAnnotFile); } } _writer = new SupplementaryAnnotationWriter(miniSuppAnnotFile, saHeader.ReferenceSequenceName, saHeader.DataSourceVersions); Console.WriteLine("MiniSA output to: " + miniSuppAnnotFile); }
public void AddReferenceNameEnsemblEmpty() { const string ucscReferenceName = "chr1"; var emptyChromosomeNamer = new ChromosomeRenamer(); var referenceMetadata = new List <ReferenceMetadata> { new ReferenceMetadata(null, ucscReferenceName, true) }; emptyChromosomeNamer.AddReferenceMetadata(referenceMetadata); var observedUcscReferenceName = emptyChromosomeNamer.GetUcscReferenceName(null); var observedEnsemblReferenceName = emptyChromosomeNamer.GetEnsemblReferenceName(ucscReferenceName); Assert.Equal(ucscReferenceName, observedEnsemblReferenceName); Assert.Null(observedUcscReferenceName); }
private void OpenNewSaWriter() { var currentEnsemblRefName = _renamer.GetEnsemblReferenceName(_currentRefName); var currentUcscRefName = _renamer.GetUcscReferenceName(_currentRefName); if (InputFileParserUtilities.ProcessedReferences.Contains(currentEnsemblRefName)) { throw new Exception($"usorted file, for chromsome {_currentRefName}, SA will be rewritten"); } InputFileParserUtilities.ProcessedReferences.Add(currentEnsemblRefName); var saPath = Path.Combine(_nsdBaseFileName, currentUcscRefName + ".nsa"); _saWriter = new SupplementaryAnnotationWriter(saPath, _currentRefName, _dataSources, _compressedSequence.GenomeAssembly); Console.WriteLine("Populating {0} data...", currentUcscRefName); _creationBench.Reset(); _numSaWritten = 0; }
public List <Tuple <string, string> > GetUcscKaryotypeOrder(string dirPath) { var vepDirectories = Directory.GetDirectories(dirPath); var referenceDict = new SortedDictionary <ushort, Tuple <string, string> >(); foreach (var dir in vepDirectories) { string referenceName = Path.GetFileName(dir); if (!_renamer.InReferenceAndVep(referenceName)) { continue; } string ucscReferenceName = _renamer.GetUcscReferenceName(referenceName, false); var refIndex = GetIndex(ucscReferenceName); referenceDict[refIndex] = new Tuple <string, string>(ucscReferenceName, dir); } return(referenceDict.Values.ToList()); }
public void GetUcscReferenceName(string ensemblReferenceName, string expectedReferenceName) { var observedReferenceName = _renamer.GetUcscReferenceName(ensemblReferenceName); Assert.Equal(expectedReferenceName, observedReferenceName); }