/// <summary> /// constructor /// </summary> private BreakEnd(string referenceName, int position, ChromosomeRenamer renamer) { _referenceIndex = renamer.GetReferenceIndex(referenceName); _referenceName = _referenceIndex >= renamer.NumRefSeqs ? referenceName : renamer.EnsemblReferenceNames[_referenceIndex]; Position = position; _renamer = renamer; }
private void AssignVcfFields() { ReferenceName = VcfColumns[VcfCommon.ChromIndex]; ReferenceIndex = _renamer.GetReferenceIndex(ReferenceName); EnsemblReferenceName = ReferenceIndex == ushort.MaxValue? ReferenceName: _renamer.EnsemblReferenceNames[ReferenceIndex]; UcscReferenceName = ReferenceIndex == ushort.MaxValue ? ReferenceName : _renamer.UcscReferenceNames[ReferenceIndex]; var referenceAllele = VcfColumns[VcfCommon.RefIndex]; VcfRefAllele = referenceAllele; VcfReferenceBegin = int.Parse(VcfColumns[VcfCommon.PosIndex]); VcfReferenceEnd = VcfReferenceBegin + referenceAllele.Length - 1; VcfVariantId = VcfColumns[VcfCommon.IdIndex]; OverlapReferenceBegin = VcfReferenceBegin; OverlapReferenceEnd = VcfReferenceEnd; IsReference = VcfColumns[VcfCommon.AltIndex] == VcfCommon.NonVariant; IsRefNoCall = false; // reset the info field if (VcfColumns[VcfCommon.InfoIndex] == ".") { VcfColumns[VcfCommon.InfoIndex] = ""; } }
/// <summary> /// returns a list of reference cytogenetic bands /// </summary> private static ICytogeneticBands GetCytogeneticBands(string inputCytobandPath, ChromosomeRenamer renamer) { var numRefSeqs = renamer.NumRefSeqs; var bandLists = new List <Band> [numRefSeqs]; for (int i = 0; i < numRefSeqs; i++) { bandLists[i] = new List <Band>(); } using (var reader = new StreamReader(FileUtilities.GetReadStream(inputCytobandPath))) { while (true) { // grab the next line string line = reader.ReadLine(); if (string.IsNullOrEmpty(line)) { break; } // split the line into columns var cols = line.Split('\t'); // sanity check: make sure we have the right number of columns const int expectedNumColumns = 5; if (cols.Length != expectedNumColumns) { throw new GeneralException($"Expected {expectedNumColumns} columns, but found {cols.Length} columns: [{line}]"); } // grab the essential values var ucscName = cols[0]; var begin = int.Parse(cols[1]) + 1; var end = int.Parse(cols[2]); var name = cols[3]; ushort refIndex = renamer.GetReferenceIndex(ucscName); if (refIndex == ChromosomeRenamer.UnknownReferenceIndex) { continue; } bandLists[refIndex].Add(new Band(begin, end, name)); } } // create the band arrays var bands = new Band[numRefSeqs][]; for (int i = 0; i < numRefSeqs; i++) { bands[i] = bandLists[i].ToArray(); } return(new CytogeneticBands(bands, renamer)); }
/// <summary> /// constructor /// </summary> public BreakEnd(string referenceName, string referenceName2, int position, int position2, char isSuffix, char isSuffix2, ChromosomeRenamer renamer) : this(referenceName, position, renamer) { ReferenceIndex2 = renamer.GetReferenceIndex(referenceName2); _referenceName2 = ReferenceIndex2 >= renamer.NumRefSeqs ? referenceName2 : renamer.EnsemblReferenceNames[ReferenceIndex2]; Position2 = position2; IsSuffix = isSuffix; IsSuffix2 = isSuffix2; _orientation = IsSuffix == '+' ? '-' : '+'; _orientation2 = IsSuffix2 == '+' ? '+' : '-'; }
/// <summary> /// parses the alternate allele /// </summary> private void ParseAltAllele(string refAllele, string altAllele) { var regexSuccess = false; // (\w+)([\[\]])([^:]+):(\d+)([\[\]]) // ([\[\]])([^:]+):(\d+)([\[\]])(\w+) if (altAllele.StartsWith(refAllele)) { var forwardRegex = new Regex(@"\w+([\[\]])([^:]+):(\d+)([\[\]])", RegexOptions.Compiled); var match = forwardRegex.Match(altAllele); if (match.Success) { IsSuffix = '-'; _orientation = '+'; _referenceName2 = match.Groups[2].Value; Position2 = Convert.ToInt32(match.Groups[3].Value); IsSuffix2 = match.Groups[4].Value == ForwardBreakEnd ? '+' : '-'; _orientation2 = match.Groups[4].Value == ForwardBreakEnd ? '+' : '-'; regexSuccess = true; ReferenceIndex2 = _renamer.GetReferenceIndex(match.Groups[2].Value); if (ReferenceIndex2 < _renamer.NumRefSeqs) { _referenceName2 = _renamer.EnsemblReferenceNames[ReferenceIndex2]; } else { return; } } } else { var reverseRegex = new Regex(@"([\[\]])([^:]+):(\d+)([\[\]])\w+", RegexOptions.Compiled); var match = reverseRegex.Match(altAllele); if (match.Success) { IsSuffix = '+'; _orientation = '-'; IsSuffix2 = match.Groups[1].Value == ForwardBreakEnd ? '+' : '-'; _orientation2 = match.Groups[1].Value == ForwardBreakEnd ? '+' : '-'; _referenceName2 = match.Groups[2].Value; Position2 = Convert.ToInt32(match.Groups[3].Value); regexSuccess = true; ReferenceIndex2 = _renamer.GetReferenceIndex(match.Groups[2].Value); if (ReferenceIndex2 < _renamer.NumRefSeqs) { _referenceName2 = _renamer.EnsemblReferenceNames[ReferenceIndex2]; } else { return; } } } if (!regexSuccess) { throw new GeneralException( "Unable to successfully parse the complex rearrangements for the following allele: " + altAllele); } }
private static CacheFile TryMatchFilename(string ndbPath, Func <string, Match> matcher, MiniCacheType type, ChromosomeRenamer renamer) { string filename = Path.GetFileName(ndbPath); if (filename == null) { return(null); } var match = matcher(filename); if (!match.Success) { return(null); } IUpdater updater; string id, transcriptDataSource; int position; ushort refIndex; switch (type) { case MiniCacheType.Transcript: var tuple = FormatUtilities.SplitVersion(match.Groups[1].Value); id = tuple.Item1; refIndex = renamer.GetReferenceIndex(match.Groups[2].Value); transcriptDataSource = match.Groups[3].Value; updater = new TranscriptUpdater(id, refIndex, transcriptDataSource); break; case MiniCacheType.Regulatory: id = match.Groups[1].Value; refIndex = renamer.GetReferenceIndex(match.Groups[2].Value); transcriptDataSource = match.Groups[3].Value; updater = new RegulatoryUpdater(id, refIndex, transcriptDataSource); break; case MiniCacheType.Position: refIndex = renamer.GetReferenceIndex(match.Groups[1].Value); position = int.Parse(match.Groups[2].Value); string refAllele = match.Groups[3].Value; string altAllele = match.Groups[4].Value; transcriptDataSource = match.Groups[5].Value; updater = new PositionUpdater(refIndex, position, refAllele, altAllele, transcriptDataSource); break; case MiniCacheType.PositionRange: refIndex = renamer.GetReferenceIndex(match.Groups[1].Value); position = int.Parse(match.Groups[2].Value); int endPosition = int.Parse(match.Groups[3].Value); transcriptDataSource = match.Groups[4].Value; updater = new PositionRangeUpdater(refIndex, position, endPosition, transcriptDataSource); break; default: throw new GeneralException($"Unexpected mini-cache type encountered: {type}"); } return(new CacheFile(ndbPath, updater.RefIndex, ConvertTranscriptDataSource(updater.TranscriptDataSource), type, updater)); }
private SupplementaryPositionCreator GetNextSupplementaryAnnotation() { // no more active iterators left if (_iSupplementaryDataItemList.Count == 0 && _additionalItemsList.Count == 0) { return(null); } var minSupplementaryDataItem = CurrentMinSupplementaryDataItem(); if (minSupplementaryDataItem == null) { return(null); //nothing more to retun. All enumerators are empty. } var sa = new SupplementaryAnnotationPosition(minSupplementaryDataItem.Start); var saCreator = new SupplementaryPositionCreator(sa) { RefSeqName = minSupplementaryDataItem.Chromosome }; string refSequence = null; if (_currentRefName == null || !_currentRefName.Equals(saCreator.RefSeqName)) { CloseCurrentSaWriter(); _currentRefName = saCreator.RefSeqName; var refIndex = _renamer.GetReferenceIndex(_currentRefName); if (refIndex == ChromosomeRenamer.UnknownReferenceIndex) { throw new GeneralException($"Could not find the reference index for: {_currentRefName}"); } _dataFileManager.LoadReference(refIndex, () => {}); OpenNewSaWriter(); } if (_compressedSequence != null) { refSequence = _compressedSequence.Substring(sa.ReferencePosition - 1, ReferenceWindowSize); } // list of data items to be removed and added var deleteList = new List <IEnumerator <SupplementaryDataItem> >(); foreach (var iDataEnumerator in _iSupplementaryDataItemList) { // only using items at the same location as minSuppDataItem if (!iDataEnumerator.Current.Equals(minSupplementaryDataItem)) { continue; } if (iDataEnumerator.Current.IsInterval) { var suppInterval = iDataEnumerator.Current.GetSupplementaryInterval(_renamer); _supplementaryIntervalList.Add(suppInterval); } else { var additionalSuppData = iDataEnumerator.Current.SetSupplementaryAnnotations(saCreator, refSequence); if (additionalSuppData != null) { _additionalItemsList.Add(additionalSuppData); } } // adding empty enumerators to deleteList if (!iDataEnumerator.MoveNext()) { deleteList.Add(iDataEnumerator); } } // add annotations from additional items if applicable. AddAdditionalItems(minSupplementaryDataItem, saCreator); // removing lists that are empty and therfore should be removed from the list of enumerators _iSupplementaryDataItemList.RemoveAll(x => deleteList.Contains(x)); return(saCreator); }