Esempio n. 1
0
 /// <summary>
 /// constructor
 /// </summary>
 private BreakEnd(string referenceName, int position, ChromosomeRenamer renamer)
 {
     _referenceIndex = renamer.GetReferenceIndex(referenceName);
     _referenceName  = _referenceIndex >= renamer.NumRefSeqs ? referenceName : renamer.EnsemblReferenceNames[_referenceIndex];
     Position        = position;
     _renamer        = renamer;
 }
Esempio n. 2
0
        private void AssignVcfFields()
        {
            ReferenceName        = VcfColumns[VcfCommon.ChromIndex];
            ReferenceIndex       = _renamer.GetReferenceIndex(ReferenceName);
            EnsemblReferenceName = ReferenceIndex == ushort.MaxValue? ReferenceName: _renamer.EnsemblReferenceNames[ReferenceIndex];
            UcscReferenceName    = ReferenceIndex == ushort.MaxValue ? ReferenceName : _renamer.UcscReferenceNames[ReferenceIndex];

            var referenceAllele = VcfColumns[VcfCommon.RefIndex];

            VcfRefAllele      = referenceAllele;
            VcfReferenceBegin = int.Parse(VcfColumns[VcfCommon.PosIndex]);
            VcfReferenceEnd   = VcfReferenceBegin + referenceAllele.Length - 1;
            VcfVariantId      = VcfColumns[VcfCommon.IdIndex];

            OverlapReferenceBegin = VcfReferenceBegin;
            OverlapReferenceEnd   = VcfReferenceEnd;

            IsReference = VcfColumns[VcfCommon.AltIndex] == VcfCommon.NonVariant;
            IsRefNoCall = false;

            // reset the info field
            if (VcfColumns[VcfCommon.InfoIndex] == ".")
            {
                VcfColumns[VcfCommon.InfoIndex] = "";
            }
        }
Esempio n. 3
0
        /// <summary>
        /// returns a list of reference cytogenetic bands
        /// </summary>
        private static ICytogeneticBands GetCytogeneticBands(string inputCytobandPath, ChromosomeRenamer renamer)
        {
            var numRefSeqs = renamer.NumRefSeqs;

            var bandLists = new List <Band> [numRefSeqs];

            for (int i = 0; i < numRefSeqs; i++)
            {
                bandLists[i] = new List <Band>();
            }

            using (var reader = new StreamReader(FileUtilities.GetReadStream(inputCytobandPath)))
            {
                while (true)
                {
                    // grab the next line
                    string line = reader.ReadLine();
                    if (string.IsNullOrEmpty(line))
                    {
                        break;
                    }

                    // split the line into columns
                    var cols = line.Split('\t');

                    // sanity check: make sure we have the right number of columns
                    const int expectedNumColumns = 5;

                    if (cols.Length != expectedNumColumns)
                    {
                        throw new GeneralException($"Expected {expectedNumColumns} columns, but found {cols.Length} columns: [{line}]");
                    }

                    // grab the essential values
                    var ucscName = cols[0];
                    var begin    = int.Parse(cols[1]) + 1;
                    var end      = int.Parse(cols[2]);
                    var name     = cols[3];

                    ushort refIndex = renamer.GetReferenceIndex(ucscName);
                    if (refIndex == ChromosomeRenamer.UnknownReferenceIndex)
                    {
                        continue;
                    }

                    bandLists[refIndex].Add(new Band(begin, end, name));
                }
            }

            // create the band arrays
            var bands = new Band[numRefSeqs][];

            for (int i = 0; i < numRefSeqs; i++)
            {
                bands[i] = bandLists[i].ToArray();
            }

            return(new CytogeneticBands(bands, renamer));
        }
Esempio n. 4
0
        /// <summary>
        /// constructor
        /// </summary>
        public BreakEnd(string referenceName, string referenceName2, int position, int position2, char isSuffix,
                        char isSuffix2, ChromosomeRenamer renamer) : this(referenceName, position, renamer)
        {
            ReferenceIndex2 = renamer.GetReferenceIndex(referenceName2);
            _referenceName2 = ReferenceIndex2 >= renamer.NumRefSeqs ? referenceName2 : renamer.EnsemblReferenceNames[ReferenceIndex2];
            Position2       = position2;
            IsSuffix        = isSuffix;
            IsSuffix2       = isSuffix2;

            _orientation  = IsSuffix == '+' ? '-' : '+';
            _orientation2 = IsSuffix2 == '+' ? '+' : '-';
        }
Esempio n. 5
0
        /// <summary>
        /// parses the alternate allele
        /// </summary>
        private void ParseAltAllele(string refAllele, string altAllele)
        {
            var regexSuccess = false;

            // (\w+)([\[\]])([^:]+):(\d+)([\[\]])
            // ([\[\]])([^:]+):(\d+)([\[\]])(\w+)
            if (altAllele.StartsWith(refAllele))
            {
                var forwardRegex = new Regex(@"\w+([\[\]])([^:]+):(\d+)([\[\]])", RegexOptions.Compiled);
                var match        = forwardRegex.Match(altAllele);

                if (match.Success)
                {
                    IsSuffix        = '-';
                    _orientation    = '+';
                    _referenceName2 = match.Groups[2].Value;
                    Position2       = Convert.ToInt32(match.Groups[3].Value);
                    IsSuffix2       = match.Groups[4].Value == ForwardBreakEnd ? '+' : '-';
                    _orientation2   = match.Groups[4].Value == ForwardBreakEnd ? '+' : '-';
                    regexSuccess    = true;
                    ReferenceIndex2 = _renamer.GetReferenceIndex(match.Groups[2].Value);
                    if (ReferenceIndex2 < _renamer.NumRefSeqs)
                    {
                        _referenceName2 = _renamer.EnsemblReferenceNames[ReferenceIndex2];
                    }
                    else
                    {
                        return;
                    }
                }
            }
            else
            {
                var reverseRegex = new Regex(@"([\[\]])([^:]+):(\d+)([\[\]])\w+", RegexOptions.Compiled);
                var match        = reverseRegex.Match(altAllele);

                if (match.Success)
                {
                    IsSuffix        = '+';
                    _orientation    = '-';
                    IsSuffix2       = match.Groups[1].Value == ForwardBreakEnd ? '+' : '-';
                    _orientation2   = match.Groups[1].Value == ForwardBreakEnd ? '+' : '-';
                    _referenceName2 = match.Groups[2].Value;
                    Position2       = Convert.ToInt32(match.Groups[3].Value);
                    regexSuccess    = true;
                    ReferenceIndex2 = _renamer.GetReferenceIndex(match.Groups[2].Value);
                    if (ReferenceIndex2 < _renamer.NumRefSeqs)
                    {
                        _referenceName2 = _renamer.EnsemblReferenceNames[ReferenceIndex2];
                    }
                    else
                    {
                        return;
                    }
                }
            }

            if (!regexSuccess)
            {
                throw new GeneralException(
                          "Unable to successfully parse the complex rearrangements for the following allele: " + altAllele);
            }
        }
Esempio n. 6
0
        private static CacheFile TryMatchFilename(string ndbPath, Func <string, Match> matcher, MiniCacheType type,
                                                  ChromosomeRenamer renamer)
        {
            string filename = Path.GetFileName(ndbPath);

            if (filename == null)
            {
                return(null);
            }

            var match = matcher(filename);

            if (!match.Success)
            {
                return(null);
            }

            IUpdater updater;
            string   id, transcriptDataSource;
            int      position;
            ushort   refIndex;

            switch (type)
            {
            case MiniCacheType.Transcript:
                var tuple = FormatUtilities.SplitVersion(match.Groups[1].Value);
                id                   = tuple.Item1;
                refIndex             = renamer.GetReferenceIndex(match.Groups[2].Value);
                transcriptDataSource = match.Groups[3].Value;
                updater              = new TranscriptUpdater(id, refIndex, transcriptDataSource);
                break;

            case MiniCacheType.Regulatory:
                id                   = match.Groups[1].Value;
                refIndex             = renamer.GetReferenceIndex(match.Groups[2].Value);
                transcriptDataSource = match.Groups[3].Value;
                updater              = new RegulatoryUpdater(id, refIndex, transcriptDataSource);
                break;

            case MiniCacheType.Position:
                refIndex = renamer.GetReferenceIndex(match.Groups[1].Value);
                position = int.Parse(match.Groups[2].Value);
                string refAllele = match.Groups[3].Value;
                string altAllele = match.Groups[4].Value;
                transcriptDataSource = match.Groups[5].Value;
                updater = new PositionUpdater(refIndex, position, refAllele, altAllele, transcriptDataSource);
                break;

            case MiniCacheType.PositionRange:
                refIndex = renamer.GetReferenceIndex(match.Groups[1].Value);
                position = int.Parse(match.Groups[2].Value);
                int endPosition = int.Parse(match.Groups[3].Value);
                transcriptDataSource = match.Groups[4].Value;
                updater = new PositionRangeUpdater(refIndex, position, endPosition, transcriptDataSource);
                break;

            default:
                throw new GeneralException($"Unexpected mini-cache type encountered: {type}");
            }

            return(new CacheFile(ndbPath, updater.RefIndex, ConvertTranscriptDataSource(updater.TranscriptDataSource),
                                 type, updater));
        }
Esempio n. 7
0
        private SupplementaryPositionCreator GetNextSupplementaryAnnotation()
        {
            // no more active iterators left
            if (_iSupplementaryDataItemList.Count == 0 && _additionalItemsList.Count == 0)
            {
                return(null);
            }

            var minSupplementaryDataItem = CurrentMinSupplementaryDataItem();

            if (minSupplementaryDataItem == null)
            {
                return(null);                                 //nothing more to retun. All enumerators are empty.
            }
            var sa = new SupplementaryAnnotationPosition(minSupplementaryDataItem.Start);


            var saCreator = new SupplementaryPositionCreator(sa)
            {
                RefSeqName = minSupplementaryDataItem.Chromosome
            };

            string refSequence = null;

            if (_currentRefName == null || !_currentRefName.Equals(saCreator.RefSeqName))
            {
                CloseCurrentSaWriter();

                _currentRefName = saCreator.RefSeqName;

                var refIndex = _renamer.GetReferenceIndex(_currentRefName);
                if (refIndex == ChromosomeRenamer.UnknownReferenceIndex)
                {
                    throw new GeneralException($"Could not find the reference index for: {_currentRefName}");
                }
                _dataFileManager.LoadReference(refIndex, () => {});

                OpenNewSaWriter();
            }

            if (_compressedSequence != null)
            {
                refSequence = _compressedSequence.Substring(sa.ReferencePosition - 1, ReferenceWindowSize);
            }
            // list of data items to be removed and added
            var deleteList = new List <IEnumerator <SupplementaryDataItem> >();

            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                // only using items at the same location as minSuppDataItem
                if (!iDataEnumerator.Current.Equals(minSupplementaryDataItem))
                {
                    continue;
                }

                if (iDataEnumerator.Current.IsInterval)
                {
                    var suppInterval = iDataEnumerator.Current.GetSupplementaryInterval(_renamer);

                    _supplementaryIntervalList.Add(suppInterval);
                }
                else
                {
                    var additionalSuppData = iDataEnumerator.Current.SetSupplementaryAnnotations(saCreator, refSequence);

                    if (additionalSuppData != null)
                    {
                        _additionalItemsList.Add(additionalSuppData);
                    }
                }
                // adding empty enumerators to deleteList
                if (!iDataEnumerator.MoveNext())
                {
                    deleteList.Add(iDataEnumerator);
                }
            }

            // add annotations from additional items if applicable.
            AddAdditionalItems(minSupplementaryDataItem, saCreator);

            // removing lists that are empty and therfore should be removed from the list of enumerators
            _iSupplementaryDataItemList.RemoveAll(x => deleteList.Contains(x));

            return(saCreator);
        }