예제 #1
0
        private bool ValidateReference(string chromosome, int pos, string refAllele)
        {
            var refIndex = _compressedSequence.Renamer.GetReferenceIndex(chromosome);

            if (refIndex == ChromosomeRenamer.UnknownReferenceIndex)
            {
                return(false);
            }
            _dataFileManager.LoadReference(refIndex, () => { });
            return(_compressedSequence.Substring(pos - 1, refAllele.Length) == refAllele);
        }
예제 #2
0
        public PianoVariant Annotate(IVariant variant)
        {
            if (variant == null)
            {
                return(null);
            }

            var variantFeature = new VariantFeature(variant as VcfVariant, _renamer, _vid);

            // load the reference sequence
            _dataFileManager.LoadReference(variantFeature.ReferenceIndex, () => {});

            // handle ref no-calls and assign the alternate alleles
            variantFeature.AssignAlternateAlleles();

            // annotate the variant
            _pianoVariant = new PianoVariant(variantFeature);
            Annotate(variantFeature);
            _performanceMetrics.Increment();

            return(_pianoVariant);
        }
예제 #3
0
        private void CreateVcf(StreamWriter writer, Transcript transcript)
        {
            var geneSymbol = transcript.Gene.Symbol;

            if (!transcript.IsCanonical && _processedGeneSet.Contains(geneSymbol))
            {
                return;
            }
            if (transcript.Translation == null)
            {
                return;
            }
            _processedGeneSet.Add(geneSymbol);

            _dataFileManager.LoadReference(transcript.ReferenceIndex, () => {});

            var position  = (transcript.Translation.CodingRegion.GenomicStart + transcript.Translation.CodingRegion.GenomicEnd) / 2;
            var refAllele = _compressedSequence.Substring(position - 1, 1);
            var altAllele = _nucleotides.First(nuceleotide => nuceleotide != refAllele);

            writer.WriteLine($"{_renamer.UcscReferenceNames[transcript.ReferenceIndex]}\t{position}\t.\t{refAllele}\t{altAllele}\t.\t.\t.");
        }
예제 #4
0
        private List <ClinVarItem> ExtractClinVarItems(LiteXmlElement xmlElement)
        {
            ClearClinvarFields();

            if (xmlElement == null)
            {
                return(null);
            }
            if (xmlElement.IsEmpty())
            {
                return(null);
            }

            foreach (var child in xmlElement.Children)
            {
                switch (child.Name)
                {
                case "ReferenceClinVarAssertion":
                    ParseRefClinVarAssertion(child);
                    break;

                case "ClinVarAssertion":
                    ParseScv(child);
                    break;
                }
            }

            if (_recordStatus != "current")
            {
                Console.WriteLine($"record status not current: {_recordStatus} for {_id}");
                return(null);
            }

            var clinvarList = new List <ClinVarItem>();

            foreach (var variant in _variantList)
            {
                // in order to match the VCF, we leave out the ones that do not have dbsnp id
                if (variant.DbSnp == null)
                {
                    continue;
                }
                if (!InputFileParserUtilities.IsDesiredChromosome(variant.Chromosome, _compressedSequence.Renamer))
                {
                    continue;
                }
                if (variant.VariantType == "Microsatellite")
                {
                    continue;
                }

                var refIndex = _compressedSequence.Renamer.GetReferenceIndex(variant.Chromosome);
                if (refIndex == ChromosomeRenamer.UnknownReferenceIndex)
                {
                    throw new GeneralException($"Could not find the reference index for: {variant.Chromosome}");
                }
                _dataFileManager.LoadReference(refIndex, () => {});

                ClinvarVariant shiftedVariant = variant;
                //some entries do not have ref allele in the xml file. For those, we extract them from our ref sequence
                if (variant.ReferenceAllele == null && variant.VariantType == "Deletion")
                {
                    shiftedVariant = GenerateRefAllele(variant, _compressedSequence);
                }
                if (variant.AltAllele == null && variant.VariantType == "Duplication")
                {
                    shiftedVariant = GenerateAltAllele(variant, _compressedSequence);
                }


                //left align the variant
                shiftedVariant = LeftShift(shiftedVariant);

                if (variant.ReferenceAllele == null && variant.VariantType == "Indel" && variant.AltAllele != null)
                {
                    shiftedVariant = GenerateRefAllele(variant, _compressedSequence);
                }

                _pubMedIds.Sort();

                if (string.IsNullOrEmpty(shiftedVariant.ReferenceAllele) && string.IsNullOrEmpty(shiftedVariant.AltAllele))
                {
                    continue;
                }

                clinvarList.Add(
                    new ClinVarItem(shiftedVariant.Chromosome,
                                    shiftedVariant.Start,
                                    _alleleOrigins.Distinct().ToList(),
                                    shiftedVariant.AltAllele ?? "",
                                    _id,
                                    _reviewStatus,
                                    _medGenIDs.Distinct().ToList(),
                                    _omimIDs.Distinct().ToList(),
                                    _orphanetIDs.Distinct().ToList(),
                                    _prefPhenotypes.Count > 0? _prefPhenotypes.Distinct().ToList(): _altPhenotypes.Distinct().ToList(),
                                    shiftedVariant.ReferenceAllele ?? "",
                                    _significance,
                                    _pubMedIds.Distinct().ToList(),
                                    _lastUpdatedDate));
            }

            return(clinvarList.Count > 0 ? clinvarList: null);
        }
예제 #5
0
        private SupplementaryPositionCreator GetNextSupplementaryAnnotation()
        {
            // no more active iterators left
            if (_iSupplementaryDataItemList.Count == 0 && _additionalItemsList.Count == 0)
            {
                return(null);
            }

            var minSupplementaryDataItem = CurrentMinSupplementaryDataItem();

            if (minSupplementaryDataItem == null)
            {
                return(null);                                 //nothing more to retun. All enumerators are empty.
            }
            var sa = new SupplementaryAnnotationPosition(minSupplementaryDataItem.Start);


            var saCreator = new SupplementaryPositionCreator(sa)
            {
                RefSeqName = minSupplementaryDataItem.Chromosome
            };

            string refSequence = null;

            if (_currentRefName == null || !_currentRefName.Equals(saCreator.RefSeqName))
            {
                CloseCurrentSaWriter();

                _currentRefName = saCreator.RefSeqName;

                var refIndex = _renamer.GetReferenceIndex(_currentRefName);
                if (refIndex == ChromosomeRenamer.UnknownReferenceIndex)
                {
                    throw new GeneralException($"Could not find the reference index for: {_currentRefName}");
                }
                _dataFileManager.LoadReference(refIndex, () => {});

                OpenNewSaWriter();
            }

            if (_compressedSequence != null)
            {
                refSequence = _compressedSequence.Substring(sa.ReferencePosition - 1, ReferenceWindowSize);
            }
            // list of data items to be removed and added
            var deleteList = new List <IEnumerator <SupplementaryDataItem> >();

            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                // only using items at the same location as minSuppDataItem
                if (!iDataEnumerator.Current.Equals(minSupplementaryDataItem))
                {
                    continue;
                }

                if (iDataEnumerator.Current.IsInterval)
                {
                    var suppInterval = iDataEnumerator.Current.GetSupplementaryInterval(_renamer);

                    _supplementaryIntervalList.Add(suppInterval);
                }
                else
                {
                    var additionalSuppData = iDataEnumerator.Current.SetSupplementaryAnnotations(saCreator, refSequence);

                    if (additionalSuppData != null)
                    {
                        _additionalItemsList.Add(additionalSuppData);
                    }
                }
                // adding empty enumerators to deleteList
                if (!iDataEnumerator.MoveNext())
                {
                    deleteList.Add(iDataEnumerator);
                }
            }

            // add annotations from additional items if applicable.
            AddAdditionalItems(minSupplementaryDataItem, saCreator);

            // removing lists that are empty and therfore should be removed from the list of enumerators
            _iSupplementaryDataItemList.RemoveAll(x => deleteList.Contains(x));

            return(saCreator);
        }