internal static bool IsRefCall([NotNull] this IVcfVariant variant, [CanBeNull] string sampleName) { //refsite is a refcall for sure if (variant.IsRefSite()) { return(true); } //if not refsite and no sample field, not a refcall if (variant.Samples.Count == 0) { return(false); } var sample = sampleName == null ? variant.Samples[0] : variant.Samples[sampleName]; var isCn = sample.SampleDictionary.TryGetValue(VcfConstants.CnSampleFieldKey, out var cnString); var isGt = sample.SampleDictionary.TryGetValue(VcfConstants.GenotypeKey, out var gt); if (isGt) { //todo: refining how to deal with ploidy. Also here we don't deal with LOH. assuming CN = ploidy is ref var gtArray = gt.Split('/', '|'); if (isCn && int.TryParse(cnString, out var intCn)) { return(intCn == gtArray.Length); } return(gtArray.All(alleleIndex => alleleIndex == "0")); } return(isCn && cnString == "2"); }
public static FailedReason?ParseFromVariant([NotNull] IVcfVariant variant, bool isCrossTypeOn, [CanBeNull] string sampleName, [CanBeNull] out WittyerType svType) { if (variant.IsRefSite() || IsRefCall(out var ploidy, out var cn, out var hasCn)) { svType = CopyNumberReference; return(null); } var hasSvTypeKey = variant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svTypeStr); if (!hasSvTypeKey) { // todo: maybe we can allow small variants, which would not have SVTYPE throw new InvalidDataException( $"Following variant does not have {VcfConstants.SvTypeKey} info key:\n{variant}"); } svType = null; if (svTypeStr == SvTypeStrings.TranslocationBreakend) { // breakends can be IntraChromosomeBreakend and TranslocationBreakend, so can't tell from SVTYPE. var mate = variant is IBreakEnd cast ? cast.Mate : SimpleBreakEnd.TryParse(variant.GetSingleAlt()).GetOrThrow(); svType = variant.Contig.Equals(mate.Contig) ? IntraChromosomeBreakend : TranslocationBreakend; return(null); } if (!TryParseSvType(svTypeStr, out svType)) { // Not BND because of check above, and if not parsable and not CNV, it's something we don't know. if (svTypeStr != SvTypeStrings.Cnv) { throw new InvalidDataException($"Cannot recognize SVTYPE of {svTypeStr}"); } } else if (!svType.HasBaseLevelStats) { // If INV or INS or whatever that doesn't need to look for CN, return. return(null); } if (!hasCn) { return(svType == null ? FailedReason.CnvWithoutCn : default(FailedReason?)); // DEL or DUP without CN } // At this point, it is CNV with CN or DEL/DUP with CN, which are also considered CNV if (cn == null) { // has CN, but can't parse. svType = null; // clear out SVTYPE=DEL/DUP return(FailedReason.UndeterminedCn); } svType = GetSvType(cn.Value); return(null); WittyerType GetSvType(int cnValue) => cnValue < ploidy ? (isCrossTypeOn ? Deletion : CopyNumberLoss) : (isCrossTypeOn ? Duplication : CopyNumberGain); bool IsRefCall(out int ploidyP, out int?cnP, out bool hasCnP) { ploidyP = 2; cnP = null; hasCnP = false; //if not refsite and no sample field, not a refcall if (variant.Samples.Count == 0) { return(false); } var sample = sampleName == null ? variant.Samples[0] : variant.Samples[sampleName]; hasCnP = sample.SampleDictionary.TryGetValue(VcfConstants.CnSampleFieldKey, out var cnString); var isGt = sample.SampleDictionary.TryGetValue(VcfConstants.GenotypeKey, out var gt); if (hasCnP && int.TryParse(cnString, out var i)) { cnP = i; } if (!isGt) { return(hasCnP && cnString == "2"); } //todo: refining how to deal with ploidy. Also here we don't deal with LOH. assuming CN = ploidy is ref var gtArray = gt.Split(VcfConstants.GtPhasedValueDelimiter[0], VcfConstants.GtUnphasedValueDelimiter[0]); ploidyP = gtArray.Length; return(cnP == null?gtArray.All(alleleIndex => alleleIndex == "0") : cnP.Value == ploidyP); } }