private VariantCall[] LoadIlluminaMatrixReportVariant(string line) { if (line[0] != 'r') { return null; } if (line[1] != 's') { return null; } if (_splitter == null) { var testSplit = line.Split('\t'); _splitter = new StringSplitter(testSplit.Length); } _splitter.Split(line, '\t'); var lineSplit = _splitter.Results; var variants = new VariantCall[lineSplit.Length - 1]; for (var i = 1; i < lineSplit.Length; i++) { var variant = new VariantCall(); variant.Id = lineSplit[0]; variant.AlleleA = lineSplit[i][0].ToString(); variant.AlleleB = lineSplit[i][1].ToString(); variants[i - 1] = variant; } return variants; }
public void LoadUcscSnps(string snpPath, bool clearSnps, IProgressReporter progress = null) { //Clear? if (clearSnps) { Snps.Clear(); } //Log progress?.Update("Loading SNPs"); //Init var lineCount = 0; _splitter = null; //Read file using (var reader = _fileReaderFactory.GetFileReader(snpPath)) { foreach (var line in reader) { //Loop init lineCount++; if (lineCount == 1) { var testSplit = line.Split('\t'); _splitter = new StringSplitter(testSplit.Length); continue; } //Read data // ReSharper disable once PossibleNullReferenceException _splitter.Split(line, '\t'); var lineSplit = _splitter.Results; var snp = new Snp(); //Build data snp.Chromosome = lineSplit[1]; snp.Pos = lineSplit[2]; snp.RsId = lineSplit[4]; snp.Strand = lineSplit[6]; snp.SnpRef = lineSplit[7]; snp.Observed = lineSplit[9]; if (snp.SnpRef.Length == 1 && snp.SnpRef != "-") { if (snp.Strand == "+") { snp.SnpAlt = snp.Observed[0] == snp.SnpRef[0] ? snp.Observed[2].ToString() : snp.Observed[0].ToString(); } else { var newObs = new char[3]; newObs[1] = '/'; //Reverse observed if(snp.Observed[0] == 'A') { newObs[2] = 'T';} else if (snp.Observed[0] == 'T') { newObs[2] = 'A'; } else if (snp.Observed[0] == 'G') { newObs[2] = 'C'; } else if (snp.Observed[0] == 'C') { newObs[2] = 'G'; } if (snp.Observed[2] == 'A') { newObs[0] = 'T'; } else if (snp.Observed[2] == 'T') { newObs[0] = 'A'; } else if (snp.Observed[2] == 'G') { newObs[0] = 'C'; } else if (snp.Observed[2] == 'C') { newObs[0] = 'G'; } var observed = new String(newObs); snp.SnpAlt = observed[0] == snp.SnpRef[0] ? observed[2].ToString() : observed[0].ToString(); } if (!Snps.ContainsKey(snp.RsId)) { Snps.Add(snp.RsId, snp); } } //Progress lineCount++; if (progress != null && lineCount % 10 == 1) { var perc = (double)reader.Stream.Position / reader.Stream.Length; progress.Update(perc); } } } }
private VariantCall[] LoadVcfVariant(string line) { //Check for comment headers if (line.Length < 2) { return null; } if (line.Substring(0, 2) == "##") { return null; } if (line[0] == '#') { return null; } if (_splitter == null) { var testSplit = line.Split('\t'); _splitter = new StringSplitter(testSplit.Length); } _splitter.Split(line, '\t'); var lineSplit = _splitter.Results; //Check if we passed QC if (lineSplit[6] != "PASS") { return null; } //Check if id starts with rs if (lineSplit[2][0] != 'r') { return null; } if (lineSplit[2][1] != 's') { return null; } //Create variant var newVar = new VariantCall(); newVar.Id = lineSplit[2]; newVar.VcfRef = lineSplit[3]; newVar.VcfAlt = lineSplit[4]; return new[] {newVar}; }
private VariantCall[] LoadIlluminaFinalReportVariant(string line) { if (line[0] != 'r') { return null; } if (line[1] != 's') { return null; } if (_splitter == null) { var testSplit = line.Split('\t'); _splitter = new StringSplitter(testSplit.Length); } _splitter.Split(line, '\t'); var lineSplit = _splitter.Results; var newVar = new VariantCall(); newVar.Id = lineSplit[0]; newVar.AlleleA = lineSplit[16]; newVar.AlleleB = lineSplit[17]; return new[] { newVar }; }
public void AnalyseVcfFile(string variantPath, VariantFileType fileType, string snpPath = null, ConsoleLogger logger = null) { ProgressReporter pr = null; logger?.LogInfo(LogName, "Analysing variants"); //Do we need to load snps? if (snpPath != null) { pr = null; if (logger != null) { pr = new ProgressReporter(logger, 1); } logger?.StartTask(1, "Load SNP file"); LoadUcscSnps(snpPath, true, pr); logger?.EndTask(1); logger?.LogInfo(LogName, "Loaded " + Snps.Count + " SNPs"); } //Init _splitter = null; var variantCount = 0; var unmatchedCount = 0; //Var vcf var calledForwardOnPlus = 0; var calledForwardOnNeg = 0; //Var final report var matchedFwdStrand = 0; var unMatchedFwdStrand = 0; //Logging pr = null; if (logger != null) { pr = new ProgressReporter(logger, 1); } logger?.StartTask(1, "Processing variants"); //Read file using (var reader = _fileReaderFactory.GetFileReader(variantPath)) { foreach (var line in reader) { //Load variants var variants = LoadVariant(fileType, line); //Continue if not a real variants if (variants == null) { continue; } for (var i = 0; i < variants.Length; i++) { //Get current variant var variant = variants[i]; //Try and match to snp's loads Snp matchSnp; Snps.TryGetValue(variant.Id, out matchSnp); //Check we have a match if (matchSnp != null) { if (fileType == VariantFileType.Vcf) { if (matchSnp.Strand == "+" && matchSnp.SnpRef == variant.VcfRef) { calledForwardOnPlus++; } else if (matchSnp.Strand == "-" && matchSnp.SnpRef == variant.VcfRef) { calledForwardOnNeg++; } } else if (fileType == VariantFileType.IlluminiaFinalReport) { if (variant.AlleleA != "-" && variant.AlleleB != "-") { if ((variant.AlleleA == matchSnp.SnpRef || variant.AlleleA == matchSnp.SnpAlt) && (variant.AlleleB == matchSnp.SnpRef || variant.AlleleB == matchSnp.SnpAlt)) { matchedFwdStrand++; } else { if (matchSnp.Observed.Length <= 3) { unMatchedFwdStrand++; } } } } else if (fileType == VariantFileType.IlluminaMatrix) { if (variant.AlleleA != "-" && variant.AlleleB != "-") { if ((variant.AlleleA == matchSnp.SnpRef || variant.AlleleA == matchSnp.SnpAlt) && (variant.AlleleB == matchSnp.SnpRef || variant.AlleleB == matchSnp.SnpAlt)) { matchedFwdStrand++; } else { if (matchSnp.Observed.Length <= 3) { unMatchedFwdStrand++; } } } } } else { unmatchedCount++; } variantCount++; if (logger != null) { var perc = (double) reader.Stream.Position/reader.Stream.Length; pr.Update(perc); if (fileType == VariantFileType.Vcf) { pr.Update( $" TV:{variantCount} UTV:{unmatchedCount} :CallFwd+:{calledForwardOnPlus} CallFwd-:{calledForwardOnNeg}"); } else if (fileType == VariantFileType.IlluminiaFinalReport) { pr.Update( $" TV:{variantCount} UTV:{unmatchedCount} MatchedFwd:{matchedFwdStrand} UnMatchedFwd:{unMatchedFwdStrand}"); } else if (fileType == VariantFileType.IlluminaMatrix) { pr.Update( $" TV:{variantCount} UTV:{unmatchedCount} MatchedFwd:{matchedFwdStrand} UnMatchedFwd:{unMatchedFwdStrand}"); } } } } } logger?.EndTask(1); }