コード例 #1
0
        private VariantCall[] LoadIlluminaMatrixReportVariant(string line)
        {
            if (line[0] != 'r')
            {
                return null;
            }
            if (line[1] != 's')
            {
                return null;
            }

            if (_splitter == null)
            {
                var testSplit = line.Split('\t');
                _splitter = new StringSplitter(testSplit.Length);
            }

            _splitter.Split(line, '\t');
            var lineSplit = _splitter.Results;

            var variants = new VariantCall[lineSplit.Length - 1];
            for (var i = 1; i < lineSplit.Length; i++)
            {
                var variant = new VariantCall();
                variant.Id = lineSplit[0];
                variant.AlleleA = lineSplit[i][0].ToString();
                variant.AlleleB = lineSplit[i][1].ToString();
                variants[i - 1] = variant;
            }

            return variants;
        }
コード例 #2
0
        public void LoadUcscSnps(string snpPath, bool clearSnps, IProgressReporter progress = null)
        {
            //Clear?
            if (clearSnps)
            {
                Snps.Clear();
            }

            //Log
            progress?.Update("Loading SNPs");
            
            //Init
            var lineCount = 0;
            _splitter = null;  
            
            //Read file      
            using (var reader = _fileReaderFactory.GetFileReader(snpPath))
            {
                foreach (var line in reader)
                {
                    //Loop init
                    lineCount++;
                    if (lineCount == 1)
                    {
                        var testSplit = line.Split('\t');
                        _splitter = new StringSplitter(testSplit.Length);
                        continue;
                    }

                    //Read data
                    // ReSharper disable once PossibleNullReferenceException
                    _splitter.Split(line, '\t');
                    var lineSplit = _splitter.Results;
                    var snp = new Snp();

                    //Build data
                    snp.Chromosome = lineSplit[1];
                    snp.Pos = lineSplit[2];
                    snp.RsId = lineSplit[4];
                    snp.Strand = lineSplit[6];
                    snp.SnpRef = lineSplit[7];
                    snp.Observed = lineSplit[9];

                    if (snp.SnpRef.Length == 1 && snp.SnpRef != "-")
                    {
                        if (snp.Strand == "+")
                        {
                            snp.SnpAlt = snp.Observed[0] == snp.SnpRef[0] ? snp.Observed[2].ToString() : snp.Observed[0].ToString();
                        }
                        else
                        {
                            var newObs = new char[3];
                            newObs[1] = '/';

                            //Reverse observed
                            if(snp.Observed[0] == 'A') { newObs[2] = 'T';}
                            else if (snp.Observed[0] == 'T') { newObs[2] = 'A'; }
                            else if (snp.Observed[0] == 'G') { newObs[2] = 'C'; }
                            else if (snp.Observed[0] == 'C') { newObs[2] = 'G'; }

                            if (snp.Observed[2] == 'A') { newObs[0] = 'T'; }
                            else if (snp.Observed[2] == 'T') { newObs[0] = 'A'; }
                            else if (snp.Observed[2] == 'G') { newObs[0] = 'C'; }
                            else if (snp.Observed[2] == 'C') { newObs[0] = 'G'; }

                            var observed = new String(newObs);
                            snp.SnpAlt = observed[0] == snp.SnpRef[0] ? observed[2].ToString() : observed[0].ToString();
                        }

                        if (!Snps.ContainsKey(snp.RsId))
                        {
                            Snps.Add(snp.RsId, snp);
                        }
                    }

                    //Progress
                    lineCount++;
                    if (progress != null && lineCount % 10 == 1)
                    {
                        var perc = (double)reader.Stream.Position / reader.Stream.Length;
                        progress.Update(perc);
                    }
                }
            }
        }
コード例 #3
0
        private VariantCall[] LoadVcfVariant(string line)
        {
            //Check for comment headers
            if (line.Length < 2)
            {
                return null;
            }
            if (line.Substring(0, 2) == "##")
            {
                return null;
            }
            if (line[0] == '#')
            {
                return null;
            }

            if (_splitter == null)
            {             
                var testSplit = line.Split('\t');
                _splitter = new StringSplitter(testSplit.Length);
            }

            _splitter.Split(line, '\t');
            var lineSplit = _splitter.Results;

            //Check if we passed QC
            if (lineSplit[6] != "PASS")
            {
                return null;
            }

            //Check if id starts with rs
            if (lineSplit[2][0] != 'r')
            {
                return null;
            }
            if (lineSplit[2][1] != 's')
            {
                return null;
            }

            //Create variant
            var newVar = new VariantCall();
            newVar.Id = lineSplit[2];
            newVar.VcfRef = lineSplit[3];
            newVar.VcfAlt = lineSplit[4];
            return new[] {newVar};
        }
コード例 #4
0
        private VariantCall[] LoadIlluminaFinalReportVariant(string line)
        {
            if (line[0] != 'r')
            {
                return null;
            }
            if (line[1] != 's')
            {
                return null;
            }

            if (_splitter == null)
            {
                var testSplit = line.Split('\t');
                _splitter = new StringSplitter(testSplit.Length);
            }

            _splitter.Split(line, '\t');
            var lineSplit = _splitter.Results;
            var newVar = new VariantCall();

            newVar.Id = lineSplit[0];
            newVar.AlleleA = lineSplit[16];
            newVar.AlleleB = lineSplit[17];

            return new[] { newVar };
        }
コード例 #5
0
        public void AnalyseVcfFile(string variantPath, VariantFileType fileType, string snpPath = null,
            ConsoleLogger logger = null)
        {
            ProgressReporter pr = null;
            logger?.LogInfo(LogName, "Analysing variants");

            //Do we need to load snps?
            if (snpPath != null)
            {
                pr = null;
                if (logger != null)
                {
                    pr = new ProgressReporter(logger, 1);
                }

                logger?.StartTask(1, "Load SNP file");
                LoadUcscSnps(snpPath, true, pr);
                logger?.EndTask(1);
                logger?.LogInfo(LogName, "Loaded " + Snps.Count + " SNPs");
            }

            //Init
            _splitter = null;
            var variantCount = 0;
            var unmatchedCount = 0;

            //Var vcf
            var calledForwardOnPlus = 0;
            var calledForwardOnNeg = 0;

            //Var final report
            var matchedFwdStrand = 0;
            var unMatchedFwdStrand = 0;

            //Logging
            pr = null;
            if (logger != null)
            {
                pr = new ProgressReporter(logger, 1);
            }
            logger?.StartTask(1, "Processing variants");

            //Read file
            using (var reader = _fileReaderFactory.GetFileReader(variantPath))
            {
                foreach (var line in reader)
                {
                    //Load variants
                    var variants = LoadVariant(fileType, line);

                    //Continue if not a real variants
                    if (variants == null)
                    {
                        continue;
                    }

                    for (var i = 0; i < variants.Length; i++)
                    {
                        //Get current variant
                        var variant = variants[i];

                        //Try and match to snp's loads
                        Snp matchSnp;
                        Snps.TryGetValue(variant.Id, out matchSnp);

                        //Check we have a match
                        if (matchSnp != null)
                        {
                            if (fileType == VariantFileType.Vcf)
                            {
                                if (matchSnp.Strand == "+" && matchSnp.SnpRef == variant.VcfRef)
                                {
                                    calledForwardOnPlus++;
                                }
                                else if (matchSnp.Strand == "-" && matchSnp.SnpRef == variant.VcfRef)
                                {
                                    calledForwardOnNeg++;
                                }
                            }
                            else if (fileType == VariantFileType.IlluminiaFinalReport)
                            {
                                if (variant.AlleleA != "-" && variant.AlleleB != "-")
                                {
                                    if ((variant.AlleleA == matchSnp.SnpRef || variant.AlleleA == matchSnp.SnpAlt) &&
                                        (variant.AlleleB == matchSnp.SnpRef || variant.AlleleB == matchSnp.SnpAlt))
                                    {
                                        matchedFwdStrand++;
                                    }
                                    else
                                    {
                                        if (matchSnp.Observed.Length <= 3)
                                        {
                                            unMatchedFwdStrand++;
                                        }
                                    }
                                }
                            }
                            else if (fileType == VariantFileType.IlluminaMatrix)
                            {
                                if (variant.AlleleA != "-" && variant.AlleleB != "-")
                                {
                                    if ((variant.AlleleA == matchSnp.SnpRef || variant.AlleleA == matchSnp.SnpAlt) &&
                                        (variant.AlleleB == matchSnp.SnpRef || variant.AlleleB == matchSnp.SnpAlt))
                                    {
                                        matchedFwdStrand++;
                                    }
                                    else
                                    {
                                        if (matchSnp.Observed.Length <= 3)
                                        {
                                            unMatchedFwdStrand++;
                                        }
                                    }
                                }
                            }
                        }
                        else
                        {
                            unmatchedCount++;
                        }

                        variantCount++;
                        if (logger != null)
                        {
                            var perc = (double) reader.Stream.Position/reader.Stream.Length;
                            pr.Update(perc);

                            if (fileType == VariantFileType.Vcf)
                            {
                                pr.Update(
                                    $" TV:{variantCount} UTV:{unmatchedCount} :CallFwd+:{calledForwardOnPlus} CallFwd-:{calledForwardOnNeg}");
                            }
                            else if (fileType == VariantFileType.IlluminiaFinalReport)
                            {
                                pr.Update(
                                    $" TV:{variantCount} UTV:{unmatchedCount} MatchedFwd:{matchedFwdStrand} UnMatchedFwd:{unMatchedFwdStrand}");
                            }
                            else if (fileType == VariantFileType.IlluminaMatrix)
                            {
                                pr.Update(
                                    $" TV:{variantCount} UTV:{unmatchedCount} MatchedFwd:{matchedFwdStrand} UnMatchedFwd:{unMatchedFwdStrand}");
                            }
                        }
                    }
                }
            }

            logger?.EndTask(1);
        }