Beispiel #1
0
        public PlinkData ReadFromFile(string fileName)
        {
            var famFile = FileUtils.ChangeExtension(fileName, ".fam");

            if (!File.Exists(famFile))
            {
                throw new FileNotFoundException("File not found: " + famFile);
            }

            var bimFile = FileUtils.ChangeExtension(fileName, ".bim");

            if (!File.Exists(bimFile))
            {
                throw new FileNotFoundException("File not found: " + bimFile);
            }

            var result = new PlinkData();

            result.Individual = PlinkIndividual.ReadFromFile(famFile);
            result.Locus      = PlinkLocus.ReadFromBimFile(bimFile);
            result.AllocateDataMemory();

            OpenBinaryFile(fileName);
            try
            {
                if (IsSNPMajor)
                {
                    for (int i = 0; i < result.Locus.Count; i++)
                    {
                        int j = 0;
                        while (j < result.Individual.Count)
                        {
                            var b = ReadByte();
                            int c = 0;
                            while (c < 7 && j < result.Individual.Count)
                            {
                                result.IsHaplotype1Allele2[i, j] = b[c++];
                                result.IsHaplotype2Allele2[i, j] = b[c++];
                                j++;
                            }
                        }
                    }
                }
                else
                {
                    for (int i = 0; i < result.Individual.Count; i++)
                    {
                        int j = 0;
                        while (j < result.Locus.Count)
                        {
                            var b = ReadByte();
                            int c = 0;
                            while (c < 7 && j < result.Locus.Count)
                            {
                                result.IsHaplotype1Allele2[j, i] = b[c++];
                                result.IsHaplotype2Allele2[j, i] = b[c++];
                                j++;
                            }
                        }
                    }
                }
            }
            finally
            {
                _reader.Close();
                _reader = null;
            }

            return(result);
        }
Beispiel #2
0
        public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            var bimfile = Path.ChangeExtension(options.InputFile, ".bim");

            var snps = PlinkLocus.ReadFromBimFile(bimfile, false, false);

            snps.RemoveAll(m => IsIndel(m) || IsMissing(m));

            var snpItems = (from snp in snps
                            select new SNPItem()
            {
                Chrom = snp.Chromosome,
                Name = snp.MarkerId,
                Position = snp.PhysicalPosition,
                Allele1 = snp.Allele1[0],
                Allele2 = snp.Allele2
            }).ToList();

            var nameMap = snpItems.FillDbsnpIdByPosition(options.DbsnpFile, this.Progress);

            using (var sw = new StreamWriter(options.OutputPrefix + ".namemap"))
            {
                sw.WriteLine("NewName\tOldName");
                foreach (var n in nameMap)
                {
                    sw.WriteLine("{0}\t{1}", n.Key, n.Value);
                }
            }

            //remove all snps without corresponding dbsnp entry
            snpItems.RemoveAll(m => m.DbsnpRefAllele == ' ');

            var nameDic = snpItems.ToGroupDictionary(m => m.Name);

            foreach (var n in nameDic)
            {
                if (n.Value.Count > 1)
                {
                    Console.Error.WriteLine("Duplicated SNP:" + n.Key);
                    foreach (var v in n.Value)
                    {
                        Console.Error.WriteLine("{0}:{1}-{2}:{3},{4}:{5},{6}", n.Key, v.Chrom, v.Position, v.Allele1, v.Allele2, v.DbsnpRefAllele, v.DbsnpAltAllele);
                    }
                }
            }

            if (File.Exists(options.G1000File))
            {
                snpItems.FindAllele2FrequencyFrom1000GomeByName(options.G1000File, this.Progress);
            }

            if (File.Exists(options.FastaFile))
            {
                snpItems.FillReferenceAlleleFromFasta(options.FastaFile, this.Progress);
            }

            Dictionary <string, StrandAction> actionMap = new Dictionary <string, StrandAction>();

            var statFile = options.OutputPrefix + ".stat";

            result.Add(statFile);
            using (var sw = new StreamWriter(statFile))
            {
                sw.WriteLine("Name\tChromosome\tPosition\tSource_Allele1\tSource_Allele2\tReference_Allele\tDbsnp_RefAllele\tDbsnp_AltAllele\tDbsnp_IsReversed\tG1000_RefAllele\tG1000_AltAllele\tG1000_MAF\tAction");

                foreach (var v in snpItems)
                {
                    StrandAction action = v.SuggestAction();
                    sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11:0.####}\t{12}", v.Name, v.Chrom, v.Position, v.Allele1, v.Allele2, v.RefChar, v.DbsnpRefAllele, v.DbsnpAltAllele, v.DbsnpIsReversed, v.G1000Allele1, v.G1000Allele2, v.G1000Allele2Frequency, action);
                    actionMap[v.Name] = action;
                }
            }

            using (var reader = new PlinkBedRandomFile(options.InputFile)
            {
                Progress = this.Progress
            })
            {
                var data = reader.Data;

                var chrs = (from v in snpItems select v.Chrom).Distinct().OrderBy(m => m).ToArray();
                foreach (var chr in chrs)
                {
                    var genfile = string.Format("{0}.{1}.gen", options.OutputPrefix, chr.ToString().PadLeft(2, '0'));
                    result.Add(genfile);
                    var map = FileUtils.ChangeExtension(genfile, ".sample");

                    new GwasSampleFormat().WriteToFile(map, data.Individual);

                    //save gen file
                    using (var sw = new StreamWriter(genfile))
                    {
                        sw.NewLine = Environment.NewLine;
                        var chrItems = snpItems.Where(m => m.Chrom == chr).ToList();
                        GenomeUtils.SortChromosome(chrItems, m => chr.ToString(), m => m.Position);
                        foreach (var snp in chrItems)
                        {
                            var ldata  = reader.Read(nameMap[snp.Name]);
                            var action = actionMap[snp.Name];

                            sw.Write("{0} {1} {2} {3} {4}", snp.Chrom, snp.Name, snp.Position, snp.DbsnpRefAllele, snp.DbsnpAltAllele);
                            for (int individualIndex = 0; individualIndex < data.Individual.Count; individualIndex++)
                            {
                                if (PlinkData.IsMissing(ldata[0, individualIndex], ldata[1, individualIndex]))
                                {
                                    sw.Write(" 0 0 0");
                                }
                                else
                                {
                                    char alle1, alle2;
                                    if (StrandAction.Switch == action || StrandAction.FlipSwitch == action)
                                    {
                                        alle1 = ldata[0, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                                        alle2 = ldata[1, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                                    }
                                    else
                                    {
                                        alle1 = ldata[0, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                                        alle2 = ldata[1, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                                    }

                                    if (alle1 != alle2)
                                    {
                                        sw.Write(" 0 1 0");
                                    }
                                    else if (alle1 == snp.DbsnpRefAllele)
                                    {
                                        sw.Write(" 1 0 0");
                                    }
                                    else
                                    {
                                        sw.Write(" 0 0 1");
                                    }
                                }
                            }
                            sw.WriteLine();
                        }
                    }
                }
            }

            return(result);
        }
        public void OpenBinaryFile(string fileName)
        {
            var famFile = FileUtils.ChangeExtension(fileName, ".fam");

            if (!File.Exists(famFile))
            {
                throw new FileNotFoundException("File not found: " + famFile);
            }

            var bimFile = FileUtils.ChangeExtension(fileName, ".bim");

            if (!File.Exists(bimFile))
            {
                throw new FileNotFoundException("File not found: " + bimFile);
            }

            Data            = new PlinkData();
            Data.Individual = PlinkIndividual.ReadFromFile(famFile);
            Data.Locus      = PlinkLocus.ReadFromBimFile(bimFile);
            //Data.Locus.ForEach(m => m.MarkerId = m.MarkerId.ToLower());
            Data.BuildMap();

            DoOpenFile(fileName);

            BitArray b = ReadByte();

            bool v1_bfile = true;

            if ((b[2] && b[3] && b[5] && b[6]) && !(b[0] || b[1] || b[4] || b[7]))
            {
                // Next number
                b = ReadByte();
                if ((b[0] && b[1] && b[3] && b[4]) && !(b[2] || b[5] || b[6] || b[7]))
                {
                    b          = ReadByte();
                    IsSNPMajor = b[0];

                    if (IsSNPMajor)
                    {
                        Progress.SetMessage("Detected that binary PED file is v1.00 SNP-major mode\n");
                    }
                    else
                    {
                        Progress.SetMessage("Detected that binary PED file is v1.00 individual-major mode\n");
                    }
                }
                else
                {
                    v1_bfile = false;
                }
            }
            else
            {
                v1_bfile = false;
            }


            // Reset file if < v1
            if (!v1_bfile)
            {
                Progress.SetMessage("Warning, old BED file <v1.00 : will try to recover...\n");
                DoOpenFile(fileName);
                b = ReadByte();
            }

            // If 0.99 file format
            if ((!v1_bfile) && (b[1] || b[2] || b[3] || b[4] || b[5] || b[6] || b[7]))
            {
                Progress.SetMessage(" *** Possible problem: guessing that BED is < v0.99   *** ");
                Progress.SetMessage(" *** High chance of data corruption, spurious results *** ");

                IsSNPMajor = false;
                DoOpenFile(fileName);
            }
            else if (!v1_bfile)
            {
                IsSNPMajor = b[0];

                Progress.SetMessage("Binary PED file is v0.99\n");

                if (IsSNPMajor)
                {
                    Progress.SetMessage("Detected that binary PED file is in SNP-major mode\n");
                }
                else
                {
                    Progress.SetMessage("Detected that binary PED file is in individual-major mode\n");
                }
            }

            _startPosition = _reader.BaseStream.Position;
        }