public override IEnumerable <string> Process()
        {
            using (var file = new PlinkBedRandomFile(_options.InputFile)
            {
                Progress = this.Progress
            })
            {
                var locusList      = file.Data.Locus;
                var individualList = file.Data.Individual;

                Progress.SetRange(0, locusList.Count);
                for (int i = 0; i < locusList.Count; i++)
                {
                    Progress.SetPosition(i);

                    var locus = locusList[i];
                    var data  = file.Read(locus.MarkerId);

                    int count1      = 0;
                    int count2      = 0;
                    int validSample = 0;
                    for (int j = 0; j < individualList.Count; j++)
                    {
                        if (PlinkData.IsMissing(data[0, j], data[1, j]))
                        {
                            continue;
                        }

                        validSample++;

                        if (data[0, j])
                        {
                            count2++;
                        }
                        else
                        {
                            count1++;
                        }

                        if (data[1, j])
                        {
                            count2++;
                        }
                        else
                        {
                            count1++;
                        }
                    }
                    locus.Allele1Frequency = ((double)(count1)) / (count1 + count2);
                    locus.TotalSample      = individualList.Count;
                    locus.ValidSample      = validSample;
                }

                PlinkLocus.WriteToFile(_options.OutputFile, locusList, false, true, true);
            }

            return(new string[] { _options.OutputFile });
        }
Пример #2
0
    private static void Validate(PlinkData data)
    {
      Assert.AreEqual(2, data.Locus.Count);
      Assert.AreEqual(7, data.Individual.Count);

      Assert.AreEqual("GCCCGC0", data.LocusAllele1(0));
      Assert.AreEqual("GGGCGG0", data.LocusAllele2(0));

      Assert.AreEqual("ATTTA0T", data.LocusAllele1(1));
      Assert.AreEqual("ATAAA0A", data.LocusAllele2(1));

      Assert.AreEqual("2,1,1,0,2,1,3", data.LocusGenoType(0, ","));
      Assert.AreEqual("2,0,1,1,2,3,1", data.LocusGenoType(1, ","));
    }
Пример #3
0
    public PlinkData ReadFromFile(string fileName)
    {
      var famFile = FileUtils.ChangeExtension(fileName, ".fam");
      if (!File.Exists(famFile))
      {
        throw new FileNotFoundException("File not found: " + famFile);
      }

      var bimFile = FileUtils.ChangeExtension(fileName, ".bim");
      if (!File.Exists(bimFile))
      {
        throw new FileNotFoundException("File not found: " + bimFile);
      }

      var result = new PlinkData();
      result.Individual = PlinkIndividual.ReadFromFile(famFile);
      result.Locus = PlinkLocus.ReadFromBimFile(bimFile);
      result.AllocateDataMemory();

      OpenBinaryFile(fileName);
      try
      {
        if (IsSNPMajor)
        {
          for (int i = 0; i < result.Locus.Count; i++)
          {
            int j = 0;
            while (j < result.Individual.Count)
            {
              var b = ReadByte();
              int c = 0;
              while (c < 7 && j < result.Individual.Count)
              {
                result.IsHaplotype1Allele2[i, j] = b[c++];
                result.IsHaplotype2Allele2[i, j] = b[c++];
                j++;
              }
            }
          }
        }
        else
        {
          for (int i = 0; i < result.Individual.Count; i++)
          {
            int j = 0;
            while (j < result.Locus.Count)
            {
              var b = ReadByte();
              int c = 0;
              while (c < 7 && j < result.Locus.Count)
              {
                result.IsHaplotype1Allele2[j, i] = b[c++];
                result.IsHaplotype2Allele2[j, i] = b[c++];
                j++;
              }
            }
          }
        }
      }
      finally
      {
        _reader.Close();
        _reader = null;
      }

      return result;
    }
Пример #4
0
        public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            var bimfile = Path.ChangeExtension(options.InputFile, ".bim");

            var snps = PlinkLocus.ReadFromBimFile(bimfile, false, false);

            snps.RemoveAll(m => IsIndel(m) || IsMissing(m));

            var snpItems = (from snp in snps
                            select new SNPItem()
            {
                Chrom = snp.Chromosome,
                Name = snp.MarkerId,
                Position = snp.PhysicalPosition,
                Allele1 = snp.Allele1[0],
                Allele2 = snp.Allele2
            }).ToList();

            var nameMap = snpItems.FillDbsnpIdByPosition(options.DbsnpFile, this.Progress);

            using (var sw = new StreamWriter(options.OutputPrefix + ".namemap"))
            {
                sw.WriteLine("NewName\tOldName");
                foreach (var n in nameMap)
                {
                    sw.WriteLine("{0}\t{1}", n.Key, n.Value);
                }
            }

            //remove all snps without corresponding dbsnp entry
            snpItems.RemoveAll(m => m.DbsnpRefAllele == ' ');

            var nameDic = snpItems.ToGroupDictionary(m => m.Name);

            foreach (var n in nameDic)
            {
                if (n.Value.Count > 1)
                {
                    Console.Error.WriteLine("Duplicated SNP:" + n.Key);
                    foreach (var v in n.Value)
                    {
                        Console.Error.WriteLine("{0}:{1}-{2}:{3},{4}:{5},{6}", n.Key, v.Chrom, v.Position, v.Allele1, v.Allele2, v.DbsnpRefAllele, v.DbsnpAltAllele);
                    }
                }
            }

            if (File.Exists(options.G1000File))
            {
                snpItems.FindAllele2FrequencyFrom1000GomeByName(options.G1000File, this.Progress);
            }

            if (File.Exists(options.FastaFile))
            {
                snpItems.FillReferenceAlleleFromFasta(options.FastaFile, this.Progress);
            }

            Dictionary <string, StrandAction> actionMap = new Dictionary <string, StrandAction>();

            var statFile = options.OutputPrefix + ".stat";

            result.Add(statFile);
            using (var sw = new StreamWriter(statFile))
            {
                sw.WriteLine("Name\tChromosome\tPosition\tSource_Allele1\tSource_Allele2\tReference_Allele\tDbsnp_RefAllele\tDbsnp_AltAllele\tDbsnp_IsReversed\tG1000_RefAllele\tG1000_AltAllele\tG1000_MAF\tAction");

                foreach (var v in snpItems)
                {
                    StrandAction action = v.SuggestAction();
                    sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11:0.####}\t{12}", v.Name, v.Chrom, v.Position, v.Allele1, v.Allele2, v.RefChar, v.DbsnpRefAllele, v.DbsnpAltAllele, v.DbsnpIsReversed, v.G1000Allele1, v.G1000Allele2, v.G1000Allele2Frequency, action);
                    actionMap[v.Name] = action;
                }
            }

            using (var reader = new PlinkBedRandomFile(options.InputFile)
            {
                Progress = this.Progress
            })
            {
                var data = reader.Data;

                var chrs = (from v in snpItems select v.Chrom).Distinct().OrderBy(m => m).ToArray();
                foreach (var chr in chrs)
                {
                    var genfile = string.Format("{0}.{1}.gen", options.OutputPrefix, chr.ToString().PadLeft(2, '0'));
                    result.Add(genfile);
                    var map = FileUtils.ChangeExtension(genfile, ".sample");

                    new GwasSampleFormat().WriteToFile(map, data.Individual);

                    //save gen file
                    using (var sw = new StreamWriter(genfile))
                    {
                        sw.NewLine = Environment.NewLine;
                        var chrItems = snpItems.Where(m => m.Chrom == chr).ToList();
                        GenomeUtils.SortChromosome(chrItems, m => chr.ToString(), m => m.Position);
                        foreach (var snp in chrItems)
                        {
                            var ldata  = reader.Read(nameMap[snp.Name]);
                            var action = actionMap[snp.Name];

                            sw.Write("{0} {1} {2} {3} {4}", snp.Chrom, snp.Name, snp.Position, snp.DbsnpRefAllele, snp.DbsnpAltAllele);
                            for (int individualIndex = 0; individualIndex < data.Individual.Count; individualIndex++)
                            {
                                if (PlinkData.IsMissing(ldata[0, individualIndex], ldata[1, individualIndex]))
                                {
                                    sw.Write(" 0 0 0");
                                }
                                else
                                {
                                    char alle1, alle2;
                                    if (StrandAction.Switch == action || StrandAction.FlipSwitch == action)
                                    {
                                        alle1 = ldata[0, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                                        alle2 = ldata[1, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                                    }
                                    else
                                    {
                                        alle1 = ldata[0, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                                        alle2 = ldata[1, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                                    }

                                    if (alle1 != alle2)
                                    {
                                        sw.Write(" 0 1 0");
                                    }
                                    else if (alle1 == snp.DbsnpRefAllele)
                                    {
                                        sw.Write(" 1 0 0");
                                    }
                                    else
                                    {
                                        sw.Write(" 0 0 1");
                                    }
                                }
                            }
                            sw.WriteLine();
                        }
                    }
                }
            }

            return(result);
        }
Пример #5
0
        public PlinkData ReadFromFile(string fileName)
        {
            var famFile = FileUtils.ChangeExtension(fileName, ".fam");

            if (!File.Exists(famFile))
            {
                throw new FileNotFoundException("File not found: " + famFile);
            }

            var bimFile = FileUtils.ChangeExtension(fileName, ".bim");

            if (!File.Exists(bimFile))
            {
                throw new FileNotFoundException("File not found: " + bimFile);
            }

            var result = new PlinkData();

            result.Individual = PlinkIndividual.ReadFromFile(famFile);
            result.Locus      = PlinkLocus.ReadFromBimFile(bimFile);
            result.AllocateDataMemory();

            OpenBinaryFile(fileName);
            try
            {
                if (IsSNPMajor)
                {
                    for (int i = 0; i < result.Locus.Count; i++)
                    {
                        int j = 0;
                        while (j < result.Individual.Count)
                        {
                            var b = ReadByte();
                            int c = 0;
                            while (c < 7 && j < result.Individual.Count)
                            {
                                result.IsHaplotype1Allele2[i, j] = b[c++];
                                result.IsHaplotype2Allele2[i, j] = b[c++];
                                j++;
                            }
                        }
                    }
                }
                else
                {
                    for (int i = 0; i < result.Individual.Count; i++)
                    {
                        int j = 0;
                        while (j < result.Locus.Count)
                        {
                            var b = ReadByte();
                            int c = 0;
                            while (c < 7 && j < result.Locus.Count)
                            {
                                result.IsHaplotype1Allele2[j, i] = b[c++];
                                result.IsHaplotype2Allele2[j, i] = b[c++];
                                j++;
                            }
                        }
                    }
                }
            }
            finally
            {
                _reader.Close();
                _reader = null;
            }

            return(result);
        }
Пример #6
0
    public void OpenBinaryFile(string fileName)
    {
      var famFile = FileUtils.ChangeExtension(fileName, ".fam");
      if (!File.Exists(famFile))
      {
        throw new FileNotFoundException("File not found: " + famFile);
      }

      var bimFile = FileUtils.ChangeExtension(fileName, ".bim");
      if (!File.Exists(bimFile))
      {
        throw new FileNotFoundException("File not found: " + bimFile);
      }

      Data = new PlinkData();
      Data.Individual = PlinkIndividual.ReadFromFile(famFile);
      Data.Locus = PlinkLocus.ReadFromBimFile(bimFile);
      //Data.Locus.ForEach(m => m.MarkerId = m.MarkerId.ToLower());
      Data.BuildMap();

      DoOpenFile(fileName);

      BitArray b = ReadByte();

      bool v1_bfile = true;

      if ((b[2] && b[3] && b[5] && b[6]) && !(b[0] || b[1] || b[4] || b[7]))
      {
        // Next number
        b = ReadByte();
        if ((b[0] && b[1] && b[3] && b[4]) && !(b[2] || b[5] || b[6] || b[7]))
        {
          b = ReadByte();
          IsSNPMajor = b[0];

          if (IsSNPMajor)
            Progress.SetMessage("Detected that binary PED file is v1.00 SNP-major mode\n");
          else
            Progress.SetMessage("Detected that binary PED file is v1.00 individual-major mode\n");

        }
        else v1_bfile = false;

      }
      else v1_bfile = false;


      // Reset file if < v1
      if (!v1_bfile)
      {
        Progress.SetMessage("Warning, old BED file <v1.00 : will try to recover...\n");
        DoOpenFile(fileName);
        b = ReadByte();
      }

      // If 0.99 file format
      if ((!v1_bfile) && (b[1] || b[2] || b[3] || b[4] || b[5] || b[6] || b[7]))
      {
        Progress.SetMessage(" *** Possible problem: guessing that BED is < v0.99   *** ");
        Progress.SetMessage(" *** High chance of data corruption, spurious results *** ");

        IsSNPMajor = false;
        DoOpenFile(fileName);
      }
      else if (!v1_bfile)
      {
        IsSNPMajor = b[0];

        Progress.SetMessage("Binary PED file is v0.99\n");

        if (IsSNPMajor)
          Progress.SetMessage("Detected that binary PED file is in SNP-major mode\n");
        else
          Progress.SetMessage("Detected that binary PED file is in individual-major mode\n");
      }

      _startPosition = _reader.BaseStream.Position;
    }
        public void OpenBinaryFile(string fileName)
        {
            var famFile = FileUtils.ChangeExtension(fileName, ".fam");

            if (!File.Exists(famFile))
            {
                throw new FileNotFoundException("File not found: " + famFile);
            }

            var bimFile = FileUtils.ChangeExtension(fileName, ".bim");

            if (!File.Exists(bimFile))
            {
                throw new FileNotFoundException("File not found: " + bimFile);
            }

            Data            = new PlinkData();
            Data.Individual = PlinkIndividual.ReadFromFile(famFile);
            Data.Locus      = PlinkLocus.ReadFromBimFile(bimFile);
            //Data.Locus.ForEach(m => m.MarkerId = m.MarkerId.ToLower());
            Data.BuildMap();

            DoOpenFile(fileName);

            BitArray b = ReadByte();

            bool v1_bfile = true;

            if ((b[2] && b[3] && b[5] && b[6]) && !(b[0] || b[1] || b[4] || b[7]))
            {
                // Next number
                b = ReadByte();
                if ((b[0] && b[1] && b[3] && b[4]) && !(b[2] || b[5] || b[6] || b[7]))
                {
                    b          = ReadByte();
                    IsSNPMajor = b[0];

                    if (IsSNPMajor)
                    {
                        Progress.SetMessage("Detected that binary PED file is v1.00 SNP-major mode\n");
                    }
                    else
                    {
                        Progress.SetMessage("Detected that binary PED file is v1.00 individual-major mode\n");
                    }
                }
                else
                {
                    v1_bfile = false;
                }
            }
            else
            {
                v1_bfile = false;
            }


            // Reset file if < v1
            if (!v1_bfile)
            {
                Progress.SetMessage("Warning, old BED file <v1.00 : will try to recover...\n");
                DoOpenFile(fileName);
                b = ReadByte();
            }

            // If 0.99 file format
            if ((!v1_bfile) && (b[1] || b[2] || b[3] || b[4] || b[5] || b[6] || b[7]))
            {
                Progress.SetMessage(" *** Possible problem: guessing that BED is < v0.99   *** ");
                Progress.SetMessage(" *** High chance of data corruption, spurious results *** ");

                IsSNPMajor = false;
                DoOpenFile(fileName);
            }
            else if (!v1_bfile)
            {
                IsSNPMajor = b[0];

                Progress.SetMessage("Binary PED file is v0.99\n");

                if (IsSNPMajor)
                {
                    Progress.SetMessage("Detected that binary PED file is in SNP-major mode\n");
                }
                else
                {
                    Progress.SetMessage("Detected that binary PED file is in individual-major mode\n");
                }
            }

            _startPosition = _reader.BaseStream.Position;
        }