public override IEnumerable <string> Process()
        {
            using (var file = new PlinkBedRandomFile(_options.InputFile)
            {
                Progress = this.Progress
            })
            {
                var locusList      = file.Data.Locus;
                var individualList = file.Data.Individual;

                Progress.SetRange(0, locusList.Count);
                for (int i = 0; i < locusList.Count; i++)
                {
                    Progress.SetPosition(i);

                    var locus = locusList[i];
                    var data  = file.Read(locus.MarkerId);

                    int count1      = 0;
                    int count2      = 0;
                    int validSample = 0;
                    for (int j = 0; j < individualList.Count; j++)
                    {
                        if (PlinkData.IsMissing(data[0, j], data[1, j]))
                        {
                            continue;
                        }

                        validSample++;

                        if (data[0, j])
                        {
                            count2++;
                        }
                        else
                        {
                            count1++;
                        }

                        if (data[1, j])
                        {
                            count2++;
                        }
                        else
                        {
                            count1++;
                        }
                    }
                    locus.Allele1Frequency = ((double)(count1)) / (count1 + count2);
                    locus.TotalSample      = individualList.Count;
                    locus.ValidSample      = validSample;
                }

                PlinkLocus.WriteToFile(_options.OutputFile, locusList, false, true, true);
            }

            return(new string[] { _options.OutputFile });
        }
    public override IEnumerable<string> Process()
    {
      using (var file = new PlinkBedRandomFile(_options.InputFile) { Progress = this.Progress })
      {
        var locusList = file.Data.Locus;
        var individualList = file.Data.Individual;

        Progress.SetRange(0, locusList.Count);
        for (int i = 0; i < locusList.Count; i++)
        {
          Progress.SetPosition(i);

          var locus = locusList[i];
          var data = file.Read(locus.MarkerId);

          int count1 = 0;
          int count2 = 0;
          int validSample = 0;
          for (int j = 0; j < individualList.Count; j++)
          {
            if (PlinkData.IsMissing(data[0, j], data[1, j]))
            {
              continue;
            }

            validSample++;

            if (data[0, j])
            {
              count2++;
            }
            else
            {
              count1++;
            }

            if (data[1, j])
            {
              count2++;
            }
            else
            {
              count1++;
            }
          }
          locus.Allele1Frequency = ((double)(count1)) / (count1 + count2);
          locus.TotalSample = individualList.Count;
          locus.ValidSample = validSample;
        }

        PlinkLocus.WriteToFile(_options.OutputFile, locusList, false, true, true);
      }

      return new string[] { _options.OutputFile };
    }
    public void TestMethod()
    {
      var file = new PlinkBedRandomFile();
      file.OpenBinaryFile("../../../data/plink/plink.bed");
      try
      {
        var data = file.Read("RS168753");

        Assert.IsTrue(data[0, 0]);
        Assert.IsFalse(data[0, 1]);
        Assert.IsFalse(data[0, 2]);
        Assert.IsFalse(data[0, 3]);
        Assert.IsTrue(data[0, 4]);

        Assert.IsTrue(data[1, 0]);
        Assert.IsFalse(data[1, 1]);
        Assert.IsTrue(data[1, 2]);
        Assert.IsTrue(data[1, 3]);
        Assert.IsTrue(data[1, 4]);

        data = file.Read("RS11267092");

        Assert.IsTrue(data[0, 0]);
        Assert.IsFalse(data[0, 1]);
        Assert.IsFalse(data[0, 2]);
        Assert.IsFalse(data[0, 3]);
        Assert.IsTrue(data[0, 4]);

        Assert.IsTrue(data[1, 0]);
        Assert.IsTrue(data[1, 1]);
        Assert.IsTrue(data[1, 2]);
        Assert.IsFalse(data[1, 3]);
        Assert.IsTrue(data[1, 4]);
      }
      finally
      {
        file.Close();
      }
    }
    public override IEnumerable<string> Process()
    {
      var result = new List<string>();

      var bimfile = Path.ChangeExtension(options.InputFile, ".bim");

      var snps = PlinkLocus.ReadFromBimFile(bimfile, false, false);
      snps.RemoveAll(m => IsIndel(m) || IsMissing(m));

      var snpItems = (from snp in snps
                      select new SNPItem()
                      {
                        Chrom = snp.Chromosome,
                        Name = snp.MarkerId,
                        Position = snp.PhysicalPosition,
                        Allele1 = snp.Allele1[0],
                        Allele2 = snp.Allele2
                      }).ToList();

      var nameMap = snpItems.FillDbsnpIdByPosition(options.DbsnpFile, this.Progress);

      using (var sw = new StreamWriter(options.OutputPrefix + ".namemap"))
      {
        sw.WriteLine("NewName\tOldName");
        foreach (var n in nameMap)
        {
          sw.WriteLine("{0}\t{1}", n.Key, n.Value);
        }
      }

      //remove all snps without corresponding dbsnp entry
      snpItems.RemoveAll(m => m.DbsnpRefAllele == ' ');

      var nameDic = snpItems.ToGroupDictionary(m => m.Name);
      foreach (var n in nameDic)
      {
        if (n.Value.Count > 1)
        {
          Console.Error.WriteLine("Duplicated SNP:" + n.Key);
          foreach (var v in n.Value)
          {
            Console.Error.WriteLine("{0}:{1}-{2}:{3},{4}:{5},{6}", n.Key, v.Chrom, v.Position, v.Allele1, v.Allele2, v.DbsnpRefAllele, v.DbsnpAltAllele);
          }
        }
      }

      if (File.Exists(options.G1000File))
      {
        snpItems.FindAllele2FrequencyFrom1000GomeByName(options.G1000File, this.Progress);
      }

      if (File.Exists(options.FastaFile))
      {
        snpItems.FillReferenceAlleleFromFasta(options.FastaFile, this.Progress);
      }

      Dictionary<string, StrandAction> actionMap = new Dictionary<string, StrandAction>();
      
      var statFile = options.OutputPrefix + ".stat";
      result.Add(statFile);
      using (var sw = new StreamWriter(statFile))
      {
        sw.WriteLine("Name\tChromosome\tPosition\tSource_Allele1\tSource_Allele2\tReference_Allele\tDbsnp_RefAllele\tDbsnp_AltAllele\tDbsnp_IsReversed\tG1000_RefAllele\tG1000_AltAllele\tG1000_MAF\tAction");

        foreach (var v in snpItems)
        {
          StrandAction action = v.SuggestAction();
          sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11:0.####}\t{12}", v.Name, v.Chrom, v.Position, v.Allele1, v.Allele2, v.RefChar, v.DbsnpRefAllele, v.DbsnpAltAllele, v.DbsnpIsReversed, v.G1000Allele1, v.G1000Allele2, v.G1000Allele2Frequency, action);
          actionMap[v.Name] = action;
        }
      }

      using (var reader = new PlinkBedRandomFile(options.InputFile) { Progress = this.Progress })
      {
        var data = reader.Data;

        var chrs = (from v in snpItems select v.Chrom).Distinct().OrderBy(m => m).ToArray();
        foreach (var chr in chrs)
        {
          var genfile = string.Format("{0}.{1}.gen", options.OutputPrefix, chr.ToString().PadLeft(2, '0'));
          result.Add(genfile);
          var map = FileUtils.ChangeExtension(genfile, ".sample");

          new GwasSampleFormat().WriteToFile(map, data.Individual);

          //save gen file
          using (var sw = new StreamWriter(genfile))
          {
            sw.NewLine = Environment.NewLine;
            var chrItems = snpItems.Where(m => m.Chrom == chr).ToList();
            GenomeUtils.SortChromosome(chrItems, m => chr.ToString(), m => m.Position);
            foreach (var snp in chrItems)
            {
              var ldata = reader.Read(nameMap[snp.Name]);
              var action = actionMap[snp.Name];

              sw.Write("{0} {1} {2} {3} {4}", snp.Chrom, snp.Name, snp.Position, snp.DbsnpRefAllele, snp.DbsnpAltAllele);
              for (int individualIndex = 0; individualIndex < data.Individual.Count; individualIndex++)
              {

                if (PlinkData.IsMissing(ldata[0, individualIndex], ldata[1, individualIndex]))
                {
                  sw.Write(" 0 0 0");
                }
                else
                {
                  char alle1, alle2;
                  if (StrandAction.Switch == action || StrandAction.FlipSwitch == action)
                  {
                    alle1 = ldata[0, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                    alle2 = ldata[1, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                  }
                  else
                  {
                    alle1 = ldata[0, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                    alle2 = ldata[1, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                  }

                  if (alle1 != alle2)
                  {
                    sw.Write(" 0 1 0");
                  }
                  else if (alle1 == snp.DbsnpRefAllele)
                  {
                    sw.Write(" 1 0 0");
                  }
                  else
                  {
                    sw.Write(" 0 0 1");
                  }
                }
              }
              sw.WriteLine();
            }
          }
        }
      }

      return result;
    }
Example #5
0
        public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            var bimfile = Path.ChangeExtension(options.InputFile, ".bim");

            var snps = PlinkLocus.ReadFromBimFile(bimfile, false, false);

            snps.RemoveAll(m => IsIndel(m) || IsMissing(m));

            var snpItems = (from snp in snps
                            select new SNPItem()
            {
                Chrom = snp.Chromosome,
                Name = snp.MarkerId,
                Position = snp.PhysicalPosition,
                Allele1 = snp.Allele1[0],
                Allele2 = snp.Allele2
            }).ToList();

            var nameMap = snpItems.FillDbsnpIdByPosition(options.DbsnpFile, this.Progress);

            using (var sw = new StreamWriter(options.OutputPrefix + ".namemap"))
            {
                sw.WriteLine("NewName\tOldName");
                foreach (var n in nameMap)
                {
                    sw.WriteLine("{0}\t{1}", n.Key, n.Value);
                }
            }

            //remove all snps without corresponding dbsnp entry
            snpItems.RemoveAll(m => m.DbsnpRefAllele == ' ');

            var nameDic = snpItems.ToGroupDictionary(m => m.Name);

            foreach (var n in nameDic)
            {
                if (n.Value.Count > 1)
                {
                    Console.Error.WriteLine("Duplicated SNP:" + n.Key);
                    foreach (var v in n.Value)
                    {
                        Console.Error.WriteLine("{0}:{1}-{2}:{3},{4}:{5},{6}", n.Key, v.Chrom, v.Position, v.Allele1, v.Allele2, v.DbsnpRefAllele, v.DbsnpAltAllele);
                    }
                }
            }

            if (File.Exists(options.G1000File))
            {
                snpItems.FindAllele2FrequencyFrom1000GomeByName(options.G1000File, this.Progress);
            }

            if (File.Exists(options.FastaFile))
            {
                snpItems.FillReferenceAlleleFromFasta(options.FastaFile, this.Progress);
            }

            Dictionary <string, StrandAction> actionMap = new Dictionary <string, StrandAction>();

            var statFile = options.OutputPrefix + ".stat";

            result.Add(statFile);
            using (var sw = new StreamWriter(statFile))
            {
                sw.WriteLine("Name\tChromosome\tPosition\tSource_Allele1\tSource_Allele2\tReference_Allele\tDbsnp_RefAllele\tDbsnp_AltAllele\tDbsnp_IsReversed\tG1000_RefAllele\tG1000_AltAllele\tG1000_MAF\tAction");

                foreach (var v in snpItems)
                {
                    StrandAction action = v.SuggestAction();
                    sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11:0.####}\t{12}", v.Name, v.Chrom, v.Position, v.Allele1, v.Allele2, v.RefChar, v.DbsnpRefAllele, v.DbsnpAltAllele, v.DbsnpIsReversed, v.G1000Allele1, v.G1000Allele2, v.G1000Allele2Frequency, action);
                    actionMap[v.Name] = action;
                }
            }

            using (var reader = new PlinkBedRandomFile(options.InputFile)
            {
                Progress = this.Progress
            })
            {
                var data = reader.Data;

                var chrs = (from v in snpItems select v.Chrom).Distinct().OrderBy(m => m).ToArray();
                foreach (var chr in chrs)
                {
                    var genfile = string.Format("{0}.{1}.gen", options.OutputPrefix, chr.ToString().PadLeft(2, '0'));
                    result.Add(genfile);
                    var map = FileUtils.ChangeExtension(genfile, ".sample");

                    new GwasSampleFormat().WriteToFile(map, data.Individual);

                    //save gen file
                    using (var sw = new StreamWriter(genfile))
                    {
                        sw.NewLine = Environment.NewLine;
                        var chrItems = snpItems.Where(m => m.Chrom == chr).ToList();
                        GenomeUtils.SortChromosome(chrItems, m => chr.ToString(), m => m.Position);
                        foreach (var snp in chrItems)
                        {
                            var ldata  = reader.Read(nameMap[snp.Name]);
                            var action = actionMap[snp.Name];

                            sw.Write("{0} {1} {2} {3} {4}", snp.Chrom, snp.Name, snp.Position, snp.DbsnpRefAllele, snp.DbsnpAltAllele);
                            for (int individualIndex = 0; individualIndex < data.Individual.Count; individualIndex++)
                            {
                                if (PlinkData.IsMissing(ldata[0, individualIndex], ldata[1, individualIndex]))
                                {
                                    sw.Write(" 0 0 0");
                                }
                                else
                                {
                                    char alle1, alle2;
                                    if (StrandAction.Switch == action || StrandAction.FlipSwitch == action)
                                    {
                                        alle1 = ldata[0, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                                        alle2 = ldata[1, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                                    }
                                    else
                                    {
                                        alle1 = ldata[0, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                                        alle2 = ldata[1, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                                    }

                                    if (alle1 != alle2)
                                    {
                                        sw.Write(" 0 1 0");
                                    }
                                    else if (alle1 == snp.DbsnpRefAllele)
                                    {
                                        sw.Write(" 1 0 0");
                                    }
                                    else
                                    {
                                        sw.Write(" 0 0 1");
                                    }
                                }
                            }
                            sw.WriteLine();
                        }
                    }
                }
            }

            return(result);
        }