public override IEnumerable <string> Process()
        {
            using (var file = new PlinkBedRandomFile(_options.InputFile)
            {
                Progress = this.Progress
            })
            {
                var locusList      = file.Data.Locus;
                var individualList = file.Data.Individual;

                Progress.SetRange(0, locusList.Count);
                for (int i = 0; i < locusList.Count; i++)
                {
                    Progress.SetPosition(i);

                    var locus = locusList[i];
                    var data  = file.Read(locus.MarkerId);

                    int count1      = 0;
                    int count2      = 0;
                    int validSample = 0;
                    for (int j = 0; j < individualList.Count; j++)
                    {
                        if (PlinkData.IsMissing(data[0, j], data[1, j]))
                        {
                            continue;
                        }

                        validSample++;

                        if (data[0, j])
                        {
                            count2++;
                        }
                        else
                        {
                            count1++;
                        }

                        if (data[1, j])
                        {
                            count2++;
                        }
                        else
                        {
                            count1++;
                        }
                    }
                    locus.Allele1Frequency = ((double)(count1)) / (count1 + count2);
                    locus.TotalSample      = individualList.Count;
                    locus.ValidSample      = validSample;
                }

                PlinkLocus.WriteToFile(_options.OutputFile, locusList, false, true, true);
            }

            return(new string[] { _options.OutputFile });
        }
        public override IEnumerable <string> Process()
        {
            Progress.SetMessage("Reading data from " + _options.InputFile + "...");
            var data           = _options.GetFileReader().ReadFromFile(_options.InputFile);
            var locusList      = data.Locus;
            var individualList = data.Individual;

            for (int i = 0; i < locusList.Count; i++)
            {
                var locus = locusList[i];

                int count1      = 0;
                int count2      = 0;
                int validSample = 0;
                for (int j = 0; j < individualList.Count; j++)
                {
                    if (data.IsMissing(i, j))
                    {
                        continue;
                    }

                    validSample++;

                    if (data.IsHaplotype1Allele2[i, j])
                    {
                        count2++;
                    }
                    else
                    {
                        count1++;
                    }

                    if (data.IsHaplotype2Allele2[i, j])
                    {
                        count2++;
                    }
                    else
                    {
                        count1++;
                    }
                }
                locus.Allele1Frequency = ((double)(count2)) / (count1 + count2);
                locus.TotalSample      = individualList.Count;
                locus.ValidSample      = validSample;
            }

            PlinkLocus.WriteToFile(_options.OutputFile, locusList, false, true);

            return(new string[] { _options.OutputFile });
        }
    public override IEnumerable<string> Process()
    {
      var locusList = new List<PlinkLocus>();

      using (var sr = new StreamReader(_options.InputFile))
      {
        string line;
        while ((line = sr.ReadLine()) != null)
        {
          var parts = line.Split(' ');
          var locus = new PlinkLocus()
          {
            Chromosome = int.Parse(parts[0]),
            MarkerId = parts[1],
            PhysicalPosition = int.Parse(parts[2]),
            Allele1 = parts[3],
            Allele2 = parts[4]
          };
          locusList.Add(locus);

          var count1 = 0;
          var count2 = 0;
          for (int i = 5; i < parts.Length; i += 3)
          {
            if (parts[i].Equals("1"))
            {
              count1 += 2;
            }
            else if (parts[i + 1].Equals("1"))
            {
              count1++;
              count2++;
            }
            else if (parts[i + 2].Equals("1"))
            {
              count2 += 2;
            }
            else
            {//unknown, ignore 
              Console.Error.WriteLine(string.Format("Unknown, name={0}, i={1}, genotype={2} {3} {4}", locus.MarkerId, i, parts[i], parts[i + 1], parts[i + 2]));
            }
          }
          locus.Allele1Frequency = ((double)(count2)) / (count1 + count2);
        }
      }

      PlinkLocus.WriteToFile(_options.OutputFile, locusList, false, true);

      return new string[] { _options.OutputFile };
    }
Example #4
0
        public override IEnumerable <string> Process()
        {
            var locusList = new List <PlinkLocus>();

            using (var sr = new StreamReader(_options.InputFile))
            {
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    var parts = line.Split(' ');
                    var locus = new PlinkLocus()
                    {
                        Chromosome       = int.Parse(parts[0]),
                        MarkerId         = parts[1],
                        PhysicalPosition = int.Parse(parts[2]),
                        Allele1          = parts[3],
                        Allele2          = parts[4]
                    };
                    locusList.Add(locus);

                    var count1 = 0;
                    var count2 = 0;
                    for (int i = 5; i < parts.Length; i += 3)
                    {
                        if (parts[i].Equals("1"))
                        {
                            count1 += 2;
                        }
                        else if (parts[i + 1].Equals("1"))
                        {
                            count1++;
                            count2++;
                        }
                        else if (parts[i + 2].Equals("1"))
                        {
                            count2 += 2;
                        }
                        else
                        {//unknown, ignore
                            Console.Error.WriteLine(string.Format("Unknown, name={0}, i={1}, genotype={2} {3} {4}", locus.MarkerId, i, parts[i], parts[i + 1], parts[i + 2]));
                        }
                    }
                    locus.Allele1Frequency = ((double)(count2)) / (count1 + count2);
                }
            }

            PlinkLocus.WriteToFile(_options.OutputFile, locusList, false, true);

            return(new string[] { _options.OutputFile });
        }
        /// <summary>
        /// Read locus from bim file of bed format
        /// </summary>
        /// <param name="fileName">bim file</param>
        /// <returns>list of PlinkLocus</returns>
        public static List <PlinkLocus> ReadFromBimFile(string fileName, bool hasPlatform = false, bool hasAllele2Freqency = false)
        {
            var result = new List <PlinkLocus>();

            using (var sr = new StreamReader(fileName))
            {
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    line = line.Trim();
                    if (string.IsNullOrEmpty(line))
                    {
                        continue;
                    }

                    var parts = line.Split('\t');
                    if (string.IsNullOrEmpty(parts[1]))
                    {
                        continue;
                    }

                    var locus = new PlinkLocus();
                    locus.Chromosome       = int.Parse(parts[0]);
                    locus.MarkerId         = parts[1];
                    locus.GeneticDistance  = double.Parse(parts[2]);
                    locus.PhysicalPosition = int.Parse(parts[3]);
                    locus.Allele1          = parts[4];
                    locus.Allele2          = parts[5];

                    var index = 6;
                    if (hasPlatform)
                    {
                        locus.Platform           = parts[index++];
                        locus.ValidPlatformCount = int.Parse(parts[index++]);
                    }

                    if (hasAllele2Freqency)
                    {
                        locus.Allele1Frequency = double.Parse(parts[index++]);
                        locus.TotalSample      = int.Parse(parts[index++]);
                        locus.ValidSample      = int.Parse(parts[index++]);
                    }

                    result.Add(locus);
                }
            }

            return(result);
        }
Example #6
0
    /// <summary>
    /// Read locus from bim file of bed format
    /// </summary>
    /// <param name="fileName">bim file</param>
    /// <returns>list of PlinkLocus</returns>
    public static List<PlinkLocus> ReadFromBimFile(string fileName, bool hasPlatform = false, bool hasAllele2Freqency = false)
    {
      var result = new List<PlinkLocus>();

      using (var sr = new StreamReader(fileName))
      {
        string line;
        while ((line = sr.ReadLine()) != null)
        {
          line = line.Trim();
          if (string.IsNullOrEmpty(line))
          {
            continue;
          }

          var parts = line.Split('\t');
          if (string.IsNullOrEmpty(parts[1]))
          {
            continue;
          }

          var locus = new PlinkLocus();
          locus.Chromosome = int.Parse(parts[0]);
          locus.MarkerId = parts[1];
          locus.GeneticDistance = double.Parse(parts[2]);
          locus.PhysicalPosition = int.Parse(parts[3]);
          locus.Allele1 = parts[4];
          locus.Allele2 = parts[5];

          var index = 6;
          if (hasPlatform)
          {
            locus.Platform = parts[index++];
            locus.ValidPlatformCount = int.Parse(parts[index++]);
          }

          if (hasAllele2Freqency)
          {
            locus.Allele1Frequency = double.Parse(parts[index++]);
            locus.TotalSample = int.Parse(parts[index++]);
            locus.ValidSample = int.Parse(parts[index++]);
          }

          result.Add(locus);
        }
      }

      return result;
    }
        /// <summary>
        /// Read locus from map file of ped format
        /// </summary>
        /// <param name="fileName">map file</param>
        /// <returns>list of PlinkLocus</returns>
        public static List <PlinkLocus> ReadFromMapFile(string fileName)
        {
            var result = new List <PlinkLocus>();

            using (var sr = new StreamReader(fileName))
            {
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    line = line.Trim();
                    if (string.IsNullOrEmpty(line))
                    {
                        continue;
                    }

                    var parts = line.Split('\t');
                    if (string.IsNullOrEmpty(parts[1]))
                    {
                        continue;
                    }

                    var locus = new PlinkLocus();
                    locus.Chromosome       = int.Parse(parts[0]);
                    locus.MarkerId         = parts[1];
                    locus.GeneticDistance  = int.Parse(parts[2]);
                    locus.PhysicalPosition = int.Parse(parts[3]);
                    if (parts.Length >= 6)
                    {
                        locus.Allele1 = parts[4];
                        locus.Allele2 = parts[5];
                    }
                    else
                    {
                        locus.Allele1 = MISSING;
                        locus.Allele2 = MISSING;
                    }
                    result.Add(locus);
                }
            }

            return(result);
        }
Example #8
0
        private static PlinkData ReadLocus(string fileName)
        {
            var result   = new PlinkData();
            var tmapFile = FileUtils.ChangeExtension(fileName, ".tmap");

            if (File.Exists(tmapFile))
            {
                result.Locus = PlinkLocus.ReadFromMapFile(tmapFile);
                return(result);
            }

            var mapFile = FileUtils.ChangeExtension(fileName, ".map");

            if (File.Exists(mapFile))
            {
                result.Locus = PlinkLocus.ReadFromMapFile(mapFile);
                return(result);
            }

            throw new FileNotFoundException("File not found: " + mapFile);
        }
 private static bool IsIndel(PlinkLocus m)
 {
   return m.Allele1.Length != 1 || m.Allele2.Length != 1 || m.Allele1.Equals("I") || m.Allele1.Equals("D") || m.Allele2.Equals("I") || m.Allele2.Equals("D");
 }
 private static bool IsMissing(PlinkLocus m)
 {
   return m.Allele1.Equals("0") && m.Allele2.Equals("0");
 }
Example #11
0
        public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            var bimfile = Path.ChangeExtension(options.InputFile, ".bim");

            var snps = PlinkLocus.ReadFromBimFile(bimfile, false, false);

            snps.RemoveAll(m => IsIndel(m) || IsMissing(m));

            var snpItems = (from snp in snps
                            select new SNPItem()
            {
                Chrom = snp.Chromosome,
                Name = snp.MarkerId,
                Position = snp.PhysicalPosition,
                Allele1 = snp.Allele1[0],
                Allele2 = snp.Allele2
            }).ToList();

            var nameMap = snpItems.FillDbsnpIdByPosition(options.DbsnpFile, this.Progress);

            using (var sw = new StreamWriter(options.OutputPrefix + ".namemap"))
            {
                sw.WriteLine("NewName\tOldName");
                foreach (var n in nameMap)
                {
                    sw.WriteLine("{0}\t{1}", n.Key, n.Value);
                }
            }

            //remove all snps without corresponding dbsnp entry
            snpItems.RemoveAll(m => m.DbsnpRefAllele == ' ');

            var nameDic = snpItems.ToGroupDictionary(m => m.Name);

            foreach (var n in nameDic)
            {
                if (n.Value.Count > 1)
                {
                    Console.Error.WriteLine("Duplicated SNP:" + n.Key);
                    foreach (var v in n.Value)
                    {
                        Console.Error.WriteLine("{0}:{1}-{2}:{3},{4}:{5},{6}", n.Key, v.Chrom, v.Position, v.Allele1, v.Allele2, v.DbsnpRefAllele, v.DbsnpAltAllele);
                    }
                }
            }

            if (File.Exists(options.G1000File))
            {
                snpItems.FindAllele2FrequencyFrom1000GomeByName(options.G1000File, this.Progress);
            }

            if (File.Exists(options.FastaFile))
            {
                snpItems.FillReferenceAlleleFromFasta(options.FastaFile, this.Progress);
            }

            Dictionary <string, StrandAction> actionMap = new Dictionary <string, StrandAction>();

            var statFile = options.OutputPrefix + ".stat";

            result.Add(statFile);
            using (var sw = new StreamWriter(statFile))
            {
                sw.WriteLine("Name\tChromosome\tPosition\tSource_Allele1\tSource_Allele2\tReference_Allele\tDbsnp_RefAllele\tDbsnp_AltAllele\tDbsnp_IsReversed\tG1000_RefAllele\tG1000_AltAllele\tG1000_MAF\tAction");

                foreach (var v in snpItems)
                {
                    StrandAction action = v.SuggestAction();
                    sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11:0.####}\t{12}", v.Name, v.Chrom, v.Position, v.Allele1, v.Allele2, v.RefChar, v.DbsnpRefAllele, v.DbsnpAltAllele, v.DbsnpIsReversed, v.G1000Allele1, v.G1000Allele2, v.G1000Allele2Frequency, action);
                    actionMap[v.Name] = action;
                }
            }

            using (var reader = new PlinkBedRandomFile(options.InputFile)
            {
                Progress = this.Progress
            })
            {
                var data = reader.Data;

                var chrs = (from v in snpItems select v.Chrom).Distinct().OrderBy(m => m).ToArray();
                foreach (var chr in chrs)
                {
                    var genfile = string.Format("{0}.{1}.gen", options.OutputPrefix, chr.ToString().PadLeft(2, '0'));
                    result.Add(genfile);
                    var map = FileUtils.ChangeExtension(genfile, ".sample");

                    new GwasSampleFormat().WriteToFile(map, data.Individual);

                    //save gen file
                    using (var sw = new StreamWriter(genfile))
                    {
                        sw.NewLine = Environment.NewLine;
                        var chrItems = snpItems.Where(m => m.Chrom == chr).ToList();
                        GenomeUtils.SortChromosome(chrItems, m => chr.ToString(), m => m.Position);
                        foreach (var snp in chrItems)
                        {
                            var ldata  = reader.Read(nameMap[snp.Name]);
                            var action = actionMap[snp.Name];

                            sw.Write("{0} {1} {2} {3} {4}", snp.Chrom, snp.Name, snp.Position, snp.DbsnpRefAllele, snp.DbsnpAltAllele);
                            for (int individualIndex = 0; individualIndex < data.Individual.Count; individualIndex++)
                            {
                                if (PlinkData.IsMissing(ldata[0, individualIndex], ldata[1, individualIndex]))
                                {
                                    sw.Write(" 0 0 0");
                                }
                                else
                                {
                                    char alle1, alle2;
                                    if (StrandAction.Switch == action || StrandAction.FlipSwitch == action)
                                    {
                                        alle1 = ldata[0, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                                        alle2 = ldata[1, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                                    }
                                    else
                                    {
                                        alle1 = ldata[0, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                                        alle2 = ldata[1, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                                    }

                                    if (alle1 != alle2)
                                    {
                                        sw.Write(" 0 1 0");
                                    }
                                    else if (alle1 == snp.DbsnpRefAllele)
                                    {
                                        sw.Write(" 1 0 0");
                                    }
                                    else
                                    {
                                        sw.Write(" 0 0 1");
                                    }
                                }
                            }
                            sw.WriteLine();
                        }
                    }
                }
            }

            return(result);
        }
Example #12
0
 private static bool IsIndel(PlinkLocus m)
 {
     return(m.Allele1.Length != 1 || m.Allele2.Length != 1 || m.Allele1.Equals("I") || m.Allele1.Equals("D") || m.Allele2.Equals("I") || m.Allele2.Equals("D"));
 }
Example #13
0
 private static bool IsMissing(PlinkLocus m)
 {
     return(m.Allele1.Equals("0") && m.Allele2.Equals("0"));
 }
Example #14
0
        public PlinkData ReadFromFile(string fileName)
        {
            var famFile = FileUtils.ChangeExtension(fileName, ".fam");

            if (!File.Exists(famFile))
            {
                throw new FileNotFoundException("File not found: " + famFile);
            }

            var bimFile = FileUtils.ChangeExtension(fileName, ".bim");

            if (!File.Exists(bimFile))
            {
                throw new FileNotFoundException("File not found: " + bimFile);
            }

            var result = new PlinkData();

            result.Individual = PlinkIndividual.ReadFromFile(famFile);
            result.Locus      = PlinkLocus.ReadFromBimFile(bimFile);
            result.AllocateDataMemory();

            OpenBinaryFile(fileName);
            try
            {
                if (IsSNPMajor)
                {
                    for (int i = 0; i < result.Locus.Count; i++)
                    {
                        int j = 0;
                        while (j < result.Individual.Count)
                        {
                            var b = ReadByte();
                            int c = 0;
                            while (c < 7 && j < result.Individual.Count)
                            {
                                result.IsHaplotype1Allele2[i, j] = b[c++];
                                result.IsHaplotype2Allele2[i, j] = b[c++];
                                j++;
                            }
                        }
                    }
                }
                else
                {
                    for (int i = 0; i < result.Individual.Count; i++)
                    {
                        int j = 0;
                        while (j < result.Locus.Count)
                        {
                            var b = ReadByte();
                            int c = 0;
                            while (c < 7 && j < result.Locus.Count)
                            {
                                result.IsHaplotype1Allele2[j, i] = b[c++];
                                result.IsHaplotype2Allele2[j, i] = b[c++];
                                j++;
                            }
                        }
                    }
                }
            }
            finally
            {
                _reader.Close();
                _reader = null;
            }

            return(result);
        }
        public void OpenBinaryFile(string fileName)
        {
            var famFile = FileUtils.ChangeExtension(fileName, ".fam");

            if (!File.Exists(famFile))
            {
                throw new FileNotFoundException("File not found: " + famFile);
            }

            var bimFile = FileUtils.ChangeExtension(fileName, ".bim");

            if (!File.Exists(bimFile))
            {
                throw new FileNotFoundException("File not found: " + bimFile);
            }

            Data            = new PlinkData();
            Data.Individual = PlinkIndividual.ReadFromFile(famFile);
            Data.Locus      = PlinkLocus.ReadFromBimFile(bimFile);
            //Data.Locus.ForEach(m => m.MarkerId = m.MarkerId.ToLower());
            Data.BuildMap();

            DoOpenFile(fileName);

            BitArray b = ReadByte();

            bool v1_bfile = true;

            if ((b[2] && b[3] && b[5] && b[6]) && !(b[0] || b[1] || b[4] || b[7]))
            {
                // Next number
                b = ReadByte();
                if ((b[0] && b[1] && b[3] && b[4]) && !(b[2] || b[5] || b[6] || b[7]))
                {
                    b          = ReadByte();
                    IsSNPMajor = b[0];

                    if (IsSNPMajor)
                    {
                        Progress.SetMessage("Detected that binary PED file is v1.00 SNP-major mode\n");
                    }
                    else
                    {
                        Progress.SetMessage("Detected that binary PED file is v1.00 individual-major mode\n");
                    }
                }
                else
                {
                    v1_bfile = false;
                }
            }
            else
            {
                v1_bfile = false;
            }


            // Reset file if < v1
            if (!v1_bfile)
            {
                Progress.SetMessage("Warning, old BED file <v1.00 : will try to recover...\n");
                DoOpenFile(fileName);
                b = ReadByte();
            }

            // If 0.99 file format
            if ((!v1_bfile) && (b[1] || b[2] || b[3] || b[4] || b[5] || b[6] || b[7]))
            {
                Progress.SetMessage(" *** Possible problem: guessing that BED is < v0.99   *** ");
                Progress.SetMessage(" *** High chance of data corruption, spurious results *** ");

                IsSNPMajor = false;
                DoOpenFile(fileName);
            }
            else if (!v1_bfile)
            {
                IsSNPMajor = b[0];

                Progress.SetMessage("Binary PED file is v0.99\n");

                if (IsSNPMajor)
                {
                    Progress.SetMessage("Detected that binary PED file is in SNP-major mode\n");
                }
                else
                {
                    Progress.SetMessage("Detected that binary PED file is in individual-major mode\n");
                }
            }

            _startPosition = _reader.BaseStream.Position;
        }
Example #16
0
    /// <summary>
    /// Read locus from map file of ped format
    /// </summary>
    /// <param name="fileName">map file</param>
    /// <returns>list of PlinkLocus</returns>
    public static List<PlinkLocus> ReadFromMapFile(string fileName)
    {
      var result = new List<PlinkLocus>();

      using (var sr = new StreamReader(fileName))
      {
        string line;
        while ((line = sr.ReadLine()) != null)
        {
          line = line.Trim();
          if (string.IsNullOrEmpty(line))
          {
            continue;
          }

          var parts = line.Split('\t');
          if (string.IsNullOrEmpty(parts[1]))
          {
            continue;
          }

          var locus = new PlinkLocus();
          locus.Chromosome = int.Parse(parts[0]);
          locus.MarkerId = parts[1];
          locus.GeneticDistance = int.Parse(parts[2]);
          locus.PhysicalPosition = int.Parse(parts[3]);
          if (parts.Length >= 6)
          {
            locus.Allele1 = parts[4];
            locus.Allele2 = parts[5];
          }
          else
          {
            locus.Allele1 = MISSING;
            locus.Allele2 = MISSING;
          }
          result.Add(locus);
        }
      }

      return result;
    }