Пример #1
0
    /// <summary>
    /// Read individual from fam file of bed format, or from ped file of ped format
    /// </summary>
    /// <param name="fileName">fam/ped file</param>
    /// <returns>list of PlinkIndividual</returns>
    public static List<PlinkIndividual> ReadFromFile(string fileName)
    {
      var result = new List<PlinkIndividual>();

      using (var sr = new StreamReader(fileName))
      {
        string line;
        var comms = new[] { '\t', ' ' };
        while ((line = sr.ReadLine()) != null)
        {
          line = line.Trim();
          if (string.IsNullOrEmpty(line))
          {
            continue;
          }

          var parts = line.Split(comms);
          if (string.IsNullOrEmpty(parts[1]))
          {
            continue;
          }

          var ind = new PlinkIndividual();
          ind.Fid = parts[0];
          ind.Iid = parts[1];
          ind.Pat = parts[2];
          ind.Mat = parts[3];
          ind.Sexcode = parts[4];
          ind.Phenotype = double.Parse(parts[5]);
          result.Add(ind);
        }
      }

      return result;
    }
        /// <summary>
        /// Read individual from fam file of bed format, or from ped file of ped format
        /// </summary>
        /// <param name="fileName">fam/ped file</param>
        /// <returns>list of PlinkIndividual</returns>
        public static List <PlinkIndividual> ReadFromFile(string fileName)
        {
            var result = new List <PlinkIndividual>();

            using (var sr = new StreamReader(fileName))
            {
                string line;
                var    comms = new[] { '\t', ' ' };
                while ((line = sr.ReadLine()) != null)
                {
                    line = line.Trim();
                    if (string.IsNullOrEmpty(line))
                    {
                        continue;
                    }

                    var parts = line.Split(comms);
                    if (string.IsNullOrEmpty(parts[1]))
                    {
                        continue;
                    }

                    var ind = new PlinkIndividual();
                    ind.Fid       = parts[0];
                    ind.Iid       = parts[1];
                    ind.Pat       = parts[2];
                    ind.Mat       = parts[3];
                    ind.Sexcode   = parts[4];
                    ind.Phenotype = double.Parse(parts[5]);
                    result.Add(ind);
                }
            }

            return(result);
        }
Пример #3
0
        public PlinkData ReadFromFile(string fileName)
        {
            var famFile = FileUtils.ChangeExtension(fileName, ".fam");

            if (!File.Exists(famFile))
            {
                throw new FileNotFoundException("File not found: " + famFile);
            }

            var bimFile = FileUtils.ChangeExtension(fileName, ".bim");

            if (!File.Exists(bimFile))
            {
                throw new FileNotFoundException("File not found: " + bimFile);
            }

            var result = new PlinkData();

            result.Individual = PlinkIndividual.ReadFromFile(famFile);
            result.Locus      = PlinkLocus.ReadFromBimFile(bimFile);
            result.AllocateDataMemory();

            OpenBinaryFile(fileName);
            try
            {
                if (IsSNPMajor)
                {
                    for (int i = 0; i < result.Locus.Count; i++)
                    {
                        int j = 0;
                        while (j < result.Individual.Count)
                        {
                            var b = ReadByte();
                            int c = 0;
                            while (c < 7 && j < result.Individual.Count)
                            {
                                result.IsHaplotype1Allele2[i, j] = b[c++];
                                result.IsHaplotype2Allele2[i, j] = b[c++];
                                j++;
                            }
                        }
                    }
                }
                else
                {
                    for (int i = 0; i < result.Individual.Count; i++)
                    {
                        int j = 0;
                        while (j < result.Locus.Count)
                        {
                            var b = ReadByte();
                            int c = 0;
                            while (c < 7 && j < result.Locus.Count)
                            {
                                result.IsHaplotype1Allele2[j, i] = b[c++];
                                result.IsHaplotype2Allele2[j, i] = b[c++];
                                j++;
                            }
                        }
                    }
                }
            }
            finally
            {
                _reader.Close();
                _reader = null;
            }

            return(result);
        }
Пример #4
0
        private PlinkData ReadFromFileWithoutIndel(string fileName)
        {
            var result = ReadLocus(fileName);

            result.Individual = PlinkIndividual.ReadFromFile(fileName);
            result.AllocateDataMemory();

            var allele1 = new char[result.Locus.Count, result.Individual.Count];
            var allele2 = new char[result.Locus.Count, result.Individual.Count];

            int individual = -1;

            //reading data
            using (var sr = new StreamReader(fileName))
            {
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    individual++;
                    var parts = line.Split(' ');
                    for (int snp = 0; snp < result.Locus.Count; snp++)
                    {
                        var locus = result.Locus[snp];
                        var pos   = 6 + snp * 2;
                        var a1    = parts[pos];
                        var a2    = parts[pos + 1];
                        allele1[snp, individual] = a1[0];
                        allele2[snp, individual] = a2[0];
                    }
                }
            }

            bool alleleAssigned = HasAlleleAssigned(result);

            for (int locus = 0; locus < result.Locus.Count; locus++)
            {
                if (!alleleAssigned)
                {
                    var  count  = new Dictionary <char, int>();
                    bool bFound = false;
                    for (int ind = 0; ind < result.Individual.Count; ind++)
                    {
                        var a1 = allele1[locus, ind];
                        if (a1 == PlinkLocus.MISSING_CHAR)
                        {
                            continue;
                        }
                        var a2 = allele2[locus, ind];
                        if (a2 == PlinkLocus.MISSING_CHAR)
                        {
                            continue;
                        }

                        if (a1 != a2)
                        {
                            result.Locus[locus].AlleleChar1 = a1;
                            result.Locus[locus].AlleleChar2 = a2;
                            bFound = true;
                            break;
                        }

                        int v;
                        if (count.TryGetValue(a1, out v))
                        {
                            count[a1] = v + 1;
                        }
                        else
                        {
                            count[a1] = 1;
                        }
                    }

                    if (!bFound)
                    {
                        var orderedCount = count.ToList().OrderByDescending(m => m.Value).ToList();
                        if (orderedCount.Count == 0)
                        {
                            continue;
                        }

                        if (orderedCount.Count == 1)
                        {
                            result.Locus[locus].AlleleChar1 = orderedCount[0].Key;
                            result.Locus[locus].AlleleChar2 = orderedCount[0].Key;
                            continue;
                        }

                        if (orderedCount.Count == 2)
                        {
                            result.Locus[locus].AlleleChar1 = orderedCount[0].Key;
                            result.Locus[locus].AlleleChar2 = orderedCount[1].Key;
                            continue;
                        }

                        throw new Exception(string.Format("There are more than 3 alleles for locus {0} : {1}", result.Locus[locus].MarkerId, (from c in orderedCount select c.Key.ToString()).Merge(", ")));
                    }

                    result.Locus.ForEach(m =>
                    {
                        m.Allele1 = m.AlleleChar1.ToString();
                        m.Allele2 = m.AlleleChar2.ToString();
                    });
                }
                else
                {
                    result.Locus.ForEach(m =>
                    {
                        m.AlleleChar1 = m.Allele1[0];
                        m.AlleleChar2 = m.Allele2[0];
                    });
                }

                var l1 = result.Locus[locus].AlleleChar1;
                //assign value
                for (int ind = 0; ind < result.Individual.Count; ind++)
                {
                    var a1 = allele1[locus, ind];
                    var a2 = allele2[locus, ind];
                    if (a1 == PlinkLocus.MISSING_CHAR || a2 == PlinkLocus.MISSING_CHAR)
                    {
                        result.IsHaplotype1Allele2[locus, ind] = true;
                        result.IsHaplotype2Allele2[locus, ind] = false;
                        continue;
                    }

                    result.IsHaplotype1Allele2[locus, ind] = a1 != l1;
                    result.IsHaplotype2Allele2[locus, ind] = a2 != l1;
                }
            }

            allele1 = null;
            allele2 = null;

            return(result);
        }
        public void OpenBinaryFile(string fileName)
        {
            var famFile = FileUtils.ChangeExtension(fileName, ".fam");

            if (!File.Exists(famFile))
            {
                throw new FileNotFoundException("File not found: " + famFile);
            }

            var bimFile = FileUtils.ChangeExtension(fileName, ".bim");

            if (!File.Exists(bimFile))
            {
                throw new FileNotFoundException("File not found: " + bimFile);
            }

            Data            = new PlinkData();
            Data.Individual = PlinkIndividual.ReadFromFile(famFile);
            Data.Locus      = PlinkLocus.ReadFromBimFile(bimFile);
            //Data.Locus.ForEach(m => m.MarkerId = m.MarkerId.ToLower());
            Data.BuildMap();

            DoOpenFile(fileName);

            BitArray b = ReadByte();

            bool v1_bfile = true;

            if ((b[2] && b[3] && b[5] && b[6]) && !(b[0] || b[1] || b[4] || b[7]))
            {
                // Next number
                b = ReadByte();
                if ((b[0] && b[1] && b[3] && b[4]) && !(b[2] || b[5] || b[6] || b[7]))
                {
                    b          = ReadByte();
                    IsSNPMajor = b[0];

                    if (IsSNPMajor)
                    {
                        Progress.SetMessage("Detected that binary PED file is v1.00 SNP-major mode\n");
                    }
                    else
                    {
                        Progress.SetMessage("Detected that binary PED file is v1.00 individual-major mode\n");
                    }
                }
                else
                {
                    v1_bfile = false;
                }
            }
            else
            {
                v1_bfile = false;
            }


            // Reset file if < v1
            if (!v1_bfile)
            {
                Progress.SetMessage("Warning, old BED file <v1.00 : will try to recover...\n");
                DoOpenFile(fileName);
                b = ReadByte();
            }

            // If 0.99 file format
            if ((!v1_bfile) && (b[1] || b[2] || b[3] || b[4] || b[5] || b[6] || b[7]))
            {
                Progress.SetMessage(" *** Possible problem: guessing that BED is < v0.99   *** ");
                Progress.SetMessage(" *** High chance of data corruption, spurious results *** ");

                IsSNPMajor = false;
                DoOpenFile(fileName);
            }
            else if (!v1_bfile)
            {
                IsSNPMajor = b[0];

                Progress.SetMessage("Binary PED file is v0.99\n");

                if (IsSNPMajor)
                {
                    Progress.SetMessage("Detected that binary PED file is in SNP-major mode\n");
                }
                else
                {
                    Progress.SetMessage("Detected that binary PED file is in individual-major mode\n");
                }
            }

            _startPosition = _reader.BaseStream.Position;
        }