/// <summary> /// Read individual from fam file of bed format, or from ped file of ped format /// </summary> /// <param name="fileName">fam/ped file</param> /// <returns>list of PlinkIndividual</returns> public static List<PlinkIndividual> ReadFromFile(string fileName) { var result = new List<PlinkIndividual>(); using (var sr = new StreamReader(fileName)) { string line; var comms = new[] { '\t', ' ' }; while ((line = sr.ReadLine()) != null) { line = line.Trim(); if (string.IsNullOrEmpty(line)) { continue; } var parts = line.Split(comms); if (string.IsNullOrEmpty(parts[1])) { continue; } var ind = new PlinkIndividual(); ind.Fid = parts[0]; ind.Iid = parts[1]; ind.Pat = parts[2]; ind.Mat = parts[3]; ind.Sexcode = parts[4]; ind.Phenotype = double.Parse(parts[5]); result.Add(ind); } } return result; }
/// <summary> /// Read individual from fam file of bed format, or from ped file of ped format /// </summary> /// <param name="fileName">fam/ped file</param> /// <returns>list of PlinkIndividual</returns> public static List <PlinkIndividual> ReadFromFile(string fileName) { var result = new List <PlinkIndividual>(); using (var sr = new StreamReader(fileName)) { string line; var comms = new[] { '\t', ' ' }; while ((line = sr.ReadLine()) != null) { line = line.Trim(); if (string.IsNullOrEmpty(line)) { continue; } var parts = line.Split(comms); if (string.IsNullOrEmpty(parts[1])) { continue; } var ind = new PlinkIndividual(); ind.Fid = parts[0]; ind.Iid = parts[1]; ind.Pat = parts[2]; ind.Mat = parts[3]; ind.Sexcode = parts[4]; ind.Phenotype = double.Parse(parts[5]); result.Add(ind); } } return(result); }
public PlinkData ReadFromFile(string fileName) { var famFile = FileUtils.ChangeExtension(fileName, ".fam"); if (!File.Exists(famFile)) { throw new FileNotFoundException("File not found: " + famFile); } var bimFile = FileUtils.ChangeExtension(fileName, ".bim"); if (!File.Exists(bimFile)) { throw new FileNotFoundException("File not found: " + bimFile); } var result = new PlinkData(); result.Individual = PlinkIndividual.ReadFromFile(famFile); result.Locus = PlinkLocus.ReadFromBimFile(bimFile); result.AllocateDataMemory(); OpenBinaryFile(fileName); try { if (IsSNPMajor) { for (int i = 0; i < result.Locus.Count; i++) { int j = 0; while (j < result.Individual.Count) { var b = ReadByte(); int c = 0; while (c < 7 && j < result.Individual.Count) { result.IsHaplotype1Allele2[i, j] = b[c++]; result.IsHaplotype2Allele2[i, j] = b[c++]; j++; } } } } else { for (int i = 0; i < result.Individual.Count; i++) { int j = 0; while (j < result.Locus.Count) { var b = ReadByte(); int c = 0; while (c < 7 && j < result.Locus.Count) { result.IsHaplotype1Allele2[j, i] = b[c++]; result.IsHaplotype2Allele2[j, i] = b[c++]; j++; } } } } } finally { _reader.Close(); _reader = null; } return(result); }
private PlinkData ReadFromFileWithoutIndel(string fileName) { var result = ReadLocus(fileName); result.Individual = PlinkIndividual.ReadFromFile(fileName); result.AllocateDataMemory(); var allele1 = new char[result.Locus.Count, result.Individual.Count]; var allele2 = new char[result.Locus.Count, result.Individual.Count]; int individual = -1; //reading data using (var sr = new StreamReader(fileName)) { string line; while ((line = sr.ReadLine()) != null) { individual++; var parts = line.Split(' '); for (int snp = 0; snp < result.Locus.Count; snp++) { var locus = result.Locus[snp]; var pos = 6 + snp * 2; var a1 = parts[pos]; var a2 = parts[pos + 1]; allele1[snp, individual] = a1[0]; allele2[snp, individual] = a2[0]; } } } bool alleleAssigned = HasAlleleAssigned(result); for (int locus = 0; locus < result.Locus.Count; locus++) { if (!alleleAssigned) { var count = new Dictionary <char, int>(); bool bFound = false; for (int ind = 0; ind < result.Individual.Count; ind++) { var a1 = allele1[locus, ind]; if (a1 == PlinkLocus.MISSING_CHAR) { continue; } var a2 = allele2[locus, ind]; if (a2 == PlinkLocus.MISSING_CHAR) { continue; } if (a1 != a2) { result.Locus[locus].AlleleChar1 = a1; result.Locus[locus].AlleleChar2 = a2; bFound = true; break; } int v; if (count.TryGetValue(a1, out v)) { count[a1] = v + 1; } else { count[a1] = 1; } } if (!bFound) { var orderedCount = count.ToList().OrderByDescending(m => m.Value).ToList(); if (orderedCount.Count == 0) { continue; } if (orderedCount.Count == 1) { result.Locus[locus].AlleleChar1 = orderedCount[0].Key; result.Locus[locus].AlleleChar2 = orderedCount[0].Key; continue; } if (orderedCount.Count == 2) { result.Locus[locus].AlleleChar1 = orderedCount[0].Key; result.Locus[locus].AlleleChar2 = orderedCount[1].Key; continue; } throw new Exception(string.Format("There are more than 3 alleles for locus {0} : {1}", result.Locus[locus].MarkerId, (from c in orderedCount select c.Key.ToString()).Merge(", "))); } result.Locus.ForEach(m => { m.Allele1 = m.AlleleChar1.ToString(); m.Allele2 = m.AlleleChar2.ToString(); }); } else { result.Locus.ForEach(m => { m.AlleleChar1 = m.Allele1[0]; m.AlleleChar2 = m.Allele2[0]; }); } var l1 = result.Locus[locus].AlleleChar1; //assign value for (int ind = 0; ind < result.Individual.Count; ind++) { var a1 = allele1[locus, ind]; var a2 = allele2[locus, ind]; if (a1 == PlinkLocus.MISSING_CHAR || a2 == PlinkLocus.MISSING_CHAR) { result.IsHaplotype1Allele2[locus, ind] = true; result.IsHaplotype2Allele2[locus, ind] = false; continue; } result.IsHaplotype1Allele2[locus, ind] = a1 != l1; result.IsHaplotype2Allele2[locus, ind] = a2 != l1; } } allele1 = null; allele2 = null; return(result); }
public void OpenBinaryFile(string fileName) { var famFile = FileUtils.ChangeExtension(fileName, ".fam"); if (!File.Exists(famFile)) { throw new FileNotFoundException("File not found: " + famFile); } var bimFile = FileUtils.ChangeExtension(fileName, ".bim"); if (!File.Exists(bimFile)) { throw new FileNotFoundException("File not found: " + bimFile); } Data = new PlinkData(); Data.Individual = PlinkIndividual.ReadFromFile(famFile); Data.Locus = PlinkLocus.ReadFromBimFile(bimFile); //Data.Locus.ForEach(m => m.MarkerId = m.MarkerId.ToLower()); Data.BuildMap(); DoOpenFile(fileName); BitArray b = ReadByte(); bool v1_bfile = true; if ((b[2] && b[3] && b[5] && b[6]) && !(b[0] || b[1] || b[4] || b[7])) { // Next number b = ReadByte(); if ((b[0] && b[1] && b[3] && b[4]) && !(b[2] || b[5] || b[6] || b[7])) { b = ReadByte(); IsSNPMajor = b[0]; if (IsSNPMajor) { Progress.SetMessage("Detected that binary PED file is v1.00 SNP-major mode\n"); } else { Progress.SetMessage("Detected that binary PED file is v1.00 individual-major mode\n"); } } else { v1_bfile = false; } } else { v1_bfile = false; } // Reset file if < v1 if (!v1_bfile) { Progress.SetMessage("Warning, old BED file <v1.00 : will try to recover...\n"); DoOpenFile(fileName); b = ReadByte(); } // If 0.99 file format if ((!v1_bfile) && (b[1] || b[2] || b[3] || b[4] || b[5] || b[6] || b[7])) { Progress.SetMessage(" *** Possible problem: guessing that BED is < v0.99 *** "); Progress.SetMessage(" *** High chance of data corruption, spurious results *** "); IsSNPMajor = false; DoOpenFile(fileName); } else if (!v1_bfile) { IsSNPMajor = b[0]; Progress.SetMessage("Binary PED file is v0.99\n"); if (IsSNPMajor) { Progress.SetMessage("Detected that binary PED file is in SNP-major mode\n"); } else { Progress.SetMessage("Detected that binary PED file is in individual-major mode\n"); } } _startPosition = _reader.BaseStream.Position; }