internal static HlaIGenotype Union(params HlaIGenotype[] hlaIGenotypes) { HlaI A1 = new HlaI(HlaILocus.A); HlaI A2 = new HlaI(HlaILocus.A); HlaI B1 = new HlaI(HlaILocus.B); HlaI B2 = new HlaI(HlaILocus.B); HlaI C1 = new HlaI(HlaILocus.C); HlaI C2 = new HlaI(HlaILocus.C); foreach (var geno in hlaIGenotypes) { A1.ParseInto(geno.AAlleles.Item1.ToString()); A2.ParseInto(geno.AAlleles.Item2.ToString()); B1.ParseInto(geno.BAlleles.Item1.ToString()); B2.ParseInto(geno.BAlleles.Item2.ToString()); C1.ParseInto(geno.CAlleles.Item1.ToString()); C2.ParseInto(geno.CAlleles.Item2.ToString()); } var result = new HlaIGenotype(A1, A2, B1, B2, C1, C2); return(result); }
protected override void WriteDistilled(IEnumerable <KeyValuePair <string, List <HlaIGenotype> > > patientsAndGenotypes, bool multipleGenotypesPerPatient) { var patientsAndGenotypesAsList = patientsAndGenotypes.ToList(); Dictionary <string, MultinomialStatistics> pidToMultinomial = new Dictionary <string, MultinomialStatistics>(); Dictionary <string, HlaIGenotype> pidGenoNameToGeno = new Dictionary <string, HlaIGenotype>(patientsAndGenotypesAsList.Count); foreach (var pidAndGenos in patientsAndGenotypes) { if (pidAndGenos.Value.Count == 1) { pidGenoNameToGeno.Add(pidAndGenos.Key, pidAndGenos.Value.Single()); } else { pidToMultinomial.Add(pidAndGenos.Key, MultinomialStatistics.GetInstance(pidAndGenos.Value.Select(g => g.Probability))); // keep track of the probabilities in this dictionary HlaIGenotype unionGenotype = HlaIGenotype.Union(pidAndGenos.Value.ToArray()); pidGenoNameToGeno.Add(pidAndGenos.Key, unionGenotype); //Add the union genotype to the matrix. for (int i = 0; i < pidAndGenos.Value.Count; i++) { string pidGenoId = pidAndGenos.Key + "_" + i; pidGenoNameToGeno.Add(pidGenoId, pidAndGenos.Value[i]); } } } Matrix <string, string, SufficientStatistics> m = DenseMatrix <string, string, SufficientStatistics> .CreateDefaultInstance( HlaEnumerator.GenerateHlas(patientsAndGenotypes.SelectMany(pidAndGenos => pidAndGenos.Value)), pidGenoNameToGeno.Keys, MissingStatistics.GetInstance()); foreach (string hlastr in m.RowKeys) { HlaI hla = HlaI.Parse(hlastr); foreach (string pid in m.ColKeys) { bool?match = pidGenoNameToGeno[pid].Matches(hla, MixtureSemantics); m.SetValueOrMissing(hlastr, pid, BooleanStatistics.GetInstance(match)); //m.SetValueOrMissing(hlastr, pid, !match.HasValue ? "?" : match.Value ? "1" : "0"); } } m.WriteDense(this.Out.CreateTextOrUseConsole()); string baseName = Path.GetFileNameWithoutExtension(this.Out.ToString()); string probabilityFile = this.Out.ToString() == "-" ? "HaplotypeCompletionProbs.txt" : this.Out.ToString().Replace(baseName, baseName + "_haplotypeProbs"); pidToMultinomial.WriteDelimitedFile(probabilityFile); }
/// <summary> /// Returns true if and only if we know for sure that this genotype matches the given Hla. Returns false if an only if we /// know for sure that this genotype does not describe the given hla. Otherwise, returns null. /// </summary> /// <param name="unambiguousHla"></param> /// <returns></returns> public bool?Matches(HlaI unambiguousHla, MixtureSemantics mixtureSemantics = MixtureSemantics.Uncertainty) { Helper.CheckCondition <ArgumentException>(!unambiguousHla.IsAmbiguous, "Can only check if you have an uynambiguous Hla"); Tuple <HlaI, HlaI> locus; switch (unambiguousHla.Locus) { case HlaILocus.A: locus = AAlleles; break; case HlaILocus.B: locus = BAlleles; break; case HlaILocus.C: locus = CAlleles; break; default: throw new Exception("Can't get here."); } UOPair <HlaI> locusToCompare = UOPair.Create(locus.Item1, locus.Item2); // easier to treat as unordered pair here, since it will order the nulls. if (locusToCompare.First == null && locusToCompare.Second == null) { return(null); // we know nothing about this locus } // note: if one of them is null, it's first. so start with second. bool?secondIsOther = locusToCompare.Second.Matches(unambiguousHla, mixtureSemantics); if (secondIsOther.HasValue && secondIsOther.Value) { return(true); // if second is a match, then we know it has it. } if (locusToCompare.First == null) { return(null); //don't know anything about first. } bool?firstIsOther = locusToCompare.First.Matches(unambiguousHla, mixtureSemantics); if (firstIsOther.HasValue && firstIsOther.Value) { return(true); // first is a match, so we know we have it } if (!firstIsOther.HasValue || !secondIsOther.HasValue) { return(null); // neither has it, so if either is missing, we don't know } return(false); // if we get here, then both alleles reported false. }
private static HlaI ParseHla(HlaILocus locus, string hlaString, HlaI otherCopyAtSameLocusOrNull = null) { Helper.CheckCondition <ParseException>(!string.IsNullOrWhiteSpace(hlaString), "Blank entries are not allowed. Use ? for missing or - for homozygous (second column of locus only)."); switch (hlaString) { case "?": return(null); case "-": Helper.CheckCondition <ParseException>(otherCopyAtSameLocusOrNull != null, "Can't mark the first column as homozygous. Only A2, B2 or C2 can be marked with -."); return(otherCopyAtSameLocusOrNull); default: HlaI hla = new HlaI(locus); hla.ParseInto(hlaString); return(hla); } }
public HlaIGenotype(HlaI A1, HlaI A2, HlaI B1, HlaI B2, HlaI C1, HlaI C2) { AAlleles = Tuple.Create(A1, A2); BAlleles = Tuple.Create(B1, B2); CAlleles = Tuple.Create(C1, C2); }
public virtual Dictionary <string, List <HlaIGenotype> > ParseFile() { Dictionary <string, List <HlaIGenotype> > result = new Dictionary <string, List <HlaIGenotype> >(); using (TextReader reader = HlaFile.OpenTextOrUseConsole(stripComments: true)) { string header = reader.ReadLine(); bool isHlaCompletionFile = header.Equals(HlaCompletionHeader); int colCount = header.Split('\t').Length; Helper.CheckCondition <ParseException>(isHlaCompletionFile || colCount == 7 || colCount == 8, "Expect a file with 7 or 8 columns. Read {0}", header); bool parseProbabilityLine = isHlaCompletionFile || colCount == 8; string line; while (null != (line = reader.ReadLine())) { string[] fields = line.Split('\t'); try { string pid = fields[0]; HlaI a1 = ParseHla(HlaILocus.A, fields[1]); HlaI a2 = ParseHla(HlaILocus.A, fields[2], a1); HlaI b1 = ParseHla(HlaILocus.B, fields[3]); HlaI b2 = ParseHla(HlaILocus.B, fields[4], b1); HlaI c1 = ParseHla(HlaILocus.C, fields[5]); HlaI c2 = ParseHla(HlaILocus.C, fields[6], c1); HlaIGenotype genotype = new HlaIGenotype(a1, a2, b1, b2, c1, c2); genotype.SetMaxResolution(MaxResolution); if (parseProbabilityLine) { double p; (double.TryParse(fields[7], out p) && p >= 0 && p <= 1).Enforce <ParseException>("The 8th field is not a valid probability. Read {0}.", p); genotype.Probability = p; } result.GetValueOrDefault(pid).Add(genotype); } catch (ParseException p) { throw new ParseException(p.Message + " for patient" + fields[0]); } } } if (MaxResolution == HlaIResolution.Group) { foreach (var pidAndGenos in result) { if (pidAndGenos.Value.Count > 1) { // we've stripped out some of the 4 digits. The resulting list may contain duplicates. Collapse those together and sum up their probabilities. var collapsedGenotypes = from geno in pidAndGenos.Value let hlaString = geno.EnumerateAll().StringJoin(" ") group geno by hlaString into identicalHaps select new HlaIGenotype( identicalHaps.First().AAlleles.Item1, identicalHaps.First().AAlleles.Item2, identicalHaps.First().BAlleles.Item1, identicalHaps.First().BAlleles.Item2, identicalHaps.First().CAlleles.Item1, identicalHaps.First().CAlleles.Item2) { Probability = identicalHaps.Sum(h => h.Probability) }; pidAndGenos.Value.Clear(); pidAndGenos.Value.AddRange(collapsedGenotypes); } } } return(result); }