コード例 #1
0
        internal static HlaIGenotype Union(params HlaIGenotype[] hlaIGenotypes)
        {
            HlaI A1 = new HlaI(HlaILocus.A);
            HlaI A2 = new HlaI(HlaILocus.A);
            HlaI B1 = new HlaI(HlaILocus.B);
            HlaI B2 = new HlaI(HlaILocus.B);
            HlaI C1 = new HlaI(HlaILocus.C);
            HlaI C2 = new HlaI(HlaILocus.C);

            foreach (var geno in hlaIGenotypes)
            {
                A1.ParseInto(geno.AAlleles.Item1.ToString());
                A2.ParseInto(geno.AAlleles.Item2.ToString());

                B1.ParseInto(geno.BAlleles.Item1.ToString());
                B2.ParseInto(geno.BAlleles.Item2.ToString());

                C1.ParseInto(geno.CAlleles.Item1.ToString());
                C2.ParseInto(geno.CAlleles.Item2.ToString());
            }

            var result = new HlaIGenotype(A1, A2, B1, B2, C1, C2);

            return(result);
        }
コード例 #2
0
        protected override void WriteDistilled(IEnumerable <KeyValuePair <string, List <HlaIGenotype> > > patientsAndGenotypes, bool multipleGenotypesPerPatient)
        {
            var patientsAndGenotypesAsList = patientsAndGenotypes.ToList();

            Dictionary <string, MultinomialStatistics> pidToMultinomial  = new Dictionary <string, MultinomialStatistics>();
            Dictionary <string, HlaIGenotype>          pidGenoNameToGeno = new Dictionary <string, HlaIGenotype>(patientsAndGenotypesAsList.Count);

            foreach (var pidAndGenos in patientsAndGenotypes)
            {
                if (pidAndGenos.Value.Count == 1)
                {
                    pidGenoNameToGeno.Add(pidAndGenos.Key, pidAndGenos.Value.Single());
                }
                else
                {
                    pidToMultinomial.Add(pidAndGenos.Key, MultinomialStatistics.GetInstance(pidAndGenos.Value.Select(g => g.Probability))); // keep track of the probabilities in this dictionary

                    HlaIGenotype unionGenotype = HlaIGenotype.Union(pidAndGenos.Value.ToArray());
                    pidGenoNameToGeno.Add(pidAndGenos.Key, unionGenotype);  //Add the union genotype to the matrix.

                    for (int i = 0; i < pidAndGenos.Value.Count; i++)
                    {
                        string pidGenoId = pidAndGenos.Key + "_" + i;
                        pidGenoNameToGeno.Add(pidGenoId, pidAndGenos.Value[i]);
                    }
                }
            }

            Matrix <string, string, SufficientStatistics> m = DenseMatrix <string, string, SufficientStatistics> .CreateDefaultInstance(
                HlaEnumerator.GenerateHlas(patientsAndGenotypes.SelectMany(pidAndGenos => pidAndGenos.Value)),
                pidGenoNameToGeno.Keys,
                MissingStatistics.GetInstance());

            foreach (string hlastr in m.RowKeys)
            {
                HlaI hla = HlaI.Parse(hlastr);
                foreach (string pid in m.ColKeys)
                {
                    bool?match = pidGenoNameToGeno[pid].Matches(hla, MixtureSemantics);
                    m.SetValueOrMissing(hlastr, pid, BooleanStatistics.GetInstance(match));
                    //m.SetValueOrMissing(hlastr, pid, !match.HasValue ? "?" : match.Value ? "1" : "0");
                }
            }

            m.WriteDense(this.Out.CreateTextOrUseConsole());

            string baseName        = Path.GetFileNameWithoutExtension(this.Out.ToString());
            string probabilityFile = this.Out.ToString() == "-" ? "HaplotypeCompletionProbs.txt" : this.Out.ToString().Replace(baseName, baseName + "_haplotypeProbs");

            pidToMultinomial.WriteDelimitedFile(probabilityFile);
        }
コード例 #3
0
        /// <summary>
        /// Returns true if and only if we know for sure that this genotype matches the given Hla. Returns false if an only if we
        /// know for sure that this genotype does not describe the given hla. Otherwise, returns null.
        /// </summary>
        /// <param name="unambiguousHla"></param>
        /// <returns></returns>
        public bool?Matches(HlaI unambiguousHla, MixtureSemantics mixtureSemantics = MixtureSemantics.Uncertainty)
        {
            Helper.CheckCondition <ArgumentException>(!unambiguousHla.IsAmbiguous, "Can only check if you have an uynambiguous Hla");

            Tuple <HlaI, HlaI> locus;

            switch (unambiguousHla.Locus)
            {
            case HlaILocus.A:
                locus = AAlleles; break;

            case HlaILocus.B:
                locus = BAlleles; break;

            case HlaILocus.C:
                locus = CAlleles; break;

            default:
                throw new Exception("Can't get here.");
            }

            UOPair <HlaI> locusToCompare = UOPair.Create(locus.Item1, locus.Item2);  // easier to treat as unordered pair here, since it will order the nulls.

            if (locusToCompare.First == null && locusToCompare.Second == null)
            {
                return(null);    // we know nothing about this locus
            }
            // note: if one of them is null, it's first. so start with second.
            bool?secondIsOther = locusToCompare.Second.Matches(unambiguousHla, mixtureSemantics);

            if (secondIsOther.HasValue && secondIsOther.Value)
            {
                return(true);    // if second is a match, then we know it has it.
            }
            if (locusToCompare.First == null)
            {
                return(null);    //don't know anything about first.
            }
            bool?firstIsOther = locusToCompare.First.Matches(unambiguousHla, mixtureSemantics);

            if (firstIsOther.HasValue && firstIsOther.Value)
            {
                return(true);    // first is a match, so we know we have it
            }
            if (!firstIsOther.HasValue || !secondIsOther.HasValue)
            {
                return(null); // neither has it, so if either is missing, we don't know
            }
            return(false);    // if we get here, then both alleles reported false.
        }
コード例 #4
0
        private static HlaI ParseHla(HlaILocus locus, string hlaString, HlaI otherCopyAtSameLocusOrNull = null)
        {
            Helper.CheckCondition <ParseException>(!string.IsNullOrWhiteSpace(hlaString), "Blank entries are not allowed. Use ? for missing or - for homozygous (second column of locus only).");

            switch (hlaString)
            {
            case "?":
                return(null);

            case "-":
                Helper.CheckCondition <ParseException>(otherCopyAtSameLocusOrNull != null, "Can't mark the first column as homozygous. Only A2, B2 or C2 can be marked with -.");
                return(otherCopyAtSameLocusOrNull);

            default:
                HlaI hla = new HlaI(locus);
                hla.ParseInto(hlaString);
                return(hla);
            }
        }
コード例 #5
0
 public HlaIGenotype(HlaI A1, HlaI A2, HlaI B1, HlaI B2, HlaI C1, HlaI C2)
 {
     AAlleles = Tuple.Create(A1, A2);
     BAlleles = Tuple.Create(B1, B2);
     CAlleles = Tuple.Create(C1, C2);
 }
コード例 #6
0
        public virtual Dictionary <string, List <HlaIGenotype> > ParseFile()
        {
            Dictionary <string, List <HlaIGenotype> > result = new Dictionary <string, List <HlaIGenotype> >();

            using (TextReader reader = HlaFile.OpenTextOrUseConsole(stripComments: true))
            {
                string header = reader.ReadLine();
                bool   isHlaCompletionFile = header.Equals(HlaCompletionHeader);
                int    colCount            = header.Split('\t').Length;
                Helper.CheckCondition <ParseException>(isHlaCompletionFile || colCount == 7 || colCount == 8, "Expect a file with 7 or 8 columns. Read {0}", header);
                bool parseProbabilityLine = isHlaCompletionFile || colCount == 8;

                string line;
                while (null != (line = reader.ReadLine()))
                {
                    string[] fields = line.Split('\t');
                    try
                    {
                        string pid = fields[0];

                        HlaI a1 = ParseHla(HlaILocus.A, fields[1]);
                        HlaI a2 = ParseHla(HlaILocus.A, fields[2], a1);
                        HlaI b1 = ParseHla(HlaILocus.B, fields[3]);
                        HlaI b2 = ParseHla(HlaILocus.B, fields[4], b1);
                        HlaI c1 = ParseHla(HlaILocus.C, fields[5]);
                        HlaI c2 = ParseHla(HlaILocus.C, fields[6], c1);

                        HlaIGenotype genotype = new HlaIGenotype(a1, a2, b1, b2, c1, c2);
                        genotype.SetMaxResolution(MaxResolution);

                        if (parseProbabilityLine)
                        {
                            double p;
                            (double.TryParse(fields[7], out p) && p >= 0 && p <= 1).Enforce <ParseException>("The 8th field is not a valid probability. Read {0}.", p);
                            genotype.Probability = p;
                        }

                        result.GetValueOrDefault(pid).Add(genotype);
                    }
                    catch (ParseException p)
                    {
                        throw new ParseException(p.Message + " for patient" + fields[0]);
                    }
                }
            }
            if (MaxResolution == HlaIResolution.Group)
            {
                foreach (var pidAndGenos in result)
                {
                    if (pidAndGenos.Value.Count > 1)
                    {
                        // we've stripped out some of the 4 digits. The resulting list may contain duplicates. Collapse those together and sum up their probabilities.
                        var collapsedGenotypes = from geno in pidAndGenos.Value
                                                 let hlaString = geno.EnumerateAll().StringJoin(" ")
                                                                 group geno by hlaString into identicalHaps
                                                                 select new HlaIGenotype(
                            identicalHaps.First().AAlleles.Item1,
                            identicalHaps.First().AAlleles.Item2,
                            identicalHaps.First().BAlleles.Item1,
                            identicalHaps.First().BAlleles.Item2,
                            identicalHaps.First().CAlleles.Item1,
                            identicalHaps.First().CAlleles.Item2)
                        {
                            Probability = identicalHaps.Sum(h => h.Probability)
                        };
                        pidAndGenos.Value.Clear();
                        pidAndGenos.Value.AddRange(collapsedGenotypes);
                    }
                }
            }


            return(result);
        }