Exemplo n.º 1
0
        private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable(HashSet <string> cidExcludeList, HashSet <string> snpExcludeSet)
        {
            CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile, compressionRatio: GZ ? .9 : 0);

            using (TextReader textReader =
#if !SILVERLIGHT
                       GZ ? SnpFile.UnGZip()  :
#endif
                       SnpFile.OpenText())
            {
                string line;
                while (null != (line = textReader.ReadLine()))
                {
                    counterWithMessages.Increment();
                    string[] field = line.Split('\t');
                    Helper.CheckCondition(field.Length == 4, "Expect lines of snp file to have four fields. " + line);
                    string snp        = field[0];
                    string cid        = field[1];
                    string value      = field[2];
                    double confidence = double.Parse(field[3]);
                    Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACT or G. " + value);
                    if (cidExcludeList.Contains(cid) || confidence < MissingThreshold || snpExcludeSet.Contains(snp))
                    {
                        continue; //not break;
                    }

                    yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1])));
                }
            }

            counterWithMessages.Finished();
        }
Exemplo n.º 2
0
        private UOPair <char> StringToUOPairConverter(string pairString)
        {
            Helper.CheckCondition(pairString.Length == 3 && pairString[1] == ' ' && pairString[0] != '?' && pairString[2] != '?', "expect pair string in tped file to be three characters long with the middle being space and neither of the others being '?'. " + pairString);
            if (pairString[0] == '0' || pairString[2] == '0')
            {
                Helper.CheckCondition(pairString[0] == '0' && pairString[2] == '0', "if either character in a pair string is '0' they should both be. " + pairString);
                return(UOPair.Create('?', '?'));
            }

            return(UOPair.Create(pairString[0], pairString[2]));
        }
Exemplo n.º 3
0
        private static IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleSequence(string genomePopFileName)
        {
            using (TextReader textReader = File.OpenText(genomePopFileName))
            {
                string firstLine = textReader.ReadLine(); //Ignore first line
                Helper.CheckCondition(firstLine != null, "Expect genome pop file to contain at least one line");
                string snpLine = textReader.ReadLine();
                Helper.CheckCondition(snpLine != null, "Expect genome pop file to contain at least two lines");
                string[] snpArray = snpLine.Split(',');
                string   line;
                int      cidIndex = -1;
                while (null != (line = textReader.ReadLine()))
                {
                    if (line == "pop")
                    {
                        continue; //not break
                    }
                    ++cidIndex;
                    string cid = string.Format("cid{0}", cidIndex);
                    throw new Exception("Why did the next line have a ', StringSplitOptions.RemoveEmptyEntries'???");
                    string[] twoParts   = line.Split(new char[] { ',' }, 2);
                    string[] valueArray = twoParts[1].TrimStart().Split(' ');
                    Helper.CheckCondition(valueArray.Length == snpArray.Length, "Expect each line to contain one entry per snp. " + cid);
                    for (int snpIndex = 0; snpIndex < snpArray.Length; ++snpIndex)
                    {
                        string value = valueArray[snpIndex];
                        string snp   = snpArray[snpIndex];
                        switch (value)
                        {
                        case "0101":
                            yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create('1', '1')));

                            break;

                        case "0102":
                        case "0201":
                            yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create('1', '2')));

                            break;

                        case "0202":
                            yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create('2', '2')));

                            break;

                        default:
                            throw new MatrixFormatException("Illegal value " + value);
                        }
                    }
                }
            }
        }
Exemplo n.º 4
0
        /// <summary>
        /// Returns true if and only if we know for sure that this genotype matches the given Hla. Returns false if an only if we
        /// know for sure that this genotype does not describe the given hla. Otherwise, returns null.
        /// </summary>
        /// <param name="unambiguousHla"></param>
        /// <returns></returns>
        public bool?Matches(HlaI unambiguousHla, MixtureSemantics mixtureSemantics = MixtureSemantics.Uncertainty)
        {
            Helper.CheckCondition <ArgumentException>(!unambiguousHla.IsAmbiguous, "Can only check if you have an uynambiguous Hla");

            Tuple <HlaI, HlaI> locus;

            switch (unambiguousHla.Locus)
            {
            case HlaILocus.A:
                locus = AAlleles; break;

            case HlaILocus.B:
                locus = BAlleles; break;

            case HlaILocus.C:
                locus = CAlleles; break;

            default:
                throw new Exception("Can't get here.");
            }

            UOPair <HlaI> locusToCompare = UOPair.Create(locus.Item1, locus.Item2);  // easier to treat as unordered pair here, since it will order the nulls.

            if (locusToCompare.First == null && locusToCompare.Second == null)
            {
                return(null);    // we know nothing about this locus
            }
            // note: if one of them is null, it's first. so start with second.
            bool?secondIsOther = locusToCompare.Second.Matches(unambiguousHla, mixtureSemantics);

            if (secondIsOther.HasValue && secondIsOther.Value)
            {
                return(true);    // if second is a match, then we know it has it.
            }
            if (locusToCompare.First == null)
            {
                return(null);    //don't know anything about first.
            }
            bool?firstIsOther = locusToCompare.First.Matches(unambiguousHla, mixtureSemantics);

            if (firstIsOther.HasValue && firstIsOther.Value)
            {
                return(true);    // first is a match, so we know we have it
            }
            if (!firstIsOther.HasValue || !secondIsOther.HasValue)
            {
                return(null); // neither has it, so if either is missing, we don't know
            }
            return(false);    // if we get here, then both alleles reported false.
        }
Exemplo n.º 5
0
        private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable()
        {
            CounterWithMessages counterWithMessages = new CounterWithMessages("Reading " + SnpFile.Name, messageIntervalOrNull: 1000);

            using (TextReader textReader = SnpFile.OpenText())
            {
                string headerLine = textReader.ReadLine();
                Helper.CheckCondition(headerLine != null, "Expect file to contain a first line");
                string[] headerFields = headerLine.Split('\t');
                Helper.CheckCondition(headerFields.Length > 0 && headerFields[0] == "", "Expect first column of first line to be blank");

                string line;
                while (null != (line = textReader.ReadLine()))
                {
                    counterWithMessages.Increment();
                    string[] fields = line.Split('\t');
                    Helper.CheckCondition(fields.Length == headerFields.Length, "Expect all lines to have the same # of columns");
                    string cid = fields[0];
                    for (int snpIndex = 1; snpIndex < headerFields.Length; ++snpIndex) // start at one to skip over 1st column
                    {
                        string        snp         = headerFields[snpIndex];
                        string        valueInFile = fields[snpIndex];
                        UOPair <char> uoPair;
                        if (valueInFile == "-")
                        {
                            continue; // not break;
                        }
                        else if (valueInFile.Length == 1)
                        {
                            char c = valueInFile[0];
                            Helper.CheckCondition("ACTG".Contains(c), () => "Expect values in snp file to be ACT or G. " + valueInFile);
                            uoPair = UOPair.Create(c, c);
                        }
                        else
                        {
                            Helper.CheckCondition(valueInFile.Length == 3 && valueInFile[1] == '/' && "ACTG".Contains(valueInFile[0]) && "ACTG".Contains(valueInFile[2]), () => "Expect longer values in snp file be of the form 'a/b' where a & b are ACT or G");
                            uoPair = UOPair.Create(valueInFile[0], valueInFile[2]);
                        }
                        yield return(RowKeyColKeyValue.Create(snp, cid, uoPair));
                    }
                }
            }
        }
Exemplo n.º 6
0
        private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable()
        {
            //int? totalLineCountOrNull = null;
            //int? messageIntervalOrNull = 10000;
            //using (TextReader textReader = SnpFile.OpenText())
            //{
            //    string line = textReader.ReadLine();
            //    if (null != line || line.Length == 0)
            //    {
            //        totalLineCountOrNull = (int?)(SnpFile.Length / (long)(line.Length +  2 /*line end*/));
            //        messageIntervalOrNull = null;
            //    }
            //}


            CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile); //"Reading " + SnpFile.Name, messageIntervalOrNull, totalLineCountOrNull);

            using (TextReader textReader = SnpFile.OpenText())
            {
                string line;
                while (null != (line = textReader.ReadLine()))
                {
                    counterWithMessages.Increment();
                    string[] field = line.Split('\t');
                    Helper.CheckCondition(field.Length == 3, "Expect lines of snp file to have three fields. " + line);
                    string cid   = field[0];
                    string snp   = field[1];
                    string value = field[2];
                    if (value == "00")
                    {
                        continue; //not break;
                    }

                    Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACTG or 00. " + value);

                    yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1])));
                }
                counterWithMessages.Finished();
            }
        }
Exemplo n.º 7
0
 public StringToUOPairConverter() : base(s => UOPair.Create(s[0], s[1]), pair => string.Format("{0}{1}", pair.First, pair.Second))
 {
 }