private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable(HashSet <string> cidExcludeList, HashSet <string> snpExcludeSet) { CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile, compressionRatio: GZ ? .9 : 0); using (TextReader textReader = #if !SILVERLIGHT GZ ? SnpFile.UnGZip() : #endif SnpFile.OpenText()) { string line; while (null != (line = textReader.ReadLine())) { counterWithMessages.Increment(); string[] field = line.Split('\t'); Helper.CheckCondition(field.Length == 4, "Expect lines of snp file to have four fields. " + line); string snp = field[0]; string cid = field[1]; string value = field[2]; double confidence = double.Parse(field[3]); Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACT or G. " + value); if (cidExcludeList.Contains(cid) || confidence < MissingThreshold || snpExcludeSet.Contains(snp)) { continue; //not break; } yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1]))); } } counterWithMessages.Finished(); }
private UOPair <char> StringToUOPairConverter(string pairString) { Helper.CheckCondition(pairString.Length == 3 && pairString[1] == ' ' && pairString[0] != '?' && pairString[2] != '?', "expect pair string in tped file to be three characters long with the middle being space and neither of the others being '?'. " + pairString); if (pairString[0] == '0' || pairString[2] == '0') { Helper.CheckCondition(pairString[0] == '0' && pairString[2] == '0', "if either character in a pair string is '0' they should both be. " + pairString); return(UOPair.Create('?', '?')); } return(UOPair.Create(pairString[0], pairString[2])); }
private static IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleSequence(string genomePopFileName) { using (TextReader textReader = File.OpenText(genomePopFileName)) { string firstLine = textReader.ReadLine(); //Ignore first line Helper.CheckCondition(firstLine != null, "Expect genome pop file to contain at least one line"); string snpLine = textReader.ReadLine(); Helper.CheckCondition(snpLine != null, "Expect genome pop file to contain at least two lines"); string[] snpArray = snpLine.Split(','); string line; int cidIndex = -1; while (null != (line = textReader.ReadLine())) { if (line == "pop") { continue; //not break } ++cidIndex; string cid = string.Format("cid{0}", cidIndex); throw new Exception("Why did the next line have a ', StringSplitOptions.RemoveEmptyEntries'???"); string[] twoParts = line.Split(new char[] { ',' }, 2); string[] valueArray = twoParts[1].TrimStart().Split(' '); Helper.CheckCondition(valueArray.Length == snpArray.Length, "Expect each line to contain one entry per snp. " + cid); for (int snpIndex = 0; snpIndex < snpArray.Length; ++snpIndex) { string value = valueArray[snpIndex]; string snp = snpArray[snpIndex]; switch (value) { case "0101": yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create('1', '1'))); break; case "0102": case "0201": yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create('1', '2'))); break; case "0202": yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create('2', '2'))); break; default: throw new MatrixFormatException("Illegal value " + value); } } } } }
/// <summary> /// Returns true if and only if we know for sure that this genotype matches the given Hla. Returns false if an only if we /// know for sure that this genotype does not describe the given hla. Otherwise, returns null. /// </summary> /// <param name="unambiguousHla"></param> /// <returns></returns> public bool?Matches(HlaI unambiguousHla, MixtureSemantics mixtureSemantics = MixtureSemantics.Uncertainty) { Helper.CheckCondition <ArgumentException>(!unambiguousHla.IsAmbiguous, "Can only check if you have an uynambiguous Hla"); Tuple <HlaI, HlaI> locus; switch (unambiguousHla.Locus) { case HlaILocus.A: locus = AAlleles; break; case HlaILocus.B: locus = BAlleles; break; case HlaILocus.C: locus = CAlleles; break; default: throw new Exception("Can't get here."); } UOPair <HlaI> locusToCompare = UOPair.Create(locus.Item1, locus.Item2); // easier to treat as unordered pair here, since it will order the nulls. if (locusToCompare.First == null && locusToCompare.Second == null) { return(null); // we know nothing about this locus } // note: if one of them is null, it's first. so start with second. bool?secondIsOther = locusToCompare.Second.Matches(unambiguousHla, mixtureSemantics); if (secondIsOther.HasValue && secondIsOther.Value) { return(true); // if second is a match, then we know it has it. } if (locusToCompare.First == null) { return(null); //don't know anything about first. } bool?firstIsOther = locusToCompare.First.Matches(unambiguousHla, mixtureSemantics); if (firstIsOther.HasValue && firstIsOther.Value) { return(true); // first is a match, so we know we have it } if (!firstIsOther.HasValue || !secondIsOther.HasValue) { return(null); // neither has it, so if either is missing, we don't know } return(false); // if we get here, then both alleles reported false. }
private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable() { CounterWithMessages counterWithMessages = new CounterWithMessages("Reading " + SnpFile.Name, messageIntervalOrNull: 1000); using (TextReader textReader = SnpFile.OpenText()) { string headerLine = textReader.ReadLine(); Helper.CheckCondition(headerLine != null, "Expect file to contain a first line"); string[] headerFields = headerLine.Split('\t'); Helper.CheckCondition(headerFields.Length > 0 && headerFields[0] == "", "Expect first column of first line to be blank"); string line; while (null != (line = textReader.ReadLine())) { counterWithMessages.Increment(); string[] fields = line.Split('\t'); Helper.CheckCondition(fields.Length == headerFields.Length, "Expect all lines to have the same # of columns"); string cid = fields[0]; for (int snpIndex = 1; snpIndex < headerFields.Length; ++snpIndex) // start at one to skip over 1st column { string snp = headerFields[snpIndex]; string valueInFile = fields[snpIndex]; UOPair <char> uoPair; if (valueInFile == "-") { continue; // not break; } else if (valueInFile.Length == 1) { char c = valueInFile[0]; Helper.CheckCondition("ACTG".Contains(c), () => "Expect values in snp file to be ACT or G. " + valueInFile); uoPair = UOPair.Create(c, c); } else { Helper.CheckCondition(valueInFile.Length == 3 && valueInFile[1] == '/' && "ACTG".Contains(valueInFile[0]) && "ACTG".Contains(valueInFile[2]), () => "Expect longer values in snp file be of the form 'a/b' where a & b are ACT or G"); uoPair = UOPair.Create(valueInFile[0], valueInFile[2]); } yield return(RowKeyColKeyValue.Create(snp, cid, uoPair)); } } } }
private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable() { //int? totalLineCountOrNull = null; //int? messageIntervalOrNull = 10000; //using (TextReader textReader = SnpFile.OpenText()) //{ // string line = textReader.ReadLine(); // if (null != line || line.Length == 0) // { // totalLineCountOrNull = (int?)(SnpFile.Length / (long)(line.Length + 2 /*line end*/)); // messageIntervalOrNull = null; // } //} CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile); //"Reading " + SnpFile.Name, messageIntervalOrNull, totalLineCountOrNull); using (TextReader textReader = SnpFile.OpenText()) { string line; while (null != (line = textReader.ReadLine())) { counterWithMessages.Increment(); string[] field = line.Split('\t'); Helper.CheckCondition(field.Length == 3, "Expect lines of snp file to have three fields. " + line); string cid = field[0]; string snp = field[1]; string value = field[2]; if (value == "00") { continue; //not break; } Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACTG or 00. " + value); yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1]))); } counterWithMessages.Finished(); } }
public StringToUOPairConverter() : base(s => UOPair.Create(s[0], s[1]), pair => string.Format("{0}{1}", pair.First, pair.Second)) { }