private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable(HashSet <string> cidExcludeList, HashSet <string> snpExcludeSet) { CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile, compressionRatio: GZ ? .9 : 0); using (TextReader textReader = #if !SILVERLIGHT GZ ? SnpFile.UnGZip() : #endif SnpFile.OpenText()) { string line; while (null != (line = textReader.ReadLine())) { counterWithMessages.Increment(); string[] field = line.Split('\t'); Helper.CheckCondition(field.Length == 4, "Expect lines of snp file to have four fields. " + line); string snp = field[0]; string cid = field[1]; string value = field[2]; double confidence = double.Parse(field[3]); Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACT or G. " + value); if (cidExcludeList.Contains(cid) || confidence < MissingThreshold || snpExcludeSet.Contains(snp)) { continue; //not break; } yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1]))); } } counterWithMessages.Finished(); }
public void ValidateDensePairAnsiGetInstanceFromSparseInternal() { UOPair <char> uoPairObjMissing = new UOPair <char>('?', '?'); UOPair <char> uoPairObjGood = new UOPair <char>('A', 'T'); RowKeyColKeyValue <string, string, UOPair <char> > rowColKey = new RowKeyColKeyValue <string, string, UOPair <char> >("R0", "C0", uoPairObjGood); List <RowKeyColKeyValue <string, string, UOPair <char> > > enumObj = new List <RowKeyColKeyValue <string, string, UOPair <char> > >(); enumObj.Add(rowColKey); DensePairAnsi dpaObj = DensePairAnsi.CreateEmptyInstance( new string[] { "R0" }, new string[] { "C0" }, uoPairObjMissing); dpaObj.GetInstanceFromSparseInternal(enumObj); Assert.AreEqual("R0", dpaObj.RowKeys[0]); Assert.AreEqual("C0", dpaObj.ColKeys[0]); Assert.AreEqual(0, dpaObj.IndexOfRowKey["R0"]); Console.WriteLine( "DensePairAnsi BVT : Validation of GetInstanceFromSparseInternal() method successful"); ApplicationLog.WriteLine( "DensePairAnsi BVT : Validation of GetInstanceFromSparseInternal() method successful"); }
private static IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleSequence(string genomePopFileName) { using (TextReader textReader = File.OpenText(genomePopFileName)) { string firstLine = textReader.ReadLine(); //Ignore first line Helper.CheckCondition(firstLine != null, "Expect genome pop file to contain at least one line"); string snpLine = textReader.ReadLine(); Helper.CheckCondition(snpLine != null, "Expect genome pop file to contain at least two lines"); string[] snpArray = snpLine.Split(','); string line; int cidIndex = -1; while (null != (line = textReader.ReadLine())) { if (line == "pop") { continue; //not break } ++cidIndex; string cid = string.Format("cid{0}", cidIndex); throw new Exception("Why did the next line have a ', StringSplitOptions.RemoveEmptyEntries'???"); string[] twoParts = line.Split(new char[] { ',' }, 2); string[] valueArray = twoParts[1].TrimStart().Split(' '); Helper.CheckCondition(valueArray.Length == snpArray.Length, "Expect each line to contain one entry per snp. " + cid); for (int snpIndex = 0; snpIndex < snpArray.Length; ++snpIndex) { string value = valueArray[snpIndex]; string snp = snpArray[snpIndex]; switch (value) { case "0101": yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create('1', '1'))); break; case "0102": case "0201": yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create('1', '2'))); break; case "0202": yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create('2', '2'))); break; default: throw new MatrixFormatException("Illegal value " + value); } } } } }
private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable() { CounterWithMessages counterWithMessages = new CounterWithMessages("Reading " + SnpFile.Name, messageIntervalOrNull: 1000); using (TextReader textReader = SnpFile.OpenText()) { string headerLine = textReader.ReadLine(); Helper.CheckCondition(headerLine != null, "Expect file to contain a first line"); string[] headerFields = headerLine.Split('\t'); Helper.CheckCondition(headerFields.Length > 0 && headerFields[0] == "", "Expect first column of first line to be blank"); string line; while (null != (line = textReader.ReadLine())) { counterWithMessages.Increment(); string[] fields = line.Split('\t'); Helper.CheckCondition(fields.Length == headerFields.Length, "Expect all lines to have the same # of columns"); string cid = fields[0]; for (int snpIndex = 1; snpIndex < headerFields.Length; ++snpIndex) // start at one to skip over 1st column { string snp = headerFields[snpIndex]; string valueInFile = fields[snpIndex]; UOPair <char> uoPair; if (valueInFile == "-") { continue; // not break; } else if (valueInFile.Length == 1) { char c = valueInFile[0]; Helper.CheckCondition("ACTG".Contains(c), () => "Expect values in snp file to be ACT or G. " + valueInFile); uoPair = UOPair.Create(c, c); } else { Helper.CheckCondition(valueInFile.Length == 3 && valueInFile[1] == '/' && "ACTG".Contains(valueInFile[0]) && "ACTG".Contains(valueInFile[2]), () => "Expect longer values in snp file be of the form 'a/b' where a & b are ACT or G"); uoPair = UOPair.Create(valueInFile[0], valueInFile[2]); } yield return(RowKeyColKeyValue.Create(snp, cid, uoPair)); } } } }
public void ValidateDensePairAnsiGetInstanceFromSparseEnum() { UOPair <char> uoPairObj = new UOPair <char>('?', '?'); RowKeyColKeyValue <string, string, UOPair <char> > rowColKey = new RowKeyColKeyValue <string, string, UOPair <char> >("R0", "C0", uoPairObj); List <RowKeyColKeyValue <string, string, UOPair <char> > > enumObj = new List <RowKeyColKeyValue <string, string, UOPair <char> > >(); DensePairAnsi dpaObj = DensePairAnsi.GetInstanceFromSparse(enumObj); Assert.IsNotNull(dpaObj); Console.WriteLine( "DensePairAnsi BVT : Validation of GetInstanceFromSparse(Ienum) method successful"); ApplicationLog.WriteLine( "DensePairAnsi BVT : Validation of GetInstanceFromSparse(Ienum) method successful"); }
public void ValidatePaddedDoubleGetInstanceFromSparseEnum() { RowKeyColKeyValue <string, string, double> rowKeyObj = new RowKeyColKeyValue <string, string, double>("R0", "C0", 2); List <RowKeyColKeyValue <string, string, double> > enumObj = new List <RowKeyColKeyValue <string, string, double> >(); enumObj.Add(rowKeyObj); PaddedDouble pdObj = PaddedDouble.GetInstanceFromSparse(enumObj); Assert.AreEqual(1, pdObj.ColCount); Assert.AreEqual(1, pdObj.RowCount); Assert.AreEqual("R0", pdObj.RowKeys[0]); Assert.AreEqual("C0", pdObj.ColKeys[0]); ApplicationLog.WriteLine( "PaddedDouble BVT : Validation of GetInstanceFromSparse(enum) method successful"); }
private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable() { //int? totalLineCountOrNull = null; //int? messageIntervalOrNull = 10000; //using (TextReader textReader = SnpFile.OpenText()) //{ // string line = textReader.ReadLine(); // if (null != line || line.Length == 0) // { // totalLineCountOrNull = (int?)(SnpFile.Length / (long)(line.Length + 2 /*line end*/)); // messageIntervalOrNull = null; // } //} CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile); //"Reading " + SnpFile.Name, messageIntervalOrNull, totalLineCountOrNull); using (TextReader textReader = SnpFile.OpenText()) { string line; while (null != (line = textReader.ReadLine())) { counterWithMessages.Increment(); string[] field = line.Split('\t'); Helper.CheckCondition(field.Length == 3, "Expect lines of snp file to have three fields. " + line); string cid = field[0]; string snp = field[1]; string value = field[2]; if (value == "00") { continue; //not break; } Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACTG or 00. " + value); yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1]))); } counterWithMessages.Finished(); } }
public void ValidatePaddedDoubleGetInstanceFromSparseEnum() { RowKeyColKeyValue<string, string, double> rowKeyObj = new RowKeyColKeyValue<string, string, double>("R0", "C0", 2); List<RowKeyColKeyValue<string, string, double>> enumObj = new List<RowKeyColKeyValue<string, string, double>>(); enumObj.Add(rowKeyObj); PaddedDouble pdObj = PaddedDouble.GetInstanceFromSparse(enumObj); Assert.AreEqual(1, pdObj.ColCount); Assert.AreEqual(1, pdObj.RowCount); Assert.AreEqual("R0", pdObj.RowKeys[0]); Assert.AreEqual("C0", pdObj.ColKeys[0]); ApplicationLog.WriteLine( "PaddedDouble BVT : Validation of GetInstanceFromSparse(enum) method successful"); }
public void ValidateDensePairAnsiGetInstanceFromSparseInternal() { UOPair<char> uoPairObjMissing = new UOPair<char>('?', '?'); UOPair<char> uoPairObjGood = new UOPair<char>('A', 'T'); RowKeyColKeyValue<string, string, UOPair<char>> rowColKey = new RowKeyColKeyValue<string, string, UOPair<char>>("R0", "C0", uoPairObjGood); List<RowKeyColKeyValue<string, string, UOPair<char>>> enumObj = new List<RowKeyColKeyValue<string, string, UOPair<char>>>(); enumObj.Add(rowColKey); DensePairAnsi dpaObj = DensePairAnsi.CreateEmptyInstance( new string[] { "R0" }, new string[] { "C0" }, uoPairObjMissing); dpaObj.GetInstanceFromSparseInternal(enumObj); Assert.AreEqual("R0", dpaObj.RowKeys[0]); Assert.AreEqual("C0", dpaObj.ColKeys[0]); Assert.AreEqual(0, dpaObj.IndexOfRowKey["R0"]); ApplicationLog.WriteLine( "DensePairAnsi BVT : Validation of GetInstanceFromSparseInternal() method successful"); }