private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable(HashSet <string> cidExcludeList, HashSet <string> snpExcludeSet) { CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile, compressionRatio: GZ ? .9 : 0); using (TextReader textReader = #if !SILVERLIGHT GZ ? SnpFile.UnGZip() : #endif SnpFile.OpenText()) { string line; while (null != (line = textReader.ReadLine())) { counterWithMessages.Increment(); string[] field = line.Split('\t'); Helper.CheckCondition(field.Length == 4, "Expect lines of snp file to have four fields. " + line); string snp = field[0]; string cid = field[1]; string value = field[2]; double confidence = double.Parse(field[3]); Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACT or G. " + value); if (cidExcludeList.Contains(cid) || confidence < MissingThreshold || snpExcludeSet.Contains(snp)) { continue; //not break; } yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1]))); } } counterWithMessages.Finished(); }
public Matrix <string, string, double> ToKernel(Matrix <string, string, double> unnormalizedInput, int?cidInBatchCountOrNull = null) { if (null == cidInBatchCountOrNull) { return(ToKernel(unnormalizedInput)); } var input = RowNormalizer.Normalize(unnormalizedInput); var cidListList = SpecialFunctions.DivideListIntoEqualChunksFromChunkSize <string>(input.ColKeys, cidInBatchCountOrNull.Value); Console.WriteLine("cids divided into {0} batches of about {1}", cidListList.Count, cidInBatchCountOrNull); Helper.CheckCondition(cidListList.Sum(l => l.Count) == input.ColCount, "real assert"); var counterWithMessages = new CounterWithMessages("kernel combintations ", 1, (cidListList.Count * cidListList.Count + cidListList.Count) / 2); var kernelPieces2D = new Matrix <string, string, double> [cidListList.Count, cidListList.Count]; for (int i = 0; i < cidListList.Count; ++i) { Console.WriteLine("Loading batch {0}, size {1}x{2}", i, cidListList[i].Count, unnormalizedInput.RowCount); var matrixI = input.SelectColsView(cidListList[i]).ToShoMatrix(verbose: true); Parallel.For(i, cidListList.Count, ParallelOptionsScope.Current, j => { Console.WriteLine("Loading batch {0}, size {1}x{2}", j, cidListList[j].Count, unnormalizedInput.RowCount); if (i == j) { ShoMatrix kii = JustKernel(matrixI); kernelPieces2D[i, i] = kii; } else { var matrixJ = input.SelectColsView(cidListList[j]).ToShoMatrix(verbose: true); ShoMatrix kij = JustKernel(matrixI, matrixJ); kernelPieces2D[i, j] = kij; kernelPieces2D[j, i] = kij.TransposeView().ToShoMatrix(); } counterWithMessages.Increment(); }); } counterWithMessages.Finished(); var output = MatrixExtensions.MergeRowsAndColsView(kernelPieces2D); Helper.CheckCondition(output.RowKeys.SequenceEqual(output.ColKeys) && output.ColKeys.SequenceEqual(unnormalizedInput.ColKeys), "Assert: MergeRows isn't working as expected"); KernelNormalizeInPlace(ref output); return(output); }
public virtual Matrix <string, string, LinearTransform> LinearTransformMatrix(Matrix <string, string, double> inputMatrix) { var counterWithMessages = new CounterWithMessages("RowNormalizing ", null, inputMatrix.RowCount, quiet: !Verbose); var linearTransformMatrix = DenseMatrix <string, string, LinearTransform> .CreateDefaultInstance(inputMatrix.RowKeys, new[] { "" }, null); Parallel.ForEach(inputMatrix.AppendIndex(), ParallelOptionsScope.Current, unnormalizedListAndIndex => { counterWithMessages.Increment(); var linearTransform = CreateLinearTransform(unnormalizedListAndIndex.Item1, inputMatrix.RowKeys[unnormalizedListAndIndex.Item2]); linearTransformMatrix[unnormalizedListAndIndex.Item2, 0] = linearTransform; }); counterWithMessages.Finished(); return(linearTransformMatrix); }
private static Dictionary <string, List <double> > CreateCidToDoubleList(Matrix <string, string, double> matrix) { CounterWithMessages counterWithMessages = new CounterWithMessages("loading cid columns ", null, matrix.ColCount, quiet: matrix.ColCount < 10); var listOfLists = ((IList <IList <double> >)matrix.TransposeView()); var cidToDoubleList = (from colKey in matrix.ColKeys .AsParallel().WithParallelOptionsScope() let colIndex = matrix.IndexOfColKey[colKey] let doubleList = CreateDoubleList(colIndex, listOfLists, counterWithMessages) select Tuple.Create(colKey, doubleList) ).ToDictionary(); counterWithMessages.Finished(); return(cidToDoubleList); }
virtual public Matrix <string, string, T> Filter <T>(Matrix <string, string, T> predictorIn, Matrix <string, string, T> target) { var counterWithMessages = new CounterWithMessages("rowFilter " + ToString(), null, predictorIn.RowCount, quiet: !Verbose); var goodRowKeySet = ( from rowKey in predictorIn.RowKeys .AsParallel().WithParallelOptionsScope() where AlwaysTrue(counterWithMessages) && IsGood(predictorIn.SelectRowsView(rowKey), target) select rowKey ).ToHashSet(); counterWithMessages.Finished(); var predictorOut = predictorIn.SelectRowsView(predictorIn.RowKeys.Intersect(goodRowKeySet)); return(predictorOut); }
private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable() { //int? totalLineCountOrNull = null; //int? messageIntervalOrNull = 10000; //using (TextReader textReader = SnpFile.OpenText()) //{ // string line = textReader.ReadLine(); // if (null != line || line.Length == 0) // { // totalLineCountOrNull = (int?)(SnpFile.Length / (long)(line.Length + 2 /*line end*/)); // messageIntervalOrNull = null; // } //} CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile); //"Reading " + SnpFile.Name, messageIntervalOrNull, totalLineCountOrNull); using (TextReader textReader = SnpFile.OpenText()) { string line; while (null != (line = textReader.ReadLine())) { counterWithMessages.Increment(); string[] field = line.Split('\t'); Helper.CheckCondition(field.Length == 3, "Expect lines of snp file to have three fields. " + line); string cid = field[0]; string snp = field[1]; string value = field[2]; if (value == "00") { continue; //not break; } Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACTG or 00. " + value); yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1]))); } counterWithMessages.Finished(); } }