예제 #1
0
        private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable(HashSet <string> cidExcludeList, HashSet <string> snpExcludeSet)
        {
            CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile, compressionRatio: GZ ? .9 : 0);

            using (TextReader textReader =
#if !SILVERLIGHT
                       GZ ? SnpFile.UnGZip()  :
#endif
                       SnpFile.OpenText())
            {
                string line;
                while (null != (line = textReader.ReadLine()))
                {
                    counterWithMessages.Increment();
                    string[] field = line.Split('\t');
                    Helper.CheckCondition(field.Length == 4, "Expect lines of snp file to have four fields. " + line);
                    string snp        = field[0];
                    string cid        = field[1];
                    string value      = field[2];
                    double confidence = double.Parse(field[3]);
                    Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACT or G. " + value);
                    if (cidExcludeList.Contains(cid) || confidence < MissingThreshold || snpExcludeSet.Contains(snp))
                    {
                        continue; //not break;
                    }

                    yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1])));
                }
            }

            counterWithMessages.Finished();
        }
예제 #2
0
        public Matrix <string, string, double> ToKernel(Matrix <string, string, double> unnormalizedInput, int?cidInBatchCountOrNull = null)
        {
            if (null == cidInBatchCountOrNull)
            {
                return(ToKernel(unnormalizedInput));
            }

            var input = RowNormalizer.Normalize(unnormalizedInput);


            var cidListList = SpecialFunctions.DivideListIntoEqualChunksFromChunkSize <string>(input.ColKeys, cidInBatchCountOrNull.Value);

            Console.WriteLine("cids divided into {0} batches of about {1}", cidListList.Count, cidInBatchCountOrNull);
            Helper.CheckCondition(cidListList.Sum(l => l.Count) == input.ColCount, "real assert");


            var counterWithMessages = new CounterWithMessages("kernel combintations ", 1, (cidListList.Count * cidListList.Count + cidListList.Count) / 2);

            var kernelPieces2D = new Matrix <string, string, double> [cidListList.Count, cidListList.Count];

            for (int i = 0; i < cidListList.Count; ++i)
            {
                Console.WriteLine("Loading batch {0}, size {1}x{2}", i, cidListList[i].Count, unnormalizedInput.RowCount);
                var matrixI = input.SelectColsView(cidListList[i]).ToShoMatrix(verbose: true);

                Parallel.For(i, cidListList.Count, ParallelOptionsScope.Current, j =>
                {
                    Console.WriteLine("Loading batch {0}, size {1}x{2}", j, cidListList[j].Count, unnormalizedInput.RowCount);

                    if (i == j)
                    {
                        ShoMatrix kii        = JustKernel(matrixI);
                        kernelPieces2D[i, i] = kii;
                    }
                    else
                    {
                        var matrixJ          = input.SelectColsView(cidListList[j]).ToShoMatrix(verbose: true);
                        ShoMatrix kij        = JustKernel(matrixI, matrixJ);
                        kernelPieces2D[i, j] = kij;
                        kernelPieces2D[j, i] = kij.TransposeView().ToShoMatrix();
                    }
                    counterWithMessages.Increment();
                });
            }
            counterWithMessages.Finished();


            var output = MatrixExtensions.MergeRowsAndColsView(kernelPieces2D);

            Helper.CheckCondition(output.RowKeys.SequenceEqual(output.ColKeys) && output.ColKeys.SequenceEqual(unnormalizedInput.ColKeys), "Assert: MergeRows isn't working as expected");

            KernelNormalizeInPlace(ref output);
            return(output);
        }
예제 #3
0
        public virtual Matrix <string, string, LinearTransform> LinearTransformMatrix(Matrix <string, string, double> inputMatrix)
        {
            var counterWithMessages   = new CounterWithMessages("RowNormalizing ", null, inputMatrix.RowCount, quiet: !Verbose);
            var linearTransformMatrix = DenseMatrix <string, string, LinearTransform> .CreateDefaultInstance(inputMatrix.RowKeys, new[] { "" }, null);

            Parallel.ForEach(inputMatrix.AppendIndex(), ParallelOptionsScope.Current, unnormalizedListAndIndex =>
            {
                counterWithMessages.Increment();
                var linearTransform = CreateLinearTransform(unnormalizedListAndIndex.Item1, inputMatrix.RowKeys[unnormalizedListAndIndex.Item2]);
                linearTransformMatrix[unnormalizedListAndIndex.Item2, 0] = linearTransform;
            });
            counterWithMessages.Finished();
            return(linearTransformMatrix);
        }
예제 #4
0
        private static Dictionary <string, List <double> > CreateCidToDoubleList(Matrix <string, string, double> matrix)
        {
            CounterWithMessages counterWithMessages = new CounterWithMessages("loading cid columns ", null, matrix.ColCount, quiet: matrix.ColCount < 10);
            var listOfLists     = ((IList <IList <double> >)matrix.TransposeView());
            var cidToDoubleList =
                (from colKey in matrix.ColKeys
                 .AsParallel().WithParallelOptionsScope()
                 let colIndex = matrix.IndexOfColKey[colKey]
                                let doubleList = CreateDoubleList(colIndex, listOfLists, counterWithMessages)
                                                 select Tuple.Create(colKey, doubleList)
                ).ToDictionary();

            counterWithMessages.Finished();
            return(cidToDoubleList);
        }
예제 #5
0
        virtual public Matrix <string, string, T> Filter <T>(Matrix <string, string, T> predictorIn, Matrix <string, string, T> target)
        {
            var counterWithMessages = new CounterWithMessages("rowFilter " + ToString(), null, predictorIn.RowCount, quiet: !Verbose);
            var goodRowKeySet       =
                (
                    from rowKey in predictorIn.RowKeys
                    .AsParallel().WithParallelOptionsScope()
                    where AlwaysTrue(counterWithMessages) && IsGood(predictorIn.SelectRowsView(rowKey), target)
                    select rowKey
                ).ToHashSet();

            counterWithMessages.Finished();

            var predictorOut = predictorIn.SelectRowsView(predictorIn.RowKeys.Intersect(goodRowKeySet));

            return(predictorOut);
        }
예제 #6
0
        private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable()
        {
            //int? totalLineCountOrNull = null;
            //int? messageIntervalOrNull = 10000;
            //using (TextReader textReader = SnpFile.OpenText())
            //{
            //    string line = textReader.ReadLine();
            //    if (null != line || line.Length == 0)
            //    {
            //        totalLineCountOrNull = (int?)(SnpFile.Length / (long)(line.Length +  2 /*line end*/));
            //        messageIntervalOrNull = null;
            //    }
            //}


            CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile); //"Reading " + SnpFile.Name, messageIntervalOrNull, totalLineCountOrNull);

            using (TextReader textReader = SnpFile.OpenText())
            {
                string line;
                while (null != (line = textReader.ReadLine()))
                {
                    counterWithMessages.Increment();
                    string[] field = line.Split('\t');
                    Helper.CheckCondition(field.Length == 3, "Expect lines of snp file to have three fields. " + line);
                    string cid   = field[0];
                    string snp   = field[1];
                    string value = field[2];
                    if (value == "00")
                    {
                        continue; //not break;
                    }

                    Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACTG or 00. " + value);

                    yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1])));
                }
                counterWithMessages.Finished();
            }
        }