Exemplo n.º 1
0
        //private static IEnumerable<InputRow> CreateRowListQuery(List<KeyValuePair<string, string>> cidGroupAndInputFileNameList, string pTargetHeader, List<string> keyNameList)
        //{
        //    foreach (var cidGroupAndInputFileName in cidGroupAndInputFileNameList)
        //    {
        //        string cidGroup = cidGroupAndInputFileName.Key;
        //        string inputFileName = cidGroupAndInputFileName.Value;
        //        Console.WriteLine("Reading file " + inputFileName);

        //        MBT.Escience.CounterWithMessages counterWithMessages = new MBT.Escience.CounterWithMessages("reading cidGroup " + cidGroup + " file, line #{0}", 10000, null);
        //        string header;
        //        foreach (var line in SpecialFunctions.TabFileTable(inputFileName, includeWholeLine:false, out header))
        //        {
        //            counterWithMessages.Increment();
        //            List<string> keyList = keyNameList.Select(keyName => line[keyName]).ToList();
        //            InputRow inputRow = new InputRow
        //            {
        //                //SplitIndex = int.Parse(line.GetValueOrDefault("splitIndex", "0")),
        //                Cid = line["cid"],
        //                CidGroup = cidGroup,
        //                TargetVal = int.Parse(line["TargetVal"]),
        //                PTarget = double.Parse(line[pTargetHeader]),
        //                KeyList = keyList
        //            };
        //            yield return inputRow;
        //        }
        //    }
        //}

        private static IEnumerable <InputRow> CreateRowListQuery(Predicate <int> splitIndexFilter, KeyValuePair <string, string> cidGroupAndInputFileName, string pTargetHeader, List <string> keyNameList)
        {
            string header1 = "pathway	cid	nullIndex	pTarget	targetVal	logLikelihood	splitIndex	splitCount	workIndex	workCount	testCidIndex	testCidCount";
            string header2 = "pathway	cid	nullIndex	pTarget(or prediction)	targetVal	logLikelihood(or score)	splitIndex	splitCount	workIndex	workCount	testCidIndex	testCidCount";

            string[] fields = header1.Split('\t');
            Dictionary <string, int> fieldToIndex = fields.Select((key, index) => new KeyValuePair <string, int>(key, index)).
                                                    ToDictionary(keyAndIndex => keyAndIndex.Key, keyAndIndex => keyAndIndex.Value);
            List <int> keyIndexList       = keyNameList.Select(keyName => fieldToIndex[keyName]).ToList();
            int        cidIndex           = fieldToIndex["cid"];
            int        targetValIndex     = fieldToIndex["targetVal"];
            int        pTargetHeaderIndex = fieldToIndex[pTargetHeader];
            int        splitIndexIndex    = fieldToIndex["splitIndex"];

            string cidGroup      = cidGroupAndInputFileName.Key;
            string inputFileName = cidGroupAndInputFileName.Value;

            Console.WriteLine("Reading file " + inputFileName);

            MBT.Escience.CounterWithMessages counterWithMessages = new MBT.Escience.CounterWithMessages("reading cidGroup " + cidGroup + " file, line #{0}", 100000, null);
            using (TextReader textReader = File.OpenText(inputFileName))
            {
                //pathway	cid	nullIndex	pTarget	targetVal	logLikelihood	splitIndex	splitCount	workIndex	workCount	testCidIndex	testCidCount
                //   0        1   2           3       4
                string line;
                while (null != (line = textReader.ReadLine()))
                {
                    if (line == header1 || line == header2)
                    {
                        continue; //not break;
                    }
                    counterWithMessages.Increment();
                    string[]      valueList = line.Split('\t');
                    List <string> keyList   = keyIndexList.Select(keyIndex => valueList[keyIndex]).ToList();

                    if (!splitIndexFilter(int.Parse(valueList[splitIndexIndex])))
                    {
                        continue; // not break;
                    }
                    InputRow inputRow = new InputRow
                    {
                        //SplitIndex = int.Parse(line.GetValueOrDefault("splitIndex", "0")),
                        Cid       = valueList[cidIndex],
                        CidGroup  = cidGroup,
                        TargetVal = int.Parse(valueList[targetValIndex]),
                        PTarget   = double.Parse(valueList[pTargetHeaderIndex]),
                        KeyList   = keyList
                    };
                    yield return(inputRow);
                }
            }
        }
Exemplo n.º 2
0
        private static ResultsRow CreateResultRow(KeyValuePair <string, List <InputRow> > keyListAsStringAndRowList, MBT.Escience.CounterWithMessages counterWithMessages, ParallelOptions parallelOptions)
        {
            counterWithMessages.Increment();

            string          keyListAsString = keyListAsStringAndRowList.Key;
            List <InputRow> rowListAll      = keyListAsStringAndRowList.Value;

            var cidGroupAndRowListList =
                (from row in rowListAll
                 group row by row.CidGroup into g
                 select new KeyValuePair <string, List <InputRow> >(g.Key, g.ToList())
                ).ToList();


            //Creates a result for each cidGroup
            var cidGroupToPValue =
                (
                    from cidGroupAndRowList in cidGroupAndRowListList
                    let cidGroup = cidGroupAndRowList.Key
                                   let rowList = cidGroupAndRowList.Value
                                                 orderby cidGroup
                                                 select new { cidGroup, Z0AndPValue = ComputeZ0AndPValue(rowList, row => row.PTarget, row => row.TargetVal, parallelOptions) }
                ).ToDictionary(pair => pair.cidGroup, pair => pair.Z0AndPValue);


            double z0OfAll = cidGroupToPValue.Values.Select(z0AndPValue => z0AndPValue.Key).Sum() / Math.Sqrt(cidGroupToPValue.Count);
            double pOfAll  = 1.0 - SpecialFunctions.ZScoreToOneTailedPValue(z0OfAll, 1e-10);


            //Create the group for all
            ResultsRow result = new ResultsRow {
                KeyListAsString = keyListAsString, CidGroupToZ0AndPValue = cidGroupToPValue, ComboPValue = pOfAll
            };

            return(result);
        }