//private static IEnumerable<InputRow> CreateRowListQuery(List<KeyValuePair<string, string>> cidGroupAndInputFileNameList, string pTargetHeader, List<string> keyNameList) //{ // foreach (var cidGroupAndInputFileName in cidGroupAndInputFileNameList) // { // string cidGroup = cidGroupAndInputFileName.Key; // string inputFileName = cidGroupAndInputFileName.Value; // Console.WriteLine("Reading file " + inputFileName); // MBT.Escience.CounterWithMessages counterWithMessages = new MBT.Escience.CounterWithMessages("reading cidGroup " + cidGroup + " file, line #{0}", 10000, null); // string header; // foreach (var line in SpecialFunctions.TabFileTable(inputFileName, includeWholeLine:false, out header)) // { // counterWithMessages.Increment(); // List<string> keyList = keyNameList.Select(keyName => line[keyName]).ToList(); // InputRow inputRow = new InputRow // { // //SplitIndex = int.Parse(line.GetValueOrDefault("splitIndex", "0")), // Cid = line["cid"], // CidGroup = cidGroup, // TargetVal = int.Parse(line["TargetVal"]), // PTarget = double.Parse(line[pTargetHeader]), // KeyList = keyList // }; // yield return inputRow; // } // } //} private static IEnumerable <InputRow> CreateRowListQuery(Predicate <int> splitIndexFilter, KeyValuePair <string, string> cidGroupAndInputFileName, string pTargetHeader, List <string> keyNameList) { string header1 = "pathway cid nullIndex pTarget targetVal logLikelihood splitIndex splitCount workIndex workCount testCidIndex testCidCount"; string header2 = "pathway cid nullIndex pTarget(or prediction) targetVal logLikelihood(or score) splitIndex splitCount workIndex workCount testCidIndex testCidCount"; string[] fields = header1.Split('\t'); Dictionary <string, int> fieldToIndex = fields.Select((key, index) => new KeyValuePair <string, int>(key, index)). ToDictionary(keyAndIndex => keyAndIndex.Key, keyAndIndex => keyAndIndex.Value); List <int> keyIndexList = keyNameList.Select(keyName => fieldToIndex[keyName]).ToList(); int cidIndex = fieldToIndex["cid"]; int targetValIndex = fieldToIndex["targetVal"]; int pTargetHeaderIndex = fieldToIndex[pTargetHeader]; int splitIndexIndex = fieldToIndex["splitIndex"]; string cidGroup = cidGroupAndInputFileName.Key; string inputFileName = cidGroupAndInputFileName.Value; Console.WriteLine("Reading file " + inputFileName); MBT.Escience.CounterWithMessages counterWithMessages = new MBT.Escience.CounterWithMessages("reading cidGroup " + cidGroup + " file, line #{0}", 100000, null); using (TextReader textReader = File.OpenText(inputFileName)) { //pathway cid nullIndex pTarget targetVal logLikelihood splitIndex splitCount workIndex workCount testCidIndex testCidCount // 0 1 2 3 4 string line; while (null != (line = textReader.ReadLine())) { if (line == header1 || line == header2) { continue; //not break; } counterWithMessages.Increment(); string[] valueList = line.Split('\t'); List <string> keyList = keyIndexList.Select(keyIndex => valueList[keyIndex]).ToList(); if (!splitIndexFilter(int.Parse(valueList[splitIndexIndex]))) { continue; // not break; } InputRow inputRow = new InputRow { //SplitIndex = int.Parse(line.GetValueOrDefault("splitIndex", "0")), Cid = valueList[cidIndex], CidGroup = cidGroup, TargetVal = int.Parse(valueList[targetValIndex]), PTarget = double.Parse(valueList[pTargetHeaderIndex]), KeyList = keyList }; yield return(inputRow); } } }
private static ResultsRow CreateResultRow(KeyValuePair <string, List <InputRow> > keyListAsStringAndRowList, MBT.Escience.CounterWithMessages counterWithMessages, ParallelOptions parallelOptions) { counterWithMessages.Increment(); string keyListAsString = keyListAsStringAndRowList.Key; List <InputRow> rowListAll = keyListAsStringAndRowList.Value; var cidGroupAndRowListList = (from row in rowListAll group row by row.CidGroup into g select new KeyValuePair <string, List <InputRow> >(g.Key, g.ToList()) ).ToList(); //Creates a result for each cidGroup var cidGroupToPValue = ( from cidGroupAndRowList in cidGroupAndRowListList let cidGroup = cidGroupAndRowList.Key let rowList = cidGroupAndRowList.Value orderby cidGroup select new { cidGroup, Z0AndPValue = ComputeZ0AndPValue(rowList, row => row.PTarget, row => row.TargetVal, parallelOptions) } ).ToDictionary(pair => pair.cidGroup, pair => pair.Z0AndPValue); double z0OfAll = cidGroupToPValue.Values.Select(z0AndPValue => z0AndPValue.Key).Sum() / Math.Sqrt(cidGroupToPValue.Count); double pOfAll = 1.0 - SpecialFunctions.ZScoreToOneTailedPValue(z0OfAll, 1e-10); //Create the group for all ResultsRow result = new ResultsRow { KeyListAsString = keyListAsString, CidGroupToZ0AndPValue = cidGroupToPValue, ComboPValue = pOfAll }; return(result); }