public void WriteCombinedFile(string dataset1InputFile, string dataset3InputFile, string dataset4OutputFile) { Dataset1 dataset1 = new Dataset1(dataset1InputFile); Dataset3 dataset3 = new Dataset3(dataset3InputFile); if (dataset1.Lines.Count != dataset1.Data.Count) { SanityCheck.AssertFailed(); } if (dataset3.Lines.Count != dataset3.Data.Count) { SanityCheck.AssertFailed(); } // Map prolific IDs in dataset3 to a dictionary, since the rows of dataset3 are in a different order than // the rows of dataset1 Dictionary <string, string> prolificIdsToNewResults = new Dictionary <string, string>(); for (int i = 0; i < dataset3.Lines.Count; i++) { prolificIdsToNewResults.Add(dataset3.Data[i].ProlificId, dataset3.Data[i].ToUniqueColumnsString("\t")); } // Write combined file using (StreamWriter sw = new StreamWriter(Globals.DataPath + dataset4OutputFile)) { sw.WriteLine(dataset1.Header + "\t" + Result3Row.UniqueColumnsHeader); for (int i = 0; i < dataset1.Lines.Count; i++) { sw.WriteLine(dataset1.Lines[i] + "\t" + prolificIdsToNewResults[dataset1.Data[i].ProlificId]); } } }
public static string[] ParseLineAsCSV(string line, int expectedTokens) { string[] toReturn = ParseLineAsCSV(line); if (toReturn.Length != expectedTokens) { SanityCheck.AssertFailed(); } return(toReturn); }
/// <summary> /// Build the mapping between Qualtrics numeric identifiers for questionnaire answers /// (in the CSV with 'Numeric' in the title), and the actual text the user selected /// (in the CSV with 'ChoiceText' in the title). The mapping isn't always obvious! /// </summary> public Dictionary <string, Dictionary <string, int> > BuildMapping(string numericCsv, string choiceTextCsv, int columnCount) { // Write a temporary ChoiceText CSV file that doesn't have extraneous newlines (as they mess up the // CSV reader...) string cleanedUpChoiceTextCsv = Globals.DataPath + Path.GetFileNameWithoutExtension(choiceTextCsv) + ".cleanedup.csv"; WriteCleanedUpChoiceTextCsv(Globals.DataPath + choiceTextCsv, cleanedUpChoiceTextCsv); using (TextFieldParser numCsvParser = new TextFieldParser(Globals.DataPath + numericCsv)) using (TextFieldParser textCsvParser = new TextFieldParser(cleanedUpChoiceTextCsv)) using (StreamWriter sw = new StreamWriter(Globals.DataPath + "Qualtrics_Numeric_to_ChoiceText_Mapping.out.txt")) { numCsvParser.CommentTokens = new string[] { "#" }; numCsvParser.SetDelimiters(new string[] { "," }); numCsvParser.HasFieldsEnclosedInQuotes = true; textCsvParser.CommentTokens = new string[] { "#" }; textCsvParser.SetDelimiters(new string[] { "," }); textCsvParser.HasFieldsEnclosedInQuotes = true; // Get headers // string numericHeader = numCsvParser.ReadLine(); string textHeader = textCsvParser.ReadLine(); if (numericHeader != textHeader) { SanityCheck.AssertFailed(); } string[] columnNames = Extensions.ParseLineAsCSV(numericHeader); if (columnNames.Length != columnCount) { SanityCheck.AssertFailed(); } // Read data (skipping intervening lines) // bool inInterveningJunk = true; string numericLine = ""; string textLine = ""; while (!numCsvParser.EndOfData) { if (inInterveningJunk) { numericLine = numCsvParser.ReadLine(); textLine = textCsvParser.ReadLine(); if (numericLine.StartsWith(ImportLinePrefix)) { inInterveningJunk = false; } } if (!inInterveningJunk) { string[] numericTokens = numCsvParser.ReadFields(); string[] textTokens = textCsvParser.ReadFields(); if (numericTokens.Length != textTokens.Length) { SanityCheck.AssertFailed(); } for (int i = 0; i < columnCount; i++) { if (numericTokens[i] != textTokens[i] && !Regex.IsMatch(numericTokens[i], @"^[0-9],[0-9,]+")) { string variable = columnNames[i]; int number = int.Parse(numericTokens[i]); string choiceText = textTokens[i]; if (!variablesToNumKeyedMappings.ContainsKey(variable)) { variablesToNumKeyedMappings.Add(variable, new Dictionary <int, string>()); } if (variablesToNumKeyedMappings[variable].ContainsKey(number)) { if (variablesToNumKeyedMappings[variable][number] != choiceText) { throw new Exception("WARNING: More than one choiceText for '" + variable + "' maps onto the value " + numericTokens[i] + "!"); } } else { variablesToNumKeyedMappings[variable].Add(number, choiceText); } } } } } // Once we have had a good look at the input files and built the mappings, let's write them to file foreach (var vm in variablesToNumKeyedMappings.OrderBy(x => x.Key)) { string variable = vm.Key; Dictionary <int, string> numbersToChoiceText = vm.Value; sw.WriteLine("*** " + vm.Key + " ***"); foreach (var kv in numbersToChoiceText.OrderBy(x => x.Key)) { sw.WriteLine(kv.Key.ToString() + "\t" + kv.Value); } sw.WriteLine(); } } // Finally, let's produce a dictionary identical to variablesToNumKeyedMappings, // but with keys and values switched in the inner dictionary foreach (var vm in variablesToNumKeyedMappings) { string variable = vm.Key; variablesToTextKeyedMappings.Add(variable, new Dictionary <string, int>()); foreach (var kv in vm.Value) { variablesToTextKeyedMappings[variable].Add(kv.Value, kv.Key); } } return(variablesToTextKeyedMappings); }
// Write dataset 5, which includes some new derived measures. // // Derived measures to add: // - scores on objective comprehension questions; // - total objective comprehension score; // - total communication efficacy score; // - total subjective numeracy score (remembering to reverse-code item 7); // - 3 codes indicating which question came first in the counterbalancing: // * 'status' before 'child' (yes (1) or no (0)); // * 'status' before 'compare' (yes (1) or no (0)); // * 'child' before 'compare' (yes (1) or no (0)). // internal void WriteTidyDataset5(string outfile, QualtricsMapping qm) { if (Header == "") { throw new Exception("Dataset 4 hasn't been loaded yet (LoadCombinedFile hasn't been run yet)"); } using (StreamWriter sw = new StreamWriter(Globals.DataPath + outfile)) { sw.WriteSepLine("\t", Header, "Scheuner-14-Value", "Scheuner-15-Value", "Scheuner-16-Value", "Scheuner-17-Value", "Scheuner-18-Value", "Result-Understood-Value", "Gender-Value", "Adults-in-House-Value", "Children-in-House-Value", "income-lower-bound", "CF-Experience-Value", "status-slider-is-nearly-correct", "child-slider-is-nearly-correct", "oc-score-1", "oc-score-2", "communication-efficacy", "subjective-numeracy", "status-before-child", "status-before-compare", "child-before-compare", "Result-Noticed-Binary", "Next-Steps-Average", "oc-compare-is-correct", "oc-status-verbalj-correct", "oc-child-verbalj-correct", "time-reading-report", "slider-compare-implication", "concordance-with-slider-values" ); for (int i = 0; i < Data.Count; i++) { var row = Data[i]; // Create additional columns for the variables with unintuitive choicetext-to-number mappings, // with the actual value that corresponds to the choice. // These are: Scheuner-14, Scheuner-15, Scheuner-16, Scheuner-17, Scheuner-18, // Result-Understood, Children-in-House, and Adults-in-House. // For each of these, there will be created a corresponding column ending in -Value // (e.g. Scheuner-14-Value) containing the common-sense value that corresponds to the // result. This is what should be used in any subsequent computations. int scheuner14Value = int.Parse(row.Scheuner14) - 1; int scheuner15Value = int.Parse(row.Scheuner15) - 1; int scheuner16Value = int.Parse(row.Scheuner16) - 1; int scheuner17Value = int.Parse(row.Scheuner17) - 1; int scheuner18Value = int.Parse(row.Scheuner18) - 1; int resultUnderstoodValue = int.Parse(row.ResultUnderstood) - 1; string genderValue = ""; if (row.Gender != "" && row.Gender != qm.GetNumberCorrespondingToChoiceText("Gender", "Prefer not to say").ToString()) { genderValue = qm.GetChoiceTextCorrespondingToNumber("Gender", int.Parse(row.Gender)); } int adultsInHouseValue = int.Parse(row.AdultsInHouse) - 3; int childrenInHouseValue = int.Parse(row.AdultsInHouse) - 4; string incomeLowerBound = ""; if (row.CombinedIncome != "") { string income = qm.GetChoiceTextCorrespondingToNumber("Combined-Income", int.Parse(row.CombinedIncome)); if (income == "Less than £10k") { incomeLowerBound = "0"; } else if (income == "More than £91k") { incomeLowerBound = "91"; } else { incomeLowerBound = income.Substring(1, 2); if (!int.TryParse(incomeLowerBound, out int testVal)) { SanityCheck.AssertFailed(); } } } string cfExperienceValue = qm.GetChoiceTextCorrespondingToNumber("CF-Experience", int.Parse(row.CFExperience)); // Get the user's score on each objective comprehension question. // 'y' is correct, 'n' is incorrect, 'x' is N/A (didn't answer question, follow directions, etc.) // N/As should probably be treated as 'n' but theoretically could also be treated as missing data. // Another choice to be made: A la Marteau, your initial thought was to have the verbal answer // "definitely a carrier of cystic fibrosis" be the CORRECT answer as to John's status, // and "unlikely to have cystic fibrosis" as the CORRECT answer regarding John & Jane's child. // However, these could also be treated as subjective. The fact that there is sensitivity/specificity // info provided complicates the picture of "definitely a carrier of cystic fibrosis" as the // 'correct' answer to question 1. // // The former way (including subjective scores) is coded below as 'oc-score-1' (objective comprehension score 1), // and the latter way as 'oc-score-2'. NB: After discussion with colleagues (but before hypothesis testing), // concluded that oc-score-1 was NOT the right way to code comprehension and stuck with oc-score-2 (described // as simply 'oc-score' in the final Results_TidyClean data file. int sliderStatusAnswerKey = (row.TestResult == "Positive" ? 100 : 1); int verbalStatusAnswerKey = (row.TestResult == "Positive" ? qm.GetNumberCorrespondingToChoiceText("oc-status-verbal", "Definitely a carrier of cystic fibrosis") : qm.GetNumberCorrespondingToChoiceText("oc-status-verbal", "Unlikely to be a carrier of cystic fibrosis")); int sliderChildAnswerKey = (row.TestResult == "Positive" ? 25 : 0); int verbalChildAnswerKey = (row.TestResult == "Positive" ? qm.GetNumberCorrespondingToChoiceText("oc-child-verbal", "Unlikely to have cystic fibrosis") : qm.GetNumberCorrespondingToChoiceText("oc-child-verbal", "Unlikely to have cystic fibrosis")); string q1Correct = Y_N_or_X(row.OcStatusVerbal, verbalStatusAnswerKey.ToString()); string q2Correct = row.StatusIsNearlyCorrect; string q3Correct = ScoreSlider(row.OcStatusSlider1, sliderStatusAnswerKey); string q4Correct = Y_N_or_X(row.OcChildVerbal, verbalChildAnswerKey.ToString()); string q5Correct = row.ChildIsNearlyCorrect; string q6Correct = ScoreSlider(row.OcChildSlider1, sliderChildAnswerKey); string q7Correct = Y_N_or_X(row.ObjcompCompare, qm.GetNumberCorrespondingToChoiceText("objcomp-compare", "It's more likely that John Doe is a carrier of cystic fibrosis").ToString()); string q8Correct = row.Hard1000IsNearlyCorrect; string q9Correct = row.Hard800IsNearlyCorrect; string[] ocscale1 = { q1Correct, q2Correct, q3Correct, q4Correct, q5Correct, q6Correct, q7Correct, q8Correct, q9Correct }; string[] ocscale2 = { q2Correct, q3Correct, q5Correct, q6Correct, q7Correct, q8Correct, q9Correct }; int ocScore1 = ocscale1.Select(x => (x == "y" ? 1 : 0)).Sum(); int ocScore2 = ocscale2.Select(x => (x == "y" ? 1 : 0)).Sum(); // Get the communication efficacy score. double communicationEfficacy = (int.Parse(row.Scheuner1) + int.Parse(row.Scheuner2) + int.Parse(row.Scheuner3) + int.Parse(row.Scheuner4) + int.Parse(row.Scheuner5) + int.Parse(row.Scheuner6) + int.Parse(row.Scheuner7) + int.Parse(row.Scheuner8) + int.Parse(row.Scheuner9) + int.Parse(row.Scheuner10) + int.Parse(row.Scheuner11) + int.Parse(row.Scheuner12) + int.Parse(row.Scheuner13) + scheuner14Value + // This switch is intentional, due to how Qualtrics recorded the data (see definition of scheuner14Value above) scheuner15Value + scheuner16Value + scheuner17Value + scheuner18Value) / 18.0; // Get subjective numeracy score (remembering to reverse-code item 7); double subjectiveNumeracy = (int.Parse(row.SNumeracy1) + int.Parse(row.SNumeracy2) + int.Parse(row.SNumeracy3) + int.Parse(row.SNumeracy4) + int.Parse(row.SNumeracy5) + int.Parse(row.SNumeracy6) + (7 - int.Parse(row.SNumeracy7)) + int.Parse(row.SNumeracy8)) / 8.0; // 3 codes indicating which question came first in the counterbalancing: // * 'status' before 'child' (yes (1) or no (0)); // * 'status' before 'compare' (yes (1) or no (0)); // * 'child' before 'compare' (yes (1) or no (0)). DateTime statusQAbsoluteTime = DateTime.Parse(row.OCStatusAbsoluteTime); DateTime compareQAbsoluteTime = DateTime.Parse(row.OCCompareAbsoluteTime); DateTime childQAbsoluteTime = DateTime.Parse(row.OCChildAbsoluteTime); int statusBeforeChild = (statusQAbsoluteTime < childQAbsoluteTime ? 1 : 0); int statusBeforeCompare = (statusQAbsoluteTime < compareQAbsoluteTime ? 1 : 0); int childBeforeCompare = (childQAbsoluteTime < compareQAbsoluteTime ? 1 : 0); // Result was noticed (1 - for yes, yes but didn't read) or not (0 - for not sure, no) int resultNoticed = int.Parse(row.ResultNoticed); int resultNoticedBinary = 0; if (resultNoticed == qm.GetNumberCorrespondingToChoiceText("Result-Noticed", "Yes, and I read it") || resultNoticed == qm.GetNumberCorrespondingToChoiceText("Result-Noticed", "Yes, but I didn't read it")) { resultNoticedBinary = 1; } // Get 'next steps' average score double nextStepsAverage = (int.Parse(row.SubjNext1) + int.Parse(row.SubjNext2) + int.Parse(row.SubjNext3) + int.Parse(row.SubjNext4) + int.Parse(row.SubjNext5)) / 5.0; // Get time spent reading report double timeReadingReport = -1; if (row.Design == "UCD") { timeReadingReport = double.Parse(row.TimeUcdReportP1PageSubmit); } else if (row.Design == "Control") { timeReadingReport = double.Parse(row.TimeControlReportPageSubmit); } else { SanityCheck.AssertFailed(); } // We are interested in whether people's answers to the OC-Compare question // are concordant with their answers on the slider questions: did their answers to the // slider questions imply that they believe that the carrier risk is higher, the child has cf // risk, or neither? int sliderCompareImplication = 0; int dontKnowValue = sliderCompareImplication = qm.GetNumberCorrespondingToChoiceText("objcomp-compare", "Don't know"); if (row.OcChildSlider1 == "" || row.OcStatusSlider1 == "") { sliderCompareImplication = dontKnowValue; } else { int childSliderValue = int.Parse(row.OcChildSlider1); int statusSliderValue = int.Parse(row.OcStatusSlider1); if (statusSliderValue > childSliderValue) { sliderCompareImplication = qm.GetNumberCorrespondingToChoiceText("objcomp-compare", "It's more likely that John Doe is a carrier of cystic fibrosis"); } else if (childSliderValue > statusSliderValue) { sliderCompareImplication = qm.GetNumberCorrespondingToChoiceText("objcomp-compare", "It's more likely that the first child of John and Jane Doe will have cystic fibrosis"); } else if (childSliderValue == statusSliderValue) { sliderCompareImplication = qm.GetNumberCorrespondingToChoiceText("objcomp-compare", "Both possibilities are equally likely"); } else { SanityCheck.AssertFailed(); } } // Check for concordance. -1 is don't know, 0 is not concordant, 1 is concordant int concordanceWithSliderValues = -1; if (int.Parse(row.ObjcompCompare) != dontKnowValue && sliderCompareImplication != dontKnowValue) { if (sliderCompareImplication == int.Parse(row.ObjcompCompare)) { concordanceWithSliderValues = 1; } else { concordanceWithSliderValues = 0; } } // Append these variables to the current row in the file we are writing sw.WriteSepLine("\t", Lines[i], scheuner14Value, scheuner15Value, scheuner16Value, scheuner17Value, scheuner18Value, resultUnderstoodValue, genderValue, adultsInHouseValue, childrenInHouseValue, incomeLowerBound, cfExperienceValue, q3Correct, q6Correct, ocScore1, ocScore2, communicationEfficacy, subjectiveNumeracy, statusBeforeChild, statusBeforeCompare, childBeforeCompare, resultNoticedBinary, nextStepsAverage, q7Correct, q1Correct, q4Correct, timeReadingReport, sliderCompareImplication, concordanceWithSliderValues ); } } }