Ejemplo n.º 1
0
        public void WriteCombinedFile(string dataset1InputFile, string dataset3InputFile, string dataset4OutputFile)
        {
            Dataset1 dataset1 = new Dataset1(dataset1InputFile);
            Dataset3 dataset3 = new Dataset3(dataset3InputFile);

            if (dataset1.Lines.Count != dataset1.Data.Count)
            {
                SanityCheck.AssertFailed();
            }
            if (dataset3.Lines.Count != dataset3.Data.Count)
            {
                SanityCheck.AssertFailed();
            }

            // Map prolific IDs in dataset3 to a dictionary, since the rows of dataset3 are in a different order than
            // the rows of dataset1
            Dictionary <string, string> prolificIdsToNewResults = new Dictionary <string, string>();

            for (int i = 0; i < dataset3.Lines.Count; i++)
            {
                prolificIdsToNewResults.Add(dataset3.Data[i].ProlificId, dataset3.Data[i].ToUniqueColumnsString("\t"));
            }

            // Write combined file
            using (StreamWriter sw = new StreamWriter(Globals.DataPath + dataset4OutputFile))
            {
                sw.WriteLine(dataset1.Header + "\t" + Result3Row.UniqueColumnsHeader);
                for (int i = 0; i < dataset1.Lines.Count; i++)
                {
                    sw.WriteLine(dataset1.Lines[i] + "\t" + prolificIdsToNewResults[dataset1.Data[i].ProlificId]);
                }
            }
        }
Ejemplo n.º 2
0
 public static string[] ParseLineAsCSV(string line, int expectedTokens)
 {
     string[] toReturn = ParseLineAsCSV(line);
     if (toReturn.Length != expectedTokens)
     {
         SanityCheck.AssertFailed();
     }
     return(toReturn);
 }
Ejemplo n.º 3
0
        /// <summary>
        /// Build the mapping between Qualtrics numeric identifiers for questionnaire answers
        /// (in the CSV with 'Numeric' in the title), and the actual text the user selected
        /// (in the CSV with 'ChoiceText' in the title). The mapping isn't always obvious!
        /// </summary>
        public Dictionary <string, Dictionary <string, int> > BuildMapping(string numericCsv, string choiceTextCsv, int columnCount)
        {
            // Write a temporary ChoiceText CSV file that doesn't have extraneous newlines (as they mess up the
            // CSV reader...)
            string cleanedUpChoiceTextCsv = Globals.DataPath + Path.GetFileNameWithoutExtension(choiceTextCsv) + ".cleanedup.csv";

            WriteCleanedUpChoiceTextCsv(Globals.DataPath + choiceTextCsv, cleanedUpChoiceTextCsv);

            using (TextFieldParser numCsvParser = new TextFieldParser(Globals.DataPath + numericCsv))
                using (TextFieldParser textCsvParser = new TextFieldParser(cleanedUpChoiceTextCsv))
                    using (StreamWriter sw = new StreamWriter(Globals.DataPath + "Qualtrics_Numeric_to_ChoiceText_Mapping.out.txt"))
                    {
                        numCsvParser.CommentTokens = new string[] { "#" };
                        numCsvParser.SetDelimiters(new string[] { "," });
                        numCsvParser.HasFieldsEnclosedInQuotes = true;
                        textCsvParser.CommentTokens            = new string[] { "#" };
                        textCsvParser.SetDelimiters(new string[] { "," });
                        textCsvParser.HasFieldsEnclosedInQuotes = true;

                        // Get headers
                        //
                        string numericHeader = numCsvParser.ReadLine();
                        string textHeader    = textCsvParser.ReadLine();
                        if (numericHeader != textHeader)
                        {
                            SanityCheck.AssertFailed();
                        }
                        string[] columnNames = Extensions.ParseLineAsCSV(numericHeader);
                        if (columnNames.Length != columnCount)
                        {
                            SanityCheck.AssertFailed();
                        }

                        // Read data (skipping intervening lines)
                        //
                        bool   inInterveningJunk = true;
                        string numericLine       = "";
                        string textLine          = "";
                        while (!numCsvParser.EndOfData)
                        {
                            if (inInterveningJunk)
                            {
                                numericLine = numCsvParser.ReadLine();
                                textLine    = textCsvParser.ReadLine();
                                if (numericLine.StartsWith(ImportLinePrefix))
                                {
                                    inInterveningJunk = false;
                                }
                            }

                            if (!inInterveningJunk)
                            {
                                string[] numericTokens = numCsvParser.ReadFields();
                                string[] textTokens    = textCsvParser.ReadFields();
                                if (numericTokens.Length != textTokens.Length)
                                {
                                    SanityCheck.AssertFailed();
                                }

                                for (int i = 0; i < columnCount; i++)
                                {
                                    if (numericTokens[i] != textTokens[i] && !Regex.IsMatch(numericTokens[i], @"^[0-9],[0-9,]+"))
                                    {
                                        string variable   = columnNames[i];
                                        int    number     = int.Parse(numericTokens[i]);
                                        string choiceText = textTokens[i];

                                        if (!variablesToNumKeyedMappings.ContainsKey(variable))
                                        {
                                            variablesToNumKeyedMappings.Add(variable, new Dictionary <int, string>());
                                        }

                                        if (variablesToNumKeyedMappings[variable].ContainsKey(number))
                                        {
                                            if (variablesToNumKeyedMappings[variable][number] != choiceText)
                                            {
                                                throw new Exception("WARNING: More than one choiceText for '" + variable + "' maps onto the value " + numericTokens[i] + "!");
                                            }
                                        }
                                        else
                                        {
                                            variablesToNumKeyedMappings[variable].Add(number, choiceText);
                                        }
                                    }
                                }
                            }
                        }

                        // Once we have had a good look at the input files and built the mappings, let's write them to file
                        foreach (var vm in variablesToNumKeyedMappings.OrderBy(x => x.Key))
                        {
                            string variable = vm.Key;
                            Dictionary <int, string> numbersToChoiceText = vm.Value;

                            sw.WriteLine("*** " + vm.Key + " ***");
                            foreach (var kv in numbersToChoiceText.OrderBy(x => x.Key))
                            {
                                sw.WriteLine(kv.Key.ToString() + "\t" + kv.Value);
                            }
                            sw.WriteLine();
                        }
                    }


            // Finally, let's produce a dictionary identical to variablesToNumKeyedMappings,
            // but with keys and values switched in the inner dictionary

            foreach (var vm in variablesToNumKeyedMappings)
            {
                string variable = vm.Key;
                variablesToTextKeyedMappings.Add(variable, new Dictionary <string, int>());

                foreach (var kv in vm.Value)
                {
                    variablesToTextKeyedMappings[variable].Add(kv.Value, kv.Key);
                }
            }

            return(variablesToTextKeyedMappings);
        }
Ejemplo n.º 4
0
        // Write dataset 5, which includes some new derived measures.
        //
        // Derived measures to add:
        // - scores on objective comprehension questions;
        // - total objective comprehension score;
        // - total communication efficacy score;
        // - total subjective numeracy score (remembering to reverse-code item 7);
        // - 3 codes indicating which question came first in the counterbalancing:
        //   * 'status' before 'child' (yes (1) or no (0));
        //   * 'status' before 'compare' (yes (1) or no (0));
        //   * 'child' before 'compare' (yes (1) or no (0)).
        //
        internal void WriteTidyDataset5(string outfile, QualtricsMapping qm)
        {
            if (Header == "")
            {
                throw new Exception("Dataset 4 hasn't been loaded yet (LoadCombinedFile hasn't been run yet)");
            }


            using (StreamWriter sw = new StreamWriter(Globals.DataPath + outfile))
            {
                sw.WriteSepLine("\t", Header,
                                "Scheuner-14-Value",
                                "Scheuner-15-Value",
                                "Scheuner-16-Value",
                                "Scheuner-17-Value",
                                "Scheuner-18-Value",
                                "Result-Understood-Value",
                                "Gender-Value",
                                "Adults-in-House-Value",
                                "Children-in-House-Value",
                                "income-lower-bound",
                                "CF-Experience-Value",
                                "status-slider-is-nearly-correct",
                                "child-slider-is-nearly-correct",
                                "oc-score-1",
                                "oc-score-2",
                                "communication-efficacy",
                                "subjective-numeracy",
                                "status-before-child",
                                "status-before-compare",
                                "child-before-compare",
                                "Result-Noticed-Binary",
                                "Next-Steps-Average",
                                "oc-compare-is-correct",
                                "oc-status-verbalj-correct",
                                "oc-child-verbalj-correct",
                                "time-reading-report",
                                "slider-compare-implication",
                                "concordance-with-slider-values"
                                );

                for (int i = 0; i < Data.Count; i++)
                {
                    var row = Data[i];

                    // Create additional columns for the variables with unintuitive choicetext-to-number mappings,
                    // with the actual value that corresponds to the choice.
                    // These are: Scheuner-14, Scheuner-15, Scheuner-16, Scheuner-17, Scheuner-18,
                    // Result-Understood, Children-in-House, and Adults-in-House.
                    // For each of these, there will be created a corresponding column ending in -Value
                    // (e.g. Scheuner-14-Value) containing the common-sense value that corresponds to the
                    // result. This is what should be used in any subsequent computations.

                    int    scheuner14Value       = int.Parse(row.Scheuner14) - 1;
                    int    scheuner15Value       = int.Parse(row.Scheuner15) - 1;
                    int    scheuner16Value       = int.Parse(row.Scheuner16) - 1;
                    int    scheuner17Value       = int.Parse(row.Scheuner17) - 1;
                    int    scheuner18Value       = int.Parse(row.Scheuner18) - 1;
                    int    resultUnderstoodValue = int.Parse(row.ResultUnderstood) - 1;
                    string genderValue           = "";
                    if (row.Gender != "" && row.Gender != qm.GetNumberCorrespondingToChoiceText("Gender", "Prefer not to say").ToString())
                    {
                        genderValue = qm.GetChoiceTextCorrespondingToNumber("Gender", int.Parse(row.Gender));
                    }
                    int adultsInHouseValue   = int.Parse(row.AdultsInHouse) - 3;
                    int childrenInHouseValue = int.Parse(row.AdultsInHouse) - 4;


                    string incomeLowerBound = "";
                    if (row.CombinedIncome != "")
                    {
                        string income = qm.GetChoiceTextCorrespondingToNumber("Combined-Income", int.Parse(row.CombinedIncome));
                        if (income == "Less than £10k")
                        {
                            incomeLowerBound = "0";
                        }
                        else if (income == "More than £91k")
                        {
                            incomeLowerBound = "91";
                        }
                        else
                        {
                            incomeLowerBound = income.Substring(1, 2);
                            if (!int.TryParse(incomeLowerBound, out int testVal))
                            {
                                SanityCheck.AssertFailed();
                            }
                        }
                    }

                    string cfExperienceValue = qm.GetChoiceTextCorrespondingToNumber("CF-Experience", int.Parse(row.CFExperience));

                    // Get the user's score on each objective comprehension question.
                    // 'y' is correct, 'n' is incorrect, 'x' is N/A (didn't answer question, follow directions, etc.)
                    // N/As should probably be treated as 'n' but theoretically could also be treated as missing data.
                    // Another choice to be made: A la Marteau, your initial thought was to have the verbal answer
                    // "definitely a carrier of cystic fibrosis" be the CORRECT answer as to John's status,
                    // and "unlikely to have cystic fibrosis" as the CORRECT answer regarding John & Jane's child.
                    // However, these could also be treated as subjective. The fact that there is sensitivity/specificity
                    // info provided complicates the picture of "definitely a carrier of cystic fibrosis" as the
                    // 'correct' answer to question 1.
                    //
                    // The former way (including subjective scores) is coded below as 'oc-score-1' (objective comprehension score 1),
                    // and the latter way as 'oc-score-2'. NB: After discussion with colleagues (but before hypothesis testing),
                    // concluded that oc-score-1 was NOT the right way to code comprehension and stuck with oc-score-2 (described
                    // as simply 'oc-score' in the final Results_TidyClean data file.

                    int sliderStatusAnswerKey = (row.TestResult == "Positive" ? 100 : 1);
                    int verbalStatusAnswerKey = (row.TestResult == "Positive" ?
                                                 qm.GetNumberCorrespondingToChoiceText("oc-status-verbal", "Definitely a carrier of cystic fibrosis") :
                                                 qm.GetNumberCorrespondingToChoiceText("oc-status-verbal", "Unlikely to be a carrier of cystic fibrosis"));
                    int sliderChildAnswerKey = (row.TestResult == "Positive" ? 25 : 0);
                    int verbalChildAnswerKey = (row.TestResult == "Positive" ?
                                                qm.GetNumberCorrespondingToChoiceText("oc-child-verbal", "Unlikely to have cystic fibrosis") :
                                                qm.GetNumberCorrespondingToChoiceText("oc-child-verbal", "Unlikely to have cystic fibrosis"));

                    string q1Correct = Y_N_or_X(row.OcStatusVerbal, verbalStatusAnswerKey.ToString());
                    string q2Correct = row.StatusIsNearlyCorrect;
                    string q3Correct = ScoreSlider(row.OcStatusSlider1, sliderStatusAnswerKey);
                    string q4Correct = Y_N_or_X(row.OcChildVerbal, verbalChildAnswerKey.ToString());
                    string q5Correct = row.ChildIsNearlyCorrect;
                    string q6Correct = ScoreSlider(row.OcChildSlider1, sliderChildAnswerKey);
                    string q7Correct = Y_N_or_X(row.ObjcompCompare, qm.GetNumberCorrespondingToChoiceText("objcomp-compare", "It's more likely that John Doe is a carrier of cystic fibrosis").ToString());
                    string q8Correct = row.Hard1000IsNearlyCorrect;
                    string q9Correct = row.Hard800IsNearlyCorrect;

                    string[] ocscale1 = { q1Correct, q2Correct, q3Correct, q4Correct, q5Correct, q6Correct, q7Correct, q8Correct, q9Correct };
                    string[] ocscale2 = { q2Correct, q3Correct, q5Correct, q6Correct, q7Correct, q8Correct, q9Correct };

                    int ocScore1 = ocscale1.Select(x => (x == "y" ? 1 : 0)).Sum();
                    int ocScore2 = ocscale2.Select(x => (x == "y" ? 1 : 0)).Sum();

                    // Get the communication efficacy score.

                    double communicationEfficacy =
                        (int.Parse(row.Scheuner1) +
                         int.Parse(row.Scheuner2) +
                         int.Parse(row.Scheuner3) +
                         int.Parse(row.Scheuner4) +
                         int.Parse(row.Scheuner5) +
                         int.Parse(row.Scheuner6) +
                         int.Parse(row.Scheuner7) +
                         int.Parse(row.Scheuner8) +
                         int.Parse(row.Scheuner9) +
                         int.Parse(row.Scheuner10) +
                         int.Parse(row.Scheuner11) +
                         int.Parse(row.Scheuner12) +
                         int.Parse(row.Scheuner13) +
                         scheuner14Value + // This switch is intentional, due to how Qualtrics recorded the data (see definition of scheuner14Value above)
                         scheuner15Value +
                         scheuner16Value +
                         scheuner17Value +
                         scheuner18Value) / 18.0;

                    // Get subjective numeracy score (remembering to reverse-code item 7);

                    double subjectiveNumeracy =
                        (int.Parse(row.SNumeracy1) +
                         int.Parse(row.SNumeracy2) +
                         int.Parse(row.SNumeracy3) +
                         int.Parse(row.SNumeracy4) +
                         int.Parse(row.SNumeracy5) +
                         int.Parse(row.SNumeracy6) +
                         (7 - int.Parse(row.SNumeracy7)) +
                         int.Parse(row.SNumeracy8)) / 8.0;

                    // 3 codes indicating which question came first in the counterbalancing:
                    //   * 'status' before 'child' (yes (1) or no (0));
                    //   * 'status' before 'compare' (yes (1) or no (0));
                    //   * 'child' before 'compare' (yes (1) or no (0)).

                    DateTime statusQAbsoluteTime  = DateTime.Parse(row.OCStatusAbsoluteTime);
                    DateTime compareQAbsoluteTime = DateTime.Parse(row.OCCompareAbsoluteTime);
                    DateTime childQAbsoluteTime   = DateTime.Parse(row.OCChildAbsoluteTime);

                    int statusBeforeChild   = (statusQAbsoluteTime < childQAbsoluteTime ? 1 : 0);
                    int statusBeforeCompare = (statusQAbsoluteTime < compareQAbsoluteTime ? 1 : 0);
                    int childBeforeCompare  = (childQAbsoluteTime < compareQAbsoluteTime ? 1 : 0);

                    // Result was noticed (1 - for yes, yes but didn't read) or not (0 - for not sure, no)

                    int resultNoticed       = int.Parse(row.ResultNoticed);
                    int resultNoticedBinary = 0;
                    if (resultNoticed == qm.GetNumberCorrespondingToChoiceText("Result-Noticed", "Yes, and I read it") ||
                        resultNoticed == qm.GetNumberCorrespondingToChoiceText("Result-Noticed", "Yes, but I didn't read it"))
                    {
                        resultNoticedBinary = 1;
                    }

                    // Get 'next steps' average score
                    double nextStepsAverage =
                        (int.Parse(row.SubjNext1) +
                         int.Parse(row.SubjNext2) +
                         int.Parse(row.SubjNext3) +
                         int.Parse(row.SubjNext4) +
                         int.Parse(row.SubjNext5)) / 5.0;

                    // Get time spent reading report
                    double timeReadingReport = -1;

                    if (row.Design == "UCD")
                    {
                        timeReadingReport = double.Parse(row.TimeUcdReportP1PageSubmit);
                    }
                    else if (row.Design == "Control")
                    {
                        timeReadingReport = double.Parse(row.TimeControlReportPageSubmit);
                    }
                    else
                    {
                        SanityCheck.AssertFailed();
                    }

                    // We are interested in whether people's answers to the OC-Compare question
                    // are concordant with their answers on the slider questions: did their answers to the
                    // slider questions imply that they believe that the carrier risk is higher, the child has cf
                    // risk, or neither?

                    int sliderCompareImplication = 0;
                    int dontKnowValue            = sliderCompareImplication = qm.GetNumberCorrespondingToChoiceText("objcomp-compare", "Don't know");
                    if (row.OcChildSlider1 == "" || row.OcStatusSlider1 == "")
                    {
                        sliderCompareImplication = dontKnowValue;
                    }
                    else
                    {
                        int childSliderValue  = int.Parse(row.OcChildSlider1);
                        int statusSliderValue = int.Parse(row.OcStatusSlider1);
                        if (statusSliderValue > childSliderValue)
                        {
                            sliderCompareImplication = qm.GetNumberCorrespondingToChoiceText("objcomp-compare", "It's more likely that John Doe is a carrier of cystic fibrosis");
                        }
                        else if (childSliderValue > statusSliderValue)
                        {
                            sliderCompareImplication = qm.GetNumberCorrespondingToChoiceText("objcomp-compare", "It's more likely that the first child of John and Jane Doe will have cystic fibrosis");
                        }
                        else if (childSliderValue == statusSliderValue)
                        {
                            sliderCompareImplication = qm.GetNumberCorrespondingToChoiceText("objcomp-compare", "Both possibilities are equally likely");
                        }
                        else
                        {
                            SanityCheck.AssertFailed();
                        }
                    }



                    // Check for concordance. -1 is don't know, 0 is not concordant, 1 is concordant

                    int concordanceWithSliderValues = -1;
                    if (int.Parse(row.ObjcompCompare) != dontKnowValue && sliderCompareImplication != dontKnowValue)
                    {
                        if (sliderCompareImplication == int.Parse(row.ObjcompCompare))
                        {
                            concordanceWithSliderValues = 1;
                        }
                        else
                        {
                            concordanceWithSliderValues = 0;
                        }
                    }


                    // Append these variables to the current row in the file we are writing

                    sw.WriteSepLine("\t", Lines[i],
                                    scheuner14Value,
                                    scheuner15Value,
                                    scheuner16Value,
                                    scheuner17Value,
                                    scheuner18Value,
                                    resultUnderstoodValue,
                                    genderValue,
                                    adultsInHouseValue,
                                    childrenInHouseValue,
                                    incomeLowerBound,
                                    cfExperienceValue,
                                    q3Correct,
                                    q6Correct,
                                    ocScore1,
                                    ocScore2,
                                    communicationEfficacy,
                                    subjectiveNumeracy,
                                    statusBeforeChild,
                                    statusBeforeCompare,
                                    childBeforeCompare,
                                    resultNoticedBinary,
                                    nextStepsAverage,
                                    q7Correct,
                                    q1Correct,
                                    q4Correct,
                                    timeReadingReport,
                                    sliderCompareImplication,
                                    concordanceWithSliderValues
                                    );
                }
            }
        }