/// <summary> /// Prepare a Kaggle submission for Titanic. /// </summary> /// <param name="dataPath">The data path.</param> /// <param name="bestNetwork">The best network.</param> /// <param name="cross">The cross validated data.</param> public void Submit(string dataPath, RBFNetwork bestNetwork, CrossValidate cross) { String now = new DateTime().ToString("yyyyMMddhhmm"); string trainingPath = Path.Combine(dataPath, TitanicConfig.TrainingFilename); string testPath = Path.Combine(dataPath, TitanicConfig.TestFilename); var score = (int) (cross.Score*10000); string submitPath = Path.Combine(dataPath, "submit-" + now + "_" + score + ".csv"); string submitInfoPath = Path.Combine(dataPath, "submit-" + now + ".txt"); using (var file = new StreamWriter(submitInfoPath)) { file.WriteLine("Crossvalidation stats:"); for (int i = 0; i < cross.Count; i++) { CrossValidateFold fold = cross.Folds[i]; file.WriteLine("Fold #" + (i + 1) + " : Score: " + fold.Score); } file.WriteLine("Average Score: " + cross.Score); file.WriteLine(); file.WriteLine(String.Join(",", bestNetwork.LongTermMemory)); } var stats = new TitanicStats(); NormalizeTitanic.Analyze(stats, trainingPath); NormalizeTitanic.Analyze(stats, testPath); var ids = new List<String>(); IList<BasicData> training = NormalizeTitanic.Normalize(stats, testPath, ids, TitanicConfig.InputNormalizeLow, TitanicConfig.InputNormalizeHigh, TitanicConfig.PredictSurvive, TitanicConfig.PredictPerish); int idx = 0; using (var streamWriter = new StreamWriter(submitPath)) using (var writer = new CsvWriter(streamWriter)) { writer.WriteField("PassengerId"); writer.WriteField("Survived"); writer.NextRecord(); foreach (BasicData data in training) { double[] output = bestNetwork.ComputeRegression(data.Input); int survived = output[0] > 0.5 ? 1 : 0; writer.WriteField(ids[idx]); writer.WriteField(survived); writer.NextRecord(); idx++; } } }
/// <summary> /// Analyze and generate stats for titanic data. /// </summary> /// <param name="stats">The stats for titanic.</param> /// <param name="filename">The file to analyze.</param> /// <returns>The passenger count.</returns> public static int Analyze(TitanicStats stats, string filename) { int count = 0; var headerMap = new Dictionary <string, int>(); using (var reader = new CsvReader(new StreamReader(filename))) { reader.Read(); for (int i = 0; i < reader.FieldHeaders.Length; i++) { headerMap[reader.FieldHeaders[i].ToLower()] = i; } int ageIndex = headerMap["age"]; int nameIndex = headerMap["name"]; int sexIndex = headerMap["sex"]; int indexEmbarked = headerMap["embarked"]; int indexFare = headerMap["fare"]; int indexPclass = headerMap["pclass"]; int survivedIndex = -1; // test data does not have survived if (headerMap.ContainsKey("survived")) { survivedIndex = headerMap["survived"]; } do { count++; String name = reader[nameIndex]; String ageStr = reader[ageIndex]; String sexStr = reader[sexIndex]; String embarkedStr = reader[indexEmbarked]; // test data does not have survived, do not use survived boolean if using test data! bool survived = false; if (survivedIndex != -1) { String survivedStr = reader[survivedIndex]; survived = survivedStr.Equals("1"); } if (indexEmbarked != -1) { embarkedStr = reader[indexEmbarked]; } // calculate average fare per class String strFare = reader[indexFare]; if (strFare.Length > 0) { double fare = double.Parse(strFare, CultureInfo.InvariantCulture); string pclass = reader[indexPclass]; if (pclass.Equals("1")) { stats.MeanFare1.Update(fare); } else if (pclass.Equals("2")) { stats.MeanFare2.Update(fare); } else if (pclass.Equals("3")) { stats.MeanFare3.Update(fare); } } bool isMale = string.Compare(sexStr, "male", true) == 0; double age; // Only compute survival stats on training data if (survivedIndex != -1) { if (embarkedStr.Equals("Q")) { stats.EmbarkedQ.Update(isMale, survived); } else if (embarkedStr.Equals("S")) { stats.EmbarkedS.Update(isMale, survived); } else if (embarkedStr.Equals("C")) { stats.EmbarkedC.Update(isMale, survived); } } stats.EmbarkedHisto.Update(embarkedStr); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalTotal.Update(isMale, survived); } if (survivedIndex != -1) { if (name.Contains("Master.")) { stats.SurvivalMaster.Update(isMale, survived); } else if (name.Contains("Mr.")) { stats.SurvivalMr.Update(isMale, survived); } else if (name.Contains("Miss.") || name.Contains("Mlle.")) { stats.SurvivalMiss.Update(isMale, survived); } else if (name.Contains("Mrs.") || name.Contains("Mme.")) { stats.SurvivalMrs.Update(isMale, survived); } else if (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major.")) { stats.SurvivalMilitary.Update(isMale, survived); } else if (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") || name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer.")) { stats.SurvivalNobility.Update(isMale, survived); } else if (name.Contains("Dr.")) { stats.SurvivalDr.Update(isMale, survived); } else if (name.Contains("Rev.")) { stats.SurvivalClergy.Update(isMale, survived); } } if (ageStr.Length > 0) { age = double.Parse(ageStr, CultureInfo.InvariantCulture); // Update general mean age for male/female if (isMale) { stats.MeanMale.Update(age); } else { stats.MeanFemale.Update(age); } // Update the total average age stats.MeanTotal.Update(age); if (name.Contains("Master.")) { stats.MeanMaster.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalMaster.Update(isMale, survived); } } else if (name.Contains("Mr.")) { stats.MeanMr.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalMr.Update(isMale, survived); } } else if (name.Contains("Miss.") || name.Contains("Mlle.")) { stats.MeanMiss.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalMiss.Update(isMale, survived); } } else if (name.Contains("Mrs.") || name.Contains("Mme.")) { stats.MeanMrs.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalMrs.Update(isMale, survived); } } else if (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major.")) { stats.MeanMilitary.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalMilitary.Update(isMale, survived); } } else if (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") || name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer.")) { stats.MeanNobility.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalNobility.Update(isMale, survived); } } else if (name.Contains("Dr.")) { stats.MeanDr.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalDr.Update(isMale, survived); } } else if (name.Contains("Rev.")) { stats.MeanClergy.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalClergy.Update(isMale, survived); } } } } while (reader.Read()); return(count); } }
/// <summary> /// The entry point for this example. If you would like to make this example /// stand alone, then add to its own project and rename to Main. /// </summary> /// <param name="args">Not used.</param> public static void ExampleMain(string[] args) { string filename = ""; if (args.Length > 0) { filename = args[0]; string dataPath = filename; string trainingPath = Path.Combine(dataPath, TitanicConfig.TrainingFilename); string testPath = Path.Combine(dataPath, TitanicConfig.TestFilename); string normalizePath = Path.Combine(dataPath, TitanicConfig.NormDumpFilename); var stats = new TitanicStats(); Analyze(stats, trainingPath); Analyze(stats, testPath); stats.Dump(); var ids = new List <String>(); IList <BasicData> training = Normalize(stats, trainingPath, ids, TitanicConfig.InputNormalizeLow, TitanicConfig.InputNormalizeHigh, TitanicConfig.PredictSurvive, TitanicConfig.PredictPerish); using (var streamWriter = new StreamWriter(normalizePath)) using (var writer = new CsvWriter(streamWriter)) { writer.WriteField("id"); writer.WriteField("age"); writer.WriteField("sex-male"); writer.WriteField("pclass"); writer.WriteField("sibsp"); writer.WriteField("parch"); writer.WriteField("fare"); writer.WriteField("embarked-c"); writer.WriteField("embarked-q"); writer.WriteField("embarked-s"); writer.WriteField("name-mil"); writer.WriteField("name-nobility"); writer.WriteField("name-dr"); writer.WriteField("name-clergy"); writer.NextRecord(); int idx = 0; foreach (BasicData data in training) { writer.WriteField(ids[idx++]); writer.WriteField(data.Input[1].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[2].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[3].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[4].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[5].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[6].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[7].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[8].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[9].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[10].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[11].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[12].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[0].ToString(CultureInfo.InvariantCulture)); writer.NextRecord(); } } } else { Console.WriteLine("Please provide your data directory path as the first argument."); } }
public static IList <BasicData> Normalize(TitanicStats stats, string filename, List <String> ids, double inputLow, double inputHigh, double predictSurvive, double predictPerish) { IList <BasicData> result = new List <BasicData>(); var headerMap = new Dictionary <string, int>(); using (var reader = new CsvReader(new StreamReader(filename))) { reader.Read(); for (int i = 0; i < reader.FieldHeaders.Length; i++) { headerMap[reader.FieldHeaders[i].ToLower()] = i; } int ageIndex = headerMap["age"]; int nameIndex = headerMap["name"]; int sexIndex = headerMap["sex"]; int indexEmbarked = headerMap["embarked"]; int indexPclass = headerMap["pclass"]; int indexSibsp = headerMap["sibsp"]; int indexParch = headerMap["parch"]; int indexFare = headerMap["fare"]; int indexId = headerMap["passengerid"]; int survivedIndex = -1; // test data does not have survived if (headerMap.ContainsKey("survived")) { survivedIndex = headerMap["survived"]; } do { var data = new BasicData(TitanicConfig.InputFeatureCount, 1); String name = reader[nameIndex]; String sex = reader[sexIndex]; String embarked = reader[indexEmbarked]; String id = reader[indexId]; // Add record the passenger id, if requested if (ids != null) { ids.Add(id); } bool isMale = string.Compare(sex, "male", true) == 0; // age double age; // do we have an age for this person? if (reader[ageIndex].Length == 0) { // age is missing, interpolate using name if (name.Contains("Master.")) { age = stats.MeanMaster.Calculate(); } else if (name.Contains("Mr.")) { age = stats.MeanMr.Calculate(); } else if (name.Contains("Miss.") || name.Contains("Mlle.")) { age = stats.MeanMiss.Calculate(); } else if (name.Contains("Mrs.") || name.Contains("Mme.")) { age = stats.MeanMrs.Calculate(); } else if (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major.")) { age = stats.MeanMiss.Calculate(); } else if (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") || name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer.")) { age = stats.MeanNobility.Calculate(); } else if (name.Contains("Dr.")) { age = stats.MeanDr.Calculate(); } else if (name.Contains("Rev.")) { age = stats.MeanClergy.Calculate(); } else { if (isMale) { age = stats.MeanMale.Calculate(); } else { age = stats.MeanFemale.Calculate(); } } } else { age = Double.Parse(reader[ageIndex], CultureInfo.InvariantCulture); } data.Input[0] = RangeNormalize(age, 0, 100, inputLow, inputHigh); // sex-male data.Input[1] = isMale ? inputHigh : inputLow; // pclass double pclass = double.Parse(reader[indexPclass], CultureInfo.InvariantCulture); data.Input[2] = RangeNormalize(pclass, 1, 3, inputLow, inputHigh); // sibsp double sibsp = double.Parse(reader[indexSibsp], CultureInfo.InvariantCulture); data.Input[3] = RangeNormalize(sibsp, 0, 10, inputLow, inputHigh); // parch double parch = double.Parse(reader[indexParch], CultureInfo.InvariantCulture); data.Input[4] = RangeNormalize(parch, 0, 10, inputLow, inputHigh); // fare String strFare = reader[indexFare]; double fare; if (strFare.Length == 0) { if (((int)pclass) == 1) { fare = stats.MeanFare1.Calculate(); } else if (((int)pclass) == 2) { fare = stats.MeanFare2.Calculate(); } else if (((int)pclass) == 3) { fare = stats.MeanFare3.Calculate(); } else { // should not happen, we would have a class other than 1,2,3. // however, if that DID happen, use the median class (2). fare = stats.MeanFare2.Calculate(); } } else { fare = Double.Parse(reader[indexFare], CultureInfo.InvariantCulture); } data.Input[5] = RangeNormalize(fare, 0, 500, inputLow, inputHigh); // embarked-c data.Input[6] = string.Compare(embarked.Trim(), "c", true) == 0 ? inputHigh : inputLow; // embarked-q data.Input[7] = string.Compare(embarked.Trim(), "q", true) == 0 ? inputHigh : inputLow; // embarked-s data.Input[8] = string.Compare(embarked.Trim(), "s", true) == 0 ? inputHigh : inputLow; // name-mil data.Input[9] = (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major.")) ? inputHigh : inputLow; // name-nobility data.Input[10] = (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") || name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer.")) ? inputHigh : inputLow; // name-dr data.Input[11] = (name.Contains("Dr.")) ? inputHigh : inputLow; // name-clergy data.Input[12] = (name.Contains("Rev.")) ? inputHigh : inputLow; // add the new row result.Add(data); // add survived, if it exists if (survivedIndex != -1) { int survived = int.Parse(reader[survivedIndex]); data.Ideal[0] = (survived == 1) ? predictSurvive : predictPerish; } } while (reader.Read()); } return(result); }
/// <summary> /// Fit a RBF model to the titanic. /// </summary> /// <param name="dataPath">The path that contains the data file.</param> public void Process(string dataPath) { string trainingPath = Path.Combine(dataPath, TitanicConfig.TrainingFilename); string testPath = Path.Combine(dataPath, TitanicConfig.TestFilename); IGenerateRandom rnd = new MersenneTwisterGenerateRandom(); // Generate stats on the titanic. var stats = new TitanicStats(); NormalizeTitanic.Analyze(stats, trainingPath); NormalizeTitanic.Analyze(stats, testPath); // Get the training data for the titanic. IList<BasicData> training = NormalizeTitanic.Normalize(stats, trainingPath, null, TitanicConfig.InputNormalizeLow, TitanicConfig.InputNormalizeHigh, TitanicConfig.PredictSurvive, TitanicConfig.PredictPerish); // Fold the data for cross validation. _cross = new CrossValidate(TitanicConfig.FoldCount, training, rnd); // Train each of the folds. for (int k = 0; k < _cross.Count; k++) { Console.WriteLine("Cross validation fold #" + (k + 1) + "/" + _cross.Count); TrainFold(k, _cross.Folds[k]); } // Show the cross validation summary. Console.WriteLine("Crossvalidation summary:"); int kk = 1; foreach (CrossValidateFold fold in _cross.Folds) { Console.WriteLine("Fold #" + kk + ": " + fold.Score); kk++; } Console.WriteLine("Final, crossvalidated score:" + _cross.Score); }
/// <summary> /// Analyze and generate stats for titanic data. /// </summary> /// <param name="stats">The stats for titanic.</param> /// <param name="filename">The file to analyze.</param> /// <returns>The passenger count.</returns> public static int Analyze(TitanicStats stats, string filename) { int count = 0; var headerMap = new Dictionary<string, int>(); using (var reader = new CsvReader(new StreamReader(filename))) { reader.Read(); for (int i = 0; i < reader.FieldHeaders.Length; i++) { headerMap[reader.FieldHeaders[i].ToLower()] = i; } int ageIndex = headerMap["age"]; int nameIndex = headerMap["name"]; int sexIndex = headerMap["sex"]; int indexEmbarked = headerMap["embarked"]; int indexFare = headerMap["fare"]; int indexPclass = headerMap["pclass"]; int survivedIndex = -1; // test data does not have survived if (headerMap.ContainsKey("survived")) { survivedIndex = headerMap["survived"]; } do { count++; String name = reader[nameIndex]; String ageStr = reader[ageIndex]; String sexStr = reader[sexIndex]; String embarkedStr = reader[indexEmbarked]; // test data does not have survived, do not use survived boolean if using test data! bool survived = false; if (survivedIndex != -1) { String survivedStr = reader[survivedIndex]; survived = survivedStr.Equals("1"); } if (indexEmbarked != -1) { embarkedStr = reader[indexEmbarked]; } // calculate average fare per class String strFare = reader[indexFare]; if (strFare.Length > 0) { double fare = double.Parse(strFare, CultureInfo.InvariantCulture); string pclass = reader[indexPclass]; if (pclass.Equals("1")) { stats.MeanFare1.Update(fare); } else if (pclass.Equals("2")) { stats.MeanFare2.Update(fare); } else if (pclass.Equals("3")) { stats.MeanFare3.Update(fare); } } bool isMale = string.Compare(sexStr, "male", true) == 0; double age; // Only compute survival stats on training data if (survivedIndex != -1) { if (embarkedStr.Equals("Q")) { stats.EmbarkedQ.Update(isMale, survived); } else if (embarkedStr.Equals("S")) { stats.EmbarkedS.Update(isMale, survived); } else if (embarkedStr.Equals("C")) { stats.EmbarkedC.Update(isMale, survived); } } stats.EmbarkedHisto.Update(embarkedStr); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalTotal.Update(isMale, survived); } if (survivedIndex != -1) { if (name.Contains("Master.")) { stats.SurvivalMaster.Update(isMale, survived); } else if (name.Contains("Mr.")) { stats.SurvivalMr.Update(isMale, survived); } else if (name.Contains("Miss.") || name.Contains("Mlle.")) { stats.SurvivalMiss.Update(isMale, survived); } else if (name.Contains("Mrs.") || name.Contains("Mme.")) { stats.SurvivalMrs.Update(isMale, survived); } else if (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major.")) { stats.SurvivalMilitary.Update(isMale, survived); } else if (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") || name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer.")) { stats.SurvivalNobility.Update(isMale, survived); } else if (name.Contains("Dr.")) { stats.SurvivalDr.Update(isMale, survived); } else if (name.Contains("Rev.")) { stats.SurvivalClergy.Update(isMale, survived); } } if (ageStr.Length > 0) { age = double.Parse(ageStr, CultureInfo.InvariantCulture); // Update general mean age for male/female if (isMale) { stats.MeanMale.Update(age); } else { stats.MeanFemale.Update(age); } // Update the total average age stats.MeanTotal.Update(age); if (name.Contains("Master.")) { stats.MeanMaster.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalMaster.Update(isMale, survived); } } else if (name.Contains("Mr.")) { stats.MeanMr.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalMr.Update(isMale, survived); } } else if (name.Contains("Miss.") || name.Contains("Mlle.")) { stats.MeanMiss.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalMiss.Update(isMale, survived); } } else if (name.Contains("Mrs.") || name.Contains("Mme.")) { stats.MeanMrs.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalMrs.Update(isMale, survived); } } else if (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major.")) { stats.MeanMilitary.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalMilitary.Update(isMale, survived); } } else if (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") || name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer.")) { stats.MeanNobility.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalNobility.Update(isMale, survived); } } else if (name.Contains("Dr.")) { stats.MeanDr.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalDr.Update(isMale, survived); } } else if (name.Contains("Rev.")) { stats.MeanClergy.Update(age); // Only compute survival stats on training data. if (survivedIndex != -1) { stats.SurvivalClergy.Update(isMale, survived); } } } } while (reader.Read()); return count; } }
/// <summary> /// The entry point for this example. If you would like to make this example /// stand alone, then add to its own project and rename to Main. /// </summary> /// <param name="args">Not used.</param> public static void ExampleMain(string[] args) { string filename = ""; if (args.Length >0 ) { filename = args[0]; string dataPath = filename; string trainingPath = Path.Combine(dataPath, TitanicConfig.TrainingFilename); string testPath = Path.Combine(dataPath, TitanicConfig.TestFilename); string normalizePath = Path.Combine(dataPath, TitanicConfig.NormDumpFilename); var stats = new TitanicStats(); Analyze(stats, trainingPath); Analyze(stats, testPath); stats.Dump(); var ids = new List<String>(); IList<BasicData> training = Normalize(stats, trainingPath, ids, TitanicConfig.InputNormalizeLow, TitanicConfig.InputNormalizeHigh, TitanicConfig.PredictSurvive, TitanicConfig.PredictPerish); using (var streamWriter = new StreamWriter(normalizePath)) using (var writer = new CsvWriter(streamWriter)) { writer.WriteField("id"); writer.WriteField("age"); writer.WriteField("sex-male"); writer.WriteField("pclass"); writer.WriteField("sibsp"); writer.WriteField("parch"); writer.WriteField("fare"); writer.WriteField("embarked-c"); writer.WriteField("embarked-q"); writer.WriteField("embarked-s"); writer.WriteField("name-mil"); writer.WriteField("name-nobility"); writer.WriteField("name-dr"); writer.WriteField("name-clergy"); writer.NextRecord(); int idx = 0; foreach (BasicData data in training) { writer.WriteField(ids[idx++]); writer.WriteField(data.Input[1].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[2].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[3].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[4].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[5].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[6].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[7].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[8].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[9].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[10].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[11].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[12].ToString(CultureInfo.InvariantCulture)); writer.WriteField(data.Input[0].ToString(CultureInfo.InvariantCulture)); writer.NextRecord(); } } } else { Console.WriteLine("Please provide your data directory path as the first argument."); } }
public static IList<BasicData> Normalize(TitanicStats stats, string filename, List<String> ids, double inputLow, double inputHigh, double predictSurvive, double predictPerish) { IList<BasicData> result = new List<BasicData>(); var headerMap = new Dictionary<string, int>(); using (var reader = new CsvReader(new StreamReader(filename))) { reader.Read(); for (int i = 0; i < reader.FieldHeaders.Length; i++) { headerMap[reader.FieldHeaders[i].ToLower()] = i; } int ageIndex = headerMap["age"]; int nameIndex = headerMap["name"]; int sexIndex = headerMap["sex"]; int indexEmbarked = headerMap["embarked"]; int indexPclass = headerMap["pclass"]; int indexSibsp = headerMap["sibsp"]; int indexParch = headerMap["parch"]; int indexFare = headerMap["fare"]; int indexId = headerMap["passengerid"]; int survivedIndex = -1; // test data does not have survived if (headerMap.ContainsKey("survived")) { survivedIndex = headerMap["survived"]; } do { var data = new BasicData(TitanicConfig.InputFeatureCount, 1); String name = reader[nameIndex]; String sex = reader[sexIndex]; String embarked = reader[indexEmbarked]; String id = reader[indexId]; // Add record the passenger id, if requested if (ids != null) { ids.Add(id); } bool isMale = string.Compare(sex, "male", true) == 0; // age double age; // do we have an age for this person? if (reader[ageIndex].Length == 0) { // age is missing, interpolate using name if (name.Contains("Master.")) { age = stats.MeanMaster.Calculate(); } else if (name.Contains("Mr.")) { age = stats.MeanMr.Calculate(); } else if (name.Contains("Miss.") || name.Contains("Mlle.")) { age = stats.MeanMiss.Calculate(); } else if (name.Contains("Mrs.") || name.Contains("Mme.")) { age = stats.MeanMrs.Calculate(); } else if (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major.")) { age = stats.MeanMiss.Calculate(); } else if (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") || name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer.")) { age = stats.MeanNobility.Calculate(); } else if (name.Contains("Dr.")) { age = stats.MeanDr.Calculate(); } else if (name.Contains("Rev.")) { age = stats.MeanClergy.Calculate(); } else { if (isMale) { age = stats.MeanMale.Calculate(); } else { age = stats.MeanFemale.Calculate(); } } } else { age = Double.Parse(reader[ageIndex], CultureInfo.InvariantCulture); } data.Input[0] = RangeNormalize(age, 0, 100, inputLow, inputHigh); // sex-male data.Input[1] = isMale ? inputHigh : inputLow; // pclass double pclass = double.Parse(reader[indexPclass], CultureInfo.InvariantCulture); data.Input[2] = RangeNormalize(pclass, 1, 3, inputLow, inputHigh); // sibsp double sibsp = double.Parse(reader[indexSibsp], CultureInfo.InvariantCulture); data.Input[3] = RangeNormalize(sibsp, 0, 10, inputLow, inputHigh); // parch double parch = double.Parse(reader[indexParch], CultureInfo.InvariantCulture); data.Input[4] = RangeNormalize(parch, 0, 10, inputLow, inputHigh); // fare String strFare = reader[indexFare]; double fare; if (strFare.Length == 0) { if (((int) pclass) == 1) { fare = stats.MeanFare1.Calculate(); } else if (((int) pclass) == 2) { fare = stats.MeanFare2.Calculate(); } else if (((int) pclass) == 3) { fare = stats.MeanFare3.Calculate(); } else { // should not happen, we would have a class other than 1,2,3. // however, if that DID happen, use the median class (2). fare = stats.MeanFare2.Calculate(); } } else { fare = Double.Parse(reader[indexFare], CultureInfo.InvariantCulture); } data.Input[5] = RangeNormalize(fare, 0, 500, inputLow, inputHigh); // embarked-c data.Input[6] = string.Compare(embarked.Trim(), "c", true) == 0 ? inputHigh : inputLow; // embarked-q data.Input[7] = string.Compare(embarked.Trim(), "q", true) == 0 ? inputHigh : inputLow; // embarked-s data.Input[8] = string.Compare(embarked.Trim(), "s", true) == 0 ? inputHigh : inputLow; // name-mil data.Input[9] = (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major.")) ? inputHigh : inputLow; // name-nobility data.Input[10] = (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") || name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer.")) ? inputHigh : inputLow; // name-dr data.Input[11] = (name.Contains("Dr.")) ? inputHigh : inputLow; // name-clergy data.Input[12] = (name.Contains("Rev.")) ? inputHigh : inputLow; // add the new row result.Add(data); // add survived, if it exists if (survivedIndex != -1) { int survived = int.Parse(reader[survivedIndex]); data.Ideal[0] = (survived == 1) ? predictSurvive : predictPerish; } } while (reader.Read()); } return result; }