/// <summary> /// Convert two 2D arrays into a List of BasicData elements. One array holds input and the other ideal /// vectors. /// </summary> /// <param name="inputData">An array of input vectors.</param> /// <param name="idealData">An array of ideal vectors.</param> /// <returns>A list of BasicData elements.</returns> public static IList<BasicData> ConvertArrays(double[][] inputData, double[][] idealData) { // create the list var result = new List<BasicData>(); // get the lengths var inputCount = inputData[0].Length; var idealCount = idealData[0].Length; // build the list for (var row = 0; row < inputData.Length; row++) { var dataRow = new BasicData(inputCount, idealCount); Array.Copy(inputData[row], dataRow.Input, inputCount); Array.Copy(idealData[row], dataRow.Ideal, idealCount); result.Add(dataRow); } return result; }
/// <summary> /// Extract a supervised training set. This has both input and expected (ideal) output. /// </summary> /// <param name="inputBegin">The first input column.</param> /// <param name="inputCount">The number of columns for input.</param> /// <param name="idealBegin">The first ideal column.</param> /// <param name="idealCount">The number of columns for ideal.</param> /// <returns>The training set.</returns> public IList<BasicData> ExtractSupervised(int inputBegin, int inputCount, int idealBegin, int idealCount) { IList<BasicData> result = new List<BasicData>(); for (int rowIndex = 0; rowIndex < Count; rowIndex++) { object[] raw = _data[rowIndex]; var row = new BasicData(inputCount, idealCount); for (int i = 0; i < inputCount; i++) { row.Input[i] = ConvertNumeric(raw, inputBegin + i); } for (int i = 0; i < idealCount; i++) { row.Ideal[i] = ConvertNumeric(raw, idealBegin + i); } result.Add(row); } return result; }
public static IList<BasicData> Normalize(TitanicStats stats, string filename, List<String> ids, double inputLow, double inputHigh, double predictSurvive, double predictPerish) { IList<BasicData> result = new List<BasicData>(); var headerMap = new Dictionary<string, int>(); using (var reader = new CsvReader(new StreamReader(filename))) { reader.Read(); for (int i = 0; i < reader.FieldHeaders.Length; i++) { headerMap[reader.FieldHeaders[i].ToLower()] = i; } int ageIndex = headerMap["age"]; int nameIndex = headerMap["name"]; int sexIndex = headerMap["sex"]; int indexEmbarked = headerMap["embarked"]; int indexPclass = headerMap["pclass"]; int indexSibsp = headerMap["sibsp"]; int indexParch = headerMap["parch"]; int indexFare = headerMap["fare"]; int indexId = headerMap["passengerid"]; int survivedIndex = -1; // test data does not have survived if (headerMap.ContainsKey("survived")) { survivedIndex = headerMap["survived"]; } do { var data = new BasicData(TitanicConfig.InputFeatureCount, 1); String name = reader[nameIndex]; String sex = reader[sexIndex]; String embarked = reader[indexEmbarked]; String id = reader[indexId]; // Add record the passenger id, if requested if (ids != null) { ids.Add(id); } bool isMale = string.Compare(sex, "male", true) == 0; // age double age; // do we have an age for this person? if (reader[ageIndex].Length == 0) { // age is missing, interpolate using name if (name.Contains("Master.")) { age = stats.MeanMaster.Calculate(); } else if (name.Contains("Mr.")) { age = stats.MeanMr.Calculate(); } else if (name.Contains("Miss.") || name.Contains("Mlle.")) { age = stats.MeanMiss.Calculate(); } else if (name.Contains("Mrs.") || name.Contains("Mme.")) { age = stats.MeanMrs.Calculate(); } else if (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major.")) { age = stats.MeanMiss.Calculate(); } else if (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") || name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer.")) { age = stats.MeanNobility.Calculate(); } else if (name.Contains("Dr.")) { age = stats.MeanDr.Calculate(); } else if (name.Contains("Rev.")) { age = stats.MeanClergy.Calculate(); } else { if (isMale) { age = stats.MeanMale.Calculate(); } else { age = stats.MeanFemale.Calculate(); } } } else { age = Double.Parse(reader[ageIndex], CultureInfo.InvariantCulture); } data.Input[0] = RangeNormalize(age, 0, 100, inputLow, inputHigh); // sex-male data.Input[1] = isMale ? inputHigh : inputLow; // pclass double pclass = double.Parse(reader[indexPclass], CultureInfo.InvariantCulture); data.Input[2] = RangeNormalize(pclass, 1, 3, inputLow, inputHigh); // sibsp double sibsp = double.Parse(reader[indexSibsp], CultureInfo.InvariantCulture); data.Input[3] = RangeNormalize(sibsp, 0, 10, inputLow, inputHigh); // parch double parch = double.Parse(reader[indexParch], CultureInfo.InvariantCulture); data.Input[4] = RangeNormalize(parch, 0, 10, inputLow, inputHigh); // fare String strFare = reader[indexFare]; double fare; if (strFare.Length == 0) { if (((int) pclass) == 1) { fare = stats.MeanFare1.Calculate(); } else if (((int) pclass) == 2) { fare = stats.MeanFare2.Calculate(); } else if (((int) pclass) == 3) { fare = stats.MeanFare3.Calculate(); } else { // should not happen, we would have a class other than 1,2,3. // however, if that DID happen, use the median class (2). fare = stats.MeanFare2.Calculate(); } } else { fare = Double.Parse(reader[indexFare], CultureInfo.InvariantCulture); } data.Input[5] = RangeNormalize(fare, 0, 500, inputLow, inputHigh); // embarked-c data.Input[6] = string.Compare(embarked.Trim(), "c", true) == 0 ? inputHigh : inputLow; // embarked-q data.Input[7] = string.Compare(embarked.Trim(), "q", true) == 0 ? inputHigh : inputLow; // embarked-s data.Input[8] = string.Compare(embarked.Trim(), "s", true) == 0 ? inputHigh : inputLow; // name-mil data.Input[9] = (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major.")) ? inputHigh : inputLow; // name-nobility data.Input[10] = (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") || name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer.")) ? inputHigh : inputLow; // name-dr data.Input[11] = (name.Contains("Dr.")) ? inputHigh : inputLow; // name-clergy data.Input[12] = (name.Contains("Rev.")) ? inputHigh : inputLow; // add the new row result.Add(data); // add survived, if it exists if (survivedIndex != -1) { int survived = int.Parse(reader[survivedIndex]); data.Ideal[0] = (survived == 1) ? predictSurvive : predictPerish; } } while (reader.Read()); } return result; }
/// <summary> /// Extract and label an unsupervised training set. /// </summary> /// <param name="labelIndex">The column index to use for the label.</param> /// <returns>The training set.</returns> public IList<BasicData> ExtractUnsupervisedLabeled(int labelIndex) { IList<BasicData> result = new List<BasicData>(); int dimensions = HeaderCount - 1; for (int rowIndex = 0; rowIndex < Count; rowIndex++) { Object[] raw = _data[rowIndex]; var row = new BasicData(dimensions, 0, raw[labelIndex].ToString()); int colIndex = 0; for (int rawColIndex = 0; rawColIndex < HeaderCount; rawColIndex++) { if (rawColIndex != labelIndex) { row.Input[colIndex++] = ConvertNumeric(raw, rawColIndex); } } result.Add(row); } return result; }