Exemplo n.º 1
0
        /// <summary>
        /// Convert two 2D arrays into a List of BasicData elements.  One array holds input and the other ideal 
        /// vectors. 
        /// </summary>
        /// <param name="inputData">An array of input vectors.</param>
        /// <param name="idealData">An array of ideal vectors.</param>
        /// <returns>A list of BasicData elements.</returns>
        public static IList<BasicData> ConvertArrays(double[][] inputData, double[][] idealData)
        {
            // create the list
            var result = new List<BasicData>();

            // get the lengths
            var inputCount = inputData[0].Length;
            var idealCount = idealData[0].Length;

            // build the list
            for (var row = 0; row < inputData.Length; row++)
            {
                var dataRow = new BasicData(inputCount, idealCount);
                Array.Copy(inputData[row], dataRow.Input, inputCount);
                Array.Copy(idealData[row], dataRow.Ideal, idealCount);
                result.Add(dataRow);
            }

            return result;
        }
Exemplo n.º 2
0
        /// <summary>
        /// Extract a supervised training set.  This has both input and expected (ideal) output. 
        /// </summary>
        /// <param name="inputBegin">The first input column.</param>
        /// <param name="inputCount">The number of columns for input.</param>
        /// <param name="idealBegin">The first ideal column.</param>
        /// <param name="idealCount">The number of columns for ideal.</param>
        /// <returns>The training set.</returns>
        public IList<BasicData> ExtractSupervised(int inputBegin, int inputCount, int idealBegin, int idealCount)
        {
            IList<BasicData> result = new List<BasicData>();

            for (int rowIndex = 0; rowIndex < Count; rowIndex++)
            {
                object[] raw = _data[rowIndex];
                var row = new BasicData(inputCount, idealCount);

                for (int i = 0; i < inputCount; i++)
                {
                    row.Input[i] = ConvertNumeric(raw, inputBegin + i);
                }

                for (int i = 0; i < idealCount; i++)
                {
                    row.Ideal[i] = ConvertNumeric(raw, idealBegin + i);
                }

                result.Add(row);
            }

            return result;
        }
Exemplo n.º 3
0
        public static IList<BasicData> Normalize(TitanicStats stats, string filename, List<String> ids,
            double inputLow, double inputHigh,
            double predictSurvive, double predictPerish)
        {
            IList<BasicData> result = new List<BasicData>();

            var headerMap = new Dictionary<string, int>();

            using (var reader = new CsvReader(new StreamReader(filename)))
            {
                reader.Read();

                for (int i = 0; i < reader.FieldHeaders.Length; i++)
                {
                    headerMap[reader.FieldHeaders[i].ToLower()] = i;
                }


                int ageIndex = headerMap["age"];
                int nameIndex = headerMap["name"];
                int sexIndex = headerMap["sex"];
                int indexEmbarked = headerMap["embarked"];
                int indexPclass = headerMap["pclass"];
                int indexSibsp = headerMap["sibsp"];
                int indexParch = headerMap["parch"];
                int indexFare = headerMap["fare"];
                int indexId = headerMap["passengerid"];
                int survivedIndex = -1;

                // test data does not have survived
                if (headerMap.ContainsKey("survived"))
                {
                    survivedIndex = headerMap["survived"];
                }


                do
                {
                    var data = new BasicData(TitanicConfig.InputFeatureCount, 1);

                    String name = reader[nameIndex];
                    String sex = reader[sexIndex];
                    String embarked = reader[indexEmbarked];
                    String id = reader[indexId];

                    // Add record the passenger id, if requested
                    if (ids != null)
                    {
                        ids.Add(id);
                    }

                    bool isMale = string.Compare(sex, "male", true) == 0;


                    // age
                    double age;

                    // do we have an age for this person?
                    if (reader[ageIndex].Length == 0)
                    {
                        // age is missing, interpolate using name
                        if (name.Contains("Master."))
                        {
                            age = stats.MeanMaster.Calculate();
                        }
                        else if (name.Contains("Mr."))
                        {
                            age = stats.MeanMr.Calculate();
                        }
                        else if (name.Contains("Miss.") || name.Contains("Mlle."))
                        {
                            age = stats.MeanMiss.Calculate();
                        }
                        else if (name.Contains("Mrs.") || name.Contains("Mme."))
                        {
                            age = stats.MeanMrs.Calculate();
                        }
                        else if (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major."))
                        {
                            age = stats.MeanMiss.Calculate();
                        }
                        else if (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") ||
                                 name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer."))
                        {
                            age = stats.MeanNobility.Calculate();
                        }
                        else if (name.Contains("Dr."))
                        {
                            age = stats.MeanDr.Calculate();
                        }
                        else if (name.Contains("Rev."))
                        {
                            age = stats.MeanClergy.Calculate();
                        }
                        else
                        {
                            if (isMale)
                            {
                                age = stats.MeanMale.Calculate();
                            }
                            else
                            {
                                age = stats.MeanFemale.Calculate();
                            }
                        }
                    }
                    else
                    {
                        age = Double.Parse(reader[ageIndex], CultureInfo.InvariantCulture);
                    }
                    data.Input[0] = RangeNormalize(age, 0, 100, inputLow, inputHigh);

                    // sex-male
                    data.Input[1] = isMale ? inputHigh : inputLow;

                    // pclass
                    double pclass = double.Parse(reader[indexPclass], CultureInfo.InvariantCulture);
                    data.Input[2] = RangeNormalize(pclass, 1, 3, inputLow, inputHigh);

                    // sibsp
                    double sibsp = double.Parse(reader[indexSibsp], CultureInfo.InvariantCulture);
                    data.Input[3] = RangeNormalize(sibsp, 0, 10, inputLow, inputHigh);

                    // parch
                    double parch = double.Parse(reader[indexParch], CultureInfo.InvariantCulture);
                    data.Input[4] = RangeNormalize(parch, 0, 10, inputLow, inputHigh);

                    // fare
                    String strFare = reader[indexFare];
                    double fare;

                    if (strFare.Length == 0)
                    {
                        if (((int) pclass) == 1)
                        {
                            fare = stats.MeanFare1.Calculate();
                        }
                        else if (((int) pclass) == 2)
                        {
                            fare = stats.MeanFare2.Calculate();
                        }
                        else if (((int) pclass) == 3)
                        {
                            fare = stats.MeanFare3.Calculate();
                        }
                        else
                        {
                            // should not happen, we would have a class other than 1,2,3.
                            // however, if that DID happen, use the median class (2).
                            fare = stats.MeanFare2.Calculate();
                        }
                    }
                    else
                    {
                        fare = Double.Parse(reader[indexFare], CultureInfo.InvariantCulture);
                    }
                    data.Input[5] = RangeNormalize(fare, 0, 500, inputLow, inputHigh);

                    // embarked-c
                    data.Input[6] = string.Compare(embarked.Trim(), "c", true) == 0 ? inputHigh : inputLow;

                    // embarked-q
                    data.Input[7] = string.Compare(embarked.Trim(), "q", true) == 0 ? inputHigh : inputLow;

                    // embarked-s
                    data.Input[8] = string.Compare(embarked.Trim(), "s", true) == 0 ? inputHigh : inputLow;

                    // name-mil
                    data.Input[9] = (name.Contains("Col.") || name.Contains("Capt.") || name.Contains("Major."))
                        ? inputHigh
                        : inputLow;

                    // name-nobility
                    data.Input[10] = (name.Contains("Countess.") || name.Contains("Lady.") || name.Contains("Sir.") ||
                                      name.Contains("Don.") || name.Contains("Dona.") || name.Contains("Jonkheer."))
                        ? inputHigh
                        : inputLow;

                    // name-dr
                    data.Input[11] = (name.Contains("Dr.")) ? inputHigh : inputLow;


                    // name-clergy
                    data.Input[12] = (name.Contains("Rev.")) ? inputHigh : inputLow;

                    // add the new row
                    result.Add(data);

                    // add survived, if it exists
                    if (survivedIndex != -1)
                    {
                        int survived = int.Parse(reader[survivedIndex]);
                        data.Ideal[0] = (survived == 1) ? predictSurvive : predictPerish;
                    }
                } while (reader.Read());
            }

            return result;
        }
Exemplo n.º 4
0
        /// <summary>
        /// Extract and label an unsupervised training set. 
        /// </summary>
        /// <param name="labelIndex">The column index to use for the label.</param>
        /// <returns>The training set.</returns>
        public IList<BasicData> ExtractUnsupervisedLabeled(int labelIndex)
        {
            IList<BasicData> result = new List<BasicData>();

            int dimensions = HeaderCount - 1;

            for (int rowIndex = 0; rowIndex < Count; rowIndex++)
            {
                Object[] raw = _data[rowIndex];
                var row = new BasicData(dimensions, 0, raw[labelIndex].ToString());

                int colIndex = 0;
                for (int rawColIndex = 0; rawColIndex < HeaderCount; rawColIndex++)
                {
                    if (rawColIndex != labelIndex)
                    {
                        row.Input[colIndex++] = ConvertNumeric(raw, rawColIndex);
                    }
                }

                result.Add(row);
            }

            return result;
        }