Exemple #1
0
 /// <summary>
 /// Returns a filled DataSet containing all valid information read in
 /// from the *.names and *.data files.
 /// </summary>
 public DataSet BuildDataSet()
 {
     var dataSet = new DataSet();
     BuildNamesEntries(ref dataSet);
     BuildDataEntries(ref dataSet);
     return dataSet;
 }
Exemple #2
0
 /// <summary>
 /// Returns a training/testing set split based on the given trainingSize.
 /// </summary>
 /// <param name="trainingSize">Number of data instances we want in our training set.</param>
 /// <returns>List[DataSet] containing training/test sets</returns>
 public List<DataSet> RandomInstance(double trainingSize)
 {
     DataSet instance = Shuffle();
     var training = new DataSet { Features = new List<Feature>(this.Features) };
     training.DataEntries.AddRange(instance.DataEntries.Take((int)trainingSize));
     var test = new DataSet(instance.DataEntries.Except(training.DataEntries).ToList());
     test.Features = new List<Feature>(this.Features);
     training.OutputIndex = test.OutputIndex = this.OutputIndex;
     return new List<DataSet>(){training, test};
 }
Exemple #3
0
 /// <summary>
 /// Shuffles this dataset and returns a new dataset containing the random instances.
 /// </summary>
 /// <returns>DataSet</returns>
 private DataSet Shuffle()
 {
     var instance = new DataSet { Features = new List<Feature>(this.Features) };
     for(int i=DataEntries.Count-1; i>=0; i--) {
         int index = Rng.Next(DataEntries.Count);
         while(instance.DataEntries.Contains(DataEntries[index]))
             index = Rng.Next(DataEntries.Count);
         instance.DataEntries.Add(DataEntries[index]);
     }
     return instance;
 }
Exemple #4
0
 /// <summary>
 /// Reads all valid entries(determined by DataReader) and stores values into a List[object]
 /// in our DataSet; paired with the attributes read in from the names file.
 /// </summary>
 private void BuildDataEntries(ref DataSet dataSet)
 {
     foreach(string entry in m_DataReader.ValidEntries()) {
         string[] data = entry.Split(',');
         if(data.Length != dataSet.Features.Count){
             Console.WriteLine("[Error]: Invalid # of data elements in {0}.", data.Select(s=>s.ToString(CultureInfo.InvariantCulture)));
             continue;
         }
         var instance = new DataInstance();
         for(int i=0; i<data.Length; i++) {
             if(IsValidValue(dataSet.Features[i].Type, data[i], dataSet.Features[i].PossibleValues.ToArray()))
                 instance.Add(data[i]);
         }
         if(instance.Count == dataSet.Features.Count)
             dataSet.DataEntries.Add(instance);
     }
 }
Exemple #5
0
 /// <summary>
 /// Reads all valid entries (determined by NamesReader) and stores values into
 /// and attribute object that is stored in our DataSet.
 /// </summary>
 private void BuildNamesEntries(ref DataSet dataSet)
 {
     foreach(string line in m_NamesReader.ValidEntries()) {
         string[] features = line.Split(':');
         var feature = new Feature(features[0], (Types)Enum.Parse(typeof(Types), features[1]));
         foreach(string s in features[2].Split(',')) {
             feature.PossibleValues.Add(s.Trim());
         }
         dataSet.Features.Add(feature);
         if(feature.Type == Types.Output){
             dataSet.OutputIndex = dataSet.Features.Count-1;
         }
     }
 }
Exemple #6
0
 public KNearest(DataSet data)
 {
     m_DataSet = data;
 }