public void Initialize(Feature feature, IEnumerable <Tuple <Instance, double> > instances) { _instances = instances; if (Model == null) { throw new InvalidOperationException("Model is null"); } if (feature.FeatureType != FeatureType.Integer && feature.FeatureType != FeatureType.Double) { throw new InvalidOperationException("Cannot use this iterator on non-numeric feature"); } _feature = feature; CurrentDistribution = new double[4][]; _sorted = _instances.Where(x => !FeatureValue.IsMissing(x.Item1[feature])).OrderBy(x => x.Item1[feature]).ToArray(); if (_sorted.Length == 0) { return; } CurrentDistribution[0] = new double[5]; _initialized = true; bestResultWasFound = false; }
public void Initialize(Feature feature, IEnumerable <Tuple <Instance, double> > instances) { _initialized = true; _instances = instances; if (Model == null) { throw new InvalidOperationException("Model is null"); } if (ClassFeature.FeatureType != FeatureType.Nominal) { throw new InvalidOperationException("Cannot use this iterator on non-nominal class"); } if (!feature.IsOrdered) { throw new InvalidOperationException("Cannot use this iterator on non-ordered feature"); } _feature = feature; CurrentDistribution = new double[2][]; sorted = _instances.Where(x => !FeatureValue.IsMissing(x.Item1[feature])).OrderBy(x => x.Item1[feature]).ToArray(); CurrentDistribution[0] = new double[(ClassFeature as NominalFeature).Values.Length]; CurrentDistribution[1] = sorted.FindDistribution(ClassFeature); if (sorted.Length == 0) { return; } _currentIndex = -1; _lastClassValue = FindNextClass(0); }
public static void LoadFeatureInformation(Feature feature, InstanceModel model, IEnumerable <Instance> instances, bool fillDatasetInformation = false) { if (feature is CategoricalFeature) { var len = ((CategoricalFeature)feature).Values.Length; double[] valuesCount = new double[len]; for (int i = 0; i < len; i++) { valuesCount[i] = instances.Count(x => x[feature] == i && !FeatureValue.IsMissing(x[feature])); } var valuesmissing = instances.Select(x => x[feature]).Count(FeatureValue.IsMissing); var valueProbability = valuesCount.Select(x => x / (valuesCount.Sum() * 1.0)).ToArray(); var ratio = valuesCount.Select(x => x / (valuesCount.Min() * 1F)).ToArray(); feature.FeatureInformation = new NominalFeatureInformation() { Distribution = valuesCount, MissingValueCount = valuesmissing, ValueProbability = valueProbability, Ratio = ratio, Feature = feature, }; } else if (feature is NumericFeature) { var nonMissingValues = instances.Where(x => !FeatureValue.IsMissing(x[feature])).Select(x => x[feature]).ToArray(); var valuesmissing = instances.Count() - nonMissingValues.Length; double max, min; if (nonMissingValues.Length > 0) { max = nonMissingValues.Max(); min = nonMissingValues.Min(); } else { max = 0; min = 0; } feature.FeatureInformation = new NumericFeatureInformation { MissingValueCount = valuesmissing, MaxValue = max, MinValue = min, Feature = feature, }; } if (fillDatasetInformation) { FillDatasetInformation(model, instances); } }
public override bool IsMatch(Instance instance) { double value = instance[Feature]; if (FeatureValue.IsMissing(value)) { return(true); } return(value == Value); }
public double[] Select(Instance instance) { if (Feature.FeatureType != FeatureType.Nominal) { throw new InvalidOperationException("Cannot use value and complement on non-nominal data"); } if (FeatureValue.IsMissing(instance[Feature])) { return(null); } return((int)instance[Feature] == (int)Value ? new double[] { 1, 0 } : new double[] { 0, 1 }); }
public double[] Select(Instance instance) { if (Feature.FeatureType == FeatureType.Nominal) { throw new InvalidOperationException("Cannot use cutpoint on nominal data"); } if (FeatureValue.IsMissing(instance[Feature])) { return(null); } return(instance[Feature] <= CutPoint ? new double[] { 1, 0 } : new double[] { 0, 1 }); }
public double[] Select(Instance instance) { if (Features.Any(p => p.FeatureType == FeatureType.Nominal)) { throw new InvalidOperationException("Cannot use cutpoint on nominal data"); } if (Features.Any(p => FeatureValue.IsMissing(instance[p]))) { return(null); } return(VectorHelper.ScalarProjection(instance, Features, Weights) <= CutPoint ? new double[] { 1, 0 } : new double[] { 0, 1 }); }
public void Initialize(Feature feature, IEnumerable <Tuple <Instance, double> > instances) { _instances = instances; if (Model == null) { throw new InvalidOperationException("Model is null"); } if (ClassFeature.FeatureType != FeatureType.Nominal) { throw new InvalidOperationException("Cannot use this iterator on non-nominal class"); } NominalFeature classFeature = (NominalFeature)ClassFeature; if (feature.FeatureType != FeatureType.Nominal) { throw new InvalidOperationException("Cannot use this iterator on non-nominal feature"); } _numClasses = classFeature.Values.Length; _feature = feature; _perValueDistribution = new Dictionary <double, double[]>(); _totalDistribution = new double[_numClasses]; foreach (var instance in _instances) { double value = instance.Item1[feature]; if (FeatureValue.IsMissing(value)) { continue; } double[] current; if (!_perValueDistribution.TryGetValue(value, out current)) { _perValueDistribution.Add(value, current = new double[_numClasses]); } int classIdx = (int)instance.Item1[ClassFeature]; current[classIdx] += instance.Item2; _totalDistribution[classIdx] += instance.Item2; } CurrentDistribution = new double[2][]; _valuesCount = _perValueDistribution.Count; existingValues = _perValueDistribution.Keys.ToArray(); _iteratingTwoValues = (_valuesCount == 2); _valueIndex = -1; _twoValuesIterated = false; _initialized = true; }
public static double ScalarProjection(Instance instance, Feature[] Features, IDictionary <Feature, double> Weights) { if (FeatureValue.IsMissing(instance[Features])) { return(double.NaN); } double result = 0; foreach (var feature in Features) { result += Weights[feature] * instance[feature]; } return(result); }
private double[] FindDistribution(IEnumerable <Tuple <Instance, double> > source, InstanceModel model, Feature classFeature) { if (classFeature.FeatureType != FeatureType.Nominal) { throw new InvalidOperationException("Cannot find distribution for non-nominal class"); } double[] result = new double[((NominalFeature)classFeature).Values.Length]; foreach (var tuple in source) { if (!FeatureValue.IsMissing(tuple.Item1[classFeature])) { int value = (int)tuple.Item1[classFeature]; result[value] += tuple.Item2; } } return(result); }
public double[] Select(Instance instance) { if (Feature.FeatureType != FeatureType.Nominal) { throw new InvalidOperationException("Cannot use multiple values on non-nominal data"); } if (FeatureValue.IsMissing(instance[Feature])) { return(null); } int value = (int)instance[Feature]; int index = Values.IndexOf(x => x == value); if (index == -1) { return(null); } double[] result = new double[ChildrenCount]; result[index] = 1; return(result); }
public static void FillDatasetInformation(InstanceModel model, IEnumerable <Instance> instances) { var datasetInformation = new DatasetInformation(); int objWithIncompleteData = instances.Count(instance => model.Features.Any(feature => FeatureValue.IsMissing(instance[feature]))); datasetInformation.FeatureInformations = model.Features.Select(feature => feature.FeatureInformation).ToArray(); datasetInformation.ObjectsWithIncompleteData = objWithIncompleteData; datasetInformation.GlobalAbscenseInformation = model.Features.Sum(feature => feature.FeatureInformation.MissingValueCount); model.DatasetInformation = datasetInformation; }