public void Initialize(Feature feature, IEnumerable <Tuple <Instance, double> > instances)
        {
            _instances = instances;

            if (Model == null)
            {
                throw new InvalidOperationException("Model is null");
            }
            if (feature.FeatureType != FeatureType.Integer && feature.FeatureType != FeatureType.Double)
            {
                throw new InvalidOperationException("Cannot use this iterator on non-numeric feature");
            }
            _feature = feature;

            CurrentDistribution = new double[4][];

            _sorted =
                _instances.Where(x => !FeatureValue.IsMissing(x.Item1[feature])).OrderBy(x => x.Item1[feature]).ToArray();

            if (_sorted.Length == 0)
            {
                return;
            }

            CurrentDistribution[0] = new double[5];

            _initialized = true;

            bestResultWasFound = false;
        }
示例#2
0
        public void Initialize(Feature feature, IEnumerable <Tuple <Instance, double> > instances)
        {
            _initialized = true;
            _instances   = instances;

            if (Model == null)
            {
                throw new InvalidOperationException("Model is null");
            }
            if (ClassFeature.FeatureType != FeatureType.Nominal)
            {
                throw new InvalidOperationException("Cannot use this iterator on non-nominal class");
            }
            if (!feature.IsOrdered)
            {
                throw new InvalidOperationException("Cannot use this iterator on non-ordered feature");
            }
            _feature            = feature;
            CurrentDistribution = new double[2][];

            sorted = _instances.Where(x => !FeatureValue.IsMissing(x.Item1[feature])).OrderBy(x => x.Item1[feature]).ToArray();

            CurrentDistribution[0] = new double[(ClassFeature as NominalFeature).Values.Length];
            CurrentDistribution[1] = sorted.FindDistribution(ClassFeature);
            if (sorted.Length == 0)
            {
                return;
            }

            _currentIndex   = -1;
            _lastClassValue = FindNextClass(0);
        }
示例#3
0
        public static void LoadFeatureInformation(Feature feature, InstanceModel model, IEnumerable <Instance> instances, bool fillDatasetInformation = false)
        {
            if (feature is CategoricalFeature)
            {
                var      len         = ((CategoricalFeature)feature).Values.Length;
                double[] valuesCount = new double[len];

                for (int i = 0; i < len; i++)
                {
                    valuesCount[i] = instances.Count(x => x[feature] == i && !FeatureValue.IsMissing(x[feature]));
                }

                var valuesmissing    = instances.Select(x => x[feature]).Count(FeatureValue.IsMissing);
                var valueProbability = valuesCount.Select(x => x / (valuesCount.Sum() * 1.0)).ToArray();
                var ratio            = valuesCount.Select(x => x / (valuesCount.Min() * 1F)).ToArray();

                feature.FeatureInformation = new NominalFeatureInformation()
                {
                    Distribution      = valuesCount,
                    MissingValueCount = valuesmissing,
                    ValueProbability  = valueProbability,
                    Ratio             = ratio,
                    Feature           = feature,
                };
            }
            else if (feature is NumericFeature)
            {
                var    nonMissingValues = instances.Where(x => !FeatureValue.IsMissing(x[feature])).Select(x => x[feature]).ToArray();
                var    valuesmissing = instances.Count() - nonMissingValues.Length;
                double max, min;

                if (nonMissingValues.Length > 0)
                {
                    max = nonMissingValues.Max();
                    min = nonMissingValues.Min();
                }
                else
                {
                    max = 0;
                    min = 0;
                }

                feature.FeatureInformation = new NumericFeatureInformation
                {
                    MissingValueCount = valuesmissing,
                    MaxValue          = max,
                    MinValue          = min,
                    Feature           = feature,
                };
            }

            if (fillDatasetInformation)
            {
                FillDatasetInformation(model, instances);
            }
        }
示例#4
0
        public override bool IsMatch(Instance instance)
        {
            double value = instance[Feature];

            if (FeatureValue.IsMissing(value))
            {
                return(true);
            }
            return(value == Value);
        }
示例#5
0
 public double[] Select(Instance instance)
 {
     if (Feature.FeatureType != FeatureType.Nominal)
     {
         throw new InvalidOperationException("Cannot use value and complement on non-nominal data");
     }
     if (FeatureValue.IsMissing(instance[Feature]))
     {
         return(null);
     }
     return((int)instance[Feature] == (int)Value ? new double[] { 1, 0 } : new double[] { 0, 1 });
 }
 public double[] Select(Instance instance)
 {
     if (Feature.FeatureType == FeatureType.Nominal)
     {
         throw new InvalidOperationException("Cannot use cutpoint on nominal data");
     }
     if (FeatureValue.IsMissing(instance[Feature]))
     {
         return(null);
     }
     return(instance[Feature] <= CutPoint ? new double[] { 1, 0 } : new double[] { 0, 1 });
 }
示例#7
0
 public double[] Select(Instance instance)
 {
     if (Features.Any(p => p.FeatureType == FeatureType.Nominal))
     {
         throw new InvalidOperationException("Cannot use cutpoint on nominal data");
     }
     if (Features.Any(p => FeatureValue.IsMissing(instance[p])))
     {
         return(null);
     }
     return(VectorHelper.ScalarProjection(instance, Features, Weights) <= CutPoint ? new double[] { 1, 0 } : new double[] { 0, 1 });
 }
示例#8
0
        public void Initialize(Feature feature, IEnumerable <Tuple <Instance, double> > instances)
        {
            _instances = instances;
            if (Model == null)
            {
                throw new InvalidOperationException("Model is null");
            }
            if (ClassFeature.FeatureType != FeatureType.Nominal)
            {
                throw new InvalidOperationException("Cannot use this iterator on non-nominal class");
            }
            NominalFeature classFeature = (NominalFeature)ClassFeature;

            if (feature.FeatureType != FeatureType.Nominal)
            {
                throw new InvalidOperationException("Cannot use this iterator on non-nominal feature");
            }
            _numClasses = classFeature.Values.Length;
            _feature    = feature;

            _perValueDistribution = new Dictionary <double, double[]>();
            _totalDistribution    = new double[_numClasses];
            foreach (var instance in _instances)
            {
                double value = instance.Item1[feature];
                if (FeatureValue.IsMissing(value))
                {
                    continue;
                }

                double[] current;
                if (!_perValueDistribution.TryGetValue(value, out current))
                {
                    _perValueDistribution.Add(value, current = new double[_numClasses]);
                }

                int classIdx = (int)instance.Item1[ClassFeature];
                current[classIdx] += instance.Item2;

                _totalDistribution[classIdx] += instance.Item2;
            }

            CurrentDistribution = new double[2][];

            _valuesCount   = _perValueDistribution.Count;
            existingValues = _perValueDistribution.Keys.ToArray();

            _iteratingTwoValues = (_valuesCount == 2);
            _valueIndex         = -1;
            _twoValuesIterated  = false;
            _initialized        = true;
        }
示例#9
0
        public static double ScalarProjection(Instance instance, Feature[] Features, IDictionary <Feature, double> Weights)
        {
            if (FeatureValue.IsMissing(instance[Features]))
            {
                return(double.NaN);
            }

            double result = 0;

            foreach (var feature in Features)
            {
                result += Weights[feature] * instance[feature];
            }
            return(result);
        }
示例#10
0
 private double[] FindDistribution(IEnumerable <Tuple <Instance, double> > source, InstanceModel model,
                                   Feature classFeature)
 {
     if (classFeature.FeatureType != FeatureType.Nominal)
     {
         throw new InvalidOperationException("Cannot find distribution for non-nominal class");
     }
     double[] result = new double[((NominalFeature)classFeature).Values.Length];
     foreach (var tuple in source)
     {
         if (!FeatureValue.IsMissing(tuple.Item1[classFeature]))
         {
             int value = (int)tuple.Item1[classFeature];
             result[value] += tuple.Item2;
         }
     }
     return(result);
 }
        public double[] Select(Instance instance)
        {
            if (Feature.FeatureType != FeatureType.Nominal)
            {
                throw new InvalidOperationException("Cannot use multiple values on non-nominal data");
            }
            if (FeatureValue.IsMissing(instance[Feature]))
            {
                return(null);
            }
            int value = (int)instance[Feature];
            int index = Values.IndexOf(x => x == value);

            if (index == -1)
            {
                return(null);
            }
            double[] result = new double[ChildrenCount];
            result[index] = 1;
            return(result);
        }
示例#12
0
        public static void FillDatasetInformation(InstanceModel model, IEnumerable <Instance> instances)
        {
            var datasetInformation = new DatasetInformation();

            int objWithIncompleteData = instances.Count(instance => model.Features.Any(feature => FeatureValue.IsMissing(instance[feature])));

            datasetInformation.FeatureInformations       = model.Features.Select(feature => feature.FeatureInformation).ToArray();
            datasetInformation.ObjectsWithIncompleteData = objWithIncompleteData;
            datasetInformation.GlobalAbscenseInformation = model.Features.Sum(feature => feature.FeatureInformation.MissingValueCount);
            model.DatasetInformation = datasetInformation;
        }