public override void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node,
      IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation,
      double qualityForImpactsCalculation = Double.NaN) {
      var classificationModel = (ISymbolicClassificationModel)model;
      var classificationProblemData = (IClassificationProblemData)problemData;

      if (double.IsNaN(qualityForImpactsCalculation))
        qualityForImpactsCalculation = CalculateQualityForImpacts(classificationModel, classificationProblemData, rows);

      replacementValue = CalculateReplacementValue(classificationModel, node, classificationProblemData, rows);
      var constantNode = new ConstantTreeNode(new Constant()) { Value = replacementValue };

      var cloner = new Cloner();
      var tempModel = cloner.Clone(classificationModel);
      var tempModelNode = (ISymbolicExpressionTreeNode)cloner.GetClone(node);

      var tempModelParentNode = tempModelNode.Parent;
      int i = tempModelParentNode.IndexOfSubtree(tempModelNode);
      tempModelParentNode.RemoveSubtree(i);
      tempModelParentNode.InsertSubtree(i, constantNode);

      OnlineCalculatorError errorState;
      var dataset = classificationProblemData.Dataset;
      var targetClassValues = dataset.GetDoubleValues(classificationProblemData.TargetVariable, rows);
      var estimatedClassValues = tempModel.GetEstimatedClassValues(dataset, rows);
      newQualityForImpactsCalculation = OnlineAccuracyCalculator.Calculate(targetClassValues, estimatedClassValues, out errorState);
      if (errorState != OnlineCalculatorError.None) newQualityForImpactsCalculation = 0.0;

      impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation;
    }
 public void AdaptToProblemData(IDataAnalysisProblemData problemData)
 {
     Weights = new DoubleArray(problemData.AllowedInputVariables.Select(v => Weights.ElementNames.Contains(v) ? GetWeight(v) : 1).ToArray())
     {
         ElementNames = problemData.AllowedInputVariables
     };
 }
Пример #3
0
        protected void WriteDatasetToExcel(ExcelWorksheet datasetWorksheet, IDataAnalysisProblemData problemData)
        {
            //remark the performance of EPPlus drops dramatically
            //if the data is not written row wise (from left to right) due the internal indices used.
            IDataset dataset         = problemData.Dataset;
            var      variableNames   = dataset.VariableNames.ToList();
            var      doubleVariables = new HashSet <string>(dataset.DoubleVariables);

            for (int col = 1; col <= variableNames.Count; col++)
            {
                datasetWorksheet.Cells[1, col].Value = variableNames[col - 1];
            }

            for (int row = 0; row < dataset.Rows; row++)
            {
                for (int col = 0; col < variableNames.Count; col++)
                {
                    if (doubleVariables.Contains(variableNames[col]))
                    {
                        datasetWorksheet.Cells[row + 2, col + 1].Value = dataset.GetDoubleValue(variableNames[col], row);
                    }
                    else
                    {
                        datasetWorksheet.Cells[row + 2, col + 1].Value = dataset.GetValue(row, col);
                    }
                }
            }
        }
 protected override ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataAnalysisProblemData problemData, DoubleLimit estimationLimits) {
   var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel(tree, interpreter, estimationLimits.Lower, estimationLimits.Upper);
   var classificationProblemData = (IClassificationProblemData)problemData;
   var rows = classificationProblemData.TrainingIndices;
   model.RecalculateModelParameters(classificationProblemData, rows);
   return model;
 }
        public virtual void AdjustProblemDataProperties(IDataAnalysisProblemData problemData)
        {
            DataAnalysisProblemData data = problemData as DataAnalysisProblemData;

            if (data == null)
            {
                throw new ArgumentException("The problem data is not a data analysis problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
            }

            string errorMessage;

            if (!data.IsProblemDataCompatible(this, out errorMessage))
            {
                throw new InvalidOperationException(errorMessage);
            }

            foreach (var inputVariable in InputVariables)
            {
                var variable = data.InputVariables.FirstOrDefault(i => i.Value == inputVariable.Value);
                InputVariables.SetItemCheckedState(inputVariable, variable != null && data.InputVariables.ItemChecked(variable));
            }

            TrainingPartition.Start = TrainingPartition.End = 0;
            TestPartition.Start     = 0;
            TestPartition.End       = Dataset.Rows;
        }
        public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData)
        {
            if (problemData == null)
            {
                throw new ArgumentNullException("problemData", "The provided problemData is null.");
            }
            ClassificationProblemData classificationProblemData = problemData as ClassificationProblemData;

            if (classificationProblemData == null)
            {
                throw new ArgumentException("The problem data is not a classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
            }

            base.AdjustProblemDataProperties(problemData);
            TargetVariable = classificationProblemData.TargetVariable;
            for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++)
            {
                ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i);
            }

            PositiveClass = classificationProblemData.PositiveClass;

            for (int i = 0; i < Classes; i++)
            {
                for (int j = 0; j < Classes; j++)
                {
                    ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]);
                }
            }
        }
        protected virtual bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage)
        {
            errorMessage = string.Empty;
            if (problemData == null)
            {
                throw new ArgumentNullException("problemData", "The provided problemData is null.");
            }

            //check allowed input variables
            StringBuilder message   = new StringBuilder();
            var           variables = new HashSet <string>(problemData.InputVariables.Select(x => x.Value));

            foreach (var item in AllowedInputVariables)
            {
                if (!variables.Contains(item))
                {
                    message.AppendLine("Input variable '" + item + "' is not present in the new problem data.");
                }
            }

            if (message.Length != 0)
            {
                errorMessage = message.ToString();
                return(false);
            }
            return(true);
        }
Пример #8
0
 public void Start(IDataAnalysisProblemData problemData, IContentView currentView) {
   IAlgorithm algorithm;
   IDataAnalysisProblem problem;
   GetMostOuterContent(currentView as Control, out algorithm, out problem);
   var context = new PreprocessingContext(problemData, algorithm ?? problem ?? problemData as IItem);
   MainFormManager.MainForm.ShowContent(context);
 }
        public void Import(IDataAnalysisProblemData problemData)
        {
            Dataset dataset = (Dataset)problemData.Dataset;

            variableNames = new List <string>(problemData.Dataset.VariableNames);

            int columnIndex = 0;

            variableValues = new List <IList>();
            foreach (var variableName in problemData.Dataset.VariableNames)
            {
                if (dataset.VariableHasType <double>(variableName))
                {
                    variableValues.Insert(columnIndex, dataset.GetDoubleValues(variableName).ToList());
                }
                else if (dataset.VariableHasType <string>(variableName))
                {
                    variableValues.Insert(columnIndex, dataset.GetStringValues(variableName).ToList());
                }
                else if (dataset.VariableHasType <DateTime>(variableName))
                {
                    variableValues.Insert(columnIndex, dataset.GetDateTimeValues(variableName).ToList());
                }
                else
                {
                    throw new ArgumentException("The datatype of column " + variableName + " must be of type double, string or DateTime");
                }
                ++columnIndex;
            }

            TrainingPartition = new IntRange(problemData.TrainingPartition.Start, problemData.TrainingPartition.End);
            TestPartition     = new IntRange(problemData.TestPartition.Start, problemData.TestPartition.End);
        }
Пример #10
0
    public IDataAnalysisProblemData CreateProblemData(IDataAnalysisProblemData oldProblemData) {
      if (context.Data.Rows == 0 || context.Data.Columns == 0) return null;

      IDataAnalysisProblemData problemData;

      if (oldProblemData is TimeSeriesPrognosisProblemData) {
        problemData = CreateTimeSeriesPrognosisData((TimeSeriesPrognosisProblemData)oldProblemData);
      } else if (oldProblemData is RegressionProblemData) {
        problemData = CreateRegressionData((RegressionProblemData)oldProblemData);
      } else if (oldProblemData is ClassificationProblemData) {
        problemData = CreateClassificationData((ClassificationProblemData)oldProblemData);
      } else if (oldProblemData is ClusteringProblemData) {
        problemData = CreateClusteringData((ClusteringProblemData)oldProblemData);
      } else {
        throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported.");
      }

      SetTrainingAndTestPartition(problemData);
      // set the input variables to the correct checked state
      var inputVariables = oldProblemData.InputVariables.ToDictionary(x => x.Value, x => x);
      foreach (var variable in problemData.InputVariables) {
        bool isChecked = inputVariables.ContainsKey(variable.Value) && oldProblemData.InputVariables.ItemChecked(inputVariables[variable.Value]);
        problemData.InputVariables.SetItemCheckedState(variable, isChecked);
      }

      return problemData;
    }
Пример #11
0
 public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage)
 {
     if (problemData == null)
     {
         throw new ArgumentNullException("problemData", "The provided problemData is null.");
     }
     return(IsDatasetCompatible(problemData.Dataset, out errorMessage));
 }
    public void CalculateElements(IDataAnalysisProblemData problemData, IDependencyCalculator calc, string partition, bool ignoreMissingValues) {
      var indices = GetRelevantIndices(problemData, partition);
      var info = new BackgroundWorkerInfo {
        Dataset = problemData.Dataset, Calculator = calc, Partition = partition, Indices = indices, IgnoreMissingValues = ignoreMissingValues
      };

      StartCalculation(info);
    }
 public void Import(IDataAnalysisProblemData problemData)
 {
     if (IsFiltered)
     {
         throw new InvalidOperationException("Import not possible while data is filtered");
     }
     originalData.Import(problemData);
 }
 protected static IEnumerable<int> GetRelevantIndices(IDataAnalysisProblemData problemData, string partition) {
   IEnumerable<int> var;
   if (partition.Equals(AbstractFeatureCorrelationView.TRAININGSAMPLES))
     var = problemData.TrainingIndices;
   else if (partition.Equals(AbstractFeatureCorrelationView.TESTSAMPLES))
     var = problemData.TestIndices;
   else var = Enumerable.Range(0, problemData.Dataset.Rows);
   return var;
 }
        private void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData)
        {
            var ppData = context.Data;

            problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
            problemData.TrainingPartition.End   = ppData.TrainingPartition.End;
            problemData.TestPartition.Start     = ppData.TestPartition.Start;
            problemData.TestPartition.End       = ppData.TestPartition.End;
        }
    public DataAnalysisSolution(IDataAnalysisModel model, IDataAnalysisProblemData problemData)
      : base() {
      name = ItemName;
      description = ItemDescription;
      Add(new Result(ModelResultName, "The data analysis model.", model));
      Add(new Result(ProblemDataResultName, "The data analysis problem data.", problemData));

      problemData.Changed += new EventHandler(ProblemData_Changed);
    }
Пример #17
0
 public PreprocessingContext(IDataAnalysisProblemData problemData, IItem source = null)
     : base("Data Preprocessing")
 {
     if (problemData == null)
     {
         throw new ArgumentNullException("problemData");
     }
     Import(problemData, source);
 }
Пример #18
0
        public void CalculateElements(IDataAnalysisProblemData problemData, IDependencyCalculator calc, string partition, bool ignoreMissingValues)
        {
            var indices = GetRelevantIndices(problemData, partition);
            var info    = new BackgroundWorkerInfo {
                Dataset = problemData.Dataset, Calculator = calc, Partition = partition, Indices = indices, IgnoreMissingValues = ignoreMissingValues
            };

            StartCalculation(info);
        }
    public PreprocessingContext(IDataAnalysisProblemData dataAnalysisProblemData, IAlgorithm algorithm, IDataAnalysisProblem problem) {
      var transactionalPreprocessingData = new TransactionalPreprocessingData(dataAnalysisProblemData);
      Data = new FilteredPreprocessingData(transactionalPreprocessingData);

      ProblemData = dataAnalysisProblemData;
      Algorithm = algorithm;
      Problem = problem;

      creator = new ProblemDataCreator(this);
    }
 public void Initialize(IDataAnalysisProblemData problemData)
 {
     if (Weights.Length != problemData.AllowedInputVariables.Count())
     {
         throw new ArgumentException("Number of Weights does not match the number of input variables");
     }
     weights = Weights.ElementNames.All(v => v == null || v.Equals(string.Empty)) ?
               Weights.ToArray() :
               problemData.AllowedInputVariables.Select(GetWeight).ToArray();
 }
Пример #21
0
        public void Start(IDataAnalysisProblemData problemData, IContentView currentView)
        {
            IAlgorithm           algorithm;
            IDataAnalysisProblem problem;

            GetMostOuterContent(currentView as Control, out algorithm, out problem);
            var context = new PreprocessingContext(problemData, algorithm ?? problem ?? problemData as IItem);

            MainFormManager.MainForm.ShowContent(context);
        }
        public DataAnalysisSolution(IDataAnalysisModel model, IDataAnalysisProblemData problemData)
            : base()
        {
            name        = ItemName;
            description = ItemDescription;
            Add(new Result(ModelResultName, "The data analysis model.", model));
            Add(new Result(ProblemDataResultName, "The data analysis problem data.", problemData));

            problemData.Changed += new EventHandler(ProblemData_Changed);
        }
    // returns true if any calculation takes place
    public bool CalculateTimeframeElements(IDataAnalysisProblemData problemData, IDependencyCalculator calc, string partition, string variable, int frames, double[,] correlation = null) {
      if (correlation != null && correlation.GetLength(1) > frames) return false;

      var indices = GetRelevantIndices(problemData, partition);
      var info = new BackgroundWorkerInfo {
        Dataset = problemData.Dataset, Calculator = calc, Partition = partition, Indices = indices, Variable = variable, Frames = frames, AlreadyCalculated = correlation
      };

      StartCalculation(info);
      return true;
    }
Пример #24
0
        public PreprocessingContext(IDataAnalysisProblemData dataAnalysisProblemData, IAlgorithm algorithm, IDataAnalysisProblem problem)
        {
            var transactionalPreprocessingData = new TransactionalPreprocessingData(dataAnalysisProblemData);

            Data = new FilteredPreprocessingData(transactionalPreprocessingData);

            ProblemData = dataAnalysisProblemData;
            Algorithm   = algorithm;
            Problem     = problem;

            creator = new ProblemDataCreator(this);
        }
        protected PreprocessingData(IDataAnalysisProblemData problemData)
            : base()
        {
            Name = "Preprocessing Data";

            transformations = new List <ITransformation>();
            selection       = new Dictionary <int, IList <int> >();

            Import(problemData);

            RegisterEventHandler();
        }
Пример #26
0
        public override void Execute()
        {
            using (OpenFileDialog openFileDialog = new OpenFileDialog()) {
                openFileDialog.Title       = "Open Problem file";
                openFileDialog.FileName    = "Item";
                openFileDialog.Multiselect = false;
                openFileDialog.DefaultExt  = "hl";
                openFileDialog.Filter      = "HeuristicLab Files|*.hl|All Files|*.*";

                if (openFileDialog.ShowDialog() != DialogResult.OK)
                {
                    return;
                }

                var content = ContentManager.Load(openFileDialog.FileName);
                var problem = content as IDataAnalysisProblem;
                IDataAnalysisProblemData problemData = null;
                if (problem != null)
                {
                    problemData = problem.ProblemData;
                }
                else
                {
                    problemData = content as IDataAnalysisProblemData;
                }
                if (problemData == null)
                {
                    throw new ArgumentException("The specified file does not contain a HeuristicLab problem.");
                }

                var activeView   = (IContentView)MainFormManager.MainForm.ActiveView;
                var optimizer    = (IOptimizer)activeView.Content;
                var newOptimizer = (IOptimizer)optimizer.Clone();

                var algorithm = newOptimizer as IAlgorithm;
                if (algorithm != null)
                {
                    ChangeProblemData(algorithm, problemData);
                }
                foreach (var alg in newOptimizer.NestedOptimizers.OfType <IAlgorithm>())
                {
                    ChangeProblemData(alg, (IDataAnalysisProblemData)problemData.Clone());
                    if (problem != null)
                    {
                        alg.Name += " " + problem.Name;
                    }
                }

                MainFormManager.MainForm.ShowContent(newOptimizer);
            }
        }
Пример #27
0
 public void Import(IDataAnalysisProblemData problemData, IItem source = null) {
   if (problemData == null) throw new ArgumentNullException("problemData");
   if (source != null && ExtractProblemData(source) != problemData)
     throw new ArgumentException("The ProblemData extracted from the Source is different than the given ProblemData.");
   Source = source ?? problemData;
   var namedSource = Source as INamedItem;
   if (namedSource != null)
     Name = "Preprocessing " + namedSource.Name;
   Data = new FilteredPreprocessingData(new TransactionalPreprocessingData(problemData));
   OnReset();
   // Reset GUI:
   // - OnContentChanged for PreprocessingView!
   // event? task(async import)?
 }
Пример #28
0
        public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage)
        {
            if (problemData == null)
            {
                throw new ArgumentNullException("problemData", "The provided problemData is null.");
            }
            var classificationProblemData = problemData as IClassificationProblemData;

            if (classificationProblemData == null)
            {
                throw new ArgumentException("The problem data is not compatible with this classification model. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
            }
            return(IsProblemDataCompatible(classificationProblemData, out errorMessage));
        }
Пример #29
0
        // returns true if any calculation takes place
        public bool CalculateTimeframeElements(IDataAnalysisProblemData problemData, IDependencyCalculator calc, string partition, string variable, int frames, double[,] correlation = null)
        {
            if (correlation != null && correlation.GetLength(1) > frames)
            {
                return(false);
            }

            var indices = GetRelevantIndices(problemData, partition);
            var info    = new BackgroundWorkerInfo {
                Dataset = problemData.Dataset, Calculator = calc, Partition = partition, Indices = indices, Variable = variable, Frames = frames, AlreadyCalculated = correlation
            };

            StartCalculation(info);
            return(true);
        }
        public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData)
        {
            if (problemData == null)
            {
                throw new ArgumentNullException("problemData", "The provided problemData is null.");
            }
            RegressionProblemData regressionProblemData = problemData as RegressionProblemData;

            if (regressionProblemData == null)
            {
                throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
            }

            base.AdjustProblemDataProperties(problemData);
        }
    protected void Scale(IDataAnalysisProblemData problemData, string targetVariable) {
      var dataset = problemData.Dataset;
      var rows = problemData.TrainingIndices;
      var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows);
      var targetValues = dataset.GetDoubleValues(targetVariable, rows);

      var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
      var targetValuesEnumerator = targetValues.GetEnumerator();
      var estimatedValuesEnumerator = estimatedValues.GetEnumerator();
      while (targetValuesEnumerator.MoveNext() & estimatedValuesEnumerator.MoveNext()) {
        double target = targetValuesEnumerator.Current;
        double estimated = estimatedValuesEnumerator.Current;
        if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
          linearScalingCalculator.Add(estimated, target);
      }
      if (linearScalingCalculator.ErrorState == OnlineCalculatorError.None && (targetValuesEnumerator.MoveNext() || estimatedValuesEnumerator.MoveNext()))
        throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");

      double alpha = linearScalingCalculator.Alpha;
      double beta = linearScalingCalculator.Beta;
      if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) return;

      ConstantTreeNode alphaTreeNode = null;
      ConstantTreeNode betaTreeNode = null;
      // check if model has been scaled previously by analyzing the structure of the tree
      var startNode = SymbolicExpressionTree.Root.GetSubtree(0);
      if (startNode.GetSubtree(0).Symbol is Addition) {
        var addNode = startNode.GetSubtree(0);
        if (addNode.SubtreeCount == 2 && addNode.GetSubtree(0).Symbol is Multiplication && addNode.GetSubtree(1).Symbol is Constant) {
          alphaTreeNode = addNode.GetSubtree(1) as ConstantTreeNode;
          var mulNode = addNode.GetSubtree(0);
          if (mulNode.SubtreeCount == 2 && mulNode.GetSubtree(1).Symbol is Constant) {
            betaTreeNode = mulNode.GetSubtree(1) as ConstantTreeNode;
          }
        }
      }
      // if tree structure matches the structure necessary for linear scaling then reuse the existing tree nodes
      if (alphaTreeNode != null && betaTreeNode != null) {
        betaTreeNode.Value *= beta;
        alphaTreeNode.Value *= beta;
        alphaTreeNode.Value += alpha;
      } else {
        var mainBranch = startNode.GetSubtree(0);
        startNode.RemoveSubtree(0);
        var scaledMainBranch = MakeSum(MakeProduct(mainBranch, beta), alpha);
        startNode.AddSubtree(scaledMainBranch);
      }
    }
Пример #32
0
        protected override void itemsListView_DragDrop(object sender, DragEventArgs e)
        {
            if (e.Effect != DragDropEffects.Copy)
            {
                return;
            }

            IDataAnalysisProblemData problemData = null;
            var dropData = e.Data.GetData(HeuristicLab.Common.Constants.DragDropDataFormat);

            if (dropData is IDataAnalysisProblemData)
            {
                problemData = (IDataAnalysisProblemData)dropData;
            }
            else if (dropData is IDataAnalysisProblem)
            {
                problemData = ((IDataAnalysisProblem)dropData).ProblemData;
            }
            else if (dropData is IValueParameter)
            {
                var param = (IValueParameter)dropData;
                problemData = param.Value as DataAnalysisProblemData;
            }
            if (problemData == null)
            {
                return;
            }

            problemData = (IDataAnalysisProblemData)problemData.Clone();

            try {
                problemData.AdjustProblemDataProperties(Content.ProblemData);
                Content.ProblemData = problemData;

                if (!Content.Name.EndsWith(" with changed problemData"))
                {
                    Content.Name += " with changed problemData";
                }
                Content.Filename = string.Empty;
                MainFormManager.GetMainForm <HeuristicLab.MainForm.WindowsForms.MainForm>().UpdateTitle();
            }
            catch (InvalidOperationException invalidOperationException) {
                ErrorHandling.ShowErrorDialog(this, invalidOperationException);
            }
            catch (ArgumentException argumentException) {
                ErrorHandling.ShowErrorDialog(this, argumentException);
            }
        }
Пример #33
0
        private static string GetTargetVariableName(IDataAnalysisProblemData problemData)
        {
            var regressionProblemData     = problemData as IRegressionProblemData;
            var classificationProblemData = problemData as IClassificationProblemData;

            if (regressionProblemData != null)
            {
                return(regressionProblemData.TargetVariable);
            }
            if (classificationProblemData != null)
            {
                return(classificationProblemData.TargetVariable);
            }

            throw new ArgumentException("Problem data is neither regression or classification problem data.");
        }
        protected override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage)
        {
            if (problemData == null)
            {
                throw new ArgumentNullException("problemData", "The provided problemData is null.");
            }
            IRegressionProblemData regressionProblemData = problemData as IRegressionProblemData;

            if (regressionProblemData == null)
            {
                throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
            }

            var returnValue = base.IsProblemDataCompatible(problemData, out errorMessage);

            return(returnValue);
        }
Пример #35
0
        public static IEnumerable <IEnumerable <int> > GenerateFolds(IDataAnalysisProblemData problemData, int numberOfFolds, bool shuffleFolds = false)
        {
            var random = new MersenneTwister((uint)Environment.TickCount);

            if (problemData is IRegressionProblemData)
            {
                var trainingIndices = shuffleFolds ? problemData.TrainingIndices.OrderBy(x => random.Next()) : problemData.TrainingIndices;
                return(GenerateFolds(trainingIndices, problemData.TrainingPartition.Size, numberOfFolds));
            }
            if (problemData is IClassificationProblemData)
            {
                // when shuffle is enabled do stratified folds generation, some folds may have zero elements
                // otherwise, generate folds normally
                return(shuffleFolds ? GenerateFoldsStratified(problemData as IClassificationProblemData, numberOfFolds, random) : GenerateFolds(problemData.TrainingIndices, problemData.TrainingPartition.Size, numberOfFolds));
            }
            throw new ArgumentException("Problem data is neither regression or classification problem data.");
        }
        public RandomForestModelSurrogate(string targetVariable, IDataAnalysisProblemData originalTrainingData,
                                          int seed, int nTrees, double r, double m, double[] classValues = null)
            : base(targetVariable)
        {
            this.name        = ItemName;
            this.description = ItemDescription;

            // data which is necessary for recalculation of the model
            this.seed = seed;
            this.originalTrainingData = (IDataAnalysisProblemData)originalTrainingData.Clone();
            this.classValues          = classValues;
            this.nTrees = nTrees;
            this.r      = r;
            this.m      = m;

            actualModel = new Lazy <IRandomForestModel>(() => RecalculateModel());
        }
Пример #37
0
 // random forest models can only be created through the static factory methods CreateRegressionModel and CreateClassificationModel
 private RandomForestModel(string targetVariable, alglib.decisionforest randomForest,
                           int seed, IDataAnalysisProblemData originalTrainingData,
                           int nTrees, double r, double m, double[] classValues = null)
     : base(targetVariable)
 {
     this.name        = ItemName;
     this.description = ItemDescription;
     // the model itself
     this.randomForest = randomForest;
     // data which is necessary for recalculation of the model
     this.seed = seed;
     this.originalTrainingData = (IDataAnalysisProblemData)originalTrainingData.Clone();
     this.classValues          = classValues;
     this.nTrees = nTrees;
     this.r      = r;
     this.m      = m;
 }
Пример #38
0
        protected virtual void loadProblemDataButton_Click(object sender, EventArgs e)
        {
            if (loadProblemDataFileDialog.ShowDialog(this) != DialogResult.OK)
            {
                return;
            }
            try {
                object hlFile = XmlParser.Deserialize(loadProblemDataFileDialog.FileName);

                IDataAnalysisProblemData problemData = null;
                if (hlFile is IDataAnalysisProblemData)
                {
                    problemData = (IDataAnalysisProblemData)hlFile;
                }
                else if (hlFile is IDataAnalysisProblem)
                {
                    problemData = ((IDataAnalysisProblem)hlFile).ProblemData;
                }
                else if (hlFile is IDataAnalysisSolution)
                {
                    problemData = ((IDataAnalysisSolution)hlFile).ProblemData;
                }

                if (problemData == null)
                {
                    throw new InvalidOperationException("The chosen HeuristicLab file does not contain a ProblemData, Problem, or DataAnalysisSolution.");
                }

                var solution = (IDataAnalysisSolution)Content.Clone();
                problemData.AdjustProblemDataProperties(solution.ProblemData);

                solution.ProblemData = problemData;
                if (!solution.Name.EndsWith(" with loaded problemData"))
                {
                    solution.Name += " with loaded problemData";
                }
                MainFormManager.MainForm.ShowContent(solution);
            }
            catch (InvalidOperationException invalidOperationException) {
                ErrorHandling.ShowErrorDialog(this, invalidOperationException);
            }
            catch (ArgumentException argumentException) {
                ErrorHandling.ShowErrorDialog(this, argumentException);
            }
        }
        protected static IEnumerable <int> GetRelevantIndices(IDataAnalysisProblemData problemData, string partition)
        {
            IEnumerable <int> var;

            if (partition.Equals(AbstractFeatureCorrelationView.TRAININGSAMPLES))
            {
                var = problemData.TrainingIndices;
            }
            else if (partition.Equals(AbstractFeatureCorrelationView.TESTSAMPLES))
            {
                var = problemData.TestIndices;
            }
            else
            {
                var = Enumerable.Range(0, problemData.Dataset.Rows);
            }
            return(var);
        }
Пример #40
0
        private void ChangeProblemData(IAlgorithm algorithm, IDataAnalysisProblemData problemData)
        {
            if (algorithm == null)
            {
                throw new ArgumentNullException("algorithm");
            }
            if (problemData == null)
            {
                throw new ArgumentNullException("problemData");
            }

            var problem = algorithm.Problem as IDataAnalysisProblem;

            if (problem != null)
            {
                problem.ProblemDataParameter.ActualValue = problemData;
            }
        }
        private static Tuple <svm_problem, svm_problem>[] GenerateSvmPartitions(IDataAnalysisProblemData problemData, int numberOfFolds, bool shuffleFolds = true)
        {
            var folds          = GenerateFolds(problemData, numberOfFolds, shuffleFolds).ToList();
            var targetVariable = GetTargetVariableName(problemData);
            var partitions     = new Tuple <svm_problem, svm_problem> [numberOfFolds];

            for (int i = 0; i < numberOfFolds; ++i)
            {
                int p                  = i; // avoid "access to modified closure" warning below
                var trainingRows       = folds.SelectMany((par, j) => j != p ? par : Enumerable.Empty <int>());
                var testRows           = folds[i];
                var trainingSvmProblem = CreateSvmProblem(problemData.Dataset, targetVariable, problemData.AllowedInputVariables, trainingRows);
                var rangeTransform     = RangeTransform.Compute(trainingSvmProblem);
                var testSvmProblem     = rangeTransform.Scale(CreateSvmProblem(problemData.Dataset, targetVariable, problemData.AllowedInputVariables, testRows));
                partitions[i] = new Tuple <svm_problem, svm_problem>(rangeTransform.Scale(trainingSvmProblem), testSvmProblem);
            }
            return(partitions);
        }
Пример #42
0
    private RandomForestModel(RandomForestModel original, Cloner cloner)
      : base(original, cloner) {
      randomForest = new alglib.decisionforest();
      randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize;
      randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses;
      randomForest.innerobj.ntrees = original.randomForest.innerobj.ntrees;
      randomForest.innerobj.nvars = original.randomForest.innerobj.nvars;
      // we assume that the trees array (double[]) is immutable in alglib
      randomForest.innerobj.trees = original.randomForest.innerobj.trees;

      // allowedInputVariables is immutable so we don't need to clone
      allowedInputVariables = original.allowedInputVariables;

      // clone data which is necessary to rebuild the model
      this.seed = original.seed;
      this.originalTrainingData = cloner.Clone(original.originalTrainingData);
      // classvalues is immutable so we don't need to clone
      this.classValues = original.classValues;
      this.nTrees = original.nTrees;
      this.r = original.r;
      this.m = original.m;
    }
 public abstract void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, double qualityForImpactsCalculation = double.NaN);
 public abstract double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN);
 public abstract double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows);
Пример #46
0
 private IDataAnalysisProblemData ExportProblemData(IDataAnalysisProblemData source) {
   return CreateNewProblemData();
 }
Пример #47
0
 public PreprocessingContext(IDataAnalysisProblemData problemData, IItem source = null)
   : base("Data Preprocessing") {
   if (problemData == null) throw new ArgumentNullException("problemData");
   Import(problemData, source);
 }
 protected override ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataAnalysisProblemData problemData, DoubleLimit estimationLimits) {
   var regressionProblemData = (IRegressionProblemData)problemData;
   return new SymbolicRegressionModel(regressionProblemData.TargetVariable, tree, interpreter, estimationLimits.Lower, estimationLimits.Upper);
 }
 protected override ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataAnalysisProblemData problemData, DoubleLimit estimationLimits) {
   return new SymbolicRegressionModel(tree, interpreter, estimationLimits.Lower, estimationLimits.Upper);
 }
Пример #50
0
    protected override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
      IRegressionProblemData regressionProblemData = problemData as IRegressionProblemData;
      if (regressionProblemData == null)
        throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");

      var returnValue = base.IsProblemDataCompatible(problemData, out errorMessage);
      //check targetVariable
      if (problemData.InputVariables.All(var => var.Value != TargetVariable)) {
        errorMessage = string.Format("The target variable {0} is not present in the new problem data.", TargetVariable)
                       + Environment.NewLine + errorMessage;
        return false;
      }
      return returnValue;
    }
    protected override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
      IClassificationProblemData classificationProblemData = problemData as IClassificationProblemData;
      if (classificationProblemData == null)
        throw new ArgumentException("The problem data is no classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");

      var returnValue = base.IsProblemDataCompatible(classificationProblemData, out errorMessage);
      //check targetVariable
      if (classificationProblemData.InputVariables.All(var => var.Value != TargetVariable)) {
        errorMessage = string.Format("The target variable {0} is not present in the new problem data.", TargetVariable)
                       + Environment.NewLine + errorMessage;
        return false;
      }

      var newClassValues = classificationProblemData.Dataset.GetDoubleValues(TargetVariable).Distinct().OrderBy(x => x);
      if (!newClassValues.SequenceEqual(ClassValues)) {
        errorMessage = errorMessage + string.Format("The class values differ in the provided classification problem data.");
        returnValue = false;
      }

      var newPositivieClassName = classificationProblemData.PositiveClass;
      if (newPositivieClassName != PositiveClass) {
        errorMessage = errorMessage + string.Format("The positive class differs in the provided classification problem data.");
        returnValue = false;
      }

      return returnValue;
    }
 public override double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN) {
   double impactValue, replacementValue;
   double newQualityForImpactsCalculation;
   CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpactsCalculation, qualityForImpactsCalculation);
   return impactValue;
 }
    public override double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows) {
      var classificationModel = (ISymbolicClassificationModel)model;
      var classificationProblemData = (IClassificationProblemData)problemData;

      return CalculateReplacementValue(node, classificationModel.SymbolicExpressionTree, classificationModel.Interpreter, classificationProblemData.Dataset, rows);
    }
    public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {
      TimeSeriesPrognosisProblemData timeSeriesProblemData = problemData as TimeSeriesPrognosisProblemData;
      if (timeSeriesProblemData == null)
        throw new ArgumentException("The problem data is not a timeseries problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");

      var trainingDataStart = TrainingIndices.First();

      base.AdjustProblemDataProperties(problemData);

      TestPartition.Start = trainingDataStart;

      TrainingHorizon = timeSeriesProblemData.TrainingHorizon;
      TestHorizon = timeSeriesProblemData.TestHorizon;
    }
Пример #55
0
    public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {
      var data = problemData as ProblemData;
      if (data == null) throw new ArgumentException("The problem data is not a problem data set for trading. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");

      string errorMessage;
      if (!data.IsProblemDataCompatible(this, out errorMessage)) {
        throw new InvalidOperationException(errorMessage);
      }

      base.AdjustProblemDataProperties(data);

      var toDelete = PriceChangeVariableParameter.ValidValues.ToList();
      foreach (var entry in data.PriceChangeVariableParameter.ValidValues) {
        if (toDelete.Any(x => x.Value == entry.Value)) {
          toDelete.RemoveAll(x => x.Value == entry.Value);
        } else {
          PriceChangeVariableParameter.ValidValues.Add(new StringValue(entry.Value));
        }
      }
      PriceChangeVariableParameter.Value =
        PriceChangeVariableParameter.ValidValues.Single(v => v.Value == data.PriceChangeVariable);

      foreach (var varToDelete in toDelete) PriceChangeVariableParameter.ValidValues.Remove(varToDelete);

      TransactionCostsParameter.Value.Value = data.TransactionCosts;

      OnChanged();
    }
Пример #56
0
    public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {
      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
      RegressionProblemData regressionProblemData = problemData as RegressionProblemData;
      if (regressionProblemData == null)
        throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");

      base.AdjustProblemDataProperties(problemData);
      TargetVariable = regressionProblemData.TargetVariable;
    }
    protected void WriteDatasetToExcel(ExcelWorksheet datasetWorksheet, IDataAnalysisProblemData problemData) {
      //remark the performance of EPPlus drops dramatically 
      //if the data is not written row wise (from left to right) due the internal indices used.
      IDataset dataset = problemData.Dataset;
      var variableNames = dataset.VariableNames.ToList();
      var doubleVariables = new HashSet<string>(dataset.DoubleVariables);

      for (int col = 1; col <= variableNames.Count; col++)
        datasetWorksheet.Cells[1, col].Value = variableNames[col - 1];

      for (int row = 0; row < dataset.Rows; row++) {
        for (int col = 0; col < variableNames.Count; col++) {
          if (doubleVariables.Contains(variableNames[col]))
            datasetWorksheet.Cells[row + 2, col + 1].Value = dataset.GetDoubleValue(variableNames[col], row);
          else
            datasetWorksheet.Cells[row + 2, col + 1].Value = dataset.GetValue(row, col);
        }
      }
    }
    public virtual void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {
      DataAnalysisProblemData data = problemData as DataAnalysisProblemData;
      if (data == null) throw new ArgumentException("The problem data is not a data analysis problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");

      string errorMessage;
      if (!data.IsProblemDataCompatible(this, out errorMessage)) {
        throw new InvalidOperationException(errorMessage);
      }

      foreach (var inputVariable in InputVariables) {
        var variable = data.InputVariables.FirstOrDefault(i => i.Value == inputVariable.Value);
        InputVariables.SetItemCheckedState(inputVariable, variable != null && data.InputVariables.ItemChecked(variable));
      }

      TrainingPartition.Start = TrainingPartition.End = 0;
      TestPartition.Start = 0;
      TestPartition.End = Dataset.Rows;
    }
 protected abstract ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataAnalysisProblemData problemData, DoubleLimit estimationLimits);
    public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {
      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
      ClassificationProblemData classificationProblemData = problemData as ClassificationProblemData;
      if (classificationProblemData == null)
        throw new ArgumentException("The problem data is not a classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");

      base.AdjustProblemDataProperties(problemData);
      TargetVariable = classificationProblemData.TargetVariable;
      for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++)
        ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i);

      PositiveClass = classificationProblemData.PositiveClass;

      for (int i = 0; i < Classes; i++) {
        for (int j = 0; j < Classes; j++) {
          ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]);
        }
      }
    }