private static InterpreterState PrepareInterpreterState(ISymbolicExpressionTree tree, IDataset dataset) { Instruction[] code = SymbolicExpressionTreeCompiler.Compile(tree, OpCodes.MapSymbolToOpCode); int necessaryArgStackSize = 0; foreach (Instruction instr in code) { if (instr.opCode == OpCodes.Variable) { var variableTreeNode = (VariableTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName); } else if (instr.opCode == OpCodes.LagVariable) { var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(laggedVariableTreeNode.VariableName); } else if (instr.opCode == OpCodes.VariableCondition) { var variableConditionTreeNode = (VariableConditionTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(variableConditionTreeNode.VariableName); } else if (instr.opCode == OpCodes.Call) { necessaryArgStackSize += instr.nArguments + 1; } } return(new InterpreterState(code, necessaryArgStackSize)); }
public static void PrepareInstructions(LinearInstruction[] code, IDataset dataset) { for (int i = 0; i != code.Length; ++i) { var instr = code[i]; #region opcode switch switch (instr.opCode) { case OpCodes.Constant: { var constTreeNode = (ConstantTreeNode)instr.dynamicNode; instr.value = constTreeNode.Value; instr.skip = true; // the value is already set so this instruction should be skipped in the evaluation phase } break; case OpCodes.Variable: { var variableTreeNode = (VariableTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName); } break; case OpCodes.LagVariable: { var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(laggedVariableTreeNode.VariableName); } break; case OpCodes.VariableCondition: { var variableConditionTreeNode = (VariableConditionTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(variableConditionTreeNode.VariableName); } break; case OpCodes.TimeLag: case OpCodes.Integral: case OpCodes.Derivative: { var seq = GetPrefixSequence(code, i); var interpreterState = new InterpreterState(seq, 0); instr.data = interpreterState; for (int j = 1; j != seq.Length; ++j) { seq[j].skip = true; } } break; } #endregion } }
public IEnumerable<IEnumerable<double>> GetPrognosedValues(IDataset dataset, IEnumerable<int> rows, IEnumerable<int> horizons) { var rowsEnumerator = rows.GetEnumerator(); var horizonsEnumerator = horizons.GetEnumerator(); var targetValues = dataset.GetReadOnlyDoubleValues(TargetVariable); // produce a n-step forecast for all rows while (rowsEnumerator.MoveNext() & horizonsEnumerator.MoveNext()) { int row = rowsEnumerator.Current; int horizon = horizonsEnumerator.Current; if (row - TimeOffset < 0) { yield return Enumerable.Repeat(double.NaN, horizon); continue; } double[] prognosis = new double[horizon]; for (int h = 0; h < horizon; h++) { double estimatedValue = 0.0; for (int i = 1; i <= TimeOffset; i++) { int offset = h - i; if (offset >= 0) estimatedValue += prognosis[offset] * Phi[i - 1]; else estimatedValue += targetValues[row + offset] * Phi[i - 1]; } estimatedValue += Constant; prognosis[h] = estimatedValue; } yield return prognosis; } if (rowsEnumerator.MoveNext() || horizonsEnumerator.MoveNext()) throw new ArgumentException("Number of elements in rows and horizon enumerations doesn't match."); }
public IEnumerable <double> GetSymbolicExpressionTreeValues(ISymbolicExpressionTree tree, IDataset dataset, IEnumerable <int> rows) { if (CheckExpressionsWithIntervalArithmetic.Value) { throw new NotSupportedException("Interval arithmetic is not yet supported in the symbolic data analysis interpreter."); } EvaluatedSolutions.Value++; // increment the evaluated solutions counter var state = PrepareInterpreterState(tree, dataset); Type[] methodArgs = { typeof(int), typeof(IList <double>[]) }; DynamicMethod testFun = new DynamicMethod("TestFun", typeof(double), methodArgs, typeof(SymbolicDataAnalysisExpressionTreeILEmittingInterpreter).Module); ILGenerator il = testFun.GetILGenerator(); CompileInstructions(il, state, dataset); il.Emit(System.Reflection.Emit.OpCodes.Conv_R8); il.Emit(System.Reflection.Emit.OpCodes.Ret); var function = (CompiledFunction)testFun.CreateDelegate(typeof(CompiledFunction)); IList <double>[] columns = dataset.DoubleVariables.Select(v => dataset.GetReadOnlyDoubleValues(v)).ToArray(); foreach (var row in rows) { yield return(function(row, columns)); } }
public IEnumerable <double> GetEstimatedValues(IDataset ds, IEnumerable <int> rows) { // lookup columns for variableNames in one pass over the tree to speed up evaluation later on ReadOnlyCollection <double>[] columnCache = new ReadOnlyCollection <double> [tree.Length]; for (int i = 0; i < tree.Length; i++) { if (tree[i].VarName != TreeNode.NO_VARIABLE) { columnCache[i] = ds.GetReadOnlyDoubleValues(tree[i].VarName); } } return(rows.Select(r => GetPredictionForRow(tree, columnCache, 0, r))); }
private BatchInstruction[] Compile(ISymbolicExpressionTree tree, IDataset dataset, Func <ISymbolicExpressionTreeNode, byte> opCodeMapper) { var root = tree.Root.GetSubtree(0).GetSubtree(0); var code = new BatchInstruction[root.GetLength()]; if (root.SubtreeCount > ushort.MaxValue) { throw new ArgumentException("Number of subtrees is too big (>65.535)"); } int c = 1, i = 0; foreach (var node in root.IterateNodesBreadth()) { if (node.SubtreeCount > ushort.MaxValue) { throw new ArgumentException("Number of subtrees is too big (>65.535)"); } code[i] = new BatchInstruction { opcode = opCodeMapper(node), narg = (ushort)node.SubtreeCount, buf = new double[BATCHSIZE], childIndex = c }; if (node is VariableTreeNode variable) { code[i].weight = variable.Weight; if (cachedData.ContainsKey(variable.VariableName)) { code[i].data = cachedData[variable.VariableName]; } else { code[i].data = dataset.GetReadOnlyDoubleValues(variable.VariableName).ToArray(); cachedData[variable.VariableName] = code[i].data; } } else if (node is ConstantTreeNode constant) { code[i].value = constant.Value; for (int j = 0; j < BATCHSIZE; ++j) { code[i].buf[j] = code[i].value; } } c += node.SubtreeCount; ++i; } return(code); }
public IEnumerable <double> GetSymbolicExpressionTreeValues(ISymbolicExpressionTree tree, IDataset dataset, IEnumerable <int> rows) { if (CheckExpressionsWithIntervalArithmetic) { throw new NotSupportedException("Interval arithmetic is not yet supported in the symbolic data analysis interpreter."); } lock (syncRoot) { EvaluatedSolutions++; // increment the evaluated solutions counter } var columns = dataset.DoubleVariables.Select(x => (IList <double>)dataset.GetReadOnlyDoubleValues(x)).ToArray(); var compiled = CompileTree(tree, dataset); return(rows.Select(x => compiled(x, columns))); }
public IEnumerable <IEnumerable <double> > GetPrognosedValues(IDataset dataset, IEnumerable <int> rows, IEnumerable <int> horizons) { var rowsEnumerator = rows.GetEnumerator(); var horizonsEnumerator = horizons.GetEnumerator(); var targetValues = dataset.GetReadOnlyDoubleValues(TargetVariable); // produce a n-step forecast for all rows while (rowsEnumerator.MoveNext() & horizonsEnumerator.MoveNext()) { int row = rowsEnumerator.Current; int horizon = horizonsEnumerator.Current; if (row - TimeOffset < 0) { yield return(Enumerable.Repeat(double.NaN, horizon)); continue; } double[] prognosis = new double[horizon]; for (int h = 0; h < horizon; h++) { double estimatedValue = 0.0; for (int i = 1; i <= TimeOffset; i++) { int offset = h - i; if (offset >= 0) { estimatedValue += prognosis[offset] * Phi[i - 1]; } else { estimatedValue += targetValues[row + offset] * Phi[i - 1]; } } estimatedValue += Constant; prognosis[h] = estimatedValue; } yield return(prognosis); } if (rowsEnumerator.MoveNext() || horizonsEnumerator.MoveNext()) { throw new ArgumentException("Number of elements in rows and horizon enumerations doesn't match."); } }
public override IEnumerable <double> GetEstimatedValues(IDataset ds, IEnumerable <int> rows) { // lookup columns for variableNames in one pass over the tree to speed up evaluation later on ReadOnlyCollection <double>[] columnCache = new ReadOnlyCollection <double> [tree.Length]; for (int i = 0; i < tree.Length; i++) { if (tree[i].VarName != TreeNode.NO_VARIABLE) { // tree models also support calculating estimations if not all variables used for training are available in the dataset if (ds.ColumnNames.Contains(tree[i].VarName)) { columnCache[i] = ds.GetReadOnlyDoubleValues(tree[i].VarName); } } } return(rows.Select(r => GetPredictionForRow(tree, columnCache, 0, r))); }
public IEnumerable <double> GetEstimatedValues(IDataset dataset, IEnumerable <int> rows) { var targetVariables = dataset.GetReadOnlyDoubleValues(TargetVariable); foreach (int row in rows) { double estimatedValue = 0.0; if (row - TimeOffset < 0) { yield return(double.NaN); continue; } for (int i = 1; i <= TimeOffset; i++) { estimatedValue += targetVariables[row - i] * Phi[i - 1]; } estimatedValue += Constant; yield return(estimatedValue); } }
private void SampleTrainingData(MersenneTwister rand, ModifiableDataset ds, int rRows, IDataset sourceDs, double[] curTarget, string targetVarName, IEnumerable <int> trainingIndices) { var selectedRows = trainingIndices.SampleRandomWithoutRepetition(rand, rRows).ToArray(); int t = 0; object[] srcRow = new object[ds.Columns]; var varNames = ds.DoubleVariables.ToArray(); foreach (var r in selectedRows) { // take all values from the original dataset for (int c = 0; c < srcRow.Length; c++) { var col = sourceDs.GetReadOnlyDoubleValues(varNames[c]); srcRow[c] = col[r]; } ds.ReplaceRow(t, srcRow); // but use the updated target values ds.SetVariableValue(curTarget[r], targetVarName, t); t++; } }
public IEnumerable<double> GetSymbolicExpressionTreeValues(ISymbolicExpressionTree tree, IDataset dataset, IEnumerable<int> rows) { if (CheckExpressionsWithIntervalArithmetic.Value) throw new NotSupportedException("Interval arithmetic is not yet supported in the symbolic data analysis interpreter."); EvaluatedSolutions.Value++; // increment the evaluated solutions counter var state = PrepareInterpreterState(tree, dataset); Type[] methodArgs = { typeof(int), typeof(IList<double>[]) }; DynamicMethod testFun = new DynamicMethod("TestFun", typeof(double), methodArgs, typeof(SymbolicDataAnalysisExpressionTreeILEmittingInterpreter).Module); ILGenerator il = testFun.GetILGenerator(); CompileInstructions(il, state, dataset); il.Emit(System.Reflection.Emit.OpCodes.Conv_R8); il.Emit(System.Reflection.Emit.OpCodes.Ret); var function = (CompiledFunction)testFun.CreateDelegate(typeof(CompiledFunction)); IList<double>[] columns = dataset.DoubleVariables.Select(v => dataset.GetReadOnlyDoubleValues(v)).ToArray(); foreach (var row in rows) { yield return function(row, columns); } }
private void SampleTrainingData(MersenneTwister rand, ModifiableDataset ds, int rRows, IDataset sourceDs, double[] curTarget, string targetVarName, IEnumerable<int> trainingIndices) { var selectedRows = trainingIndices.SampleRandomWithoutRepetition(rand, rRows).ToArray(); int t = 0; object[] srcRow = new object[ds.Columns]; var varNames = ds.DoubleVariables.ToArray(); foreach (var r in selectedRows) { // take all values from the original dataset for (int c = 0; c < srcRow.Length; c++) { var col = sourceDs.GetReadOnlyDoubleValues(varNames[c]); srcRow[c] = col[r]; } ds.ReplaceRow(t, srcRow); // but use the updated target values ds.SetVariableValue(curTarget[r], targetVarName, t); t++; } }
public static void PrepareInstructions(LinearInstruction[] code, IDataset dataset) { for (int i = 0; i != code.Length; ++i) { var instr = code[i]; #region opcode switch switch (instr.opCode) { case OpCodes.Constant: { var constTreeNode = (ConstantTreeNode)instr.dynamicNode; instr.value = constTreeNode.Value; instr.skip = true; // the value is already set so this instruction should be skipped in the evaluation phase } break; case OpCodes.Variable: { var variableTreeNode = (VariableTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName); } break; case OpCodes.LagVariable: { var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(laggedVariableTreeNode.VariableName); } break; case OpCodes.VariableCondition: { var variableConditionTreeNode = (VariableConditionTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(variableConditionTreeNode.VariableName); } break; case OpCodes.TimeLag: case OpCodes.Integral: case OpCodes.Derivative: { var seq = GetPrefixSequence(code, i); var interpreterState = new InterpreterState(seq, 0); instr.data = interpreterState; for (int j = 1; j != seq.Length; ++j) seq[j].skip = true; } break; } #endregion } }
private static InterpreterState PrepareInterpreterState(ISymbolicExpressionTree tree, IDataset dataset) { Instruction[] code = SymbolicExpressionTreeCompiler.Compile(tree, OpCodes.MapSymbolToOpCode); int necessaryArgStackSize = 0; foreach (Instruction instr in code) { if (instr.opCode == OpCodes.Variable) { var variableTreeNode = (VariableTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName); } else if (instr.opCode == OpCodes.LagVariable) { var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(laggedVariableTreeNode.VariableName); } else if (instr.opCode == OpCodes.VariableCondition) { var variableConditionTreeNode = (VariableConditionTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(variableConditionTreeNode.VariableName); } else if (instr.opCode == OpCodes.Call) { necessaryArgStackSize += instr.nArguments + 1; } } return new InterpreterState(code, necessaryArgStackSize); }
public override IEnumerable<double> GetEstimatedValues(IDataset ds, IEnumerable<int> rows) { // lookup columns for variableNames in one pass over the tree to speed up evaluation later on ReadOnlyCollection<double>[] columnCache = new ReadOnlyCollection<double>[tree.Length]; for (int i = 0; i < tree.Length; i++) { if (tree[i].VarName != TreeNode.NO_VARIABLE) { // tree models also support calculating estimations if not all variables used for training are available in the dataset if (ds.ColumnNames.Contains(tree[i].VarName)) columnCache[i] = ds.GetReadOnlyDoubleValues(tree[i].VarName); } } return rows.Select(r => GetPredictionForRow(tree, columnCache, 0, r)); }
public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { var targetVariables = dataset.GetReadOnlyDoubleValues(TargetVariable); foreach (int row in rows) { double estimatedValue = 0.0; if (row - TimeOffset < 0) { yield return double.NaN; continue; } for (int i = 1; i <= TimeOffset; i++) { estimatedValue += targetVariables[row - i] * Phi[i - 1]; } estimatedValue += Constant; yield return estimatedValue; } }
public IEnumerable<double> GetEstimatedValues(IDataset ds, IEnumerable<int> rows) { // lookup columns for variableNames in one pass over the tree to speed up evaluation later on ReadOnlyCollection<double>[] columnCache = new ReadOnlyCollection<double>[tree.Length]; for (int i = 0; i < tree.Length; i++) { if (tree[i].VarName != TreeNode.NO_VARIABLE) { columnCache[i] = ds.GetReadOnlyDoubleValues(tree[i].VarName); } } return rows.Select(r => GetPredictionForRow(tree, columnCache, 0, r)); }