public void TestChangeIndex() { IndexedDataTable indexedTable = new IndexedDataTable(new string[] { "Year", "Name" }); indexedTable.SetIndex(new object[] { 2000, "Name1" }); indexedTable.SetValues("A", new double[] { 1, 2, 3, 4 }); // vector indexedTable.Set("B", 1234); // scalar indexedTable.SetIndex(new object[] { 2001, "Name2" }); indexedTable.SetValues("A", new double[] { 5, 6, 7, 8 }); // vector indexedTable.Set("B", 5678); // scalar Assert.IsTrue( Utilities.CreateTable(new string[] { "Year", "Name", "A", "B" }, new List <object[]> { new object[] { 2000, "Name1", 1, 1234 }, new object[] { 2000, "Name1", 2, 1234 }, new object[] { 2000, "Name1", 3, 1234 }, new object[] { 2000, "Name1", 4, 1234 }, new object[] { 2001, "Name2", 5, 5678 }, new object[] { 2001, "Name2", 6, 5678 }, new object[] { 2001, "Name2", 7, 5678 }, new object[] { 2001, "Name2", 8, 5678 } }) .IsSame(indexedTable.ToTable())); }
/// <summary>Main run method for performing our calculations and storing data.</summary> public void Run() { // If the target table has not been modified during the simulation run, don't do anything. if (dataStore?.Writer != null && !dataStore.Writer.TablesModified.Contains(TableName)) { return; } if (string.IsNullOrWhiteSpace(TableName)) { throw new Exception(string.Format("Error in probability model {0}: TableName is null", Name)); } else if (!dataStore.Reader.TableNames.Contains(TableName)) { throw new Exception(string.Format("Error in probability model {0}: table '{1}' does not exist in the database.", Name, TableName)); } DataTable simulationData = dataStore.Reader.GetData(TableName, fieldNames: dataStore.Reader.ColumnNames(TableName)); if (simulationData != null) { IndexedDataTable simData = new IndexedDataTable(simulationData, new string[] { FieldToSplitOn }); IndexedDataTable probabilityData = new IndexedDataTable(new string[] { FieldToSplitOn }); foreach (var group in simData.Groups()) { object keyValue = group.IndexValues[0]; // Add in our key column probabilityData.SetIndex(new object[] { keyValue }); probabilityData.Set <object>(FieldToSplitOn, keyValue); // Add in all other numeric columns. bool haveWrittenProbabilityColumn = false; foreach (DataColumn column in simulationData.Columns) { if (column.DataType == typeof(double)) { var values = group.Get <double>(column.ColumnName).ToList(); values.Sort(); if (!haveWrittenProbabilityColumn) { // Add in the probability column double[] probabilityValues = MathUtilities.ProbabilityDistribution(values.Count, this.Exceedence); probabilityData.SetValues("Probability", probabilityValues); haveWrittenProbabilityColumn = true; } probabilityData.SetValues(column.ColumnName, values); } } } // Write the stats data to the DataStore DataTable t = probabilityData.ToTable(); t.TableName = this.Name; dataStore.Writer.WriteTable(t); } }
public void TestIterateThroughGroups() { IndexedDataTable indexedTable = new IndexedDataTable(new string[] { "Year", "Name" }); indexedTable.SetIndex(new object[] { 2000, "Name1" }); indexedTable.SetValues("A", new double[] { 1, 2, 3, 4 }); // vector indexedTable.Set("B", 1234); // scalar indexedTable.SetIndex(new object[] { 2001, "Name2" }); indexedTable.SetValues("A", new double[] { 5, 6, 7, 8 }); // vector indexedTable.Set("B", 5678); // scalar int i = 1; foreach (var group in indexedTable.Groups()) { var a = group.Get <double>("A"); if (i == 1) { Assert.AreEqual(a, new double[] { 1, 2, 3, 4 }); } else { Assert.AreEqual(a, new double[] { 5, 6, 7, 8 }); } i++; } }
public Form1() { InitializeComponent(); using (var xmlStream = Assembly.GetExecutingAssembly().GetManifestResourceStream("LiveLinqToDataSet.Northwind.xml")) { System.Diagnostics.Debug.Assert(xmlStream != null); XmlReaderSettings settings = new XmlReaderSettings { IgnoreWhitespace = true }; XmlReader reader = XmlReader.Create(xmlStream, settings); untypedDataSet.ReadXml(reader); } using (var xmlStream = Assembly.GetExecutingAssembly().GetManifestResourceStream("LiveLinqToDataSet.Northwind.xml")) { System.Diagnostics.Debug.Assert(xmlStream != null); XmlReaderSettings settings = new XmlReaderSettings { IgnoreWhitespace = true }; XmlReader reader = XmlReader.Create(xmlStream, settings); foreach (DataColumn col in typedDataSet.Tables["Orders"].Columns) { col.ColumnMapping = MappingType.Attribute; // to make our XML file more compact } typedDataSet.Namespace = ""; // need this because our XML file does not have xmlns defined typedDataSet.ReadXml(reader); } untypedCustomers = untypedDataSet.Tables["Customers"].AsIndexed(); untypedOrders = untypedDataSet.Tables["Orders"].AsIndexed(); customers = typedDataSet.Customers.AsIndexed(); orders = typedDataSet.Orders.AsIndexed(); }
public void TestGetColumn() { IndexedDataTable indexedTable = new IndexedDataTable(new string[] { "Year", "Name" }); indexedTable.SetIndex(new object[] { 2000, "Name1" }); indexedTable.SetValues("A", new double[] { 1, 2, 3, 4 }); // vector indexedTable.Set("B", 1234); // scalar indexedTable.SetIndex(new object[] { 2001, "Name2" }); indexedTable.SetValues("A", new double[] { 5, 6, 7, 8 }); // vector indexedTable.Set("B", 5678); // scalar IndexedDataTable indexedTable2 = new IndexedDataTable(indexedTable.ToTable(), new string[] { "Year" }); indexedTable2.SetIndex(new object[] { 2000 }); var a = indexedTable2.Get <double>("A"); Assert.AreEqual(a, new double[] { 1, 2, 3, 4 }); var names = indexedTable2.Get <string>("Name"); Assert.AreEqual(names, new string[] { "Name1", "Name1", "Name1", "Name1" }); indexedTable2.SetIndex(new object[] { 2001 }); var b = indexedTable2.Get <int>("B"); Assert.AreEqual(b, new int[] { 5678, 5678, 5678, 5678 }); }
/// <summary>Main run method for performing our post simulation calculations</summary> public void Run() { // Note - we seem to be assuming that the predicted data table is called Report. // If the predicted table has not been modified during the most recent simulations run, don't do anything. if (dataStore?.Writer != null && !dataStore.Writer.TablesModified.Contains("Report")) { return; } string sql = "SELECT * FROM [Report]"; DataTable predictedData = dataStore.Reader.GetDataUsingSql(sql); if (predictedData != null) { IndexedDataTable predictedDataIndexed = new IndexedDataTable(predictedData, null); string outputNames = StringUtilities.Build(Outputs, ",", "\"", "\""); string inputNames = StringUtilities.Build(Inputs, ",", "\"", "\""); string anovaVariableValuesFileName = GetTempFileName("anovaVariableValues", ".csv"); // Write variables file using (var writer = new StreamWriter(anovaVariableValuesFileName)) DataTableUtilities.DataTableToText(predictedDataIndexed.ToTable(), 0, ",", true, writer, excelFriendly: true); string script = string.Format( "inputs <- c({0})" + Environment.NewLine + "inputs <- inputs[inputs != \"\"]" + Environment.NewLine + "outputs <- c({1})" + Environment.NewLine + "outputs <- outputs[outputs != \"\"]" + Environment.NewLine + "factorial_data <- read.csv(\"{2}\")" + Environment.NewLine + "indices <- data.frame(matrix(ncol = 4, nrow = 0))" + Environment.NewLine + "colnames(indices) <- c(\"Input\", \"Output\", \"FirstOrder\", \"TotalOrder\")" + Environment.NewLine + "for (output in outputs){{" + Environment.NewLine + " data <- factorial_data[, names(factorial_data) %in% inputs | names(factorial_data) == output]" + Environment.NewLine + " data[, names(data) %in% inputs] <- lapply(data[, names(data) %in% inputs], factor)" + Environment.NewLine + " output_mean <- mean(data[[output]])" + Environment.NewLine + " TSS <- sum((data[[output]] - output_mean)^2)" + Environment.NewLine + " anova_model <- aov(data[[output]] ~ (.)^1000, data = data[, names(data) %in% inputs])" + Environment.NewLine + " SSi <- summary(anova_model)[[1]][2]" + Environment.NewLine + " variance_contributions <- SSi / TSS" + Environment.NewLine + " parameter_names <- trimws(rownames(SSi), which = \"both\")" + Environment.NewLine + " all_results <- data.frame(parameter_names, variance_contributions, row.names = NULL)" + Environment.NewLine + " names(all_results) <- list(\"input\", \"% of variance\") " + Environment.NewLine + " for (input in inputs){{" + Environment.NewLine + " first <- all_results[all_results$input == input, colnames(all_results) == \"% of variance\"]" + Environment.NewLine + " total <- sum(all_results[grepl(input, all_results$input), colnames(all_results) == \"% of variance\"])" + Environment.NewLine + " result <- data.frame(Input=c(input), Output=c(output), FirstOrder=c(first), TotalOrder=c(total))" + Environment.NewLine + " indices <- rbind(indices, result)" + Environment.NewLine + " }}" + Environment.NewLine + "}}" + Environment.NewLine + "write.table(indices, sep=\",\", row.names=FALSE)" + Environment.NewLine , inputNames, outputNames, anovaVariableValuesFileName.Replace("\\", "/")); DataTable results = RunR(script); results.TableName = Name + "Statistics"; dataStore.Writer.WriteTable(results); } }
protected IncrementalLinearSolver(IncrementalLinearSolver original, Cloner cloner) : base(original, cloner) { problemTypeParam = cloner.Clone(original.problemTypeParam); qualityUpdateIntervalParam = cloner.Clone(original.qualityUpdateIntervalParam); if (original.qualityPerClock != null) { qualityPerClock = cloner.Clone(original.qualityPerClock); } }
public void TestNoIndexSetScalarThenSetVector() { IndexedDataTable indexedTable = new IndexedDataTable(null); indexedTable.Set("A", 1234); // scalar indexedTable.SetValues("B", new double[] { 1, 2, 3, 4 }); // vector string expected = ReflectionUtilities.GetResourceAsString("UnitTests.APSIMShared.TestNoIndexSetScalarThenSetVector.Expected.txt"); Assert.AreEqual(Utilities.TableToString(indexedTable.ToTable()), expected); }
public void TestSetVectorThenSetScalar() { IndexedDataTable indexedTable = new IndexedDataTable(new string[] { "Year", "Name" }); indexedTable.SetIndex(new object[] { 2000, "Name1" }); indexedTable.SetValues("A", new double[] { 1, 2, 3, 4 }); // vector indexedTable.Set("B", 1234); // scalar string expected = ReflectionUtilities.GetResourceAsString("UnitTests.APSIMShared.IndexedDataTableTests.TestSetVectorThenScalar.Expected.txt"); Assert.AreEqual(Utilities.TableToString(indexedTable.ToTable()), expected); }
public void TestNoIndexSetScalarThenSetVector() { IndexedDataTable indexedTable = new IndexedDataTable(null); indexedTable.Set("A", 1234); // scalar indexedTable.SetValues("B", new double[] { 1, 2, 3, 4 }); // vector Assert.AreEqual(Utilities.TableToString(indexedTable.ToTable()), " A, B\r\n" + "1234,1.000\r\n" + "1234,2.000\r\n" + "1234,3.000\r\n" + "1234,4.000\r\n"); }
public void TestSetScalarThenSetVector() { IndexedDataTable indexedTable = new IndexedDataTable(new string[] { "Year", "Name" }); indexedTable.SetIndex(new object[] { 2000, "Name1" }); indexedTable.Set("A", 1234); // scalar indexedTable.SetValues("B", new double[] { 1, 2, 3, 4 }); // vector Assert.AreEqual(Utilities.TableToString(indexedTable.ToTable()), "Year, Name, A, B\r\n" + "2000,Name1,1234,1.000\r\n" + "2000,Name1,1234,2.000\r\n" + "2000,Name1,1234,3.000\r\n" + "2000,Name1,1234,4.000\r\n"); }
/// <summary> /// The main run method called to fill tables in the specified DataStore. /// </summary> /// <param name="dataStore">The DataStore to work with</param> public void Run(IStorageReader dataStore) { dataStore.DeleteDataInTable(this.Name); DataTable simulationData = dataStore.GetData(TableName, fieldNames: dataStore.GetTableColumns(TableName)); if (simulationData != null) { IndexedDataTable simData = new IndexedDataTable(simulationData, new string[] { FieldToSplitOn }); IndexedDataTable probabilityData = new IndexedDataTable(new string[] { FieldToSplitOn }); foreach (var group in simData.Groups()) { object keyValue = group.IndexValues[0]; // Add in our key column probabilityData.SetIndex(new object[] { keyValue }); probabilityData.Set <object>(FieldToSplitOn, keyValue); // Add in all other numeric columns. bool haveWrittenProbabilityColumn = false; foreach (DataColumn column in simulationData.Columns) { if (column.DataType == typeof(double)) { var values = group.Get <double>(column.ColumnName).ToList(); values.Sort(); if (!haveWrittenProbabilityColumn) { // Add in the probability column double[] probabilityValues = MathUtilities.ProbabilityDistribution(values.Count, this.Exceedence); probabilityData.SetValues("Probability", probabilityValues); haveWrittenProbabilityColumn = true; } probabilityData.SetValues(column.ColumnName, values); } } } // Write the stats data to the DataStore DataTable t = probabilityData.ToTable(); t.TableName = this.Name; dataStore.WriteTable(t); } }
public void TestNoIndexSetScalarThenSetVector() { IndexedDataTable indexedTable = new IndexedDataTable(null); indexedTable.Set("A", 1234); // scalar indexedTable.SetValues("B", new double[] { 1, 2, 3, 4 }); // vector Assert.IsTrue( Utilities.CreateTable(new string[] { "A", "B" }, new List <object[]> { new object[] { 1234, 1 }, new object[] { 1234, 2 }, new object[] { 1234, 3 }, new object[] { 1234, 4 } }) .IsSame(indexedTable.ToTable())); }
public void TestSetVectorThenSetScalar() { IndexedDataTable indexedTable = new IndexedDataTable(new string[] { "Year", "Name" }); indexedTable.SetIndex(new object[] { 2000, "Name1" }); indexedTable.SetValues("A", new double[] { 1, 2, 3, 4 }); // vector indexedTable.Set("B", 1234); // scalar Assert.IsTrue( Utilities.CreateTable(new string[] { "Year", "Name", "A", "B" }, new List <object[]> { new object[] { 2000, "Name1", 1, 1234 }, new object[] { 2000, "Name1", 2, 1234 }, new object[] { 2000, "Name1", 3, 1234 }, new object[] { 2000, "Name1", 4, 1234 } }) .IsSame(indexedTable.ToTable())); }
public void TestIndexedDataTable() { var dt = new IndexedDataTable <int>("test", "test description"); var dr = new IndexedDataRow <int>("test row"); dr.Values.Add(Tuple.Create(1, 1.0)); dr.Values.Add(Tuple.Create(2, 2.0)); dr.Values.Add(Tuple.Create(3, 3.0)); dt.Rows.Add(dr); var ser = new ProtoBufSerializer(); ser.Serialize(dt, tempFile); var dt2 = (IndexedDataTable <int>)ser.Deserialize(tempFile); Assert.AreEqual(dt.Rows["test row"].Values[0], dt2.Rows["test row"].Values[0]); Assert.AreEqual(dt.Rows["test row"].Values[1], dt2.Rows["test row"].Values[1]); Assert.AreEqual(dt.Rows["test row"].Values[2], dt2.Rows["test row"].Values[2]); }
private static IndexedDataTable <double> NMSEGraph(double[,] coeff, double[] lambda, double[] trainNMSE, double[] testNMSE) { var errorTable = new IndexedDataTable <double>("NMSE", "Path of NMSE values over different lambda values"); var numNonZeroCoeffs = new int[lambda.Length]; errorTable.VisualProperties.YAxisMaximumAuto = false; errorTable.VisualProperties.YAxisMinimumAuto = false; errorTable.VisualProperties.XAxisMaximumAuto = false; errorTable.VisualProperties.XAxisMinimumAuto = false; for (int i = 0; i < coeff.GetLength(0); i++) { for (int j = 0; j < coeff.GetLength(1); j++) { if (!coeff[i, j].IsAlmost(0.0)) { numNonZeroCoeffs[i]++; } } } errorTable.VisualProperties.YAxisMinimumFixedValue = 0; errorTable.VisualProperties.YAxisMaximumFixedValue = 1.0; errorTable.VisualProperties.XAxisLogScale = true; errorTable.VisualProperties.XAxisTitle = "Lambda"; errorTable.VisualProperties.YAxisTitle = "Normalized mean of squared errors (NMSE)"; errorTable.VisualProperties.SecondYAxisTitle = "Number of variables"; errorTable.Rows.Add(new IndexedDataRow <double>("NMSE (train)", "Path of NMSE values over different lambda values", lambda.Zip(trainNMSE, (l, v) => Tuple.Create(l, v)))); errorTable.Rows.Add(new IndexedDataRow <double>("NMSE (test)", "Path of NMSE values over different lambda values", lambda.Zip(testNMSE, (l, v) => Tuple.Create(l, v)))); errorTable.Rows.Add(new IndexedDataRow <double>("Number of variables", "The number of non-zero coefficients for each step in the path", lambda.Zip(numNonZeroCoeffs, (l, v) => Tuple.Create(l, (double)v)))); if (lambda.Length > 2) { errorTable.VisualProperties.XAxisMinimumFixedValue = Math.Pow(10, Math.Floor(Math.Log10(lambda.Last()))); errorTable.VisualProperties.XAxisMaximumFixedValue = Math.Pow(10, Math.Ceiling(Math.Log10(lambda.Skip(1).First()))); } errorTable.Rows["NMSE (train)"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; errorTable.Rows["NMSE (test)"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; errorTable.Rows["Number of variables"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; errorTable.Rows["Number of variables"].VisualProperties.SecondYAxis = true; return(errorTable); }
public void TestChangeIndex() { IndexedDataTable indexedTable = new IndexedDataTable(new string[] { "Year", "Name" }); indexedTable.SetIndex(new object[] { 2000, "Name1" }); indexedTable.SetValues("A", new double[] { 1, 2, 3, 4 }); // vector indexedTable.Set("B", 1234); // scalar indexedTable.SetIndex(new object[] { 2001, "Name2" }); indexedTable.SetValues("A", new double[] { 5, 6, 7, 8 }); // vector indexedTable.Set("B", 5678); // scalar Assert.AreEqual(Utilities.TableToString(indexedTable.ToTable()), "Year, Name, A, B\r\n" + "2000,Name1,1.000,1234\r\n" + "2000,Name1,2.000,1234\r\n" + "2000,Name1,3.000,1234\r\n" + "2000,Name1,4.000,1234\r\n" + "2001,Name2,5.000,5678\r\n" + "2001,Name2,6.000,5678\r\n" + "2001,Name2,7.000,5678\r\n" + "2001,Name2,8.000,5678\r\n"); }
private static IndexedDataTable <double> CoefficientGraph(double[,] coeff, double[] lambda, IEnumerable <string> allowedVars, IDataset ds, bool showOnlyRelevantBasisFuncs = true) { var coeffTable = new IndexedDataTable <double>("Coefficients", "The paths of standarized coefficient values over different lambda values"); coeffTable.VisualProperties.YAxisMaximumAuto = false; coeffTable.VisualProperties.YAxisMinimumAuto = false; coeffTable.VisualProperties.XAxisMaximumAuto = false; coeffTable.VisualProperties.XAxisMinimumAuto = false; coeffTable.VisualProperties.XAxisLogScale = true; coeffTable.VisualProperties.XAxisTitle = "Lambda"; coeffTable.VisualProperties.YAxisTitle = "Coefficients"; coeffTable.VisualProperties.SecondYAxisTitle = "Number of variables"; var nLambdas = lambda.Length; var nCoeff = coeff.GetLength(1); var dataRows = new IndexedDataRow <double> [nCoeff]; var numNonZeroCoeffs = new int[nLambdas]; var doubleVariables = allowedVars.Where(ds.VariableHasType <double>); var factorVariableNames = allowedVars.Where(ds.VariableHasType <string>); var factorVariablesAndValues = ds.GetFactorVariableValues(factorVariableNames, Enumerable.Range(0, ds.Rows)); //must consider all factor values (in train and test set) for (int i = 0; i < coeff.GetLength(0); i++) { for (int j = 0; j < coeff.GetLength(1); j++) { if (!coeff[i, j].IsAlmost(0.0)) { numNonZeroCoeffs[i]++; } } } { int i = 0; foreach (var factorVariableAndValues in factorVariablesAndValues) { foreach (var factorValue in factorVariableAndValues.Value) { double sigma = ds.GetStringValues(factorVariableAndValues.Key) .Select(s => s == factorValue ? 1.0 : 0.0) .StandardDeviation(); // calc std dev of binary indicator var path = Enumerable.Range(0, nLambdas).Select(r => Tuple.Create(lambda[r], coeff[r, i] * sigma)).ToArray(); dataRows[i] = new IndexedDataRow <double>(factorVariableAndValues.Key + "=" + factorValue, factorVariableAndValues.Key + "=" + factorValue, path); i++; } } foreach (var doubleVariable in doubleVariables) { double sigma = ds.GetDoubleValues(doubleVariable).StandardDeviation(); var path = Enumerable.Range(0, nLambdas).Select(r => Tuple.Create(lambda[r], coeff[r, i] * sigma)).ToArray(); dataRows[i] = new IndexedDataRow <double>(doubleVariable, doubleVariable, path); i++; } // add to coeffTable by total weight (larger area under the curve => more important); foreach (var r in dataRows.OrderByDescending(r => r.Values.Select(t => t.Item2).Sum(x => Math.Abs(x)))) { coeffTable.Rows.Add(r); } } if (lambda.Length > 2) { coeffTable.VisualProperties.XAxisMinimumFixedValue = Math.Pow(10, Math.Floor(Math.Log10(lambda.Last()))); coeffTable.VisualProperties.XAxisMaximumFixedValue = Math.Pow(10, Math.Ceiling(Math.Log10(lambda.Skip(1).First()))); } coeffTable.Rows.Add(new IndexedDataRow <double>("Number of variables", "The number of non-zero coefficients for each step in the path", lambda.Zip(numNonZeroCoeffs, (l, v) => Tuple.Create(l, (double)v)))); coeffTable.Rows["Number of variables"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; coeffTable.Rows["Number of variables"].VisualProperties.SecondYAxis = true; return(coeffTable); }
/// <summary>Main run method for performing our post simulation calculations</summary> /// <param name="dataStore">The data store.</param> public void Run(IDataStore dataStore) { DataTable predictedData = dataStore.Reader.GetData("Report", filter: "SimulationName LIKE '" + Name + "%'", orderBy: "SimulationID"); if (predictedData != null) { // Determine how many years we have per simulation DataView view = new DataView(predictedData); view.RowFilter = "SimulationName='" + Name + "Simulation1'"; Years = DataTableUtilities.GetColumnAsIntegers(view, "Clock.Today.Year"); // Create a table of all predicted values DataTable predictedValues = new DataTable(); List <string> descriptiveColumnNames = new List <string>(); List <string> variableNames = new List <string>(); foreach (double year in Years) { view.RowFilter = "Clock.Today.Year=" + year; foreach (DataColumn predictedColumn in view.Table.Columns) { if (predictedColumn.DataType == typeof(double)) { double[] valuesForYear = DataTableUtilities.GetColumnAsDoubles(view, predictedColumn.ColumnName); if (valuesForYear.Distinct().Count() == 1) { if (!descriptiveColumnNames.Contains(predictedColumn.ColumnName)) { descriptiveColumnNames.Add(predictedColumn.ColumnName); } } else { DataTableUtilities.AddColumn(predictedValues, predictedColumn.ColumnName + year, valuesForYear); if (!variableNames.Contains(predictedColumn.ColumnName)) { variableNames.Add(predictedColumn.ColumnName); } } } } } // Run R DataTable eeDataRaw; DataTable statsDataRaw; RunRPostSimulation(predictedValues, out eeDataRaw, out statsDataRaw); // Get ee data from R and store in ee table. // EE data from R looks like: // "ResidueWt", "FASW", "CN2", "Cona", "variable","path" // - 22.971008269563,0.00950570342209862,-0.00379987333757356,56.7587080430652,"FallowEvaporation1996",1 // - 25.790599484188, 0.0170777988614538, -0.0265991133629069,58.0240658644712,"FallowEvaporation1996",2 // - 26.113599477728, 0.0113851992409871, 0.0113996200126667,57.9689677010766,"FallowEvaporation1996",3 // - 33.284199334316, 0.0323193916349732, -0.334388853704853,60.5376820772641,"FallowEvaporation1996",4 DataView eeView = new DataView(eeDataRaw); IndexedDataTable eeTableKey = new IndexedDataTable(new string[] { "Parameter", "Year" }); // Create a path variable. var pathValues = Enumerable.Range(1, NumPaths).ToArray(); foreach (var parameter in Parameters) { foreach (DataColumn column in predictedValues.Columns) { eeView.RowFilter = "variable = '" + column.ColumnName + "'"; if (eeView.Count != NumPaths) { throw new Exception("Found only " + eeView.Count + " paths for variable " + column.ColumnName + " in ee table"); } int year = Convert.ToInt32(column.ColumnName.Substring(column.ColumnName.Length - 4)); string variableName = column.ColumnName.Substring(0, column.ColumnName.Length - 4); eeTableKey.SetIndex(new object[] { parameter.Name, year }); List <double> values = DataTableUtilities.GetColumnAsDoubles(eeView, parameter.Name).ToList(); for (int i = 0; i < values.Count; i++) { values[i] = Math.Abs(values[i]); } var runningMean = MathUtilities.RunningAverage(values); eeTableKey.SetValues("Path", pathValues); eeTableKey.SetValues(variableName + ".MuStar", runningMean); } } DataTable eeTable = eeTableKey.ToTable(); eeTable.TableName = Name + "PathAnalysis"; // Get stats data from R and store in MuStar table. // Stats data coming back from R looks like: // "mu", "mustar", "sigma", "param","variable" // -30.7331368183818, 30.7331368183818, 5.42917964248002,"ResidueWt","FallowEvaporation1996" // -0.0731299918470997,0.105740687296631,0.450848277601353, "FASW","FallowEvaporation1996" // -0.83061431285624,0.839772007599748, 1.75541097254145, "CN2","FallowEvaporation1996" // 62.6942591520838, 62.6942591520838, 5.22778043503867, "Cona","FallowEvaporation1996" // -17.286285468283, 19.4018404625051, 24.1361388348929,"ResidueWt","FallowRunoff1996" // 8.09850688306722, 8.09852589447407, 15.1988107373113, "FASW","FallowRunoff1996" // 18.6196168461051, 18.6196168461051, 15.1496277765849, "CN2","FallowRunoff1996" // -7.12794888887507, 7.12794888887507, 5.54014788597839, "Cona","FallowRunoff1996" IndexedDataTable tableKey = new IndexedDataTable(new string[2] { "Parameter", "Year" }); foreach (DataRow row in statsDataRaw.Rows) { string variable = row["variable"].ToString(); int year = Convert.ToInt32(variable.Substring(variable.Length - 4)); variable = variable.Substring(0, variable.Length - 4); tableKey.SetIndex(new object[] { row["param"], year }); tableKey.Set(variable + ".Mu", row["mu"]); tableKey.Set(variable + ".MuStar", row["mustar"]); tableKey.Set(variable + ".Sigma", row["sigma"]); // Need to bring in the descriptive values. view.RowFilter = "Clock.Today.Year=" + year; foreach (var descriptiveColumnName in descriptiveColumnNames) { var values = DataTableUtilities.GetColumnAsStrings(view, descriptiveColumnName); if (values.Distinct().Count() == 1) { tableKey.Set(descriptiveColumnName, view[0][descriptiveColumnName]); } } } DataTable muStarTable = tableKey.ToTable(); muStarTable.TableName = Name + "Statistics"; dataStore.Writer.WriteTable(eeTable); dataStore.Writer.WriteTable(muStarTable); } }
/// <summary>Main run method for performing our post simulation calculations</summary> public void Run() { // If the predicted table has not been modified, don't do anything. // This can happen if other simulations were run but the Morris model was not. if (dataStore?.Writer != null && !dataStore.Writer.TablesModified.Contains(TableName)) { return; } DataTable predictedData = dataStore.Reader.GetData(TableName); if (predictedData != null) { // Determine how many aggregation values we have per simulation DataView view = new DataView(predictedData); view.RowFilter = "SimulationName='" + Name + "Simulation1'"; AggregationValues = DataTableUtilities.GetColumnAsStrings(view, AggregationVariableName); // Create a table of all predicted values DataTable predictedValues = new DataTable(); List <string> descriptiveColumnNames = new List <string>(); List <string> variableNames = new List <string>(); foreach (string aggregationValue in AggregationValues) { string value = aggregationValue; if (DateTime.TryParse(value, out DateTime date)) { value = date.ToString("yyyy-MM-dd"); } view.RowFilter = $"{AggregationVariableName}='{value}'"; foreach (DataColumn predictedColumn in view.Table.Columns) { if (predictedColumn.DataType == typeof(double)) { double[] values = DataTableUtilities.GetColumnAsDoubles(view, predictedColumn.ColumnName); if (values.Distinct().Count() == 1) { if (!descriptiveColumnNames.Contains(predictedColumn.ColumnName)) { descriptiveColumnNames.Add(predictedColumn.ColumnName); } } else { DataTableUtilities.AddColumn(predictedValues, predictedColumn.ColumnName + "_" + value, values); if (!variableNames.Contains(predictedColumn.ColumnName)) { variableNames.Add(predictedColumn.ColumnName); } } } } } // Run R DataTable eeDataRaw; DataTable statsDataRaw; RunRPostSimulation(predictedValues, out eeDataRaw, out statsDataRaw); // Get ee data from R and store in ee table. // EE data from R looks like: // "ResidueWt", "FASW", "CN2", "Cona", "variable","path" // - 22.971008269563,0.00950570342209862,-0.00379987333757356,56.7587080430652,"FallowEvaporation1996",1 // - 25.790599484188, 0.0170777988614538, -0.0265991133629069,58.0240658644712,"FallowEvaporation1996",2 // - 26.113599477728, 0.0113851992409871, 0.0113996200126667,57.9689677010766,"FallowEvaporation1996",3 // - 33.284199334316, 0.0323193916349732, -0.334388853704853,60.5376820772641,"FallowEvaporation1996",4 DataView eeView = new DataView(eeDataRaw); IndexedDataTable eeTableKey = new IndexedDataTable(new string[] { "Parameter", AggregationVariableName }); // Create a path variable. var pathValues = Enumerable.Range(1, NumPaths).ToArray(); foreach (var parameter in Parameters) { foreach (DataColumn column in predictedValues.Columns) { eeView.RowFilter = "variable = '" + column.ColumnName + "'"; if (eeView.Count != NumPaths) { throw new Exception("Found only " + eeView.Count + " paths for variable " + column.ColumnName + " in ee table"); } string aggregationValue = StringUtilities.GetAfter(column.ColumnName, "_"); string variableName = StringUtilities.RemoveAfter(column.ColumnName, '_'); eeTableKey.SetIndex(new object[] { parameter.Name, aggregationValue }); List <double> values = DataTableUtilities.GetColumnAsDoubles(eeView, parameter.Name).ToList(); for (int i = 0; i < values.Count; i++) { values[i] = Math.Abs(values[i]); } var runningMean = MathUtilities.RunningAverage(values); eeTableKey.SetValues("Path", pathValues); eeTableKey.SetValues(variableName + ".MuStar", runningMean); } } DataTable eeTable = eeTableKey.ToTable(); eeTable.TableName = Name + "PathAnalysis"; // Get stats data from R and store in MuStar table. // Stats data coming back from R looks like: // "mu", "mustar", "sigma", "param","variable" // -30.7331368183818, 30.7331368183818, 5.42917964248002,"ResidueWt","FallowEvaporation1996" // -0.0731299918470997,0.105740687296631,0.450848277601353, "FASW","FallowEvaporation1996" // -0.83061431285624,0.839772007599748, 1.75541097254145, "CN2","FallowEvaporation1996" // 62.6942591520838, 62.6942591520838, 5.22778043503867, "Cona","FallowEvaporation1996" // -17.286285468283, 19.4018404625051, 24.1361388348929,"ResidueWt","FallowRunoff1996" // 8.09850688306722, 8.09852589447407, 15.1988107373113, "FASW","FallowRunoff1996" // 18.6196168461051, 18.6196168461051, 15.1496277765849, "CN2","FallowRunoff1996" // -7.12794888887507, 7.12794888887507, 5.54014788597839, "Cona","FallowRunoff1996" IndexedDataTable tableKey = new IndexedDataTable(new string[2] { "Parameter", AggregationVariableName }); foreach (DataRow row in statsDataRaw.Rows) { string variable = row["variable"].ToString(); string aggregationValue = StringUtilities.GetAfter(variable, "_"); variable = StringUtilities.RemoveAfter(variable, '_'); tableKey.SetIndex(new object[] { row["param"], aggregationValue }); tableKey.Set(variable + ".Mu", row["mu"]); tableKey.Set(variable + ".MuStar", row["mustar"]); tableKey.Set(variable + ".Sigma", row["sigma"]); // Need to bring in the descriptive values. view.RowFilter = $"{AggregationVariableName}='{aggregationValue}'"; foreach (var descriptiveColumnName in descriptiveColumnNames) { var values = DataTableUtilities.GetColumnAsStrings(view, descriptiveColumnName); if (values.Distinct().Count() == 1) { tableKey.Set(descriptiveColumnName, view[0][descriptiveColumnName]); } } } DataTable muStarTable = tableKey.ToTable(); muStarTable.TableName = Name + "Statistics"; dataStore.Writer.WriteTable(eeTable); dataStore.Writer.WriteTable(muStarTable); } }
/// <summary>Main run method for performing our calculations and storing data.</summary> public void Run() { if (dataStore?.Writer != null && !dataStore.Writer.TablesModified.Contains("Report")) { return; } DataTable predictedData = dataStore.Reader.GetData("Report", filter: "SimulationName LIKE '" + Name + "%'", orderBy: "SimulationID"); if (predictedData != null) { IndexedDataTable variableValues = new IndexedDataTable(null); // Determine how many years we have per simulation DataView view = new DataView(predictedData); view.RowFilter = "SimulationName='" + Name + "Simulation1'"; var Years = DataTableUtilities.GetColumnAsIntegers(view, "Clock.Today.Year"); // Create a results table. IndexedDataTable results; if (Years.Count() > 1) { results = new IndexedDataTable(new string[] { "Year" }); } else { results = new IndexedDataTable(null); } // Loop through all years and perform analysis on each. List <string> errorsFromR = new List <string>(); foreach (double year in Years) { view.RowFilter = "Clock.Today.Year=" + year; foreach (DataColumn predictedColumn in predictedData.Columns) { if (predictedColumn.DataType == typeof(double)) { var values = DataTableUtilities.GetColumnAsDoubles(view, predictedColumn.ColumnName); if (values.Distinct().Count() > 1) { variableValues.SetValues(predictedColumn.ColumnName, values); } } } string paramNames = StringUtilities.Build(Parameters.Select(p => p.Name), ",", "\"", "\""); string sobolx1FileName = GetTempFileName("sobolx1", ".csv"); string sobolx2FileName = GetTempFileName("sobolx2", ".csv"); string sobolVariableValuesFileName = GetTempFileName("sobolvariableValues", ".csv"); // Write variables file using (var writer = new StreamWriter(sobolVariableValuesFileName)) DataTableUtilities.DataTableToText(variableValues.ToTable(), 0, ",", true, writer, excelFriendly: false, decimalFormatString: "F6"); // Write X1 using (var writer = new StreamWriter(sobolx1FileName)) DataTableUtilities.DataTableToText(X1, 0, ",", true, writer, excelFriendly: false, decimalFormatString: "F6"); // Write X2 using (var writer = new StreamWriter(sobolx2FileName)) DataTableUtilities.DataTableToText(X2, 0, ",", true, writer, excelFriendly: false, decimalFormatString: "F6"); string script = string.Format( $".libPaths(c('{R.PackagesDirectory}', .libPaths()))" + Environment.NewLine + $"library('boot', lib.loc = '{R.PackagesDirectory}')" + Environment.NewLine + $"library('sensitivity', lib.loc = '{R.PackagesDirectory}')" + Environment.NewLine + "params <- c({0})" + Environment.NewLine + "n <- {1}" + Environment.NewLine + "nparams <- {2}" + Environment.NewLine + "X1 <- read.csv(\"{3}\")" + Environment.NewLine + "X2 <- read.csv(\"{4}\")" + Environment.NewLine + "sa <- sobolSalt(model = NULL, X1, X2, scheme=\"A\", nboot = 100)" + Environment.NewLine + "variableValues = read.csv(\"{5}\")" + Environment.NewLine + "for (columnName in colnames(variableValues))" + Environment.NewLine + "{{" + Environment.NewLine + " sa$y <- variableValues[[columnName]]" + Environment.NewLine + " tell(sa)" + Environment.NewLine + " sa$S$Parameter <- params" + Environment.NewLine + " sa$T$Parameter <- params" + Environment.NewLine + " sa$S$ColumnName <- columnName" + Environment.NewLine + " sa$T$ColumnName <- columnName" + Environment.NewLine + " sa$S$Indices <- \"FirstOrder\"" + Environment.NewLine + " sa$T$Indices <- \"Total\"" + Environment.NewLine + " if (!exists(\"allData\"))" + Environment.NewLine + " allData <- rbind(sa$S, sa$T)" + Environment.NewLine + " else" + Environment.NewLine + " allData <- rbind(allData, sa$S, sa$T)" + Environment.NewLine + "}}" + Environment.NewLine + "write.table(allData, sep=\",\", row.names=FALSE)" + Environment.NewLine , paramNames, NumPaths, Parameters.Count, sobolx1FileName.Replace("\\", "/"), sobolx1FileName.Replace("\\", "/"), sobolVariableValuesFileName.Replace("\\", "/")); DataTable resultsForYear = null; try { resultsForYear = RunR(script); // Put output from R into results table. if (Years.Count() > 1) { results.SetIndex(new object[] { year.ToString() }); } foreach (DataColumn col in resultsForYear.Columns) { if (col.DataType == typeof(string)) { results.SetValues(col.ColumnName, DataTableUtilities.GetColumnAsStrings(resultsForYear, col.ColumnName)); } else { results.SetValues(col.ColumnName, DataTableUtilities.GetColumnAsDoubles(resultsForYear, col.ColumnName)); } } } catch (Exception err) { string msg = err.Message; if (Years.Count() > 1) { msg = "Year " + year + ": " + msg; } errorsFromR.Add(msg); } } var resultsRawTable = results.ToTable(); resultsRawTable.TableName = Name + "Statistics"; dataStore.Writer.WriteTable(resultsRawTable); if (errorsFromR.Count > 0) { string msg = StringUtilities.BuildString(errorsFromR.ToArray(), Environment.NewLine); throw new Exception(msg); } } }
private void CreateSolutionPath() { double[] lambda; double[] trainNMSE; double[] testNMSE; double[,] coeff; double[] intercept; RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out trainNMSE, out testNMSE, out coeff, out intercept); var coeffTable = new IndexedDataTable <double>("Coefficients", "The paths of standarized coefficient values over different lambda values"); coeffTable.VisualProperties.YAxisMaximumAuto = false; coeffTable.VisualProperties.YAxisMinimumAuto = false; coeffTable.VisualProperties.XAxisMaximumAuto = false; coeffTable.VisualProperties.XAxisMinimumAuto = false; coeffTable.VisualProperties.XAxisLogScale = true; coeffTable.VisualProperties.XAxisTitle = "Lambda"; coeffTable.VisualProperties.YAxisTitle = "Coefficients"; coeffTable.VisualProperties.SecondYAxisTitle = "Number of variables"; var nLambdas = lambda.Length; var nCoeff = coeff.GetLength(1); var dataRows = new IndexedDataRow <double> [nCoeff]; var allowedVars = Problem.ProblemData.AllowedInputVariables.ToArray(); var numNonZeroCoeffs = new int[nLambdas]; var ds = Problem.ProblemData.Dataset; var doubleVariables = allowedVars.Where(ds.VariableHasType <double>); var factorVariableNames = allowedVars.Where(ds.VariableHasType <string>); var factorVariablesAndValues = ds.GetFactorVariableValues(factorVariableNames, Enumerable.Range(0, ds.Rows)); // must consider all factor values (in train and test set) { int i = 0; foreach (var factorVariableAndValues in factorVariablesAndValues) { foreach (var factorValue in factorVariableAndValues.Value) { double sigma = ds.GetStringValues(factorVariableAndValues.Key) .Select(s => s == factorValue ? 1.0 : 0.0) .StandardDeviation(); // calc std dev of binary indicator var path = Enumerable.Range(0, nLambdas).Select(r => Tuple.Create(lambda[r], coeff[r, i] * sigma)).ToArray(); dataRows[i] = new IndexedDataRow <double>(factorVariableAndValues.Key + "=" + factorValue, factorVariableAndValues.Key + "=" + factorValue, path); i++; } } foreach (var doubleVariable in doubleVariables) { double sigma = ds.GetDoubleValues(doubleVariable).StandardDeviation(); var path = Enumerable.Range(0, nLambdas).Select(r => Tuple.Create(lambda[r], coeff[r, i] * sigma)).ToArray(); dataRows[i] = new IndexedDataRow <double>(doubleVariable, doubleVariable, path); i++; } // add to coeffTable by total weight (larger area under the curve => more important); foreach (var r in dataRows.OrderByDescending(r => r.Values.Select(t => t.Item2).Sum(x => Math.Abs(x)))) { coeffTable.Rows.Add(r); } } for (int i = 0; i < coeff.GetLength(0); i++) { for (int j = 0; j < coeff.GetLength(1); j++) { if (!coeff[i, j].IsAlmost(0.0)) { numNonZeroCoeffs[i]++; } } } if (lambda.Length > 2) { coeffTable.VisualProperties.XAxisMinimumFixedValue = Math.Pow(10, Math.Floor(Math.Log10(lambda.Last()))); coeffTable.VisualProperties.XAxisMaximumFixedValue = Math.Pow(10, Math.Ceiling(Math.Log10(lambda.Skip(1).First()))); } coeffTable.Rows.Add(new IndexedDataRow <double>("Number of variables", "The number of non-zero coefficients for each step in the path", lambda.Zip(numNonZeroCoeffs, (l, v) => Tuple.Create(l, (double)v)))); coeffTable.Rows["Number of variables"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; coeffTable.Rows["Number of variables"].VisualProperties.SecondYAxis = true; Results.Add(new Result(coeffTable.Name, coeffTable.Description, coeffTable)); var errorTable = new IndexedDataTable <double>("NMSE", "Path of NMSE values over different lambda values"); errorTable.VisualProperties.YAxisMaximumAuto = false; errorTable.VisualProperties.YAxisMinimumAuto = false; errorTable.VisualProperties.XAxisMaximumAuto = false; errorTable.VisualProperties.XAxisMinimumAuto = false; errorTable.VisualProperties.YAxisMinimumFixedValue = 0; errorTable.VisualProperties.YAxisMaximumFixedValue = 1.0; errorTable.VisualProperties.XAxisLogScale = true; errorTable.VisualProperties.XAxisTitle = "Lambda"; errorTable.VisualProperties.YAxisTitle = "Normalized mean of squared errors (NMSE)"; errorTable.VisualProperties.SecondYAxisTitle = "Number of variables"; errorTable.Rows.Add(new IndexedDataRow <double>("NMSE (train)", "Path of NMSE values over different lambda values", lambda.Zip(trainNMSE, (l, v) => Tuple.Create(l, v)))); errorTable.Rows.Add(new IndexedDataRow <double>("NMSE (test)", "Path of NMSE values over different lambda values", lambda.Zip(testNMSE, (l, v) => Tuple.Create(l, v)))); errorTable.Rows.Add(new IndexedDataRow <double>("Number of variables", "The number of non-zero coefficients for each step in the path", lambda.Zip(numNonZeroCoeffs, (l, v) => Tuple.Create(l, (double)v)))); if (lambda.Length > 2) { errorTable.VisualProperties.XAxisMinimumFixedValue = Math.Pow(10, Math.Floor(Math.Log10(lambda.Last()))); errorTable.VisualProperties.XAxisMaximumFixedValue = Math.Pow(10, Math.Ceiling(Math.Log10(lambda.Skip(1).First()))); } errorTable.Rows["NMSE (train)"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; errorTable.Rows["NMSE (test)"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; errorTable.Rows["Number of variables"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; errorTable.Rows["Number of variables"].VisualProperties.SecondYAxis = true; Results.Add(new Result(errorTable.Name, errorTable.Description, errorTable)); }
public Form1() { InitializeComponent(); // Filling the data set with data string dataPath = string.Empty; foreach (DataColumn col in typedDataSet.Tables["Orders"].Columns) { col.ColumnMapping = MappingType.Attribute; // to make our XML file more compact } typedDataSet.Namespace = ""; // need this because our XML file does not have xmlns defined using (var xmlStream = Assembly.GetExecutingAssembly().GetManifestResourceStream("LiveViewsDataSet.Northwind.xml")) { System.Diagnostics.Debug.Assert(xmlStream != null); XmlReaderSettings settings = new XmlReaderSettings { IgnoreWhitespace = true }; XmlReader reader = XmlReader.Create(xmlStream, settings); typedDataSet.ReadXml(reader); } // Creating indexes. It is optional, only for performance optimization. // If you delete this, the functionality will be the same and slowdown will be noticeable only on large data sets. // If you don't need to create indexes, you can use typedDataSet.Customers.AsLive() below instead of customers.AsLive(), so you don't need the // IndexedDataTable<> class, can proceed directly to View<NorthwindDataSet.CustomersRow>. IndexedDataTable <NorthwindDataSet.CustomersRow> customers = typedDataSet.Customers.AsIndexed(); IndexedDataTable <NorthwindDataSet.OrdersRow> orders = typedDataSet.Orders.AsIndexed(); customers.Indexes.Add(c => c.CustomerID, true); orders.Indexes.Add(o => o.CustomerID); orders.Indexes.Add(o => o.ShipCity); //...end of creating indexes // Make the tables live: customersView = typedDataSet.Customers.AsLive(); ordersView = typedDataSet.Orders.AsLive().AsUpdatable(); // AsUpdatable() makes it possible for // the user to change data directly in the grid showing the join view below. // Order fields can be changed by the user there, Customer fields can't be changed by the user // Specifying this argument is needed only if you need to change data directly in the view instead // of changing the base data, and then only for query operators with more than one argument such as Join. // Creating a live view over the base data IListSource view = (from o in ordersView where o.ShipCity == "London" || o.ShipCity == "Colchester" select new { OrderID = o.OrderID, CustomerID = o.CustomerID, ShipCity = o.ShipCity }).AsDynamic(); ShowViewInGrid(view, dataGridView1); // Creating another live view over the base data customerOrderView = from c in customersView join o in ordersView on c.CustomerID equals o.CustomerID where o.ShipCity == "London" || o.ShipCity == "Colchester" // Using a user-defined class like CustomerOrder is not mandatory. Anonymous class could be used instead as in the // query above, but in that case we could not assign it to a variable like customerOrderView, defined outside // of the scope of this method, because anonymous classes are only available in local scope. select new CustomerOrder { CustomerID = c.CustomerID, City = c.City, OrderID = o.OrderID, ShipCity = o.ShipCity }; ShowViewInGrid(customerOrderView, dataGridView2); // See the readme.txt file in the project folder for a description of the live view functionality you can try in this sample }