public void testInputTables() { HashSet <String> errors = new HashSet <String>(); foreach (String schemaId in _STAGING.getSchemaIds()) { StagingSchema schema = _STAGING.getSchema(schemaId); // build a list of input tables that should be excluded foreach (StagingSchemaInput input in schema.getInputs()) { if (input.getTable() != null) { HashSet <String> inputKeys = new HashSet <String>(); StagingTable table = _STAGING.getTable(input.getTable()); foreach (StagingColumnDefinition def in table.getColumnDefinitions()) { if (ColumnType.INPUT == def.getType()) { inputKeys.Add(def.getKey()); } } // make sure the input key matches the an input column if (!inputKeys.Contains(input.getKey())) { errors.Add("Input key " + schemaId + ":" + input.getKey() + " does not match validation table " + table.getId() + ": " + inputKeys.ToString()); } } } } assertNoErrors(errors, "input values and their assocated validation tables"); }
public void testMappingIdUniqueness() { HashSet <String> errors = new HashSet <String>(); foreach (String schemaId in _STAGING.getSchemaIds()) { StagingSchema schema = _STAGING.getSchema(schemaId); // build a list of input tables that should be excluded HashSet <String> ids = new HashSet <String>(); List <IMapping> mappings = schema.getMappings(); if (mappings != null) { foreach (StagingMapping mapping in mappings) { if (ids.Contains(mapping.getId())) { errors.Add("The mapping id " + schemaId + ":" + mapping.getId() + " is duplicated. This should never happen"); } ids.Add(mapping.getId()); } } } assertNoErrors(errors, "input values and their assocated validation tables"); }
// Looks at all tables involved in all the mappings in the definition and returns a list of output keys that will be created. It will also deal with mapped // outputs. The outputs from each mapping will only be included if it passes the inclusion/exclusion criteria based on the context. If the schema has StagingOutputs // defined, then the calulated output list is exactly the same as the schema output list. // @param schema a StagingSchema // @param context a context of values used to to check mapping inclusion/exclusion // @return a Set of unique output keys public HashSet <String> getOutputs(StagingSchema schema, Dictionary <String, String> context) { HashSet <String> outputs = new HashSet <String>(); // if outputs are defined in the schema, then there is no reason to look any further into the mappings; the output defines exactly what keys will // be returned and it doesn't matter what context is passed in that case if (schema.getOutputMap() != null) { foreach (KeyValuePair <String, IOutput> entry in schema.getOutputMap()) { outputs.Add(entry.Key); } return(outputs); } // if outputs were not defined, then the tables involved in the mappings will be used to determine the possible outputs if (schema.getMappings() != null) { foreach (StagingMapping mapping in schema.getMappings()) { outputs.UnionWith(getOutputs(mapping, context)); } } // if valid outputs are defined on the schema level, only return outputs that defined; this removed "temporary" outputs that may be defined during the // staging process if (schema.getOutputMap() != null) { outputs.RemoveWhere(entry => !schema.getOutputMap().ContainsKey(entry)); } return(outputs); }
public override IDefinition getDefinition(String id) { StagingSchema oRetval = null; _schemas.TryGetValue(id, out oRetval); return(oRetval); }
/** * Add a schema */ public void addSchema(StagingSchema schema) { initSchema(schema); _schemas[schema.getId()] = schema; foreach (KeyValuePair <string, StagingSchema> entry in _schemas) { _SchemaKeys.Add(entry.Key); } }
private static void CreateStagingTables( string schemaName, string connectionString, int commandTimeoutSecs, int maxDegreeOfParallelism, Pipelines pipelineOptions) { var stage = new StagingSchema(connectionString, schemaName, commandTimeoutSecs, maxDegreeOfParallelism, pipelineOptions); stage.CreateStagingTables(); }
public void testLookupInputs() { // test valid combinations that do not require a discriminator StagingSchema schema = _STAGING.getSchema("prostate"); TnmSchemaLookup lookup = new TnmSchemaLookup("C619", "8000"); Assert.IsTrue(_STAGING.getInputs(schema, lookup.getInputs()).Contains("clin_t")); lookup = new TnmSchemaLookup("C619", "8120"); Assert.IsFalse(_STAGING.getInputs(schema, lookup.getInputs()).Contains("clin_t")); }
public void testBasicInputs() { // all inputs for all schemas will have null unit and decimal places foreach (String id in _STAGING.getSchemaIds()) { StagingSchema schema = _STAGING.getSchema(id); foreach (StagingSchemaInput input in schema.getInputs()) { Assert.IsNull(input.getUnit(), "No schemas should have units"); Assert.IsTrue(input.getDecimalPlaces() == 0, "No schemas should have decimal places"); } } }
// Return a list of tables identifiers involved in the specified schema // @param schemaId schema identifier // @return a Set of table identifiers; if the schema is not found the set will be empty public HashSet <String> getInvolvedTables(String schemaId) { HashSet <String> tables = new HashSet <String>(); StagingSchema schema = getSchema(schemaId); if (schema != null && schema.getInvolvedTables() != null) { tables = schema.getInvolvedTables(); } return(tables); }
// Check the validity of a single field of a schema based on the supplied context. The value of this key should be in the context as well // as any other properties needed to evaluation validity. If the schema or field do no exist, false will be returned. // @param schemaId schema identifier // @param key input key // @param context Map of keys/values to validate against // @return a boolean indicating whether the code exists for the the passed schema field public bool isContextValid(String schemaId, String key, Dictionary <String, String> context) { // first get the algorithm StagingSchema schema = getSchema(schemaId); if (schema == null) { return(false); } // get the table id from the schema IInput input = null; if (!schema.getInputMap().TryGetValue(key, out input)) { input = null; } if (input == null) { return(false); } // missing context will always return false if (context == null || context.Count == 0) { return(false); } // all context input needs to be trimmed Dictionary <String, String> testContext = new Dictionary <String, String>(20, StringComparer.Ordinal); foreach (KeyValuePair <String, String> entry in context) { testContext[entry.Key] = (entry.Value != null ? entry.Value.Trim() : ""); } // if the input specifies a table for validation, test against it if (input.getTable() != null) { ITable table = getTable(input.getTable()); return(table != null && (DecisionEngineFuncs.matchTable(table, testContext) != null)); } return(true); }
// Look up a schema based on site, histology and an optional discriminator. // @param lookup schema lookup input // @return a list of StagingSchemaInfo objects private List <StagingSchema> getSchemas(SchemaLookup lookup) { List <StagingSchema> matchedSchemas = new List <StagingSchema>(5); String site = lookup.getInput(StagingData.PRIMARY_SITE_KEY); String histology = lookup.getInput(StagingData.HISTOLOGY_KEY); bool hasDiscriminator = lookup.hasDiscriminator(); // site or histology must be supplied and they must be valid; I am assuming that all algorithms must have tables that validate // both site and histology if ((site != null && !isValidSite(site)) || (histology != null && !isValidHistology(histology))) { return(matchedSchemas); } // searching on a discriminator is only supported if also searching on site and histology; if ssf25 supplied without either // of those fields, return no results if (hasDiscriminator && (site == null || (site.Length == 0) || histology == null || (histology.Length == 0))) { return(matchedSchemas); } // site or histology must be supplied if (site != null || histology != null) { HashSet <String> lstSchemaIds = getSchemaIds(); // loop over selection table and match using only the supplied keys foreach (String schemaId in lstSchemaIds) { StagingSchema schema = (StagingSchema)(getDefinition(schemaId)); if (schema.getSchemaSelectionTable() != null) { StagingTable table = (StagingTable)(getTable(schema.getSchemaSelectionTable())); if (table != null && DecisionEngineFuncs.matchTable(table, lookup.getInputs(), lookup.getKeys()) != null) { matchedSchemas.Add(schema); } } } } return(matchedSchemas); }
private void ClearTempStage() { var stage = new StagingSchema( AdminConnectionString, StagingSchema.TemporaryStagingSchemaName, Timeouts.AdminDatabase, _configuration.MaxDegreeOfParallelism, _configuration.Pipelines); if (stage.AllTablesExist()) { stage.TruncateStagingTables(); } else { stage.CreateStagingTables(); } }
public void verifyInputs() { HashSet <String> errors = new HashSet <String>(); foreach (String id in _STAGING.getSchemaIds()) { StagingSchema schema = _STAGING.getSchema(id); // loop over all the inputs returned by processing the schema and make sure they are all part of the main list of inputs on the schema foreach (String input in _STAGING.getInputs(schema)) { if (!schema.getInputMap().ContainsKey(input)) { errors.Add("Error processing schema " + schema.getId() + ": Table input '" + input + "' not in master list of inputs"); } } } assertNoErrors(errors, "input values"); }
private void CheckStageIntegrity() { // first check that all foreign key values have an entry in the primary table... var sc = new StagingSchema( AdminConnectionString, StagingSchema.TemporaryStagingSchemaName, Timeouts.AdminDatabase, _configuration.MaxDegreeOfParallelism, _configuration.Pipelines); sc.CheckIntegrity(); // now check that the consolidation key values are not blank... ConsolidationKeyCheck.Execute( AdminConnectionString, Timeouts.AdminDatabase, _configuration.MaxDegreeOfParallelism, _configuration.Consolidation, StagingSchema.TemporaryStagingSchemaName); }
// Looks at all tables involved in all the mappings in the definition and returns a list of input keys that could be used. It will also deal with mapped // inputs. The inputs from each mapping will only be included if it passes the inclusion/exclusion criteria based on the context. Note that if an input // to a table was not a supplied input (i.e. it was created as an output of a previous table) it will not be included in the list of inputs. The inputs will // also include any used in schema selection. All inputs returned from this method should be in the schema input list otherwise there is a problem with the // schema. // @param schema a StagingSchema // @param context a context of values used to to check mapping inclusion/exclusion // @return a Set of unique input keys public HashSet <String> getInputs(StagingSchema schema, Dictionary <String, String> context) { HashSet <String> inputs = new HashSet <String>(); // add schema selection fields if (schema.getSchemaSelectionTable() != null) { StagingTable table = getTable(schema.getSchemaSelectionTable()); if (table != null) { foreach (StagingColumnDefinition def in table.getColumnDefinitions()) { if (ColumnType.INPUT == def.getType()) { inputs.Add(def.getKey()); } } } } // process all mappings if (schema.getMappings() != null) { HashSet <String> excludedInputs = new HashSet <String>(); HashSet <String> thisInput = null; foreach (StagingMapping mapping in schema.getMappings()) { thisInput = getInputs(mapping, context, excludedInputs); inputs.UnionWith(thisInput); } } // always remove all context variables since they are never needed to be supplied inputs.ExceptWith(CONTEXT_KEYS); return(inputs); }
// Constructor loads all schemas and sets up table cache // @param algorithm algorithm // @param version version protected StagingFileDataProvider(String algorithm, String version) : base() { _algorithm = algorithm; _version = version; String basedir = System.IO.Directory.GetCurrentDirectory() + "\\"; if (!Directory.Exists(basedir + "Algorithms\\")) { basedir = System.IO.Directory.GetCurrentDirectory() + "\\..\\..\\..\\"; if (System.IO.Directory.GetCurrentDirectory().IndexOf("x64") >= 0) { basedir += "\\..\\"; } basedir += "Resources\\"; } String directory = ""; // loop over all tables and load them into Map try { directory = basedir + "Algorithms\\" + algorithm.ToLower() + "\\" + version + "\\tables"; foreach (String file in readLines(directory + "\\ids.txt")) { if (file.Length != 0) { TextReader reader = getStagingInputStream(directory + "\\" + file + ".json"); StagingTable table = new StagingTable(); using (reader) { Newtonsoft.Json.JsonSerializer serializer = new Newtonsoft.Json.JsonSerializer(); table = (StagingTable)serializer.Deserialize(reader, typeof(StagingTable)); } initTable(table); _tables[table.getId()] = table; } } } catch (IOException e) { throw new System.InvalidOperationException("IOException reading tables: " + e.Message); } // loop over all schemas and load them into Map try { directory = basedir + "Algorithms\\" + algorithm.ToLower() + "\\" + version + "\\schemas"; foreach (String file in readLines(directory + "\\ids.txt")) { if (file.Length != 0) { TextReader reader = getStagingInputStream(directory + "\\" + file + ".json"); StagingSchema schema = new StagingSchema(); using (reader) { Newtonsoft.Json.JsonSerializer serializer = new Newtonsoft.Json.JsonSerializer(); schema = (StagingSchema)serializer.Deserialize(reader, typeof(StagingSchema)); } initSchema(schema); _schemas[schema.getId()] = schema; } } } catch (IOException e) { throw new System.InvalidOperationException("IOException reading schemas: " + e.Message); } GenerateSchemaIds(); GenerateTableIds(); // finally, initialize any caches now that everything else has been set up invalidateCache(); }
// Looks at all tables involved in all the mappings in the definition and returns a list of output keys that could be created. It will also deal with mapped // outputs. // @param schema a StagingSchema // @return a Set of unique output keys public HashSet <String> getOutputs(StagingSchema schema) { return(getOutputs(schema, null)); }
public void testExternalLoad() { Assert.AreEqual("testing", _STAGING.getAlgorithm()); Assert.AreEqual("99.99", _STAGING.getVersion()); Assert.AreEqual(1, _STAGING.getSchemaIds().Count); Assert.AreEqual(62, _STAGING.getTableIds().Count); StagingSchema schema = _STAGING.getSchema("urethra"); Assert.IsNotNull(schema); Assert.AreEqual("testing", schema.getAlgorithm()); Assert.AreEqual("99.99", schema.getVersion()); StagingTable table = _STAGING.getTable("ajcc_descriptor_codes"); Assert.IsNotNull(table); Assert.AreEqual("testing", table.getAlgorithm()); Assert.AreEqual("99.99", table.getVersion()); Assert.AreEqual(6, table.getTableRows().Count); HashSet <String> involved = _STAGING.getInvolvedTables("urethra"); Assert.AreEqual(62, involved.Count); Assert.IsTrue(involved.Contains("mets_eval_ipa")); StagingData data = new StagingData(); data.setInput("site", "C680"); data.setInput("hist", "8000"); data.setInput("behavior", "3"); data.setInput("grade", "9"); data.setInput("year_dx", "2013"); data.setInput("cs_input_version_original", "020550"); data.setInput("extension", "100"); data.setInput("extension_eval", "9"); data.setInput("nodes", "100"); data.setInput("nodes_eval", "9"); data.setInput("mets", "10"); data.setInput("mets_eval", "9"); // perform the staging _STAGING.stage(data); Assert.AreEqual(StagingData.Result.STAGED, data.getResult()); Assert.AreEqual("urethra", data.getSchemaId()); Assert.AreEqual(0, data.getErrors().Count); Assert.AreEqual(37, data.getPath().Count); // check output Assert.AreEqual("129", data.getOutput("schema_number")); Assert.AreEqual("020550", data.getOutput("csver_derived")); // AJCC 6 Assert.AreEqual("70", data.getOutput("stor_ajcc6_stage")); // AJCC 7 Assert.AreEqual("700", data.getOutput("stor_ajcc7_stage")); // Summary Stage Assert.AreEqual("7", data.getOutput("stor_ss77")); Assert.AreEqual("7", data.getOutput("stor_ss2000")); }
// Stage the passed case. // @param data all input values are passed through this database // @return the same StagingData with output values filled in public StagingData stage(StagingData data) { // first clear out schema/output/errors/path data.setSchemaId(null); data.setOutput(new Dictionary <String, String>(100, StringComparer.Ordinal)); data.setErrors(new List <Error>(100)); data.setPath(new List <String>(100)); // make sure site and histology are supplied if (data.getInput(StagingData.PRIMARY_SITE_KEY) == null || data.getInput(StagingData.HISTOLOGY_KEY) == null) { data.setResult(StagingData.Result.FAILED_MISSING_SITE_OR_HISTOLOGY); return(data); } // get the schema; if a single schema is not found, return right away with an error List <StagingSchema> schemas = lookupSchema(new SchemaLookup(data.getInput())); if (schemas.Count != 1) { if (schemas.Count == 0) { data.setResult(StagingData.Result.FAILED_NO_MATCHING_SCHEMA); } else { data.setResult(StagingData.Result.FAILED_MULITPLE_MATCHING_SCHEMAS); } return(data); } StagingSchema schema = null; if (schemas.Count > 0) { schema = schemas[0]; } // add schema id to result data.setSchemaId(schema.getId()); // copy the input into a new context Dictionary <String, String> context = new Dictionary <String, String>(data.getInput(), StringComparer.Ordinal); // make sure all supplied inputs are defined in the definition foreach (KeyValuePair <String, String> entry in context) { if (!schema.getInputMap().ContainsKey(entry.Key)) { data.addError(new Error.ErrorBuilder(Error.Type.UNKNOWN_INPUT).message("Unknown input key supplied: " + entry.Key).key(entry.Key).build()); } } if (data.getErrors().Count > 0) { data.setResult(StagingData.Result.FAILED_INVALID_INPUT); return(data); } // add context variables addContextKeys(context); // check that year of DX is valid if (!isContextValid(schema.getId(), StagingData.YEAR_DX_KEY, context)) { data.setResult(StagingData.Result.FAILED_INVALID_YEAR_DX); return(data); } // perform the staging Result result = _engine.process(schemas[0].getId(), context); // remove the context variables removeContextKeys(context); // set the staging data result based on the Result returned from the DecisionEngine if (Result.Type.FAILED_INPUT == result.getType()) { data.setResult(StagingData.Result.FAILED_INVALID_INPUT); } else { data.setResult(StagingData.Result.STAGED); } // remove the original input keys from the resulting context; in addition, we want to remove any input keys // from the resulting context that were set with a default value; to accomplish this remove all keys that are // defined as input in the selected schema foreach (KeyValuePair <String, String> entry in data.getInput()) { context.Remove(entry.Key); } foreach (StagingSchemaInput input in schemas[0].getInputs()) { context.Remove(input.getKey()); } // add the results to the data card data.setOutput(result.getContext()); data.setErrors(result.getErrors()); data.setPath(result.getPath()); return(data); }
// Initialize data provider private void init(Stream inStream) { HashSet <String> algorithms = new HashSet <String>(); HashSet <String> versions = new HashSet <String>(); using (ZipArchive archive = new ZipArchive(inStream, ZipArchiveMode.Read)) { foreach (ZipArchiveEntry entry in archive.Entries) { if ((entry.Name.Length == 0) || (!entry.Name.EndsWith(".json"))) { continue; } if (entry.FullName.StartsWith("tables")) { String s = extractEntry(entry); StagingTable table = new StagingTable(); table = Newtonsoft.Json.JsonConvert.DeserializeObject <StagingTable>(s); if (DebugSettings.DEBUG_LOADED_TABLES) { Debug.WriteLine("Table: "); Debug.WriteLine(table.GetDebugString(" ")); } initTable(table); algorithms.Add(table.getAlgorithm()); versions.Add(table.getVersion()); _tables[table.getId()] = table; } else if (entry.FullName.StartsWith("schemas")) { String s = extractEntry(entry); StagingSchema schema = new StagingSchema(); schema = Newtonsoft.Json.JsonConvert.DeserializeObject <StagingSchema>(s); if (DebugSettings.DEBUG_LOADED_SCHEMAS) { Debug.WriteLine("Schema: "); Debug.WriteLine(schema.GetDebugString(" ")); } initSchema(schema); algorithms.Add(schema.getAlgorithm()); versions.Add(schema.getVersion()); _schemas[schema.getId()] = schema; } } } // verify that all the algorithm names and versions are consistent if (algorithms.Count != 1) { throw new System.InvalidOperationException("Error initializing provider; only a single algorithm should be included in file"); } if (versions.Count != 1) { throw new System.InvalidOperationException("Error initializing provider; only a single version should be included in file"); } HashSet <String> .Enumerator enumAlg = algorithms.GetEnumerator(); HashSet <String> .Enumerator enumVer = versions.GetEnumerator(); enumAlg.MoveNext(); enumVer.MoveNext(); _algorithm = enumAlg.Current; _version = enumVer.Current; GenerateSchemaIds(); GenerateTableIds(); // finally, initialize any caches now that everything else has been set up invalidateCache(); }
// Initialize a schema. // @param schema schema entity // @return initialized schema entity public static StagingSchema initSchema(StagingSchema schema) { // parse the schema selection ranges if (schema.getSchemaSelectionTable() == null) { throw new System.InvalidOperationException("Schemas must have a schema selection table."); } // store the inputs in a Map that can searched more efficiently if (schema.getInputs() != null) { Dictionary <String, IInput> parsedInputMap = new Dictionary <String, IInput>(); foreach (StagingSchemaInput input in schema.getInputs()) { // verify that all inputs contain a key if (input.getKey() == null) { throw new System.InvalidOperationException("All input definitions must have a 'key' defined."); } parsedInputMap[input.getKey()] = input; } schema.setInputMap(parsedInputMap); } // store the outputs in a Map that can searched more efficiently if (schema.getOutputs() != null) { Dictionary <String, IOutput> parsedOutputMap = new Dictionary <String, IOutput>(); foreach (StagingSchemaOutput output in schema.getOutputs()) { // verify that all inputs contain a key if (output.getKey() == null) { throw new System.InvalidOperationException("All output definitions must have a 'key' defined."); } parsedOutputMap[output.getKey()] = output; } schema.setOutputMap(parsedOutputMap); } // make sure that the mapping initial context does not set a value for an input field if (schema.getMappings() != null) { foreach (StagingMapping mapping in schema.getMappings()) { if (mapping.getInitialContext() != null) { foreach (StagingKeyValue kv in mapping.getInitialContext()) { if (schema.getInputMap().ContainsKey(kv.getKey())) { throw new System.InvalidOperationException("The key '" + kv.getKey() + "' is defined in an initial context, but that is not allowed since it is also defined as an input."); } } } } } return(schema); }