/// <summary> /// Constructs MetaTable from the source data /// </summary> /// <param name="sourceTable">The source table.</param> /// <param name="task">The task.</param> /// <param name="logger">The logger.</param> /// <returns></returns> public MetaTable Construct(SourceTable sourceTable, TableExtractionTask task, ITextRender logger) { MetaTable metaTable = null; switch (task.score.executionMode) { case ExtractionTaskEngineMode.Application: metaTable = new MetaTable(task.tableDescription); metaTable.SetSchema(sourceTable); metaTable.ApplySchema(task.PropertyDictionary.items.Select(x => x.Meta)); metaTable.SetEntries(sourceTable); metaTable.Comment = "Constructed by " + GetType().Name; break; case ExtractionTaskEngineMode.Validation: metaTable = new MetaTable(task.tableDescription); metaTable.SetSchema(sourceTable); metaTable.SetEntriesAndLinkToSource(sourceTable); //.SetEntries(sourceTable); metaTable.RefineSchema(sourceContentAnalysis); metaTable.Comment = "Constructed by " + GetType().Name; break; default: break; } return(metaTable); }
public TaskValidationResult(TableExtractionTask _task) { task = _task; // task.score = new TableExtractionTaskScore(); // task.score.ValidationResult = this; }
public void AfterConstruction(MetaTable metaTable, TableExtractionTask task, ITextRender logger) { switch (task.score.executionMode) { default: break; case ExtractionTaskEngineMode.Validation: task.score.CurrentEntry().metaTable.Add(metaTable); break; case ExtractionTaskEngineMode.Application: task.score.CurrentEntry().metaTable.Add(metaTable); //if (task.tableDescription == null) //{ // var sourceDesc = sourceContentAnalysis.GetDescription(sourceTable); // task.tableDescription = new imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableDescription(sourceDesc, imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableFormatType.vertical); //} break; } }
public virtual MetaTable Construct(SourceTable sourceTable, TableExtractionTask task) { var tb = universalMetaTableConstructor.Construct(sourceTable, task, output); return(tb); }
public MetaTableDescription ConstructDescription(SourceTable sourceTable, TableExtractionTask task, ITextRender logger) { //if (sourceTable[0, 0].Value.isNullOrEmpty()) //{ // sourceTable[0, 0].Value = "ID"; //} MetaTableDescription metaDescription = null; builderForText reporter = task.score.CurrentEntry().reporter; switch (task.score.executionMode) { case ExtractionTaskEngineMode.Training: SourceTableDescription sourceDesc = sourceContentAnalysis.GetDescription(sourceTable); metaDescription = new imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableDescription(sourceDesc, imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableFormatType.vertical); // task.tableDescription = metaDescription; //task.score.CurrentEntry().metaTableDescription = metaDescription; break; case ExtractionTaskEngineMode.Validation: default: case ExtractionTaskEngineMode.Application: if (task.tableDescription == null) { throw new Exception("Task [" + task.name + "] has no table description set."); } break; } if (folder != null) { String sp = folder.pathFor("UMTC_Construct_" + task.name + "_" + task.score.executionMode.toString() + ".txt"); File.WriteAllText(sp, reporter.GetContent()); } return(metaDescription); }
public TableExtractionTask GetMergedTask() { TableExtractionTask output = new TableExtractionTask(items.Select(x => x.Task)); List <SourceTableCase> featuresToMatch = items.First().Features.Where(x => x.HasFlag(SourceTableCase.stable) || x.HasFlag(SourceTableCase.variable)).ToList(); if (featuresToMatch.Contains(SourceTableCase.horizontalOrientation)) { output.multiNodePolicy = Analyzers.Data.TaskMultiNodePolicy.AsSingleTableRows; } else if (featuresToMatch.Contains(SourceTableCase.verticalOrientation)) { output.multiNodePolicy = Analyzers.Data.TaskMultiNodePolicy.AsSignleTableColumns; } return(output); }
//public override MetaTableSchema GetTableSchema() //{ // return null; //} public override MetaTable Construct(SourceTable sourceTable, TableExtractionTask task) { return(base.Construct(sourceTable, task)); //if (UseUniversalConstructors) //{ // return base.Construct(sourceTable, task); //} //Dictionary<String, Int32> ColumnIndexByPropertyName = new Dictionary<string, int>(); //Dictionary<Int32, String> PropertyNameByColumnIndex = new Dictionary<Int32, String>(); //for (int i = 0; i < Template.items.Count; i++) //{ // var tItem = Template.items[i]; // tItem.Category.HasFlag(NodeInTemplateRole.Static); // List<string> uniContent = sourceTable.GetColumn(i).GetUnique(); // if (uniContent.Count == 1) // { // // ColumnIndexByPropertyName.Add(uniContent.First(), i); // PropertyNameByColumnIndex.Add(i, uniContent.First()); // } //} //if (!PropertyNameByColumnIndex.Any()) { // return base.Construct(sourceTable, task); //} else //{ // MetaTable output = new MetaTable(); // List<List<String>> ColumnData = new List<List<string>>(); // for (int i = 0; i < sourceTable.Width; i++) // { // if (PropertyNameByColumnIndex.ContainsKey(i)) // { // output.properties.Add(PropertyNameByColumnIndex[i], ColumnData.Count); // } else // { // ColumnData.Add(sourceTable.GetColumn(i)); // } // } // for (int i = 0; i < ColumnData.Count; i++) // { // var p = output.properties.FirstOrDefault(x => x.index == i); // if (p == null) // { // output.properties.Add("P" + i.ToString(), i); // } // } // for (int i = 0; i < sourceTable.Height; i++) // { // MetaTableEntry entry = null; // for (int j = 0; j < ColumnData.Count; j++) // { // if (j == 0) // { // entry = new MetaTableEntry() // { // ID = ColumnData[j][i] // }; // } // var p = output.properties.FirstOrDefault(x => x.index == j); // entry.properties[p.PropertyName] = ColumnData[j][i]; // } // output.entries.Add(entry); // } // output.RefineSchema(sourceContentAnalysis); // return output; //} }
public SourceTableAggregation(List <SourceTable> sources, IHtmlExtractor extractor, TableExtractionTask task) { if (sources.isNullOrEmpty()) { return; } aggregatedDescriptions = sources.Select(x => extractor.sourceContentAnalysis.GetDescription(x)).CompileSourceDescription(); aggregatedAsRows = sources.Merge(false, true); aggregatedAsColumns = sources.Merge(true, true); if (aggregatedDescriptions.sourceHeight.Minimum > 1) { if (aggregatedDescriptions.sourceHeight.Range == 0) { Features.Add(SourceTableCase.stableHeight); } else { Features.Add(SourceTableCase.variableHeight); } if (aggregatedAsRows.Height == 1) { Features.Add(SourceTableCase.staticContent | SourceTableCase.vertically); Features.Add(SourceTableCase.horizontalOrientation); } } if (aggregatedDescriptions.sourceWidth.Minimum > 1) { if (aggregatedDescriptions.sourceWidth.Range == 0) { Features.Add(SourceTableCase.stableWidth); } else { Features.Add(SourceTableCase.variableWidth); } if (aggregatedAsRows.Width == 1) { Features.Add(SourceTableCase.staticContent | SourceTableCase.horizontally); Features.Add(SourceTableCase.verticalOrientation); } } if (Features.ContainsAll(SourceTableCase.stableWidth, SourceTableCase.variableHeight)) { Features.Add(SourceTableCase.verticalOrientation); } if (Features.ContainsAll(SourceTableCase.variableWidth, SourceTableCase.stableHeight)) { Features.Add(SourceTableCase.horizontalOrientation); } if (!Features.Any(x => x.HasFlag(SourceTableCase.orientation))) { //if (Features.Any(x => x.HasFlag(SourceTableCase.variableHeight)) && aggregatedAsRows.Height == 1) //{ // Features.Add(SourceTableCase.horizontalOrientation); //} if (aggregatedDescriptions.sourceHeight.Minimum > 1 && aggregatedAsRows.Height == 1) { Features.Add(SourceTableCase.horizontalOrientation); } if (aggregatedDescriptions.sourceWidth.Minimum > 1 && aggregatedAsRows.Width == 1) { Features.Add(SourceTableCase.verticalOrientation); } } name = task.name; // taskname; Task = task; }
//public override MetaTableSchema GetTableSchema() //{ // return null; // //throw new NotImplementedException(); //} /// <summary> /// Constructs the specified source table. /// </summary> /// <param name="sourceTable">The source table.</param> /// <param name="task">The task.</param> /// <returns></returns> public override MetaTable Construct(SourceTable sourceTable, TableExtractionTask task) { if (UseUniversalConstructors) { return(base.Construct(sourceTable, task)); } MetaTable table = new MetaTable(GetTableDescription()); var rows = sourceTable.GetContentCells(); //var data = sourceTable.GetContentCells(); Boolean IsMultiEntryList = false; if (sourceTable.Width > 2) { IsMultiEntryList = true; } if (IsMultiEntryList) { table.description.format = MetaTableFormatType.vertical; var entryIDProperty = table.properties.Add("ID"); entryIDProperty.index = EntryID; var EntryPropertyTerm = table.properties.Add("Term"); EntryPropertyTerm.index = PropertyX; var EntryPropertyValue = table.properties.Add("Value"); EntryPropertyValue.index = ValueX; foreach (var row in rows) { table.entries.CreateEntry(row, true); } } else { table.description.format = MetaTableFormatType.horizontal; Dictionary <String, MetaTableProperty> propDict = new Dictionary <string, MetaTableProperty>(); List <String> propertyValues = new List <string>(); foreach (var row in rows) { String propertyName = row[PropertyX].Value; String propertyValue = row[ValueX].Value; propertyValues.Add(propertyValue); var vInfo = sourceContentAnalysis.DetermineContentType(propertyValue, true); var metaProperty = table.properties.Add(propertyName); metaProperty.ContentType = vInfo.type; propDict.Add(propertyName, metaProperty); RefinedPropertyStats pStats = new RefinedPropertyStats(); pStats.Assign(vInfo); pStats.Compute(); pStats.Deploy(metaProperty); } MetaTableEntry entry = table.entries.CreateEntry(propertyValues, true); } return(table); }
public static MetaTableDescription CompileDescription(this TableExtractionTask task, List <SourceTableDescription> sourceDescriptions) { MetaTableDescription metaDescription = null; SourceTableDescriptionAggregation aggregatedDescriptions = sourceDescriptions.CompileSourceDescription(); // new SourceTableDescriptionAggregation(); //task.score.LastEntry().aggregatedDescriptions; // List<SourceTableDescription> sourceDescriptions = task.score.TaskRuns.Where(x => x.executionMode == ExtractionTaskEngineMode.Training).Where(x => x.IsSuccess).Select(x => x.metaTableDescription.sourceDescription).ToList(); //aggregatedDescriptions.Report(report_folder, reporter); metaDescription = new imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableDescription(new SourceTableDescription(), imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableFormatType.unknown); metaDescription.Comment = "Created by for " + task.name; SourceTableSliceTestAggregation SelectedAsPropertyUID = null; SourceTableSliceTestAggregation SelectedAsEntryUID = null; if (aggregatedDescriptions.rowTestAggregation.IsPreferedAsPropertyUID) { SelectedAsPropertyUID = aggregatedDescriptions.rowTestAggregation; } else if (aggregatedDescriptions.columnTestAggregation.IsPreferedAsPropertyUID) { SelectedAsPropertyUID = aggregatedDescriptions.columnTestAggregation; } else if (aggregatedDescriptions.rowTestAggregation.IsSuitableAsUID) { SelectedAsPropertyUID = aggregatedDescriptions.rowTestAggregation; } else if (aggregatedDescriptions.columnTestAggregation.IsSuitableAsUID) { SelectedAsPropertyUID = aggregatedDescriptions.columnTestAggregation; } else if (aggregatedDescriptions.rowTestAggregation.IsAcceptableAsPropertyUID) { SelectedAsPropertyUID = aggregatedDescriptions.rowTestAggregation; } else if (aggregatedDescriptions.columnTestAggregation.IsAcceptableAsPropertyUID) { SelectedAsPropertyUID = aggregatedDescriptions.columnTestAggregation; } if (SelectedAsPropertyUID == null) { metaDescription.index_propertyID = -1; } else { if (aggregatedDescriptions.rowTestAggregation == SelectedAsPropertyUID) { SelectedAsEntryUID = aggregatedDescriptions.columnTestAggregation; } else if (aggregatedDescriptions.columnTestAggregation == SelectedAsPropertyUID) { SelectedAsEntryUID = aggregatedDescriptions.rowTestAggregation; } } if (SelectedAsEntryUID != null) { if (SelectedAsEntryUID.IsDistinctValue) { } else { SelectedAsEntryUID = null; metaDescription.index_entryID = -1; } } if (SelectedAsEntryUID != null) { metaDescription.entrySource = SelectedAsEntryUID.format; metaDescription.index_entryID = 0; } if (SelectedAsPropertyUID != null) { metaDescription.propertySource = SelectedAsPropertyUID.format; metaDescription.index_propertyID = 0; } if (metaDescription.propertySource == SourceTableSliceType.undefined) { if (aggregatedDescriptions.sourceWidth.Range <= aggregatedDescriptions.sourceHeight.Range) { metaDescription.propertySource = SourceTableSliceType.column; } else if (aggregatedDescriptions.sourceWidth.Range > aggregatedDescriptions.sourceHeight.Range) { metaDescription.propertySource = SourceTableSliceType.row; } } if (metaDescription.entrySource == SourceTableSliceType.undefined) { switch (metaDescription.propertySource) { default: break; case SourceTableSliceType.column: metaDescription.entrySource = SourceTableSliceType.row; break; case SourceTableSliceType.row: metaDescription.entrySource = SourceTableSliceType.column; break; } } switch (metaDescription.propertySource) { default: break; case SourceTableSliceType.column: metaDescription.sourceDescription.valueZone.y = 0; // metaDescription.index_propertyID + 1; metaDescription.sourceDescription.valueZone.x = 0; //metaDescription.index_entryID + 1; // metaDescription.IsTransposed = true; // metaDescription.entrySource = SourceTableSliceType.row; metaDescription.format = MetaTableFormatType.vertical; break; case SourceTableSliceType.row: // metaDescription.entrySource = SourceTableSliceType.column; metaDescription.format = MetaTableFormatType.horizontal; metaDescription.sourceDescription.valueZone.x = 0; // metaDescription.index_propertyID + 1; metaDescription.sourceDescription.valueZone.y = 0; // metaDescription.index_entryID + 1; break; } // metaDescription.Report(report_folder, reporter); return(metaDescription); }
/// <summary> /// Prepares the and construct. /// </summary> /// <param name="task">The task.</param> /// <param name="output">The output.</param> public void PrepareAndConstruct(TableExtractionTask task, List <TableExtractionChain> output) { //List<SourceTable> sourceTables = //var sourceDict = output.ToDictionary(x => x.source); switch (task.multiNodePolicy) { case TaskMultiNodePolicy.AsSingleTableRows: case TaskMultiNodePolicy.AsSignleTableColumns: var st = output.Select(x => x.source).ToList(); output.Clear(); SourceTable sti = st[0]; SourceTable final = sti; for (int i = 1; i < st.Count; i++) { if (task.multiNodePolicy == TaskMultiNodePolicy.AsSingleTableRows) { final = final.MergeAsRows(st[i]); } else { final = final.MergeAsColumns(st[i]); } } TableExtractionChain chain = new TableExtractionChain() { source = final, name = task.resultTableNamePrefix }; //sourceTables.Add(final); // if (firstMetaTable == null) firstMetaTable = ch.meta; output.Add(chain); break; default: case TaskMultiNodePolicy.AsSeparatedTables: // foreach (var ch in output) // { // sourceTables.Add(ch.source); //// ch.meta = Construct(ch.source, task); // // if (firstMetaTable == null) firstMetaTable = ch.meta; // } break; } var mode = task.score.executionMode; for (int i2 = 0; i2 < output.Count; i2++) { var metaDescription = ConstructDescription(output[i2].source, task); if (mode != ExtractionTaskEngineMode.Training) { var metaTable = Construct(output[i2].source, task); if (metaTable != null) { metaTable.ExtraInfoEntries.Merge(task.ExtraInfoEntries); metaTable.ExtraInfoEntries.Add(MetaTable.EXTRAINFOENTRYKEY_TASKNAME, task.name); metaTable.ExtraInfoEntries.Add(MetaTable.EXTRAINFOENTRYKEY_EXTRACTORNAME, task.ExtractorName); } if (mode == ExtractionTaskEngineMode.Validation) { if (metaTable.Comment.isNullOrEmpty()) { metaTable.Comment = "Constructed by " + GetType().Name; } } AfterConstruction(metaTable, task); output[i2].meta = metaTable; } } }
public virtual void AfterConstruction(MetaTable metaTable, TableExtractionTask task) { universalMetaTableConstructor.AfterConstruction(metaTable, task, output); }