/// <summary>
        /// Constructs MetaTable from the source data
        /// </summary>
        /// <param name="sourceTable">The source table.</param>
        /// <param name="task">The task.</param>
        /// <param name="logger">The logger.</param>
        /// <returns></returns>
        public MetaTable Construct(SourceTable sourceTable, TableExtractionTask task, ITextRender logger)
        {
            MetaTable metaTable = null;

            switch (task.score.executionMode)
            {
            case ExtractionTaskEngineMode.Application:
                metaTable = new MetaTable(task.tableDescription);
                metaTable.SetSchema(sourceTable);
                metaTable.ApplySchema(task.PropertyDictionary.items.Select(x => x.Meta));
                metaTable.SetEntries(sourceTable);

                metaTable.Comment = "Constructed by " + GetType().Name;
                break;

            case ExtractionTaskEngineMode.Validation:
                metaTable = new MetaTable(task.tableDescription);
                metaTable.SetSchema(sourceTable);
                metaTable.SetEntriesAndLinkToSource(sourceTable);      //.SetEntries(sourceTable);

                metaTable.RefineSchema(sourceContentAnalysis);

                metaTable.Comment = "Constructed by " + GetType().Name;
                break;

            default:
                break;
            }


            return(metaTable);
        }
Exemplo n.º 2
0
        public TaskValidationResult(TableExtractionTask _task)
        {
            task = _task;

            // task.score = new TableExtractionTaskScore();
            // task.score.ValidationResult = this;
        }
        public void AfterConstruction(MetaTable metaTable, TableExtractionTask task, ITextRender logger)
        {
            switch (task.score.executionMode)
            {
            default:
                break;

            case ExtractionTaskEngineMode.Validation:


                task.score.CurrentEntry().metaTable.Add(metaTable);

                break;

            case ExtractionTaskEngineMode.Application:

                task.score.CurrentEntry().metaTable.Add(metaTable);

                //if (task.tableDescription == null)
                //{
                //    var sourceDesc = sourceContentAnalysis.GetDescription(sourceTable);
                //    task.tableDescription = new imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableDescription(sourceDesc, imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableFormatType.vertical);
                //}


                break;
            }
        }
Exemplo n.º 4
0
        public virtual MetaTable Construct(SourceTable sourceTable, TableExtractionTask task)
        {
            var tb = universalMetaTableConstructor.Construct(sourceTable, task, output);


            return(tb);
        }
        public MetaTableDescription ConstructDescription(SourceTable sourceTable, TableExtractionTask task, ITextRender logger)
        {
            //if (sourceTable[0, 0].Value.isNullOrEmpty())
            //{
            //    sourceTable[0, 0].Value = "ID";
            //}

            MetaTableDescription metaDescription = null;


            builderForText reporter = task.score.CurrentEntry().reporter;


            switch (task.score.executionMode)
            {
            case ExtractionTaskEngineMode.Training:

                SourceTableDescription sourceDesc = sourceContentAnalysis.GetDescription(sourceTable);


                metaDescription = new imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableDescription(sourceDesc, imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableFormatType.vertical);
                // task.tableDescription = metaDescription;


                //task.score.CurrentEntry().metaTableDescription = metaDescription;

                break;

            case ExtractionTaskEngineMode.Validation:
            default:
            case ExtractionTaskEngineMode.Application:


                if (task.tableDescription == null)
                {
                    throw new Exception("Task [" + task.name + "] has no table description set.");
                }


                break;
            }

            if (folder != null)
            {
                String sp = folder.pathFor("UMTC_Construct_" + task.name + "_" + task.score.executionMode.toString() + ".txt");
                File.WriteAllText(sp, reporter.GetContent());
            }

            return(metaDescription);
        }
Exemplo n.º 6
0
        public TableExtractionTask GetMergedTask()
        {
            TableExtractionTask output = new TableExtractionTask(items.Select(x => x.Task));

            List <SourceTableCase> featuresToMatch = items.First().Features.Where(x => x.HasFlag(SourceTableCase.stable) || x.HasFlag(SourceTableCase.variable)).ToList();

            if (featuresToMatch.Contains(SourceTableCase.horizontalOrientation))
            {
                output.multiNodePolicy = Analyzers.Data.TaskMultiNodePolicy.AsSingleTableRows;
            }
            else if (featuresToMatch.Contains(SourceTableCase.verticalOrientation))
            {
                output.multiNodePolicy = Analyzers.Data.TaskMultiNodePolicy.AsSignleTableColumns;
            }

            return(output);
        }
        //public override MetaTableSchema GetTableSchema()
        //{
        //    return null;
        //}



        public override MetaTable Construct(SourceTable sourceTable, TableExtractionTask task)
        {
            return(base.Construct(sourceTable, task));

            //if (UseUniversalConstructors)
            //{
            //    return base.Construct(sourceTable, task);
            //}

            //Dictionary<String, Int32> ColumnIndexByPropertyName = new Dictionary<string, int>();
            //Dictionary<Int32, String> PropertyNameByColumnIndex = new Dictionary<Int32, String>();

            //for (int i = 0; i < Template.items.Count; i++)
            //{
            //    var tItem = Template.items[i];

            //    tItem.Category.HasFlag(NodeInTemplateRole.Static);

            //    List<string> uniContent = sourceTable.GetColumn(i).GetUnique();

            //    if (uniContent.Count == 1)
            //    {
            //      // ColumnIndexByPropertyName.Add(uniContent.First(), i);
            //       PropertyNameByColumnIndex.Add(i, uniContent.First());
            //    }
            //}

            //if (!PropertyNameByColumnIndex.Any()) {

            //    return base.Construct(sourceTable, task);
            //} else
            //{
            //    MetaTable output = new MetaTable();

            //    List<List<String>> ColumnData = new List<List<string>>();

            //    for (int i = 0; i < sourceTable.Width; i++)
            //    {
            //        if (PropertyNameByColumnIndex.ContainsKey(i))
            //        {
            //            output.properties.Add(PropertyNameByColumnIndex[i], ColumnData.Count);
            //        } else
            //        {
            //            ColumnData.Add(sourceTable.GetColumn(i));
            //        }
            //    }

            //    for (int i = 0; i < ColumnData.Count; i++)
            //    {
            //        var p = output.properties.FirstOrDefault(x => x.index == i);
            //        if (p == null)
            //        {
            //             output.properties.Add("P" + i.ToString(), i);
            //        }
            //    }

            //    for (int i = 0; i < sourceTable.Height; i++)
            //    {
            //        MetaTableEntry entry = null;
            //        for (int j = 0; j < ColumnData.Count; j++)
            //        {

            //            if (j == 0)
            //            {
            //                entry = new MetaTableEntry()
            //                {
            //                    ID = ColumnData[j][i]
            //                };
            //            }

            //            var p = output.properties.FirstOrDefault(x => x.index == j);
            //            entry.properties[p.PropertyName] = ColumnData[j][i];
            //        }
            //        output.entries.Add(entry);
            //    }

            //    output.RefineSchema(sourceContentAnalysis);
            //    return output;
            //}
        }
        public SourceTableAggregation(List <SourceTable> sources, IHtmlExtractor extractor, TableExtractionTask task)
        {
            if (sources.isNullOrEmpty())
            {
                return;
            }

            aggregatedDescriptions = sources.Select(x => extractor.sourceContentAnalysis.GetDescription(x)).CompileSourceDescription();
            aggregatedAsRows       = sources.Merge(false, true);
            aggregatedAsColumns    = sources.Merge(true, true);

            if (aggregatedDescriptions.sourceHeight.Minimum > 1)
            {
                if (aggregatedDescriptions.sourceHeight.Range == 0)
                {
                    Features.Add(SourceTableCase.stableHeight);
                }
                else
                {
                    Features.Add(SourceTableCase.variableHeight);
                }

                if (aggregatedAsRows.Height == 1)
                {
                    Features.Add(SourceTableCase.staticContent | SourceTableCase.vertically);
                    Features.Add(SourceTableCase.horizontalOrientation);
                }
            }

            if (aggregatedDescriptions.sourceWidth.Minimum > 1)
            {
                if (aggregatedDescriptions.sourceWidth.Range == 0)
                {
                    Features.Add(SourceTableCase.stableWidth);
                }
                else
                {
                    Features.Add(SourceTableCase.variableWidth);
                }


                if (aggregatedAsRows.Width == 1)
                {
                    Features.Add(SourceTableCase.staticContent | SourceTableCase.horizontally);
                    Features.Add(SourceTableCase.verticalOrientation);
                }
            }

            if (Features.ContainsAll(SourceTableCase.stableWidth, SourceTableCase.variableHeight))
            {
                Features.Add(SourceTableCase.verticalOrientation);
            }
            if (Features.ContainsAll(SourceTableCase.variableWidth, SourceTableCase.stableHeight))
            {
                Features.Add(SourceTableCase.horizontalOrientation);
            }

            if (!Features.Any(x => x.HasFlag(SourceTableCase.orientation)))
            {
                //if (Features.Any(x => x.HasFlag(SourceTableCase.variableHeight)) && aggregatedAsRows.Height == 1)
                //{

                //    Features.Add(SourceTableCase.horizontalOrientation);
                //}

                if (aggregatedDescriptions.sourceHeight.Minimum > 1 && aggregatedAsRows.Height == 1)
                {
                    Features.Add(SourceTableCase.horizontalOrientation);
                }

                if (aggregatedDescriptions.sourceWidth.Minimum > 1 && aggregatedAsRows.Width == 1)
                {
                    Features.Add(SourceTableCase.verticalOrientation);
                }
            }

            name = task.name; // taskname;
            Task = task;
        }
Exemplo n.º 9
0
        //public override MetaTableSchema GetTableSchema()
        //{
        //    return null;
        //    //throw new NotImplementedException();
        //}

        /// <summary>
        /// Constructs the specified source table.
        /// </summary>
        /// <param name="sourceTable">The source table.</param>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override MetaTable Construct(SourceTable sourceTable, TableExtractionTask task)
        {
            if (UseUniversalConstructors)
            {
                return(base.Construct(sourceTable, task));
            }

            MetaTable table = new MetaTable(GetTableDescription());

            var rows = sourceTable.GetContentCells();
            //var data = sourceTable.GetContentCells();

            Boolean IsMultiEntryList = false;

            if (sourceTable.Width > 2)
            {
                IsMultiEntryList = true;
            }

            if (IsMultiEntryList)
            {
                table.description.format = MetaTableFormatType.vertical;

                var entryIDProperty = table.properties.Add("ID");
                entryIDProperty.index = EntryID;

                var EntryPropertyTerm = table.properties.Add("Term");
                EntryPropertyTerm.index = PropertyX;

                var EntryPropertyValue = table.properties.Add("Value");
                EntryPropertyValue.index = ValueX;

                foreach (var row in rows)
                {
                    table.entries.CreateEntry(row, true);
                }
            }
            else
            {
                table.description.format = MetaTableFormatType.horizontal;

                Dictionary <String, MetaTableProperty> propDict = new Dictionary <string, MetaTableProperty>();

                List <String> propertyValues = new List <string>();

                foreach (var row in rows)
                {
                    String propertyName  = row[PropertyX].Value;
                    String propertyValue = row[ValueX].Value;
                    propertyValues.Add(propertyValue);

                    var vInfo        = sourceContentAnalysis.DetermineContentType(propertyValue, true);
                    var metaProperty = table.properties.Add(propertyName);
                    metaProperty.ContentType = vInfo.type;
                    propDict.Add(propertyName, metaProperty);

                    RefinedPropertyStats pStats = new RefinedPropertyStats();
                    pStats.Assign(vInfo);
                    pStats.Compute();
                    pStats.Deploy(metaProperty);
                }

                MetaTableEntry entry = table.entries.CreateEntry(propertyValues, true);
            }

            return(table);
        }
        public static MetaTableDescription CompileDescription(this TableExtractionTask task, List <SourceTableDescription> sourceDescriptions)
        {
            MetaTableDescription metaDescription = null;


            SourceTableDescriptionAggregation aggregatedDescriptions = sourceDescriptions.CompileSourceDescription(); // new SourceTableDescriptionAggregation();  //task.score.LastEntry().aggregatedDescriptions;

            // List<SourceTableDescription> sourceDescriptions = task.score.TaskRuns.Where(x => x.executionMode == ExtractionTaskEngineMode.Training).Where(x => x.IsSuccess).Select(x => x.metaTableDescription.sourceDescription).ToList();


            //aggregatedDescriptions.Report(report_folder, reporter);

            metaDescription         = new imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableDescription(new SourceTableDescription(), imbSCI.DataExtraction.MetaTables.Descriptors.MetaTableFormatType.unknown);
            metaDescription.Comment = "Created by for " + task.name;

            SourceTableSliceTestAggregation SelectedAsPropertyUID = null;
            SourceTableSliceTestAggregation SelectedAsEntryUID    = null;

            if (aggregatedDescriptions.rowTestAggregation.IsPreferedAsPropertyUID)
            {
                SelectedAsPropertyUID = aggregatedDescriptions.rowTestAggregation;
            }
            else if (aggregatedDescriptions.columnTestAggregation.IsPreferedAsPropertyUID)
            {
                SelectedAsPropertyUID = aggregatedDescriptions.columnTestAggregation;
            }
            else if (aggregatedDescriptions.rowTestAggregation.IsSuitableAsUID)
            {
                SelectedAsPropertyUID = aggregatedDescriptions.rowTestAggregation;
            }
            else if (aggregatedDescriptions.columnTestAggregation.IsSuitableAsUID)
            {
                SelectedAsPropertyUID = aggregatedDescriptions.columnTestAggregation;
            }
            else if (aggregatedDescriptions.rowTestAggregation.IsAcceptableAsPropertyUID)
            {
                SelectedAsPropertyUID = aggregatedDescriptions.rowTestAggregation;
            }
            else if (aggregatedDescriptions.columnTestAggregation.IsAcceptableAsPropertyUID)
            {
                SelectedAsPropertyUID = aggregatedDescriptions.columnTestAggregation;
            }

            if (SelectedAsPropertyUID == null)
            {
                metaDescription.index_propertyID = -1;
            }
            else
            {
                if (aggregatedDescriptions.rowTestAggregation == SelectedAsPropertyUID)
                {
                    SelectedAsEntryUID = aggregatedDescriptions.columnTestAggregation;
                }
                else if (aggregatedDescriptions.columnTestAggregation == SelectedAsPropertyUID)
                {
                    SelectedAsEntryUID = aggregatedDescriptions.rowTestAggregation;
                }
            }

            if (SelectedAsEntryUID != null)
            {
                if (SelectedAsEntryUID.IsDistinctValue)
                {
                }
                else
                {
                    SelectedAsEntryUID            = null;
                    metaDescription.index_entryID = -1;
                }
            }


            if (SelectedAsEntryUID != null)
            {
                metaDescription.entrySource   = SelectedAsEntryUID.format;
                metaDescription.index_entryID = 0;
            }

            if (SelectedAsPropertyUID != null)
            {
                metaDescription.propertySource   = SelectedAsPropertyUID.format;
                metaDescription.index_propertyID = 0;
            }


            if (metaDescription.propertySource == SourceTableSliceType.undefined)
            {
                if (aggregatedDescriptions.sourceWidth.Range <= aggregatedDescriptions.sourceHeight.Range)
                {
                    metaDescription.propertySource = SourceTableSliceType.column;
                }
                else if (aggregatedDescriptions.sourceWidth.Range > aggregatedDescriptions.sourceHeight.Range)
                {
                    metaDescription.propertySource = SourceTableSliceType.row;
                }
            }

            if (metaDescription.entrySource == SourceTableSliceType.undefined)
            {
                switch (metaDescription.propertySource)
                {
                default:

                    break;

                case SourceTableSliceType.column:
                    metaDescription.entrySource = SourceTableSliceType.row;
                    break;

                case SourceTableSliceType.row:
                    metaDescription.entrySource = SourceTableSliceType.column;
                    break;
                }
            }

            switch (metaDescription.propertySource)
            {
            default:

                break;

            case SourceTableSliceType.column:
                metaDescription.sourceDescription.valueZone.y = 0;    // metaDescription.index_propertyID + 1;
                metaDescription.sourceDescription.valueZone.x = 0;    //metaDescription.index_entryID + 1;
                                                                      // metaDescription.IsTransposed = true;
                                                                      //   metaDescription.entrySource = SourceTableSliceType.row;
                metaDescription.format = MetaTableFormatType.vertical;
                break;

            case SourceTableSliceType.row:
                //  metaDescription.entrySource = SourceTableSliceType.column;
                metaDescription.format = MetaTableFormatType.horizontal;
                metaDescription.sourceDescription.valueZone.x = 0;     // metaDescription.index_propertyID + 1;
                metaDescription.sourceDescription.valueZone.y = 0;     // metaDescription.index_entryID + 1;
                break;
            }



            // metaDescription.Report(report_folder, reporter);

            return(metaDescription);
        }
Exemplo n.º 11
0
        /// <summary>
        /// Prepares the and construct.
        /// </summary>
        /// <param name="task">The task.</param>
        /// <param name="output">The output.</param>
        public void PrepareAndConstruct(TableExtractionTask task, List <TableExtractionChain> output)
        {
            //List<SourceTable> sourceTables =

            //var sourceDict = output.ToDictionary(x => x.source);


            switch (task.multiNodePolicy)
            {
            case TaskMultiNodePolicy.AsSingleTableRows:
            case TaskMultiNodePolicy.AsSignleTableColumns:
                var st = output.Select(x => x.source).ToList();
                output.Clear();

                SourceTable sti   = st[0];
                SourceTable final = sti;
                for (int i = 1; i < st.Count; i++)
                {
                    if (task.multiNodePolicy == TaskMultiNodePolicy.AsSingleTableRows)
                    {
                        final = final.MergeAsRows(st[i]);
                    }
                    else
                    {
                        final = final.MergeAsColumns(st[i]);
                    }
                }

                TableExtractionChain chain = new TableExtractionChain()
                {
                    source = final,
                    name   = task.resultTableNamePrefix
                };

                //sourceTables.Add(final);


                //  if (firstMetaTable == null) firstMetaTable = ch.meta;

                output.Add(chain);

                break;

            default:
            case TaskMultiNodePolicy.AsSeparatedTables:
//                    foreach (var ch in output)
//                    {
//                        sourceTables.Add(ch.source);
////                        ch.meta = Construct(ch.source, task);
//                       // if (firstMetaTable == null) firstMetaTable = ch.meta;
//                    }
                break;
            }

            var mode = task.score.executionMode;

            for (int i2 = 0; i2 < output.Count; i2++)
            {
                var metaDescription = ConstructDescription(output[i2].source, task);
                if (mode != ExtractionTaskEngineMode.Training)
                {
                    var metaTable = Construct(output[i2].source, task);
                    if (metaTable != null)
                    {
                        metaTable.ExtraInfoEntries.Merge(task.ExtraInfoEntries);
                        metaTable.ExtraInfoEntries.Add(MetaTable.EXTRAINFOENTRYKEY_TASKNAME, task.name);
                        metaTable.ExtraInfoEntries.Add(MetaTable.EXTRAINFOENTRYKEY_EXTRACTORNAME, task.ExtractorName);
                    }
                    if (mode == ExtractionTaskEngineMode.Validation)
                    {
                        if (metaTable.Comment.isNullOrEmpty())
                        {
                            metaTable.Comment = "Constructed by " + GetType().Name;
                        }
                    }
                    AfterConstruction(metaTable, task);
                    output[i2].meta = metaTable;
                }
            }
        }
Exemplo n.º 12
0
 public virtual void AfterConstruction(MetaTable metaTable, TableExtractionTask task)
 {
     universalMetaTableConstructor.AfterConstruction(metaTable, task, output);
 }