public static SourceTable GetDistinctRows(this SourceTable source)
        {
            var all = source.GetContentCells(false);

            all = TakeDistinct(all);

            if (!all.Any())
            {
                return(null);
            }
            Int32 Width = all.Max(x => x.Count);

            Int32 Height = all.Count;

            var output = new SourceTable(Width, Height);



            for (int i = 0; i < output.Height; i++)
            {
                var row = all[i];

                for (int j = 0; j < output.Width; j++)
                {
                    if (j < row.Count())
                    {
                        output.SetCell(j, i, row[j]);
                        //   output[j, i].Value = row[j];
                    }
                }
            }

            return(output);
        }
示例#2
0
        public void SetEntriesAndLinkToSource(SourceTable source)
        {
            Int32 Skip = description.EntrySkipCount;
            List <List <SourceTableCell> > data = source.GetContentCells(description.format == MetaTableFormatType.vertical);

            sourceTable = source;

            for (int i = Skip; i < data.Count; i++)
            {
                if (i != description.index_propertyID)
                {
                    entries.CreateEntry(data[i], true);
                }
            }
        }
 public static List <HtmlNode> GetLinkedNodes(this SourceTable source, Func <HtmlNode, Boolean> nodeEvaluation, Boolean allChildren = true)
 {
     return(source.GetContentCells().GetLinkedNodes(nodeEvaluation, allChildren));
 }
        //public override MetaTableSchema GetTableSchema()
        //{
        //    return null;
        //    //throw new NotImplementedException();
        //}

        /// <summary>
        /// Constructs the specified source table.
        /// </summary>
        /// <param name="sourceTable">The source table.</param>
        /// <param name="task">The task.</param>
        /// <returns></returns>
        public override MetaTable Construct(SourceTable sourceTable, TableExtractionTask task)
        {
            if (UseUniversalConstructors)
            {
                return(base.Construct(sourceTable, task));
            }

            MetaTable table = new MetaTable(GetTableDescription());

            var rows = sourceTable.GetContentCells();
            //var data = sourceTable.GetContentCells();

            Boolean IsMultiEntryList = false;

            if (sourceTable.Width > 2)
            {
                IsMultiEntryList = true;
            }

            if (IsMultiEntryList)
            {
                table.description.format = MetaTableFormatType.vertical;

                var entryIDProperty = table.properties.Add("ID");
                entryIDProperty.index = EntryID;

                var EntryPropertyTerm = table.properties.Add("Term");
                EntryPropertyTerm.index = PropertyX;

                var EntryPropertyValue = table.properties.Add("Value");
                EntryPropertyValue.index = ValueX;

                foreach (var row in rows)
                {
                    table.entries.CreateEntry(row, true);
                }
            }
            else
            {
                table.description.format = MetaTableFormatType.horizontal;

                Dictionary <String, MetaTableProperty> propDict = new Dictionary <string, MetaTableProperty>();

                List <String> propertyValues = new List <string>();

                foreach (var row in rows)
                {
                    String propertyName  = row[PropertyX].Value;
                    String propertyValue = row[ValueX].Value;
                    propertyValues.Add(propertyValue);

                    var vInfo        = sourceContentAnalysis.DetermineContentType(propertyValue, true);
                    var metaProperty = table.properties.Add(propertyName);
                    metaProperty.ContentType = vInfo.type;
                    propDict.Add(propertyName, metaProperty);

                    RefinedPropertyStats pStats = new RefinedPropertyStats();
                    pStats.Assign(vInfo);
                    pStats.Compute();
                    pStats.Deploy(metaProperty);
                }

                MetaTableEntry entry = table.entries.CreateEntry(propertyValues, true);
            }

            return(table);
        }