Beispiel #1
0
 internal bool IsCompatibleWith(Column source)
 {
     Contracts.Check(source.IsValid, nameof(source));
     if (Name != source.Name)
     {
         return(false);
     }
     if (Kind != source.Kind)
     {
         return(false);
     }
     if (!ItemType.Equals(source.ItemType))
     {
         return(false);
     }
     if (IsKey != source.IsKey)
     {
         return(false);
     }
     foreach (var annotationCol in Annotations)
     {
         if (!source.Annotations.TryFindColumn(annotationCol.Name, out var inputAnnotationCol))
         {
             return(false);
         }
         if (!annotationCol.IsCompatibleWith(inputAnnotationCol))
         {
             return(false);
         }
     }
     return(true);
 }
Beispiel #2
0
 internal bool IsCompatibleWith(Column source)
 {
     Contracts.Check(source.IsValid, nameof(source));
     if (Name != source.Name)
     {
         return(false);
     }
     if (Kind != source.Kind)
     {
         return(false);
     }
     if (!ItemType.Equals(source.ItemType))
     {
         return(false);
     }
     if (IsKey != source.IsKey)
     {
         return(false);
     }
     foreach (var metaCol in Metadata)
     {
         if (!source.Metadata.TryFindColumn(metaCol.Name, out var inputMetaCol))
         {
             return(false);
         }
         if (!metaCol.IsCompatibleWith(inputMetaCol))
         {
             return(false);
         }
     }
     return(true);
 }
Beispiel #3
0
        protected bool EqualTypes(DataViewType type1, DataViewType type2, bool exactTypes)
        {
            Contracts.AssertValue(type1);
            Contracts.AssertValue(type2);

            if (type1.Equals(type2))
            {
                return(true);
            }
            return(!exactTypes && type1 is VectorDataViewType vt1 && type2 is VectorDataViewType vt2 && vt1.ItemType.Equals(vt2.ItemType) && vt1.Size == vt2.Size);
        }
Beispiel #4
0
        private ColumnCodec[] GetActiveColumns(DataViewSchema schema, int[] colIndices)
        {
            _host.AssertValue(schema);
            _host.AssertValueOrNull(colIndices);

            ColumnCodec[] activeSourceColumns = new ColumnCodec[Utils.Size(colIndices)];
            if (Utils.Size(colIndices) == 0)
            {
                return(activeSourceColumns);
            }

            for (int c = 0; c < colIndices.Length; ++c)
            {
                DataViewType type = schema[colIndices[c]].Type;
                IValueCodec  codec;
                if (!_factory.TryGetCodec(type, out codec))
                {
                    throw _host.Except("Could not get codec for requested column {0} of type {1}", schema[c].Name, type);
                }
                _host.Assert(type.Equals(codec.Type));
                activeSourceColumns[c] = new ColumnCodec(colIndices[c], codec);
            }
            return(activeSourceColumns);
        }
Beispiel #5
0
            private BoundColumn MakeColumn(DataViewSchema inputSchema, int iinfo)
            {
                Contracts.AssertValue(inputSchema);
                Contracts.Assert(0 <= iinfo && iinfo < _parent._columns.Length);

                DataViewType itemType = null;

                int[] sources = new int[_parent._columns[iinfo].Sources.Count];
                // Go through the columns, and establish the following:
                // - indices of input columns in the input schema. Throw if they are not there.
                // - output type. Throw if the types of inputs are not the same.
                // - how many slots are there in the output vector (or variable). Denoted by totalSize.
                // - total size of CategoricalSlotRanges metadata, if present. Denoted by catCount.
                // - whether the column is normalized.
                //      It is true when ALL inputs are normalized (and of numeric type).
                // - whether the column has slot names.
                //      It is true if ANY input is a scalar, or has slot names.
                // - whether the column has categorical slot ranges.
                //      It is true if ANY input has this metadata.
                int  totalSize       = 0;
                int  catCount        = 0;
                bool isNormalized    = true;
                bool hasSlotNames    = false;
                bool hasCategoricals = false;

                for (int i = 0; i < _parent._columns[iinfo].Sources.Count; i++)
                {
                    var(srcName, srcAlias) = _parent._columns[iinfo].Sources[i];
                    if (!inputSchema.TryGetColumnIndex(srcName, out int srcCol))
                    {
                        throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", srcName);
                    }
                    sources[i] = srcCol;

                    var        curType       = inputSchema[srcCol].Type;
                    VectorType curVectorType = curType as VectorType;

                    DataViewType currentItemType   = curVectorType?.ItemType ?? curType;
                    int          currentValueCount = curVectorType?.Size ?? 1;

                    if (itemType == null)
                    {
                        itemType  = currentItemType;
                        totalSize = currentValueCount;
                    }
                    else if (currentItemType.Equals(itemType))
                    {
                        // If any one input is variable length, then the output is variable length.
                        if (totalSize == 0 || currentValueCount == 0)
                        {
                            totalSize = 0;
                        }
                        else
                        {
                            totalSize += currentValueCount;
                        }
                    }
                    else
                    {
                        throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", srcName, itemType.ToString(), curType.ToString());
                    }

                    if (isNormalized && !inputSchema[srcCol].IsNormalized())
                    {
                        isNormalized = false;
                    }

                    if (AnnotationUtils.TryGetCategoricalFeatureIndices(inputSchema, srcCol, out int[] typeCat))