Ejemplo n.º 1
0
        /// <summary>
        /// Return whether the given column index is hidden in the given schema.
        /// </summary>
        public static bool IsHidden(this ISchema schema, int col)
        {
            Contracts.CheckValue(schema, nameof(schema));
            string name = schema.GetColumnName(col);
            int    top;
            bool   tmp = schema.TryGetColumnIndex(name, out top);

            Contracts.Assert(tmp, "Why did TryGetColumnIndex return false?");
            return(!tmp || top != col);
        }
        /// <summary>
        /// To display a schema.
        /// </summary>
        /// <param name="schema">schema</param>
        /// <param name="sep">column separator</param>
        /// <param name="vectorVec">if true, show Vec<R4, 2> and false, shows :R4:5-6 </R4> does the same for keys</param>
        /// <param name="keepHidden">keepHidden columns?</param>
        /// <returns>schema as a string</returns>
        public static string ToString(ISchema schema, string sep = "; ", bool vectorVec = true, bool keepHidden = false)
        {
            var    builder = new StringBuilder();
            string name, type;
            string si;
            int    lag = 0;

            for (int i = 0; i < schema.ColumnCount; ++i)
            {
                if (!keepHidden && schema.IsHidden(i))
                {
                    continue;
                }
                if (builder.Length > 0)
                {
                    builder.Append(sep);
                }
                name = schema.GetColumnName(i);
                var t = schema.GetColumnType(i);
                if (vectorVec || (!t.IsVector && !t.IsKey))
                {
                    type = schema.GetColumnType(i).ToString().Replace(" ", "");
                    si   = (i + lag).ToString();
                }
                else
                {
                    if (t.IsVector)
                    {
                        if (t.AsVector.DimCount != 1)
                        {
                            throw Contracts.ExceptNotSupp("Only vector with one dimension are supported.");
                        }
                        type = t.ItemType.RawKind.ToString();
                        si   = string.Format("{0}-{1}", i + lag, i + lag + t.AsVector.GetDim(0) - 1);
                        lag += t.AsVector.GetDim(0) - 1;
                    }
                    else if (t.IsKey && t.AsKey.Contiguous)
                    {
                        var k = t.AsKey;
                        type = k.Count > 0
                                    ? string.Format("{0}[{1}-{2}]", k.RawKind, k.Min, k.Min + (ulong)k.Count - 1)
                                    : string.Format("{0}[{1}-{2}]", k.RawKind, k.Min, "*");
                        si = i.ToString();
                    }
                    else
                    {
                        throw Contracts.ExceptNotImpl(string.Format("Unable to process type '{0}'.", t));
                    }
                }

                builder.Append(string.Format("{0}:{1}:{2}", name, type, si));
            }
            return(builder.ToString());
        }
        public string GetColumnName(int col)
        {
            Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col));

            bool isSrc;
            int  index = MapColumnIndex(out isSrc, col);

            if (isSrc)
            {
                return(Input.GetColumnName(index));
            }
            return(GetColumnNameCore(index));
        }
        private static void ComputeColumnMapping(ISchema input, string[] names, out int[] colMap, out int[] mapIinfoToCol)
        {
            // To compute the column mapping information, first populate:
            // * _colMap[src] with the ~ of the iinfo that hides src (zero for none).
            // * _mapIinfoToCol[iinfo] with the ~ of the source column that iinfo hides (zero for none).
            colMap        = new int[input.ColumnCount + names.Length];
            mapIinfoToCol = new int[names.Length];
            for (int iinfo = 0; iinfo < names.Length; iinfo++)
            {
                var name = names[iinfo];
                int colHidden;
                if (input.TryGetColumnIndex(name, out colHidden))
                {
                    Contracts.Check(0 <= colHidden && colHidden < input.ColumnCount);
                    var str = input.GetColumnName(colHidden);
                    Contracts.Check(str == name);
                    Contracts.Check(colMap[colHidden] == 0);
                    mapIinfoToCol[iinfo] = ~colHidden;
                    colMap[colHidden]    = ~iinfo;
                }
            }

            // Now back-fill the column mapping.
            int colDst = colMap.Length;

            for (int iinfo = names.Length; --iinfo >= 0;)
            {
                Contracts.Assert(mapIinfoToCol[iinfo] <= 0);
                if (mapIinfoToCol[iinfo] == 0)
                {
                    colMap[--colDst]     = ~iinfo;
                    mapIinfoToCol[iinfo] = colDst;
                }
            }
            for (int colSrc = input.ColumnCount; --colSrc >= 0;)
            {
                Contracts.Assert(colMap[colSrc] <= 0);
                if (colMap[colSrc] < 0)
                {
                    Contracts.Assert(colDst > 1);
                    int iinfo = ~colMap[colSrc];
                    Contracts.Assert(0 <= iinfo && iinfo < names.Length);
                    Contracts.Assert(mapIinfoToCol[iinfo] == ~colSrc);
                    colMap[--colDst]     = ~iinfo;
                    mapIinfoToCol[iinfo] = colDst;
                }
                Contracts.Assert(colDst > 0);
                colMap[--colDst] = colSrc;
            }
            Contracts.Assert(colDst == 0);
        }
Ejemplo n.º 5
0
        private void SetInput(ISchema schema, HashSet <string> toDrop)
        {
            var recordType = new JObject();

            recordType["type"] = "record";
            recordType["name"] = "DataInput";
            var fields     = new JArray();
            var fieldNames = new HashSet <string>();

            for (int c = 0; c < schema.ColumnCount; ++c)
            {
                if (schema.IsHidden(c))
                {
                    continue;
                }
                string name = schema.GetColumnName(c);
                if (toDrop.Contains(name))
                {
                    continue;
                }
                JToken pfaType = PfaTypeOrNullForColumn(schema, c);
                if (pfaType == null)
                {
                    continue;
                }
                string fieldName = ModelUtils.CreateNameCore(name, fieldNames.Contains);
                fieldNames.Add(fieldName);
                var fieldDeclaration = new JObject();
                fieldDeclaration["name"] = fieldName;
                fieldDeclaration["type"] = pfaType;
                fields.Add(fieldDeclaration);
                _nameToVarName.Add(name, "input." + fieldName);
            }
            _host.Assert(_nameToVarName.Count == fields.Count);
            _host.Assert(_nameToVarName.Count == fieldNames.Count);
            recordType["fields"] = fields;
            _host.Check(fields.Count >= 1, "Schema produced no inputs for the PFA conversion.");
            if (fields.Count == 1)
            {
                // If there's only one, don't bother forming a record.
                var field = (JObject)fields[0];
                Pfa.InputType = field["type"];
                _nameToVarName[_nameToVarName.Keys.First()] = "input";
            }
            else
            {
                Pfa.InputType = recordType;
            }
        }
        public static Data.TextLoaderColumn[] ToColumnArgArray(ISchema schema)
        {
            var res = new Data.TextLoaderColumn[schema.ColumnCount];

            for (int i = 0; i < res.Length; ++i)
            {
                res[i] = new Data.TextLoaderColumn()
                {
                    Name   = schema.GetColumnName(i),
                    Type   = DataKind2DataDataKind(schema.GetColumnType(i).RawKind),
                    Source = new[] { new Data.TextLoaderRange(i) }
                };
            }
            return(res);
        }
        private static bool ShouldAddColumn(ISchema schema, int i, string[] extraColumns, uint scoreSet)
        {
            uint scoreSetId = 0;

            if (schema.TryGetMetadata(MetadataUtils.ScoreColumnSetIdType.AsPrimitive, MetadataUtils.Kinds.ScoreColumnSetId, i, ref scoreSetId) &&
                scoreSetId == scoreSet)
            {
                return(true);
            }
            var columnName = schema.GetColumnName(i);

            if (extraColumns != null && Array.FindIndex(extraColumns, columnName.Equals) >= 0)
            {
                return(true);
            }
            return(false);
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Create a schema shape out of the fully defined schema.
        /// </summary>
        public static SchemaShape Create(ISchema schema)
        {
            Contracts.CheckValue(schema, nameof(schema));
            var cols = new List <Column>();

            for (int iCol = 0; iCol < schema.ColumnCount; iCol++)
            {
                if (!schema.IsHidden(iCol))
                {
                    // First create the metadata.
                    var mCols = new List <Column>();
                    foreach (var metaNameType in schema.GetMetadataTypes(iCol))
                    {
                        GetColumnArgs(metaNameType.Value, out var mVecKind, out var mItemType, out var mIsKey);
                        mCols.Add(new Column(metaNameType.Key, mVecKind, mItemType, mIsKey));
                    }
                    var metadata = mCols.Count > 0 ? new SchemaShape(mCols) : _empty;
                    // Next create the single column.
                    GetColumnArgs(schema.GetColumnType(iCol), out var vecKind, out var itemType, out var isKey);
                    cols.Add(new Column(schema.GetColumnName(iCol), vecKind, itemType, isKey, metadata));
                }
            }
            return(new SchemaShape(cols));
        }