Example #1
0
        /// <summary>
        /// Returns the max value for the specified metadata kind.
        /// The metadata type should be a KeyType with raw type U4.
        /// colMax will be set to the first column that has the max value for the specified metadata.
        /// If no column has the specified metadata, colMax is set to -1 and the method returns zero.
        /// The filter function is called for each column, passing in the schema and the column index, and returns
        /// true if the column should be considered, false if the column should be skipped.
        /// </summary>
        public static uint GetMaxMetadataKind(this Schema schema, out int colMax, string metadataKind, Func <Schema, int, bool> filterFunc = null)
        {
            uint max = 0;

            colMax = -1;
            for (int col = 0; col < schema.Count; col++)
            {
                var columnType = schema.GetMetadataTypeOrNull(metadataKind, col);
                if (columnType == null || !columnType.IsKey || columnType.RawKind != DataKind.U4)
                {
                    continue;
                }
                if (filterFunc != null && !filterFunc(schema, col))
                {
                    continue;
                }
                uint value = 0;
                schema.GetMetadata(metadataKind, col, ref value);
                if (max < value)
                {
                    max    = value;
                    colMax = col;
                }
            }
            return(max);
        }
Example #2
0
 public ColumnType GetMetadataTypeOrNull(string kind, int col)
 {
     _ectx.Check(0 <= col && col < ColumnCount);
     if (IsPivot(col) && !ShouldPreserveMetadata(kind))
     {
         return(null);
     }
     return(_inputSchema.GetMetadataTypeOrNull(kind, col));
 }
Example #3
0
        /// <summary>
        /// Tries to get the metadata kind of the specified type for a column.
        /// </summary>
        /// <typeparam name="T">The raw type of the metadata, should match the PrimitiveType type</typeparam>
        /// <param name="schema">The schema</param>
        /// <param name="type">The type of the metadata</param>
        /// <param name="kind">The metadata kind</param>
        /// <param name="col">The column</param>
        /// <param name="value">The value to return, if successful</param>
        /// <returns>True if the metadata of the right type exists, false otherwise</returns>
        public static bool TryGetMetadata <T>(this Schema schema, PrimitiveType type, string kind, int col, ref T value)
        {
            Contracts.CheckValue(schema, nameof(schema));
            Contracts.CheckValue(type, nameof(type));

            var metadataType = schema.GetMetadataTypeOrNull(kind, col);

            if (!type.Equals(metadataType))
            {
                return(false);
            }
            schema.GetMetadata(kind, col, ref value);
            return(true);
        }
Example #4
0
        public static bool HasKeyValues(this Schema schema, int col, int keyCount)
        {
            if (keyCount == 0)
            {
                return(false);
            }

            var type = schema.GetMetadataTypeOrNull(Kinds.KeyValues, col);

            return
                (type != null &&
                 type.IsVector &&
                 type.VectorSize == keyCount &&
                 type.ItemType.IsText);
        }
Example #5
0
        /// <summary>
        /// Returns <c>true</c> if the specified column:
        ///  * is a vector of length N (including 0)
        ///  * has a SlotNames metadata
        ///  * metadata type is VBuffer&lt;ReadOnlyMemory&lt;char&gt;&gt; of length N
        /// </summary>
        public static bool HasSlotNames(this Schema schema, int col, int vectorSize)
        {
            if (vectorSize == 0)
            {
                return(false);
            }

            var type = schema.GetMetadataTypeOrNull(Kinds.SlotNames, col);

            return
                (type != null &&
                 type.IsVector &&
                 type.VectorSize == vectorSize &&
                 type.ItemType.IsText);
        }
Example #6
0
        /// <summary>
        /// The categoricalFeatures is a vector of the indices of categorical features slots.
        /// This vector should always have an even number of elements, and the elements should be parsed in groups of two consecutive numbers.
        /// So if its value is the range of numbers: 0,2,3,4,8,9
        /// look at it as [0,2],[3,4],[8,9].
        /// The way to interpret that is: feature with indices 0, 1, and 2 are one categorical
        /// Features with indices 3 and 4 are another categorical. Features 5 and 6 don't appear there, so they are not categoricals.
        /// </summary>
        public static bool TryGetCategoricalFeatureIndices(Schema schema, int colIndex, out int[] categoricalFeatures)
        {
            Contracts.CheckValue(schema, nameof(schema));
            Contracts.Check(colIndex >= 0, nameof(colIndex));

            bool isValid = false;

            categoricalFeatures = null;
            if (!(schema.GetColumnType(colIndex) is VectorType vecType && vecType.Size > 0))
            {
                return(isValid);
            }

            var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex);

            if (type?.RawType == typeof(VBuffer <int>))
            {
                VBuffer <int> catIndices = default(VBuffer <int>);
                schema.GetMetadata(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex, ref catIndices);
                VBufferUtils.Densify(ref catIndices);
                int columnSlotsCount = vecType.Size;
                if (catIndices.Length > 0 && catIndices.Length % 2 == 0 && catIndices.Length <= columnSlotsCount * 2)
                {
                    int previousEndIndex = -1;
                    isValid = true;
                    var catIndicesValues = catIndices.GetValues();
                    for (int i = 0; i < catIndicesValues.Length; i += 2)
                    {
                        if (catIndicesValues[i] > catIndicesValues[i + 1] ||
                            catIndicesValues[i] <= previousEndIndex ||
                            catIndicesValues[i] >= columnSlotsCount ||
                            catIndicesValues[i + 1] >= columnSlotsCount)
                        {
                            isValid = false;
                            break;
                        }

                        previousEndIndex = catIndicesValues[i + 1];
                    }
                    if (isValid)
                    {
                        categoricalFeatures = catIndicesValues.ToArray();
                    }
                }
            }

            return(isValid);
        }
Example #7
0
 /// <summary>
 /// Returns the set of column ids which match the value of specified metadata kind.
 /// The metadata type should be of type text.
 /// </summary>
 public static IEnumerable <int> GetColumnSet(this Schema schema, string metadataKind, string value)
 {
     for (int col = 0; col < schema.Count; col++)
     {
         var columnType = schema.GetMetadataTypeOrNull(metadataKind, col);
         if (columnType != null && columnType.IsText)
         {
             ReadOnlyMemory <char> val = default;
             schema.GetMetadata(metadataKind, col, ref val);
             if (ReadOnlyMemoryUtils.EqualsStr(value, val))
             {
                 yield return(col);
             }
         }
     }
 }
Example #8
0
 /// <summary>
 /// Returns the set of column ids which match the value of specified metadata kind.
 /// The metadata type should be a KeyType with raw type U4.
 /// </summary>
 public static IEnumerable <int> GetColumnSet(this Schema schema, string metadataKind, uint value)
 {
     for (int col = 0; col < schema.Count; col++)
     {
         var columnType = schema.GetMetadataTypeOrNull(metadataKind, col);
         if (columnType != null && columnType.IsKey && columnType.RawKind == DataKind.U4)
         {
             uint val = 0;
             schema.GetMetadata(metadataKind, col, ref val);
             if (val == value)
             {
                 yield return(col);
             }
         }
     }
 }
Example #9
0
        private static void PrintSchema(TextWriter writer, Arguments args, Schema schema, ITransposeSchema tschema)
        {
            Contracts.AssertValue(writer);
            Contracts.AssertValue(args);
            Contracts.AssertValue(schema);
            Contracts.AssertValueOrNull(tschema);
#if !CORECLR
            if (args.ShowJson)
            {
                writer.WriteLine("Json Schema not supported.");
                return;
            }
#endif
            int colLim = schema.ColumnCount;

            var itw = new IndentedTextWriter(writer, "  ");
            itw.WriteLine("{0} columns:", colLim);
            using (itw.Nest())
            {
                var names = default(VBuffer <ReadOnlyMemory <char> >);
                for (int col = 0; col < colLim; col++)
                {
                    var name     = schema.GetColumnName(col);
                    var type     = schema.GetColumnType(col);
                    var slotType = tschema == null ? null : tschema.GetSlotType(col);
                    itw.WriteLine("{0}: {1}{2}", name, type, slotType == null ? "" : " (T)");

                    bool metaVals = args.ShowMetadataValues;
                    if (metaVals || args.ShowMetadataTypes)
                    {
                        ShowMetadata(itw, schema, col, metaVals);
                        continue;
                    }

                    if (!args.ShowSlots)
                    {
                        continue;
                    }
                    if (!type.IsKnownSizeVector)
                    {
                        continue;
                    }
                    ColumnType typeNames;
                    if ((typeNames = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, col)) == null)
                    {
                        continue;
                    }
                    if (typeNames.VectorSize != type.VectorSize || !typeNames.ItemType.IsText)
                    {
                        Contracts.Assert(false, "Unexpected slot names type");
                        continue;
                    }
                    schema.GetMetadata(MetadataUtils.Kinds.SlotNames, col, ref names);
                    if (names.Length != type.VectorSize)
                    {
                        Contracts.Assert(false, "Unexpected length of slot names vector");
                        continue;
                    }

                    using (itw.Nest())
                    {
                        bool verbose = args.Verbose ?? false;
                        foreach (var kvp in names.Items(all: verbose))
                        {
                            if (verbose || !kvp.Value.IsEmpty)
                            {
                                itw.WriteLine("{0}:{1}", kvp.Key, kvp.Value);
                            }
                        }
                    }
                }
            }
        }
Example #10
0
 public ColumnType GetMetadataTypeOrNull(string kind, int col)
 {
     Contracts.CheckNonEmpty(kind, nameof(kind));
     Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col));
     return(_input.GetMetadataTypeOrNull(kind, Sources[col]));
 }