/// <summary> /// Return whether the given column index is hidden in the given schema. /// </summary> public static bool IsHidden(this ISchema schema, int col) { Contracts.CheckValue(schema, nameof(schema)); string name = schema.GetColumnName(col); int top; bool tmp = schema.TryGetColumnIndex(name, out top); Contracts.Assert(tmp, "Why did TryGetColumnIndex return false?"); return(!tmp || top != col); }
/// <summary> /// To display a schema. /// </summary> /// <param name="schema">schema</param> /// <param name="sep">column separator</param> /// <param name="vectorVec">if true, show Vec<R4, 2> and false, shows :R4:5-6 </R4> does the same for keys</param> /// <param name="keepHidden">keepHidden columns?</param> /// <returns>schema as a string</returns> public static string ToString(ISchema schema, string sep = "; ", bool vectorVec = true, bool keepHidden = false) { var builder = new StringBuilder(); string name, type; string si; int lag = 0; for (int i = 0; i < schema.ColumnCount; ++i) { if (!keepHidden && schema.IsHidden(i)) { continue; } if (builder.Length > 0) { builder.Append(sep); } name = schema.GetColumnName(i); var t = schema.GetColumnType(i); if (vectorVec || (!t.IsVector && !t.IsKey)) { type = schema.GetColumnType(i).ToString().Replace(" ", ""); si = (i + lag).ToString(); } else { if (t.IsVector) { if (t.AsVector.DimCount != 1) { throw Contracts.ExceptNotSupp("Only vector with one dimension are supported."); } type = t.ItemType.RawKind.ToString(); si = string.Format("{0}-{1}", i + lag, i + lag + t.AsVector.GetDim(0) - 1); lag += t.AsVector.GetDim(0) - 1; } else if (t.IsKey && t.AsKey.Contiguous) { var k = t.AsKey; type = k.Count > 0 ? string.Format("{0}[{1}-{2}]", k.RawKind, k.Min, k.Min + (ulong)k.Count - 1) : string.Format("{0}[{1}-{2}]", k.RawKind, k.Min, "*"); si = i.ToString(); } else { throw Contracts.ExceptNotImpl(string.Format("Unable to process type '{0}'.", t)); } } builder.Append(string.Format("{0}:{1}:{2}", name, type, si)); } return(builder.ToString()); }
public string GetColumnName(int col) { Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col)); bool isSrc; int index = MapColumnIndex(out isSrc, col); if (isSrc) { return(Input.GetColumnName(index)); } return(GetColumnNameCore(index)); }
private static void ComputeColumnMapping(ISchema input, string[] names, out int[] colMap, out int[] mapIinfoToCol) { // To compute the column mapping information, first populate: // * _colMap[src] with the ~ of the iinfo that hides src (zero for none). // * _mapIinfoToCol[iinfo] with the ~ of the source column that iinfo hides (zero for none). colMap = new int[input.ColumnCount + names.Length]; mapIinfoToCol = new int[names.Length]; for (int iinfo = 0; iinfo < names.Length; iinfo++) { var name = names[iinfo]; int colHidden; if (input.TryGetColumnIndex(name, out colHidden)) { Contracts.Check(0 <= colHidden && colHidden < input.ColumnCount); var str = input.GetColumnName(colHidden); Contracts.Check(str == name); Contracts.Check(colMap[colHidden] == 0); mapIinfoToCol[iinfo] = ~colHidden; colMap[colHidden] = ~iinfo; } } // Now back-fill the column mapping. int colDst = colMap.Length; for (int iinfo = names.Length; --iinfo >= 0;) { Contracts.Assert(mapIinfoToCol[iinfo] <= 0); if (mapIinfoToCol[iinfo] == 0) { colMap[--colDst] = ~iinfo; mapIinfoToCol[iinfo] = colDst; } } for (int colSrc = input.ColumnCount; --colSrc >= 0;) { Contracts.Assert(colMap[colSrc] <= 0); if (colMap[colSrc] < 0) { Contracts.Assert(colDst > 1); int iinfo = ~colMap[colSrc]; Contracts.Assert(0 <= iinfo && iinfo < names.Length); Contracts.Assert(mapIinfoToCol[iinfo] == ~colSrc); colMap[--colDst] = ~iinfo; mapIinfoToCol[iinfo] = colDst; } Contracts.Assert(colDst > 0); colMap[--colDst] = colSrc; } Contracts.Assert(colDst == 0); }
private void SetInput(ISchema schema, HashSet <string> toDrop) { var recordType = new JObject(); recordType["type"] = "record"; recordType["name"] = "DataInput"; var fields = new JArray(); var fieldNames = new HashSet <string>(); for (int c = 0; c < schema.ColumnCount; ++c) { if (schema.IsHidden(c)) { continue; } string name = schema.GetColumnName(c); if (toDrop.Contains(name)) { continue; } JToken pfaType = PfaTypeOrNullForColumn(schema, c); if (pfaType == null) { continue; } string fieldName = ModelUtils.CreateNameCore(name, fieldNames.Contains); fieldNames.Add(fieldName); var fieldDeclaration = new JObject(); fieldDeclaration["name"] = fieldName; fieldDeclaration["type"] = pfaType; fields.Add(fieldDeclaration); _nameToVarName.Add(name, "input." + fieldName); } _host.Assert(_nameToVarName.Count == fields.Count); _host.Assert(_nameToVarName.Count == fieldNames.Count); recordType["fields"] = fields; _host.Check(fields.Count >= 1, "Schema produced no inputs for the PFA conversion."); if (fields.Count == 1) { // If there's only one, don't bother forming a record. var field = (JObject)fields[0]; Pfa.InputType = field["type"]; _nameToVarName[_nameToVarName.Keys.First()] = "input"; } else { Pfa.InputType = recordType; } }
public static Data.TextLoaderColumn[] ToColumnArgArray(ISchema schema) { var res = new Data.TextLoaderColumn[schema.ColumnCount]; for (int i = 0; i < res.Length; ++i) { res[i] = new Data.TextLoaderColumn() { Name = schema.GetColumnName(i), Type = DataKind2DataDataKind(schema.GetColumnType(i).RawKind), Source = new[] { new Data.TextLoaderRange(i) } }; } return(res); }
private static bool ShouldAddColumn(ISchema schema, int i, string[] extraColumns, uint scoreSet) { uint scoreSetId = 0; if (schema.TryGetMetadata(MetadataUtils.ScoreColumnSetIdType.AsPrimitive, MetadataUtils.Kinds.ScoreColumnSetId, i, ref scoreSetId) && scoreSetId == scoreSet) { return(true); } var columnName = schema.GetColumnName(i); if (extraColumns != null && Array.FindIndex(extraColumns, columnName.Equals) >= 0) { return(true); } return(false); }
/// <summary> /// Create a schema shape out of the fully defined schema. /// </summary> public static SchemaShape Create(ISchema schema) { Contracts.CheckValue(schema, nameof(schema)); var cols = new List <Column>(); for (int iCol = 0; iCol < schema.ColumnCount; iCol++) { if (!schema.IsHidden(iCol)) { // First create the metadata. var mCols = new List <Column>(); foreach (var metaNameType in schema.GetMetadataTypes(iCol)) { GetColumnArgs(metaNameType.Value, out var mVecKind, out var mItemType, out var mIsKey); mCols.Add(new Column(metaNameType.Key, mVecKind, mItemType, mIsKey)); } var metadata = mCols.Count > 0 ? new SchemaShape(mCols) : _empty; // Next create the single column. GetColumnArgs(schema.GetColumnType(iCol), out var vecKind, out var itemType, out var isKey); cols.Add(new Column(schema.GetColumnName(iCol), vecKind, itemType, isKey, metadata)); } } return(new SchemaShape(cols)); }