public static Bindings Create(ModelLoadContext ctx, ISchema input, IChannel ch) { Contracts.AssertValue(ch); ch.AssertValue(ctx); // *** Binary format *** // int: count of group column infos (ie, count of source columns) // For each group column info // int: the tokenizer language // int: the id of source column name // int: the id of languages column name // bool: whether the types output is required // For each column info that belongs to this group column info // (either one column info for tokens or two for tokens and types) // int: the id of the column name int groupsLen = ctx.Reader.ReadInt32(); ch.CheckDecode(groupsLen > 0); var names = new List<string>(); var infos = new List<ColInfo>(); var groups = new ColGroupInfo[groupsLen]; for (int i = 0; i < groups.Length; i++) { int lang = ctx.Reader.ReadInt32(); ch.CheckDecode(Enum.IsDefined(typeof(Language), lang)); string srcName = ctx.LoadNonEmptyString(); int srcIdx; ColumnType srcType; Bind(input, srcName, t => t.ItemType.IsText, SrcTypeName, out srcIdx, out srcType, false); string langsName = ctx.LoadStringOrNull(); int langsIdx; if (langsName != null) { ColumnType langsType; Bind(input, langsName, t => t.IsText, LangTypeName, out langsIdx, out langsType, false); } else langsIdx = -1; bool requireTypes = ctx.Reader.ReadBoolByte(); groups[i] = new ColGroupInfo((Language)lang, srcIdx, srcName, srcType, langsIdx, langsName, requireTypes); infos.Add(new ColInfo(i)); names.Add(ctx.LoadNonEmptyString()); if (requireTypes) { infos.Add(new ColInfo(i, isTypes: true)); names.Add(ctx.LoadNonEmptyString()); } } return new Bindings(groups, infos.ToArray(), input, false, names.ToArray()); }
private static void Load(IChannel ch, ModelLoadContext ctx, CodecFactory factory, ref VBuffer <ReadOnlyMemory <char> > values) { Contracts.AssertValue(ch); ch.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); // *** Binary format *** // Codec parameterization: A codec parameterization that should be a ReadOnlyMemory codec // int: n, the number of bytes used to write the values // byte[n]: As encoded using the codec // Get the codec from the factory, and from the stream. We have to // attempt to read the codec from the stream, since codecs can potentially // be versioned based on their parameterization. IValueCodec codec; // This *could* happen if we have an old version attempt to read a new version. // Enabling this sort of binary classification is why we also need to write the // codec specification. if (!factory.TryReadCodec(ctx.Reader.BaseStream, out codec)) { throw ch.ExceptDecode(); } ch.AssertValue(codec); ch.CheckDecode(codec.Type.IsVector); ch.CheckDecode(codec.Type.ItemType.IsText); var textCodec = (IValueCodec <VBuffer <ReadOnlyMemory <char> > >)codec; var bufferLen = ctx.Reader.ReadInt32(); ch.CheckDecode(bufferLen >= 0); using (var stream = new SubsetStream(ctx.Reader.BaseStream, bufferLen)) { using (var reader = textCodec.OpenReader(stream, 1)) { reader.MoveNext(); values = default(VBuffer <ReadOnlyMemory <char> >); reader.Get(ref values); } ch.CheckDecode(stream.ReadByte() == -1); } }