コード例 #1
0
            public static Bindings Create(ModelLoadContext ctx, ISchema input, IChannel ch)
            {
                Contracts.AssertValue(ch);
                ch.AssertValue(ctx);

                // *** Binary format ***
                // int: count of group column infos (ie, count of source columns)
                // For each group column info
                //     int: the tokenizer language
                //     int: the id of source column name
                //     int: the id of languages column name
                //     bool: whether the types output is required
                //     For each column info that belongs to this group column info
                //     (either one column info for tokens or two for tokens and types)
                //          int: the id of the column name

                int groupsLen = ctx.Reader.ReadInt32();
                ch.CheckDecode(groupsLen > 0);

                var names = new List<string>();
                var infos = new List<ColInfo>();
                var groups = new ColGroupInfo[groupsLen];
                for (int i = 0; i < groups.Length; i++)
                {
                    int lang = ctx.Reader.ReadInt32();
                    ch.CheckDecode(Enum.IsDefined(typeof(Language), lang));

                    string srcName = ctx.LoadNonEmptyString();
                    int srcIdx;
                    ColumnType srcType;
                    Bind(input, srcName, t => t.ItemType.IsText, SrcTypeName, out srcIdx, out srcType, false);

                    string langsName = ctx.LoadStringOrNull();
                    int langsIdx;
                    if (langsName != null)
                    {
                        ColumnType langsType;
                        Bind(input, langsName, t => t.IsText, LangTypeName, out langsIdx, out langsType, false);
                    }
                    else
                        langsIdx = -1;

                    bool requireTypes = ctx.Reader.ReadBoolByte();
                    groups[i] = new ColGroupInfo((Language)lang, srcIdx, srcName, srcType, langsIdx, langsName, requireTypes);

                    infos.Add(new ColInfo(i));
                    names.Add(ctx.LoadNonEmptyString());
                    if (requireTypes)
                    {
                        infos.Add(new ColInfo(i, isTypes: true));
                        names.Add(ctx.LoadNonEmptyString());
                    }
                }

                return new Bindings(groups, infos.ToArray(), input, false, names.ToArray());
            }
コード例 #2
0
        private static void Load(IChannel ch, ModelLoadContext ctx, CodecFactory factory, ref VBuffer <ReadOnlyMemory <char> > values)
        {
            Contracts.AssertValue(ch);
            ch.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            // *** Binary format ***
            // Codec parameterization: A codec parameterization that should be a ReadOnlyMemory codec
            // int: n, the number of bytes used to write the values
            // byte[n]: As encoded using the codec

            // Get the codec from the factory, and from the stream. We have to
            // attempt to read the codec from the stream, since codecs can potentially
            // be versioned based on their parameterization.
            IValueCodec codec;

            // This *could* happen if we have an old version attempt to read a new version.
            // Enabling this sort of binary classification is why we also need to write the
            // codec specification.
            if (!factory.TryReadCodec(ctx.Reader.BaseStream, out codec))
            {
                throw ch.ExceptDecode();
            }
            ch.AssertValue(codec);
            ch.CheckDecode(codec.Type.IsVector);
            ch.CheckDecode(codec.Type.ItemType.IsText);
            var textCodec = (IValueCodec <VBuffer <ReadOnlyMemory <char> > >)codec;

            var bufferLen = ctx.Reader.ReadInt32();

            ch.CheckDecode(bufferLen >= 0);
            using (var stream = new SubsetStream(ctx.Reader.BaseStream, bufferLen))
            {
                using (var reader = textCodec.OpenReader(stream, 1))
                {
                    reader.MoveNext();
                    values = default(VBuffer <ReadOnlyMemory <char> >);
                    reader.Get(ref values);
                }
                ch.CheckDecode(stream.ReadByte() == -1);
            }
        }