Beispiel #1
0
        public void EliminateAll(SymValue arg)
        {
            SymValue value = Find(arg);

            AddEliminateAllUpdate(value);
            TermMap    = TermMap.RemoveAll(value);
            this [arg] = this.UnderlyingTopValue;
        }
Beispiel #2
0
 private void AddEliminateAllUpdate(SymValue from)
 {
     if (!IsOldSymbol(from))
     {
         return;
     }
     foreach (TFunc function in TermMap.Keys2(from))
     {
         AddUpdate(new EliminateEdgeUpdate <TFunc, TADomain> (from, function));
     }
 }
Beispiel #3
0
        public void Eliminate(TFunc function, SymValue arg)
        {
            SymValue value = Find(arg);
            DoubleImmutableMap <SymValue, TFunc, SymValue> newTermMap = TermMap.Remove(value, function);

            if (newTermMap == TermMap)
            {
                return;
            }
            TermMap = newTermMap;
            AddEliminateEdgeUpdate(value, function);
        }
Beispiel #4
0
        protected override bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string srcVariableName, string dstVariableName)
        {
            if (!info.TypeSrc.ItemType.IsText)
            {
                return(false);
            }

            var terms            = default(VBuffer <DvText>);
            TermMap <DvText> map = (TermMap <DvText>)_termMap[iinfo].Map;

            map.GetTerms(ref terms);
            string opType = "LabelEncoder";
            var    node   = ctx.CreateNode(opType, srcVariableName, dstVariableName, ctx.GetNodeName(opType));

            node.AddAttribute("classes_strings", terms.DenseValues());
            node.AddAttribute("default_int64", -1);
            node.AddAttribute("default_string", DvText.Empty);
            return(true);
        }
        protected override bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string srcVariableName, string dstVariableName)
        {
            if (!info.TypeSrc.ItemType.IsText)
            {
                return(false);
            }

            var terms            = default(VBuffer <DvText>);
            TermMap <DvText> map = (TermMap <DvText>)_termMap[iinfo].Map;

            map.GetTerms(ref terms);
            string opType = "LabelEncoder";
            var    node   = ctx.CreateNode(opType, srcVariableName, dstVariableName, ctx.GetNodeName(opType));

            node.AddAttribute("classes_strings", terms.DenseValues());
            node.AddAttribute("default_int64", -1);
            //default_string needs to be an empty string but there is a BUG in Lotus that
            //throws a validation error when default_string is empty. As a work around, set
            //default_string to a space.
            node.AddAttribute("default_string", " ");
            return(true);
        }
        /// <summary>
        /// Re-apply constructor.
        /// </summary>
        private TermTransform(IHostEnvironment env, TermTransform transform, IDataView newSource)
            : base(env, RegistrationName, transform, newSource, TestIsKnownDataKind)
        {
            Host.AssertNonEmpty(Infos);
            Host.Assert(Infos.Length == transform.Infos.Length);

            _textMetadata = transform._textMetadata;
            _termMap      = new BoundTermMap[Infos.Length];
            for (int iinfo = 0; iinfo < Infos.Length; ++iinfo)
            {
                TermMap map = transform._termMap[iinfo].Map;
                if (!map.ItemType.Equals(Infos[iinfo].TypeSrc.ItemType))
                {
                    // Column with the same name, but different types.
                    throw Host.Except(
                              "For column '{0}', term map was trained on items of type '{1}' but being applied to type '{2}'",
                              Infos[iinfo].Name, map.ItemType, Infos[iinfo].TypeSrc.ItemType);
                }
                _termMap[iinfo] = map.Bind(this, iinfo);
            }
            _types = ComputeTypesAndMetadata();
        }
        protected override JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, ColInfo info, JToken srcToken)
        {
            Contracts.AssertValue(ctx);
            Contracts.Assert(0 <= iinfo && iinfo < Infos.Length);
            Contracts.Assert(Infos[iinfo] == info);
            Contracts.AssertValue(srcToken);
            Contracts.Assert(CanSavePfa);

            if (!info.TypeSrc.ItemType.IsText)
            {
                return(null);
            }
            var terms            = default(VBuffer <DvText>);
            TermMap <DvText> map = (TermMap <DvText>)_termMap[iinfo].Map;

            map.GetTerms(ref terms);
            var jsonMap = new JObject();

            foreach (var kv in terms.Items())
            {
                jsonMap[kv.Value.ToString()] = kv.Key;
            }
            string cellName = ctx.DeclareCell(
                "TermMap", PfaUtils.Type.Map(PfaUtils.Type.Int), jsonMap);
            JObject cellRef = PfaUtils.Cell(cellName);

            if (info.TypeSrc.IsVector)
            {
                var funcName = ctx.GetFreeFunctionName("mapTerm");
                ctx.Pfa.AddFunc(funcName, new JArray(PfaUtils.Param("term", PfaUtils.Type.String)),
                                PfaUtils.Type.Int, PfaUtils.If(PfaUtils.Call("map.containsKey", cellRef, "term"), PfaUtils.Index(cellRef, "term"), -1));
                var funcRef = PfaUtils.FuncRef("u." + funcName);
                return(PfaUtils.Call("a.map", srcToken, funcRef));
            }
            return(PfaUtils.If(PfaUtils.Call("map.containsKey", cellRef, srcToken), PfaUtils.Index(cellRef, srcToken), -1));
        }
Beispiel #8
0
        private SymValue this [SymValue source, TFunc function]
        {
            get
            {
                source = Find(source);
                SymValue sv = TermMap [source, function];
                SymValue key;
                if (sv == null)
                {
                    key           = FreshSymbol();
                    TermMap       = TermMap.Add(source, function, key);
                    EqualTermsMap = EqualTermsMap.Add(key, Sequence <SymGraphTerm <TFunc> > .Cons(new SymGraphTerm <TFunc> (function, source), null));
                    AddEdgeUpdate(source, function);
                }
                else
                {
                    key = Find(sv);
                }

                return(key);
            }
            set
            {
                source = Find(source);
                value  = Find(value);

                TermMap = TermMap.Add(source, function, value);
                Sequence <SymGraphTerm <TFunc> > rest = EqualTermsMap [value];
                if (rest.IsEmpty() || (!rest.Head.Function.Equals(function) || rest.Head.Args [0] != source))
                {
                    EqualTermsMap = EqualTermsMap.Add(value, rest.Cons(new SymGraphTerm <TFunc> (function, source)));
                }

                AddEdgeUpdate(source, function);
            }
        }
        private TermTransform(IHost host, ModelLoadContext ctx, IDataView input)
            : base(host, ctx, input, TestIsKnownDataKind)
        {
            Host.AssertValue(ctx);

            // *** Binary format ***
            // for each term map:
            //   bool(byte): whether this column should present key value metadata as text

            int cinfo = Infos.Length;

            Host.Assert(cinfo > 0);

            if (ctx.Header.ModelVerWritten >= VerNonTextTypesSupported)
            {
                _textMetadata = ctx.Reader.ReadBoolArray(cinfo);
            }
            else
            {
                _textMetadata = new bool[cinfo]; // No need to set in this case. They're all text.
            }
            const string dir = "Vocabulary";

            TermMap[] termMap = new TermMap[cinfo];
            bool      b       = ctx.TryProcessSubModel(dir,
                                                       c =>
            {
                // *** Binary format ***
                // int: number of term maps (should equal number of columns)
                // for each term map:
                //   byte: code identifying the term map type (0 text, 1 codec)
                //   <data>: type specific format, see TermMap save/load methods

                Host.CheckValue(c, nameof(ctx));
                c.CheckAtModel(GetTermManagerVersionInfo());
                int cmap = c.Reader.ReadInt32();
                Host.CheckDecode(cmap == cinfo);
                if (c.Header.ModelVerWritten >= VerManagerNonTextTypesSupported)
                {
                    for (int i = 0; i < cinfo; ++i)
                    {
                        termMap[i] = TermMap.Load(c, host, this);
                    }
                }
                else
                {
                    for (int i = 0; i < cinfo; ++i)
                    {
                        termMap[i] = TermMap.TextImpl.Create(c, host);
                    }
                }
            });

#pragma warning disable TLC_NoMessagesForLoadContext // Vaguely useful.
            if (!b)
            {
                throw Host.ExceptDecode("Missing {0} model", dir);
            }
#pragma warning restore TLC_NoMessagesForLoadContext
            _termMap = new BoundTermMap[cinfo];
            for (int i = 0; i < cinfo; ++i)
            {
                _termMap[i] = termMap[i].Bind(this, i);
            }

            _types = ComputeTypesAndMetadata();
        }
        /// <summary>
        /// This builds the <see cref="TermMap"/> instances per column.
        /// </summary>
        private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] infos,
                                       ArgumentsBase args, ColumnBase[] column, IDataView trainingData)
        {
            Contracts.AssertValue(env);
            env.AssertValue(ch);
            ch.AssertValue(infos);
            ch.AssertValue(args);
            ch.AssertValue(column);
            ch.AssertValue(trainingData);

            if ((args.Term != null || !string.IsNullOrEmpty(args.Terms)) &&
                (!string.IsNullOrWhiteSpace(args.DataFile) || args.Loader.IsGood() ||
                 !string.IsNullOrWhiteSpace(args.TermsColumn)))
            {
                ch.Warning("Explicit term list specified. Data file arguments will be ignored");
            }

            if (!Enum.IsDefined(typeof(SortOrder), args.Sort))
            {
                throw ch.ExceptUserArg(nameof(args.Sort), "Undefined sorting criteria '{0}' detected", args.Sort);
            }

            TermMap termsFromFile = null;
            var     termMap       = new TermMap[infos.Length];

            int[]         lims         = new int[infos.Length];
            int           trainsNeeded = 0;
            HashSet <int> toTrain      = null;

            for (int iinfo = 0; iinfo < infos.Length; iinfo++)
            {
                // First check whether we have a terms argument, and handle it appropriately.
                var terms      = new DvText(column[iinfo].Terms);
                var termsArray = column[iinfo].Term;
                if (!terms.HasChars && termsArray == null)
                {
                    terms      = new DvText(args.Terms);
                    termsArray = args.Term;
                }

                terms = terms.Trim();
                if (terms.HasChars || (termsArray != null && termsArray.Length > 0))
                {
                    // We have terms! Pass it in.
                    var sortOrder = column[iinfo].Sort ?? args.Sort;
                    if (!Enum.IsDefined(typeof(SortOrder), sortOrder))
                    {
                        throw ch.ExceptUserArg(nameof(args.Sort), "Undefined sorting criteria '{0}' detected for column '{1}'", sortOrder, infos[iinfo].Name);
                    }

                    var bldr = Builder.Create(infos[iinfo].TypeSrc, sortOrder);
                    if (terms.HasChars)
                    {
                        bldr.ParseAddTermArg(ref terms, ch);
                    }
                    else
                    {
                        bldr.ParseAddTermArg(termsArray, ch);
                    }
                    termMap[iinfo] = bldr.Finish();
                }
                else if (!string.IsNullOrWhiteSpace(args.DataFile))
                {
                    // First column using this file.
                    if (termsFromFile == null)
                    {
                        var bldr = Builder.Create(infos[iinfo].TypeSrc, column[iinfo].Sort ?? args.Sort);
                        termsFromFile = CreateFileTermMap(env, ch, args, bldr);
                    }
                    if (!termsFromFile.ItemType.Equals(infos[iinfo].TypeSrc.ItemType))
                    {
                        // We have no current plans to support re-interpretation based on different column
                        // type, not only because it's unclear what realistic customer use-cases for such
                        // a complicated feature would be, and also because it's difficult to see how we
                        // can logically reconcile "reinterpretation" for different types with the resulting
                        // data view having an actual type.
                        throw ch.ExceptUserArg(nameof(args.DataFile), "Data file terms loaded as type '{0}' but mismatches column '{1}' item type '{2}'",
                                               termsFromFile.ItemType, infos[iinfo].Name, infos[iinfo].TypeSrc.ItemType);
                    }
                    termMap[iinfo] = termsFromFile;
                }
                else
                {
                    // Auto train this column. Leave the term map null for now, but set the lim appropriately.
                    lims[iinfo] = column[iinfo].MaxNumTerms ?? args.MaxNumTerms;
                    ch.CheckUserArg(lims[iinfo] > 0, nameof(Column.MaxNumTerms), "Must be positive");
                    Utils.Add(ref toTrain, infos[iinfo].Source);
                    ++trainsNeeded;
                }
            }

            ch.Assert((Utils.Size(toTrain) == 0) == (trainsNeeded == 0));
            ch.Assert(Utils.Size(toTrain) <= trainsNeeded);
            if (trainsNeeded > 0)
            {
                Trainer[] trainer     = new Trainer[trainsNeeded];
                int[]     trainerInfo = new int[trainsNeeded];
                // Open the cursor, then instantiate the trainers.
                int itrainer;
                using (var cursor = trainingData.GetRowCursor(toTrain.Contains))
                    using (var pch = env.StartProgressChannel("Building term dictionary"))
                    {
                        long   rowCur   = 0;
                        double rowCount = trainingData.GetRowCount(true) ?? double.NaN;
                        var    header   = new ProgressHeader(new[] { "Total Terms" }, new[] { "examples" });

                        itrainer = 0;
                        for (int iinfo = 0; iinfo < infos.Length; ++iinfo)
                        {
                            if (termMap[iinfo] != null)
                            {
                                continue;
                            }
                            var bldr = Builder.Create(infos[iinfo].TypeSrc, column[iinfo].Sort ?? args.Sort);
                            trainerInfo[itrainer] = iinfo;
                            trainer[itrainer++]   = Trainer.Create(cursor, infos[iinfo].Source, false, lims[iinfo], bldr);
                        }
                        ch.Assert(itrainer == trainer.Length);
                        pch.SetHeader(header,
                                      e =>
                        {
                            e.SetProgress(0, rowCur, rowCount);
                            // Purely feedback for the user. That the other thread might be
                            // working in the background is not a problem.
                            e.SetMetric(0, trainer.Sum(t => t.Count));
                        });

                        // The [0,tmin) trainers are finished.
                        int tmin = 0;
                        // We might exit early if all trainers reach their maximum.
                        while (tmin < trainer.Length && cursor.MoveNext())
                        {
                            rowCur++;
                            for (int t = tmin; t < trainer.Length; ++t)
                            {
                                if (!trainer[t].ProcessRow())
                                {
                                    Utils.Swap(ref trainerInfo[t], ref trainerInfo[tmin]);
                                    Utils.Swap(ref trainer[t], ref trainer[tmin++]);
                                }
                            }
                        }

                        pch.Checkpoint(trainer.Sum(t => t.Count), rowCur);
                    }
                for (itrainer = 0; itrainer < trainer.Length; ++itrainer)
                {
                    int iinfo = trainerInfo[itrainer];
                    ch.Assert(termMap[iinfo] == null);
                    if (trainer[itrainer].Count == 0)
                    {
                        ch.Warning("Term map for output column '{0}' contains no entries.", infos[iinfo].Name);
                    }
                    termMap[iinfo] = trainer[itrainer].Finish();
                    // Allow the intermediate structures in the trainer and builder to be released as we iterate
                    // over the columns, as the Finish operation can potentially result in the allocation of
                    // additional structures.
                    trainer[itrainer] = null;
                }
                ch.Assert(termMap.All(tm => tm != null));
                ch.Assert(termMap.Zip(infos, (tm, info) => tm.ItemType.Equals(info.TypeSrc.ItemType)).All(x => x));
            }

            return(termMap);
        }
Beispiel #11
0
        public void Dump(TextWriter tw)
        {
            var set      = new HashSet <SymValue> ();
            var workList = new WorkList <SymValue> ();
            IImmutableMap <SymValue, int> triggers = ImmutableIntKeyMap <SymValue, int> .Empty(SymValue.GetUniqueKey);

            tw.WriteLine("EGraphId: {0}", this.egraph_id);
            tw.WriteLine("LastSymbolId: {0}", LastSymbolId);

            foreach (TFunc function in TermMap.Keys2(this.const_root))
            {
                SymValue sv = this [this.const_root, function];
                tw.WriteLine("{0} = {1}", function, sv);
                workList.Add(sv);
            }

            while (!workList.IsEmpty())
            {
                SymValue sv = workList.Pull();
                if (!set.Add(sv))
                {
                    continue;
                }

                foreach (TFunc function in TermMap.Keys2(sv))
                {
                    SymValue target = this [sv, function];

                    tw.WriteLine("{0}({2}) = {1})", function, target, sv);
                    workList.Add(target);
                }
                foreach (var edge in MultiEdgeMap.Keys2(sv))
                {
                    foreach (SymValue target in MultiEdgeMap[sv, edge].AsEnumerable())
                    {
                        if (!UpdateTrigger(target, edge, ref triggers))
                        {
                            continue;
                        }
                        SymGraphTerm <TFunc> term = EqualMultiTermsMap [target];
                        if (term.Args != null)
                        {
                            tw.WriteLine("{0}({1}) = {2}",
                                         term.Function,
                                         term.Args.ToString(", "), target);
                            workList.Add(target);
                        }
                    }
                }
            }

            tw.WriteLine("**Abstract value map");
            foreach (SymValue sv in set)
            {
                TADomain abstractValue = this [sv];
                if (!abstractValue.IsTop)
                {
                    tw.WriteLine("{0} -> {1}", sv, abstractValue);
                }
            }
        }
Beispiel #12
0
 public IEnumerable <TFunc> Functions(SymValue sv)
 {
     return(TermMap.Keys2(Find(sv)));
 }
Beispiel #13
0
 public void Eliminate(TFunc function)
 {
     TermMap = TermMap.Remove(this.const_root, function);
     AddEliminateEdgeUpdate(this.const_root, function);
 }