public void EliminateAll(SymValue arg) { SymValue value = Find(arg); AddEliminateAllUpdate(value); TermMap = TermMap.RemoveAll(value); this [arg] = this.UnderlyingTopValue; }
private void AddEliminateAllUpdate(SymValue from) { if (!IsOldSymbol(from)) { return; } foreach (TFunc function in TermMap.Keys2(from)) { AddUpdate(new EliminateEdgeUpdate <TFunc, TADomain> (from, function)); } }
public void Eliminate(TFunc function, SymValue arg) { SymValue value = Find(arg); DoubleImmutableMap <SymValue, TFunc, SymValue> newTermMap = TermMap.Remove(value, function); if (newTermMap == TermMap) { return; } TermMap = newTermMap; AddEliminateEdgeUpdate(value, function); }
protected override bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string srcVariableName, string dstVariableName) { if (!info.TypeSrc.ItemType.IsText) { return(false); } var terms = default(VBuffer <DvText>); TermMap <DvText> map = (TermMap <DvText>)_termMap[iinfo].Map; map.GetTerms(ref terms); string opType = "LabelEncoder"; var node = ctx.CreateNode(opType, srcVariableName, dstVariableName, ctx.GetNodeName(opType)); node.AddAttribute("classes_strings", terms.DenseValues()); node.AddAttribute("default_int64", -1); node.AddAttribute("default_string", DvText.Empty); return(true); }
protected override bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string srcVariableName, string dstVariableName) { if (!info.TypeSrc.ItemType.IsText) { return(false); } var terms = default(VBuffer <DvText>); TermMap <DvText> map = (TermMap <DvText>)_termMap[iinfo].Map; map.GetTerms(ref terms); string opType = "LabelEncoder"; var node = ctx.CreateNode(opType, srcVariableName, dstVariableName, ctx.GetNodeName(opType)); node.AddAttribute("classes_strings", terms.DenseValues()); node.AddAttribute("default_int64", -1); //default_string needs to be an empty string but there is a BUG in Lotus that //throws a validation error when default_string is empty. As a work around, set //default_string to a space. node.AddAttribute("default_string", " "); return(true); }
/// <summary> /// Re-apply constructor. /// </summary> private TermTransform(IHostEnvironment env, TermTransform transform, IDataView newSource) : base(env, RegistrationName, transform, newSource, TestIsKnownDataKind) { Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == transform.Infos.Length); _textMetadata = transform._textMetadata; _termMap = new BoundTermMap[Infos.Length]; for (int iinfo = 0; iinfo < Infos.Length; ++iinfo) { TermMap map = transform._termMap[iinfo].Map; if (!map.ItemType.Equals(Infos[iinfo].TypeSrc.ItemType)) { // Column with the same name, but different types. throw Host.Except( "For column '{0}', term map was trained on items of type '{1}' but being applied to type '{2}'", Infos[iinfo].Name, map.ItemType, Infos[iinfo].TypeSrc.ItemType); } _termMap[iinfo] = map.Bind(this, iinfo); } _types = ComputeTypesAndMetadata(); }
protected override JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, ColInfo info, JToken srcToken) { Contracts.AssertValue(ctx); Contracts.Assert(0 <= iinfo && iinfo < Infos.Length); Contracts.Assert(Infos[iinfo] == info); Contracts.AssertValue(srcToken); Contracts.Assert(CanSavePfa); if (!info.TypeSrc.ItemType.IsText) { return(null); } var terms = default(VBuffer <DvText>); TermMap <DvText> map = (TermMap <DvText>)_termMap[iinfo].Map; map.GetTerms(ref terms); var jsonMap = new JObject(); foreach (var kv in terms.Items()) { jsonMap[kv.Value.ToString()] = kv.Key; } string cellName = ctx.DeclareCell( "TermMap", PfaUtils.Type.Map(PfaUtils.Type.Int), jsonMap); JObject cellRef = PfaUtils.Cell(cellName); if (info.TypeSrc.IsVector) { var funcName = ctx.GetFreeFunctionName("mapTerm"); ctx.Pfa.AddFunc(funcName, new JArray(PfaUtils.Param("term", PfaUtils.Type.String)), PfaUtils.Type.Int, PfaUtils.If(PfaUtils.Call("map.containsKey", cellRef, "term"), PfaUtils.Index(cellRef, "term"), -1)); var funcRef = PfaUtils.FuncRef("u." + funcName); return(PfaUtils.Call("a.map", srcToken, funcRef)); } return(PfaUtils.If(PfaUtils.Call("map.containsKey", cellRef, srcToken), PfaUtils.Index(cellRef, srcToken), -1)); }
private SymValue this [SymValue source, TFunc function] { get { source = Find(source); SymValue sv = TermMap [source, function]; SymValue key; if (sv == null) { key = FreshSymbol(); TermMap = TermMap.Add(source, function, key); EqualTermsMap = EqualTermsMap.Add(key, Sequence <SymGraphTerm <TFunc> > .Cons(new SymGraphTerm <TFunc> (function, source), null)); AddEdgeUpdate(source, function); } else { key = Find(sv); } return(key); } set { source = Find(source); value = Find(value); TermMap = TermMap.Add(source, function, value); Sequence <SymGraphTerm <TFunc> > rest = EqualTermsMap [value]; if (rest.IsEmpty() || (!rest.Head.Function.Equals(function) || rest.Head.Args [0] != source)) { EqualTermsMap = EqualTermsMap.Add(value, rest.Cons(new SymGraphTerm <TFunc> (function, source))); } AddEdgeUpdate(source, function); } }
private TermTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, TestIsKnownDataKind) { Host.AssertValue(ctx); // *** Binary format *** // for each term map: // bool(byte): whether this column should present key value metadata as text int cinfo = Infos.Length; Host.Assert(cinfo > 0); if (ctx.Header.ModelVerWritten >= VerNonTextTypesSupported) { _textMetadata = ctx.Reader.ReadBoolArray(cinfo); } else { _textMetadata = new bool[cinfo]; // No need to set in this case. They're all text. } const string dir = "Vocabulary"; TermMap[] termMap = new TermMap[cinfo]; bool b = ctx.TryProcessSubModel(dir, c => { // *** Binary format *** // int: number of term maps (should equal number of columns) // for each term map: // byte: code identifying the term map type (0 text, 1 codec) // <data>: type specific format, see TermMap save/load methods Host.CheckValue(c, nameof(ctx)); c.CheckAtModel(GetTermManagerVersionInfo()); int cmap = c.Reader.ReadInt32(); Host.CheckDecode(cmap == cinfo); if (c.Header.ModelVerWritten >= VerManagerNonTextTypesSupported) { for (int i = 0; i < cinfo; ++i) { termMap[i] = TermMap.Load(c, host, this); } } else { for (int i = 0; i < cinfo; ++i) { termMap[i] = TermMap.TextImpl.Create(c, host); } } }); #pragma warning disable TLC_NoMessagesForLoadContext // Vaguely useful. if (!b) { throw Host.ExceptDecode("Missing {0} model", dir); } #pragma warning restore TLC_NoMessagesForLoadContext _termMap = new BoundTermMap[cinfo]; for (int i = 0; i < cinfo; ++i) { _termMap[i] = termMap[i].Bind(this, i); } _types = ComputeTypesAndMetadata(); }
/// <summary> /// This builds the <see cref="TermMap"/> instances per column. /// </summary> private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] infos, ArgumentsBase args, ColumnBase[] column, IDataView trainingData) { Contracts.AssertValue(env); env.AssertValue(ch); ch.AssertValue(infos); ch.AssertValue(args); ch.AssertValue(column); ch.AssertValue(trainingData); if ((args.Term != null || !string.IsNullOrEmpty(args.Terms)) && (!string.IsNullOrWhiteSpace(args.DataFile) || args.Loader.IsGood() || !string.IsNullOrWhiteSpace(args.TermsColumn))) { ch.Warning("Explicit term list specified. Data file arguments will be ignored"); } if (!Enum.IsDefined(typeof(SortOrder), args.Sort)) { throw ch.ExceptUserArg(nameof(args.Sort), "Undefined sorting criteria '{0}' detected", args.Sort); } TermMap termsFromFile = null; var termMap = new TermMap[infos.Length]; int[] lims = new int[infos.Length]; int trainsNeeded = 0; HashSet <int> toTrain = null; for (int iinfo = 0; iinfo < infos.Length; iinfo++) { // First check whether we have a terms argument, and handle it appropriately. var terms = new DvText(column[iinfo].Terms); var termsArray = column[iinfo].Term; if (!terms.HasChars && termsArray == null) { terms = new DvText(args.Terms); termsArray = args.Term; } terms = terms.Trim(); if (terms.HasChars || (termsArray != null && termsArray.Length > 0)) { // We have terms! Pass it in. var sortOrder = column[iinfo].Sort ?? args.Sort; if (!Enum.IsDefined(typeof(SortOrder), sortOrder)) { throw ch.ExceptUserArg(nameof(args.Sort), "Undefined sorting criteria '{0}' detected for column '{1}'", sortOrder, infos[iinfo].Name); } var bldr = Builder.Create(infos[iinfo].TypeSrc, sortOrder); if (terms.HasChars) { bldr.ParseAddTermArg(ref terms, ch); } else { bldr.ParseAddTermArg(termsArray, ch); } termMap[iinfo] = bldr.Finish(); } else if (!string.IsNullOrWhiteSpace(args.DataFile)) { // First column using this file. if (termsFromFile == null) { var bldr = Builder.Create(infos[iinfo].TypeSrc, column[iinfo].Sort ?? args.Sort); termsFromFile = CreateFileTermMap(env, ch, args, bldr); } if (!termsFromFile.ItemType.Equals(infos[iinfo].TypeSrc.ItemType)) { // We have no current plans to support re-interpretation based on different column // type, not only because it's unclear what realistic customer use-cases for such // a complicated feature would be, and also because it's difficult to see how we // can logically reconcile "reinterpretation" for different types with the resulting // data view having an actual type. throw ch.ExceptUserArg(nameof(args.DataFile), "Data file terms loaded as type '{0}' but mismatches column '{1}' item type '{2}'", termsFromFile.ItemType, infos[iinfo].Name, infos[iinfo].TypeSrc.ItemType); } termMap[iinfo] = termsFromFile; } else { // Auto train this column. Leave the term map null for now, but set the lim appropriately. lims[iinfo] = column[iinfo].MaxNumTerms ?? args.MaxNumTerms; ch.CheckUserArg(lims[iinfo] > 0, nameof(Column.MaxNumTerms), "Must be positive"); Utils.Add(ref toTrain, infos[iinfo].Source); ++trainsNeeded; } } ch.Assert((Utils.Size(toTrain) == 0) == (trainsNeeded == 0)); ch.Assert(Utils.Size(toTrain) <= trainsNeeded); if (trainsNeeded > 0) { Trainer[] trainer = new Trainer[trainsNeeded]; int[] trainerInfo = new int[trainsNeeded]; // Open the cursor, then instantiate the trainers. int itrainer; using (var cursor = trainingData.GetRowCursor(toTrain.Contains)) using (var pch = env.StartProgressChannel("Building term dictionary")) { long rowCur = 0; double rowCount = trainingData.GetRowCount(true) ?? double.NaN; var header = new ProgressHeader(new[] { "Total Terms" }, new[] { "examples" }); itrainer = 0; for (int iinfo = 0; iinfo < infos.Length; ++iinfo) { if (termMap[iinfo] != null) { continue; } var bldr = Builder.Create(infos[iinfo].TypeSrc, column[iinfo].Sort ?? args.Sort); trainerInfo[itrainer] = iinfo; trainer[itrainer++] = Trainer.Create(cursor, infos[iinfo].Source, false, lims[iinfo], bldr); } ch.Assert(itrainer == trainer.Length); pch.SetHeader(header, e => { e.SetProgress(0, rowCur, rowCount); // Purely feedback for the user. That the other thread might be // working in the background is not a problem. e.SetMetric(0, trainer.Sum(t => t.Count)); }); // The [0,tmin) trainers are finished. int tmin = 0; // We might exit early if all trainers reach their maximum. while (tmin < trainer.Length && cursor.MoveNext()) { rowCur++; for (int t = tmin; t < trainer.Length; ++t) { if (!trainer[t].ProcessRow()) { Utils.Swap(ref trainerInfo[t], ref trainerInfo[tmin]); Utils.Swap(ref trainer[t], ref trainer[tmin++]); } } } pch.Checkpoint(trainer.Sum(t => t.Count), rowCur); } for (itrainer = 0; itrainer < trainer.Length; ++itrainer) { int iinfo = trainerInfo[itrainer]; ch.Assert(termMap[iinfo] == null); if (trainer[itrainer].Count == 0) { ch.Warning("Term map for output column '{0}' contains no entries.", infos[iinfo].Name); } termMap[iinfo] = trainer[itrainer].Finish(); // Allow the intermediate structures in the trainer and builder to be released as we iterate // over the columns, as the Finish operation can potentially result in the allocation of // additional structures. trainer[itrainer] = null; } ch.Assert(termMap.All(tm => tm != null)); ch.Assert(termMap.Zip(infos, (tm, info) => tm.ItemType.Equals(info.TypeSrc.ItemType)).All(x => x)); } return(termMap); }
public void Dump(TextWriter tw) { var set = new HashSet <SymValue> (); var workList = new WorkList <SymValue> (); IImmutableMap <SymValue, int> triggers = ImmutableIntKeyMap <SymValue, int> .Empty(SymValue.GetUniqueKey); tw.WriteLine("EGraphId: {0}", this.egraph_id); tw.WriteLine("LastSymbolId: {0}", LastSymbolId); foreach (TFunc function in TermMap.Keys2(this.const_root)) { SymValue sv = this [this.const_root, function]; tw.WriteLine("{0} = {1}", function, sv); workList.Add(sv); } while (!workList.IsEmpty()) { SymValue sv = workList.Pull(); if (!set.Add(sv)) { continue; } foreach (TFunc function in TermMap.Keys2(sv)) { SymValue target = this [sv, function]; tw.WriteLine("{0}({2}) = {1})", function, target, sv); workList.Add(target); } foreach (var edge in MultiEdgeMap.Keys2(sv)) { foreach (SymValue target in MultiEdgeMap[sv, edge].AsEnumerable()) { if (!UpdateTrigger(target, edge, ref triggers)) { continue; } SymGraphTerm <TFunc> term = EqualMultiTermsMap [target]; if (term.Args != null) { tw.WriteLine("{0}({1}) = {2}", term.Function, term.Args.ToString(", "), target); workList.Add(target); } } } } tw.WriteLine("**Abstract value map"); foreach (SymValue sv in set) { TADomain abstractValue = this [sv]; if (!abstractValue.IsTop) { tw.WriteLine("{0} -> {1}", sv, abstractValue); } } }
public IEnumerable <TFunc> Functions(SymValue sv) { return(TermMap.Keys2(Find(sv))); }
public void Eliminate(TFunc function) { TermMap = TermMap.Remove(this.const_root, function); AddEliminateEdgeUpdate(this.const_root, function); }