예제 #1
0
        protected override JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, ColInfo info, JToken srcToken)
        {
            Contracts.AssertValue(ctx);
            Contracts.Assert(0 <= iinfo && iinfo < Infos.Length);
            Contracts.Assert(Infos[iinfo] == info);
            Contracts.AssertValue(srcToken);
            Contracts.Assert(CanSavePfa);

            var exInfo = _exes[iinfo];
            var sep    = PfaUtils.String("" + exInfo.Separators[0]);

            if (info.TypeSrc.IsVector)
            {
                // If it's a vector, we'll concatenate them together.
                srcToken = PfaUtils.Call("s.join", srcToken, sep);
            }

            if (exInfo.Separators.Length > 1)
            {
                // Due to the intrinsics in PFA, it is much easier if we can do
                // one split, rather than multiple splits. So, if there are multiple
                // separators, we first replace them with the first separator, then
                // split once on that one. This could also have been done with a.flatMap.
                for (int i = 1; i < exInfo.Separators.Length; ++i)
                {
                    var postSep = PfaUtils.String("" + exInfo.Separators[i]);
                    srcToken = PfaUtils.Call("s.replaceall", srcToken, postSep, sep);
                }
            }
            srcToken = PfaUtils.Call("s.split", srcToken, sep);
            // The TLC word tokenizer does not yield empty strings, but PFA's
            // split does. Filter them out.
            var hasCharsRef = PfaUtils.FuncRef(ctx.Pfa.EnsureHasChars());

            srcToken = PfaUtils.Call("a.filter", srcToken, hasCharsRef);
            return(srcToken);
        }
예제 #2
0
        protected override JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, ColInfo info, JToken srcToken)
        {
            Contracts.AssertValue(ctx);
            Contracts.Assert(0 <= iinfo && iinfo < Infos.Length);
            Contracts.Assert(Infos[iinfo] == info);
            Contracts.AssertValue(srcToken);
            Contracts.Assert(CanSavePfa);

            if (!info.TypeSrc.ItemType.IsText)
            {
                return(null);
            }
            var terms            = default(VBuffer <DvText>);
            TermMap <DvText> map = (TermMap <DvText>)_termMap[iinfo].Map;

            map.GetTerms(ref terms);
            var jsonMap = new JObject();

            foreach (var kv in terms.Items())
            {
                jsonMap[kv.Value.ToString()] = kv.Key;
            }
            string cellName = ctx.DeclareCell(
                "TermMap", PfaUtils.Type.Map(PfaUtils.Type.Int), jsonMap);
            JObject cellRef = PfaUtils.Cell(cellName);

            if (info.TypeSrc.IsVector)
            {
                var funcName = ctx.GetFreeFunctionName("mapTerm");
                ctx.Pfa.AddFunc(funcName, new JArray(PfaUtils.Param("term", PfaUtils.Type.String)),
                                PfaUtils.Type.Int, PfaUtils.If(PfaUtils.Call("map.containsKey", cellRef, "term"), PfaUtils.Index(cellRef, "term"), -1));
                var funcRef = PfaUtils.FuncRef("u." + funcName);
                return(PfaUtils.Call("a.map", srcToken, funcRef));
            }
            return(PfaUtils.If(PfaUtils.Call("map.containsKey", cellRef, srcToken), PfaUtils.Index(cellRef, srcToken), -1));
        }
        protected override JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, ColInfo info, JToken srcToken)
        {
            Contracts.AssertValue(ctx);
            Contracts.Assert(0 <= iinfo && iinfo < Infos.Length);
            Contracts.Assert(Infos[iinfo] == info);
            Contracts.AssertValue(srcToken);
            Contracts.Assert(CanSavePfa);

            int keyCount = info.TypeSrc.ItemType.KeyCount;

            Host.Assert(keyCount > 0);
            // If the input type is scalar, we can just use the fanout function.
            if (!info.TypeSrc.IsVector)
            {
                return(PfaUtils.Call("cast.fanoutDouble", srcToken, 0, keyCount, false));
            }

            JToken arrType = PfaUtils.Type.Array(PfaUtils.Type.Double);

            if (_concat[iinfo])
            {
                // The concatenation case. We can still use fanout, but we just append them all together.
                return(PfaUtils.Call("a.flatMap", srcToken,
                                     PfaContext.CreateFuncBlock(new JArray()
                {
                    PfaUtils.Param("k", PfaUtils.Type.Int)
                },
                                                                arrType, PfaUtils.Call("cast.fanoutDouble", "k", 0, keyCount, false))));
            }

            // The bag case, while the most useful, is the most elaborate and difficult: we create
            // an all-zero array and then add items to it.
            const string funcName = "keyToVecUpdate";

            if (!ctx.Pfa.ContainsFunc(funcName))
            {
                var toFunc = PfaContext.CreateFuncBlock(
                    new JArray()
                {
                    PfaUtils.Param("v", PfaUtils.Type.Double)
                }, PfaUtils.Type.Double,
                    PfaUtils.Call("+", "v", 1));

                ctx.Pfa.AddFunc(funcName,
                                new JArray(PfaUtils.Param("a", arrType), PfaUtils.Param("i", PfaUtils.Type.Int)),
                                arrType, PfaUtils.If(PfaUtils.Call(">=", "i", 0),
                                                     PfaUtils.Index("a", "i").AddReturn("to", toFunc), "a"));
            }

            return(PfaUtils.Call("a.fold", srcToken,
                                 PfaUtils.Call("cast.fanoutDouble", -1, 0, keyCount, false), PfaUtils.FuncRef("u." + funcName)));
        }