예제 #1
0
파일: tagging.cs 프로젝트: slyzius/draft
        public void Initialize(Voucher[] inVouchers, FeatureManager ftm)
        {
            _ftm = ftm;

            var vouchers = ftm.ReadUniqueVouchers(inVouchers);

            _totalVouchers = vouchers.Length;

            // feature per category likellihood
            _categCounts = vouchers.GroupBy(x => x.TagName).ToDictionary(x => x.Key, x => x.Count());

            _featureLikellihood = vouchers.GroupBy(g => g.TagName)
                    .SelectMany(y =>y.SelectMany(c => _ftm.ReadFeatures(c.OcrFeatures.ToList()).Select(f=>new Tuple<string,int>(y.Key,f.Item1))))
                    .GroupBy(x=>x.Item1)
                    .ToDictionary(x=>x.Key,x=>x.GroupBy(y=>y.Item2).ToDictionary(z =>/*_ftm._featureById[*/z.Key/*]*/, z=> ((double)z.Count() + 1) / (x.Count() + _ftm._featureById.Count)));


            // user per category likellihood: p(categ,user) = p(categ|user)*p(user)
            int totalUsers = vouchers.Select(x => x.OrganizationId).Distinct().Count();
            _userPrior = vouchers.GroupBy(x => x.OrganizationId).ToDictionary(x => x.Key, x => ((double)x.Count() + 1) / (_totalVouchers + totalUsers));

            _categUserLikellihood = vouchers.GroupBy(g => g.OrganizationId)
                .ToDictionary(x => x.Key, x => x.GroupBy(y => y.TagName).ToDictionary(y => y.Key, y => ((double)y.Count() + 1) / (_categCounts[y.Key] + totalUsers)));

            //var tmp = vouchers.Where(x => x.OrganizationId == 3634).GroupBy(x => x.TagName).Select(x => new Tuple<string, double>(x.Key, ((double)x.Count() + 1) / (categCounts[x.Key] + totalUsers))).ToList();
        }