Esempio n. 1
0
        public override void Setup()
        {
            base.Setup();

            if (!(MmlRecommenderInstance is BPRFM))
            {
                throw new WrapRecException("Expect ml-class 'BPRFM' for 'MmlBprfmRecommender'");
            }

            var wBprFm = MmlRecommenderInstance as WeightedBPRFM;

            if (wBprFm != null)
            {
                wBprFm.NumGroups = int.Parse(SetupParameters["numGroups"]);
            }

            if (SetupParameters.ContainsKey("Normalize"))
            {
                ((BPRFM)MmlRecommenderInstance).Normalize = bool.Parse(SetupParameters["Normalize"]);
            }

            if (SetupParameters.ContainsKey("ingoreFeatures"))
            {
                ((BPRFM)MmlRecommenderInstance).IgnoreFeaturesOnPrediction = bool.Parse(SetupParameters["ingoreFeatures"]);
            }
        }
Esempio n. 2
0
        public override void Setup()
        {
            if (!Split.Container.IsLoaded)
            {
                Logger.Current.Info("Loading DataContainer '{0}'...", Split.Container.Id);
                Split.Container.Load();
            }

            OutputPath = Path.Combine(ExperimentManager.ResultsFolder, SetupParameters["outputPath"]);

            if (SetupParameters.ContainsKey("contextNames"))
            {
                ContextNames = SetupParameters["contextNames"].Split(',').ToList();
            }
            else
            {
                ContextNames = new List <string>();
            }

            UserMapper    = new Mapping();
            ContextMapper = new Mapping();

            // burn id 0
            UserMapper.ToInternalID("burn");
            ContextMapper.ToInternalID("burn");

            UserItems = Split.Container.Users.Values.ToDictionary(u => u.Id,
                                                                  u => u.Feedbacks.Select(f => f.Item.Id).ToList());

            AllItems = Split.Container.Items.Values.Select(i => i.Id).ToList();
        }
Esempio n. 3
0
        public override void Setup()
        {
            if (SetupParameters.ContainsKey("dim"))
            {
                SetupParameters["dim"] = SetupParameters["dim"].Replace('-', ',');
            }

            if (SetupParameters.ContainsKey("regular"))
            {
                SetupParameters["regular"] = SetupParameters["regular"].Replace('-', ',');
            }

            if (!SetupParameters.ContainsKey("libFmPath"))
            {
                SetupParameters["libFmPath"] = "libfm.net.exe";
            }

            LibFmArguments = SetupParameters.Where(kv => kv.Key.ToLower() != "libfmpath")
                             .ToDictionary(kv => "-" + kv.Key, kv => kv.Value)
                             .SelectMany(kv => new string[] { kv.Key, kv.Value }).ToList();

            if (!SetupParameters.ContainsKey("save_model"))
            {
                LibFmArguments.Add("-save_model train.model");
            }

            // default data type
            DataType = DataType.Ratings;

            FeatureBuilder = new FmFeatureBuilder();
        }
Esempio n. 4
0
        public override void Setup()
        {
            bool   hasHeader = (SetupParameters.ContainsKey("hasHeader") && SetupParameters["hasHeader"] == "false") ? false : true;
            string delimiter = SetupParameters.ContainsKey("delimiter") ? SetupParameters["delimiter"].Replace("\\t", "\t") : ",";

            CsvConfig = new CsvConfiguration()
            {
                Delimiter       = delimiter,
                HasHeaderRecord = hasHeader
            };

            if (SetupParameters.ContainsKey("header"))
            {
                Header = SetupParameters["header"].Split(',').Select(h =>
                {
                    var parts = h.Split(':');

                    if (parts.Length == 1)
                    {
                        return new { Header = parts[0], Type = "d" }
                    }
                    ;

                    return(new { Header = parts[0], Type = parts[1] });
                }).ToDictionary(h => h.Header, h => h.Type);

                int i = 0;
                FieldIndices = SetupParameters["header"].Split(',').ToDictionary(h => h.Split(':')[0], h => i++);
            }
        }
        public override void Setup()
        {
            base.Setup();

            if (SetupParameters.ContainsKey("predictionFile"))
            {
                _predictionWriter = new MultiKeyDictionary <int, int, StreamWriter>();
            }
        }
Esempio n. 6
0
        public override void Setup()
        {
            if (SetupParameters.ContainsKey("delimiter"))
            {
                Delimiter = SetupParameters["delimiter"].Replace("\\t", "\t");
            }
            else
            {
                Delimiter = "\t";
            }

            ResultFiles = SetupParameters["sourceFiles"].Split(',')
                          .Select(eId => Path.Combine(ExperimentManager.ResultsFolder, eId)).ToArray();

            OutputFile = Path.Combine(ExperimentManager.ResultsFolder, SetupParameters["outputFile"]);
        }
Esempio n. 7
0
        private void LoadFmModel()
        {
            string modelFile = SetupParameters.ContainsKey("save_model") ?
                               SetupParameters["save_model"] : "train.model";

            modelFile = modelFile.Replace("\"", "");

            var lines = File.ReadAllLines(modelFile);

            // TODO: if dim is 0,0,x w0 and w would be 0 and the format of the file would be different
            float w0 = float.Parse(lines.Skip(1).First());

            _w = new float[] { w0 }.Concat(
                lines.Skip(3).TakeWhile(l => !l.StartsWith("#")).Select(l => float.Parse(l))).ToList();

            _v = lines.Skip(_w.Count + 3)
                 .Select(l => l.Split(' ').Select(v => float.Parse(v)).ToArray()).ToList();
        }
Esempio n. 8
0
        public override void Setup()
        {
            var readerPaths = ExperimentManager.ConfigRoot.Descendants("reader")
                              .ToDictionary(el => el.Attribute("id").Value.Inject(ExperimentManager.Parameters), el => el.Attribute("path").Value.Inject(ExperimentManager.Parameters));

            Part1Path  = readerPaths[SetupParameters["part1Reader"]];
            Part2Path  = readerPaths[SetupParameters["part2Reader"]];
            SourcePath = readerPaths[SetupParameters["sourceReader"]];

            Part1Ratio = float.Parse(SetupParameters["part1Ratio"]);

            var hasHeaderAttr = ExperimentManager.ConfigRoot.Descendants("reader")
                                .Where(el => el.Attribute("id").Value.Inject(ExperimentManager.Parameters) == SetupParameters["sourceReader"]).Single()
                                .Attribute("hasHeader");

            HasHeader = hasHeaderAttr != null && hasHeaderAttr.Value == "false" ? false : true;

            Shuffle = SetupParameters.ContainsKey("shuffle") && SetupParameters["shuffle"] == "false" ? false : true;
        }
        public override void Setup()
        {
            base.Setup();

            if (!(MmlRecommenderInstance is FM))
            {
                throw new WrapRecException("Expect ml-class 'FM' for 'MmlFmRecommender'");
            }

            var wFm = MmlRecommenderInstance as WFM;

            if (wFm != null)
            {
                wFm.NumGroups = int.Parse(SetupParameters["numGroups"]);
            }

            if (SetupParameters.ContainsKey("Normalize"))
            {
                ((FM)MmlRecommenderInstance).Normalize = bool.Parse(SetupParameters["Normalize"]);
            }
        }
Esempio n. 10
0
        public virtual void Setup()
        {
            if (!Split.Container.IsLoaded)
            {
                Logger.Current.Info("Loading DataContainer '{0}'...", Split.Container.Id);
                Split.Container.Load();
            }
            if (!EvaluationContext.IsSetuped)
            {
                Logger.Current.Info("Setuping evaluation context '{0}'...", EvaluationContext.Id);
                EvaluationContext.Setup();
            }

            Logger.Current.Info("Setuping model '{0}'...", Model.Id);
            Model.Setup();

            if (SetupParameters.ContainsKey("multiEval"))
            {
                MultiEval       = int.Parse(SetupParameters["multiEval"]);
                Model.Iterated += ModelIterated;
            }
        }
Esempio n. 11
0
        public override void Setup()
        {
            // candidate items
            if (!SetupParameters.ContainsKey("candidateItemsMode"))
            {
                CandidateItemsMode = CandidateItems.TRAINING;
            }
            else
            {
                CandidateItemsMode = (CandidateItems)Enum.Parse(typeof(CandidateItems), SetupParameters["candidateItemsMode"], true);
            }

            if (SetupParameters.ContainsKey("candidateItemsFile"))
            {
                CandidateItemsFile = SetupParameters["candidateItemsFile"];
            }
            else if (CandidateItemsMode == CandidateItems.EXPLICIT)
            {
                throw new WrapRecException("Expect a 'candidateItemsFile' for the mode 'explicit'!");
            }

            // candidate users
            if (!SetupParameters.ContainsKey("candidateUsersMode"))
            {
                CandidateUsersMode = CandidateItems.TEST;
            }
            else
            {
                CandidateUsersMode = (CandidateItems)Enum.Parse(typeof(CandidateItems), SetupParameters["candidateUsersMode"], true);
            }

            if (SetupParameters.ContainsKey("candidateUsersFile"))
            {
                CandidateUsersFile = SetupParameters["candidateUsersFile"];
            }
            else if (CandidateUsersMode == CandidateItems.EXPLICIT)
            {
                throw new WrapRecException("Expect a 'candidateUsersFile' for the mode 'explicit!'");
            }

            CutOffs = SetupParameters["cutOffs"].Split(',').Select(c => int.Parse(c)).ToArray();

            if (!SetupParameters.ContainsKey("numCandidates"))
            {
                NumCandidates = new int[] { int.MaxValue }
            }
            ;
            else
            {
                NumCandidates = SetupParameters["numCandidates"].Split(',').Select(n =>
                {
                    return((n == "max") ? int.MaxValue : int.Parse(n));
                }).ToArray();
            }

            _maxNumCandidates = NumCandidates.Max();

            if (SetupParameters.ContainsKey("relevantItems") && SetupParameters["relevantItems"].ToLower() == "all")
            {
                _isSliceRelevant = f => f.SliceType == FeedbackSlice.TRAIN || f.SliceType == FeedbackSlice.TEST;
            }
            else
            {
                _isSliceRelevant = f => f.SliceType == FeedbackSlice.TEST;
            }

            if (SetupParameters.ContainsKey("userMetricsFile"))
            {
                _perUserMetrics = new MultiKeyDictionary <int, int, StreamWriter>();
            }
        }
Esempio n. 12
0
        public override void Setup()
        {
            base.Setup();

            // the CrossValidationSplit is already setuped by its parent split
            if (IsSetup || Type == SplitType.CROSSVALIDATION_SUBSPLIT || Type == SplitType.DYNAMIC_SUBSPLIT)
            {
                IsSetup = true;
                return;
            }

            // Setuping splits
            float[] trainRatios = { 1f };
            int     numFolds    = 5;

            if (SetupParameters.ContainsKey("trainRatios"))
            {
                trainRatios = SetupParameters["trainRatios"].Split(',').Select(tr => float.Parse(tr)).ToArray();
            }

            if (SetupParameters.ContainsKey("numFolds"))
            {
                numFolds = int.Parse(SetupParameters["numFolds"]);
            }

            if (Type == SplitType.STATIC)
            {
                _train = Container.Feedbacks.Where(f => f.SliceType == FeedbackSlice.TRAIN);
                _test  = Container.Feedbacks.Where(f => f.SliceType == FeedbackSlice.TEST);
            }
            else if (Type == SplitType.DYNAMIC)
            {
                var feedbacks = Container.Feedbacks.Shuffle();

                SubSplits = trainRatios.Select(tr =>
                {
                    // the trainCount wont be calculated until the enumerator is being used
                    // So container is not required to be loaded in advanced
                    var trainCount = new Lazy <int>(() => Convert.ToInt32(Container.Feedbacks.Count * tr));

                    var train = feedbacks.Take(trainCount);
                    var test  = feedbacks.Skip(trainCount);

                    var ss = new FeedbackSimpleSplit(train, test)
                    {
                        Id              = Id + "-" + tr.ToString(),
                        Type            = SplitType.DYNAMIC_SUBSPLIT,
                        Container       = this.Container,
                        SetupParameters = this.SetupParameters
                    };
                    ss.Setup();
                    return(ss);
                });
            }
            else if (Type == SplitType.CROSSVALIDATION)
            {
                var feedbacks = Container.Feedbacks.Shuffle();
                // here all parameters of Take and Skip functions are calculated with lazyLoading
                // The SubSplits are formed when the enumeration is being started
                var foldCount = new Lazy <int>(() => (int)((1f / numFolds) * Container.Feedbacks.Count));

                SubSplits = Enumerable.Range(0, numFolds)
                            .Select(i =>
                {
                    var train = feedbacks.Take(() => (numFolds - i - 1) * foldCount.Value)
                                .Concat(feedbacks.Skip(() => (numFolds - i) * foldCount.Value)
                                        .Take(() => i * foldCount.Value));
                    var test = feedbacks.Skip(() => (numFolds - i - 1) * foldCount.Value)
                               .Take(foldCount);
                    var ss = new FeedbackSimpleSplit(train, test)
                    {
                        Id              = this.Id + "-fold" + (i + 1),
                        Type            = SplitType.CROSSVALIDATION_SUBSPLIT,
                        Container       = this.Container,
                        SetupParameters = this.SetupParameters
                    };
                    ss.Setup();
                    return(ss);
                });
            }
            IsSetup = true;
        }
Esempio n. 13
0
        public Dictionary <string, string> GetStatistics()
        {
            // TODO: when test is empty ther is error
            if (_statistics != null)
            {
                return(_statistics);
            }

            Logger.Current.Info("Calculating split '{0}' statistics...", Id);

            var trainUsers = new Dictionary <string, int>();
            var trainItems = new Dictionary <string, int>();

            var testUsers = new Dictionary <string, int>();
            var testItems = new Dictionary <string, int>();

            int trainCount = 0, testCount = 0;

            foreach (Feedback f in Train)
            {
                trainCount++;
                string userId = f.User.Id;
                string itemId = f.Item.Id;

                if (!trainUsers.ContainsKey(userId))
                {
                    trainUsers[userId] = 1;
                }
                else
                {
                    trainUsers[userId]++;
                }

                if (!trainItems.ContainsKey(itemId))
                {
                    trainItems[itemId] = 1;
                }
                else
                {
                    trainItems[itemId]++;
                }
            }

            foreach (Feedback f in Test)
            {
                testCount++;
                string userId = f.User.Id;
                string itemId = f.Item.Id;

                if (!testUsers.ContainsKey(userId))
                {
                    testUsers[userId] = 1;
                }
                else
                {
                    testUsers[userId]++;
                }

                if (!testItems.ContainsKey(itemId))
                {
                    testItems[itemId] = 1;
                }
                else
                {
                    testItems[itemId]++;
                }
            }

            _statistics = new Dictionary <string, string>();

            int   totalCount = trainCount + testCount;
            float percTrain  = 100f * ((float)trainCount / totalCount);
            float percTest   = 100f * ((float)testCount / totalCount);

            long   trainMatrixCount = (long)trainUsers.Count * trainItems.Count;
            double sparsityTrain    = (double)100L * (trainMatrixCount - trainCount) / trainMatrixCount;

            long   testMatrixCount = (long)testUsers.Count * testItems.Count;
            double sparsityTest    = (double)100L * (testMatrixCount - testCount) / testMatrixCount;

            _statistics.Add("splitId", Id);

            _statistics.Add("train", trainCount.ToString());
            _statistics.Add("test", testCount.ToString());
            _statistics.Add("%train", string.Format("{0:0.00}", percTrain));
            _statistics.Add("%test", string.Format("{0:0.00}", percTest));

            _statistics.Add("trainUsers", trainUsers.Count.ToString());
            _statistics.Add("trainItems", trainItems.Count.ToString());
            _statistics.Add("sparsityTrain", string.Format("{0:0.00}", sparsityTrain));

            _statistics.Add("testUsers", testUsers.Count.ToString());
            _statistics.Add("testItems", testItems.Count.ToString());
            _statistics.Add("sparsityTest", string.Format("{0:0.00}", sparsityTest));

            _statistics.Add("newTestUsers", testUsers.Keys.Except(trainUsers.Keys).Count().ToString());
            _statistics.Add("newTestItems", testItems.Keys.Except(trainItems.Keys).Count().ToString());

            _statistics.Add("trUsrMinFb", trainUsers.Values.Min().ToString());
            _statistics.Add("trUsrMaxFb", trainUsers.Values.Max().ToString());
            _statistics.Add("trUsrAvgFb", string.Format("{0:0.00}", trainUsers.Values.Average()));

            _statistics.Add("teUsrMinFb", testUsers.Values.Min().ToString());
            _statistics.Add("teUsrMaxFb", testUsers.Values.Max().ToString());
            _statistics.Add("teUsrAvgFb", string.Format("{0:0.00}", testUsers.Values.Average()));

            _statistics.Add("trItmMinFb", trainItems.Values.Min().ToString());
            _statistics.Add("trItmMaxFb", trainItems.Values.Max().ToString());
            _statistics.Add("trItmAvgFb", string.Format("{0:0.00}", trainItems.Values.Average()));

            _statistics.Add("teItmMinFb", testItems.Values.Min().ToString());
            _statistics.Add("teItmMaxFb", testItems.Values.Max().ToString());
            _statistics.Add("teItmAvgFb", string.Format("{0:0.00}", testItems.Values.Average()));

            _statistics.Add("feedbackAttrs", SetupParameters.ContainsKey("feedbackAttributes") ? SetupParameters["feedbackAttributes"] : "NA");
            _statistics.Add("userAttrs", SetupParameters.ContainsKey("userAttributes") ? SetupParameters["userAttributes"] : "NA");
            _statistics.Add("itemAttrs", SetupParameters.ContainsKey("itemAttributes") ? SetupParameters["itemAttributes"] : "NA");

            return(_statistics);
        }