public override void Setup() { base.Setup(); if (!(MmlRecommenderInstance is BPRFM)) { throw new WrapRecException("Expect ml-class 'BPRFM' for 'MmlBprfmRecommender'"); } var wBprFm = MmlRecommenderInstance as WeightedBPRFM; if (wBprFm != null) { wBprFm.NumGroups = int.Parse(SetupParameters["numGroups"]); } if (SetupParameters.ContainsKey("Normalize")) { ((BPRFM)MmlRecommenderInstance).Normalize = bool.Parse(SetupParameters["Normalize"]); } if (SetupParameters.ContainsKey("ingoreFeatures")) { ((BPRFM)MmlRecommenderInstance).IgnoreFeaturesOnPrediction = bool.Parse(SetupParameters["ingoreFeatures"]); } }
public override void Setup() { if (!Split.Container.IsLoaded) { Logger.Current.Info("Loading DataContainer '{0}'...", Split.Container.Id); Split.Container.Load(); } OutputPath = Path.Combine(ExperimentManager.ResultsFolder, SetupParameters["outputPath"]); if (SetupParameters.ContainsKey("contextNames")) { ContextNames = SetupParameters["contextNames"].Split(',').ToList(); } else { ContextNames = new List <string>(); } UserMapper = new Mapping(); ContextMapper = new Mapping(); // burn id 0 UserMapper.ToInternalID("burn"); ContextMapper.ToInternalID("burn"); UserItems = Split.Container.Users.Values.ToDictionary(u => u.Id, u => u.Feedbacks.Select(f => f.Item.Id).ToList()); AllItems = Split.Container.Items.Values.Select(i => i.Id).ToList(); }
public override void Setup() { if (SetupParameters.ContainsKey("dim")) { SetupParameters["dim"] = SetupParameters["dim"].Replace('-', ','); } if (SetupParameters.ContainsKey("regular")) { SetupParameters["regular"] = SetupParameters["regular"].Replace('-', ','); } if (!SetupParameters.ContainsKey("libFmPath")) { SetupParameters["libFmPath"] = "libfm.net.exe"; } LibFmArguments = SetupParameters.Where(kv => kv.Key.ToLower() != "libfmpath") .ToDictionary(kv => "-" + kv.Key, kv => kv.Value) .SelectMany(kv => new string[] { kv.Key, kv.Value }).ToList(); if (!SetupParameters.ContainsKey("save_model")) { LibFmArguments.Add("-save_model train.model"); } // default data type DataType = DataType.Ratings; FeatureBuilder = new FmFeatureBuilder(); }
public override void Setup() { bool hasHeader = (SetupParameters.ContainsKey("hasHeader") && SetupParameters["hasHeader"] == "false") ? false : true; string delimiter = SetupParameters.ContainsKey("delimiter") ? SetupParameters["delimiter"].Replace("\\t", "\t") : ","; CsvConfig = new CsvConfiguration() { Delimiter = delimiter, HasHeaderRecord = hasHeader }; if (SetupParameters.ContainsKey("header")) { Header = SetupParameters["header"].Split(',').Select(h => { var parts = h.Split(':'); if (parts.Length == 1) { return new { Header = parts[0], Type = "d" } } ; return(new { Header = parts[0], Type = parts[1] }); }).ToDictionary(h => h.Header, h => h.Type); int i = 0; FieldIndices = SetupParameters["header"].Split(',').ToDictionary(h => h.Split(':')[0], h => i++); } }
public override void Setup() { base.Setup(); if (SetupParameters.ContainsKey("predictionFile")) { _predictionWriter = new MultiKeyDictionary <int, int, StreamWriter>(); } }
public override void Setup() { if (SetupParameters.ContainsKey("delimiter")) { Delimiter = SetupParameters["delimiter"].Replace("\\t", "\t"); } else { Delimiter = "\t"; } ResultFiles = SetupParameters["sourceFiles"].Split(',') .Select(eId => Path.Combine(ExperimentManager.ResultsFolder, eId)).ToArray(); OutputFile = Path.Combine(ExperimentManager.ResultsFolder, SetupParameters["outputFile"]); }
private void LoadFmModel() { string modelFile = SetupParameters.ContainsKey("save_model") ? SetupParameters["save_model"] : "train.model"; modelFile = modelFile.Replace("\"", ""); var lines = File.ReadAllLines(modelFile); // TODO: if dim is 0,0,x w0 and w would be 0 and the format of the file would be different float w0 = float.Parse(lines.Skip(1).First()); _w = new float[] { w0 }.Concat( lines.Skip(3).TakeWhile(l => !l.StartsWith("#")).Select(l => float.Parse(l))).ToList(); _v = lines.Skip(_w.Count + 3) .Select(l => l.Split(' ').Select(v => float.Parse(v)).ToArray()).ToList(); }
public override void Setup() { var readerPaths = ExperimentManager.ConfigRoot.Descendants("reader") .ToDictionary(el => el.Attribute("id").Value.Inject(ExperimentManager.Parameters), el => el.Attribute("path").Value.Inject(ExperimentManager.Parameters)); Part1Path = readerPaths[SetupParameters["part1Reader"]]; Part2Path = readerPaths[SetupParameters["part2Reader"]]; SourcePath = readerPaths[SetupParameters["sourceReader"]]; Part1Ratio = float.Parse(SetupParameters["part1Ratio"]); var hasHeaderAttr = ExperimentManager.ConfigRoot.Descendants("reader") .Where(el => el.Attribute("id").Value.Inject(ExperimentManager.Parameters) == SetupParameters["sourceReader"]).Single() .Attribute("hasHeader"); HasHeader = hasHeaderAttr != null && hasHeaderAttr.Value == "false" ? false : true; Shuffle = SetupParameters.ContainsKey("shuffle") && SetupParameters["shuffle"] == "false" ? false : true; }
public override void Setup() { base.Setup(); if (!(MmlRecommenderInstance is FM)) { throw new WrapRecException("Expect ml-class 'FM' for 'MmlFmRecommender'"); } var wFm = MmlRecommenderInstance as WFM; if (wFm != null) { wFm.NumGroups = int.Parse(SetupParameters["numGroups"]); } if (SetupParameters.ContainsKey("Normalize")) { ((FM)MmlRecommenderInstance).Normalize = bool.Parse(SetupParameters["Normalize"]); } }
public virtual void Setup() { if (!Split.Container.IsLoaded) { Logger.Current.Info("Loading DataContainer '{0}'...", Split.Container.Id); Split.Container.Load(); } if (!EvaluationContext.IsSetuped) { Logger.Current.Info("Setuping evaluation context '{0}'...", EvaluationContext.Id); EvaluationContext.Setup(); } Logger.Current.Info("Setuping model '{0}'...", Model.Id); Model.Setup(); if (SetupParameters.ContainsKey("multiEval")) { MultiEval = int.Parse(SetupParameters["multiEval"]); Model.Iterated += ModelIterated; } }
public override void Setup() { // candidate items if (!SetupParameters.ContainsKey("candidateItemsMode")) { CandidateItemsMode = CandidateItems.TRAINING; } else { CandidateItemsMode = (CandidateItems)Enum.Parse(typeof(CandidateItems), SetupParameters["candidateItemsMode"], true); } if (SetupParameters.ContainsKey("candidateItemsFile")) { CandidateItemsFile = SetupParameters["candidateItemsFile"]; } else if (CandidateItemsMode == CandidateItems.EXPLICIT) { throw new WrapRecException("Expect a 'candidateItemsFile' for the mode 'explicit'!"); } // candidate users if (!SetupParameters.ContainsKey("candidateUsersMode")) { CandidateUsersMode = CandidateItems.TEST; } else { CandidateUsersMode = (CandidateItems)Enum.Parse(typeof(CandidateItems), SetupParameters["candidateUsersMode"], true); } if (SetupParameters.ContainsKey("candidateUsersFile")) { CandidateUsersFile = SetupParameters["candidateUsersFile"]; } else if (CandidateUsersMode == CandidateItems.EXPLICIT) { throw new WrapRecException("Expect a 'candidateUsersFile' for the mode 'explicit!'"); } CutOffs = SetupParameters["cutOffs"].Split(',').Select(c => int.Parse(c)).ToArray(); if (!SetupParameters.ContainsKey("numCandidates")) { NumCandidates = new int[] { int.MaxValue } } ; else { NumCandidates = SetupParameters["numCandidates"].Split(',').Select(n => { return((n == "max") ? int.MaxValue : int.Parse(n)); }).ToArray(); } _maxNumCandidates = NumCandidates.Max(); if (SetupParameters.ContainsKey("relevantItems") && SetupParameters["relevantItems"].ToLower() == "all") { _isSliceRelevant = f => f.SliceType == FeedbackSlice.TRAIN || f.SliceType == FeedbackSlice.TEST; } else { _isSliceRelevant = f => f.SliceType == FeedbackSlice.TEST; } if (SetupParameters.ContainsKey("userMetricsFile")) { _perUserMetrics = new MultiKeyDictionary <int, int, StreamWriter>(); } }
public override void Setup() { base.Setup(); // the CrossValidationSplit is already setuped by its parent split if (IsSetup || Type == SplitType.CROSSVALIDATION_SUBSPLIT || Type == SplitType.DYNAMIC_SUBSPLIT) { IsSetup = true; return; } // Setuping splits float[] trainRatios = { 1f }; int numFolds = 5; if (SetupParameters.ContainsKey("trainRatios")) { trainRatios = SetupParameters["trainRatios"].Split(',').Select(tr => float.Parse(tr)).ToArray(); } if (SetupParameters.ContainsKey("numFolds")) { numFolds = int.Parse(SetupParameters["numFolds"]); } if (Type == SplitType.STATIC) { _train = Container.Feedbacks.Where(f => f.SliceType == FeedbackSlice.TRAIN); _test = Container.Feedbacks.Where(f => f.SliceType == FeedbackSlice.TEST); } else if (Type == SplitType.DYNAMIC) { var feedbacks = Container.Feedbacks.Shuffle(); SubSplits = trainRatios.Select(tr => { // the trainCount wont be calculated until the enumerator is being used // So container is not required to be loaded in advanced var trainCount = new Lazy <int>(() => Convert.ToInt32(Container.Feedbacks.Count * tr)); var train = feedbacks.Take(trainCount); var test = feedbacks.Skip(trainCount); var ss = new FeedbackSimpleSplit(train, test) { Id = Id + "-" + tr.ToString(), Type = SplitType.DYNAMIC_SUBSPLIT, Container = this.Container, SetupParameters = this.SetupParameters }; ss.Setup(); return(ss); }); } else if (Type == SplitType.CROSSVALIDATION) { var feedbacks = Container.Feedbacks.Shuffle(); // here all parameters of Take and Skip functions are calculated with lazyLoading // The SubSplits are formed when the enumeration is being started var foldCount = new Lazy <int>(() => (int)((1f / numFolds) * Container.Feedbacks.Count)); SubSplits = Enumerable.Range(0, numFolds) .Select(i => { var train = feedbacks.Take(() => (numFolds - i - 1) * foldCount.Value) .Concat(feedbacks.Skip(() => (numFolds - i) * foldCount.Value) .Take(() => i * foldCount.Value)); var test = feedbacks.Skip(() => (numFolds - i - 1) * foldCount.Value) .Take(foldCount); var ss = new FeedbackSimpleSplit(train, test) { Id = this.Id + "-fold" + (i + 1), Type = SplitType.CROSSVALIDATION_SUBSPLIT, Container = this.Container, SetupParameters = this.SetupParameters }; ss.Setup(); return(ss); }); } IsSetup = true; }
public Dictionary <string, string> GetStatistics() { // TODO: when test is empty ther is error if (_statistics != null) { return(_statistics); } Logger.Current.Info("Calculating split '{0}' statistics...", Id); var trainUsers = new Dictionary <string, int>(); var trainItems = new Dictionary <string, int>(); var testUsers = new Dictionary <string, int>(); var testItems = new Dictionary <string, int>(); int trainCount = 0, testCount = 0; foreach (Feedback f in Train) { trainCount++; string userId = f.User.Id; string itemId = f.Item.Id; if (!trainUsers.ContainsKey(userId)) { trainUsers[userId] = 1; } else { trainUsers[userId]++; } if (!trainItems.ContainsKey(itemId)) { trainItems[itemId] = 1; } else { trainItems[itemId]++; } } foreach (Feedback f in Test) { testCount++; string userId = f.User.Id; string itemId = f.Item.Id; if (!testUsers.ContainsKey(userId)) { testUsers[userId] = 1; } else { testUsers[userId]++; } if (!testItems.ContainsKey(itemId)) { testItems[itemId] = 1; } else { testItems[itemId]++; } } _statistics = new Dictionary <string, string>(); int totalCount = trainCount + testCount; float percTrain = 100f * ((float)trainCount / totalCount); float percTest = 100f * ((float)testCount / totalCount); long trainMatrixCount = (long)trainUsers.Count * trainItems.Count; double sparsityTrain = (double)100L * (trainMatrixCount - trainCount) / trainMatrixCount; long testMatrixCount = (long)testUsers.Count * testItems.Count; double sparsityTest = (double)100L * (testMatrixCount - testCount) / testMatrixCount; _statistics.Add("splitId", Id); _statistics.Add("train", trainCount.ToString()); _statistics.Add("test", testCount.ToString()); _statistics.Add("%train", string.Format("{0:0.00}", percTrain)); _statistics.Add("%test", string.Format("{0:0.00}", percTest)); _statistics.Add("trainUsers", trainUsers.Count.ToString()); _statistics.Add("trainItems", trainItems.Count.ToString()); _statistics.Add("sparsityTrain", string.Format("{0:0.00}", sparsityTrain)); _statistics.Add("testUsers", testUsers.Count.ToString()); _statistics.Add("testItems", testItems.Count.ToString()); _statistics.Add("sparsityTest", string.Format("{0:0.00}", sparsityTest)); _statistics.Add("newTestUsers", testUsers.Keys.Except(trainUsers.Keys).Count().ToString()); _statistics.Add("newTestItems", testItems.Keys.Except(trainItems.Keys).Count().ToString()); _statistics.Add("trUsrMinFb", trainUsers.Values.Min().ToString()); _statistics.Add("trUsrMaxFb", trainUsers.Values.Max().ToString()); _statistics.Add("trUsrAvgFb", string.Format("{0:0.00}", trainUsers.Values.Average())); _statistics.Add("teUsrMinFb", testUsers.Values.Min().ToString()); _statistics.Add("teUsrMaxFb", testUsers.Values.Max().ToString()); _statistics.Add("teUsrAvgFb", string.Format("{0:0.00}", testUsers.Values.Average())); _statistics.Add("trItmMinFb", trainItems.Values.Min().ToString()); _statistics.Add("trItmMaxFb", trainItems.Values.Max().ToString()); _statistics.Add("trItmAvgFb", string.Format("{0:0.00}", trainItems.Values.Average())); _statistics.Add("teItmMinFb", testItems.Values.Min().ToString()); _statistics.Add("teItmMaxFb", testItems.Values.Max().ToString()); _statistics.Add("teItmAvgFb", string.Format("{0:0.00}", testItems.Values.Average())); _statistics.Add("feedbackAttrs", SetupParameters.ContainsKey("feedbackAttributes") ? SetupParameters["feedbackAttributes"] : "NA"); _statistics.Add("userAttrs", SetupParameters.ContainsKey("userAttributes") ? SetupParameters["userAttributes"] : "NA"); _statistics.Add("itemAttrs", SetupParameters.ContainsKey("itemAttributes") ? SetupParameters["itemAttributes"] : "NA"); return(_statistics); }