예제 #1
0
        public DaySplitter(UserBehaviorDatabase db, int daysForTesting)
        {
            int splitDay = db.UserActions.Max(x => x.Day) - daysForTesting;

            TrainingDB = db.Clone();
            TrainingDB.UserActions.RemoveAll(x => x.Day > splitDay);

            TestingDB = db.Clone();
            TestingDB.UserActions.RemoveAll(x => x.Day <= splitDay);

            // Remove any user-article pairs from the testing set that are also in the training set
            TestingDB.UserActions.RemoveAll(test => TrainingDB.UserActions.Any(train => train.UserID == test.UserID && train.ArticleID == test.ArticleID));
        }
예제 #2
0
 public UserBehaviorTransformer(UserBehaviorDatabase database)
 {
     db = database;
 }
예제 #3
0
        public UserBehaviorDatabase LoadUserBehaviorDatabase(string file)
        {
            UserBehaviorDatabase db = new UserBehaviorDatabase();

            List <string> lines = File.ReadAllLines(file).ToList();

            // Get the indexes of each data section
            int tagsIndex        = lines.FindIndex(x => x.StartsWith(HeaderTags));
            int articlesIndex    = lines.FindIndex(x => x.StartsWith(HeaderArticles));
            int usersIndex       = lines.FindIndex(x => x.StartsWith(HeaderUsers));
            int userActionsIndex = lines.FindIndex(x => x.StartsWith(HeaderUserActions));

            // Parse out the tags
            for (int i = tagsIndex; i < articlesIndex; i++)
            {
                if (!lines[i].Trim().StartsWith("#") && lines[i].Trim().Length > 0)
                {
                    foreach (string s in lines[i].Split(','))
                    {
                        db.Tags.Add(new Tag(s.Trim()));
                    }
                }
            }

            // Parse out the articles
            for (int i = articlesIndex; i < usersIndex; i++)
            {
                if (!lines[i].Trim().StartsWith("#") && lines[i].Trim().Length > 0)
                {
                    string[]   cols = lines[i].Split(',');
                    List <Tag> tags = new List <Tag>();

                    for (int c = 2; c < cols.Length; c++)
                    {
                        tags.Add(new Tag(cols[c].Trim()));
                    }

                    db.Articles.Add(new Article(int.Parse(cols[0].Trim()), cols[1].Trim(), tags));
                }
            }

            // Parse out the users
            for (int i = usersIndex; i < userActionsIndex; i++)
            {
                if (!lines[i].Trim().StartsWith("#") && lines[i].Trim().Length > 0)
                {
                    string[] cols = lines[i].Split(',');
                    db.Users.Add(new User(int.Parse(cols[0].Trim()), cols[1].Trim()));
                }
            }

            // Parse out the user actions
            for (int i = userActionsIndex; i < lines.Count; i++)
            {
                if (!lines[i].Trim().StartsWith("#") && lines[i].Trim().Length > 0)
                {
                    string[] cols = lines[i].Split(',');
                    db.UserActions.Add(new UserAction(int.Parse(cols[0].Trim()), cols[1].Trim(), int.Parse(cols[2].Trim()), cols[3].Trim(), int.Parse(cols[4].Trim()), cols[5].Trim()));
                }
            }

            return(db);
        }