/// <summary> /// Main algorithm to return author searches from the database. We start with prefering the whole string matches /// and then starts to include more and more granular results, by words count match /// /// </summary> /// <param name="db">Database context</param> /// <param name="query">Current query</param> /// <returns>List of returned books</returns> public static IEnumerable <Author> SearchAuthor(BookRecommenderContext db, string query) { // First try to find the exact query within books var startsWithFound = db.Authors.Where(a => a.NameEn.ToLower().StartsWith(query.ToLower())); var containsFound = db.Authors.Where(a => a.NameEn.ToLower().Contains(query.ToLower())); var wholeQueryFound = startsWithFound.Concat(containsFound); var splitQuery = query.ToLower().Split(new char[] { ' ' }); // if query is only one word - return if (splitQuery.Length == 1) { return(wholeQueryFound); } // then find books which have even only single query word, and order them by the amounts of query words found var countDictionary = new Dictionary <Author, int>(); foreach (var queryWord in splitQuery) { var authors = db.Authors.Where(a => a.NameEn.ToLower().Contains(queryWord)); foreach (var author in authors) { int value; countDictionary[author] = countDictionary.TryGetValue(author, out value) ? ++value : 1; } } var orderedResult = countDictionary.OrderByDescending(b => b.Value).Select(b => b.Key); //combine the whole query result with single words query result in way that whole query is preffered var combQuery = wholeQueryFound.Concat(orderedResult).Distinct(); return(combQuery); }
/// <summary> /// Method which takes IEnumerable with the data and calls the assigned line action on every line of the data. /// The data needs to fit inside the line action. /// This method is meant to be used when mining from command line /// </summary> /// <param name="data">Enumerable with the data lines</param> /// <param name="lineAction">Action which will be executed on each line</param> protected void UpdateDatabase <T>(IEnumerable <T> data, LineAction <T> lineAction) { // Writeout the delegate name System.Console.WriteLine(lineAction.GetMethodInfo().Name); bool successfull = false; do { var numberOfTries = 1; try { //Execute query -- retrieve collection only once System.Console.WriteLine("Querying Endpoind"); var listData = data.ToList(); System.Console.WriteLine("Updating database"); using (var db = new BookRecommenderContext()) { //Create new console progress counter using (var counter = new Counter(listData.Count)) { // Insert all books in database foreach (var line in listData) { lineAction(line, db); counter.Update(); } } System.Console.WriteLine("Saving database"); db.SaveChanges(); successfull = true; } } catch (Exception ex) { // If something went wrong, wait 10 sec and then try again numberOfTries++; System.Console.WriteLine(ex.ToString()); System.Console.WriteLine("Try again, attempt number " + numberOfTries); } // If something went wrong, wait 10 sec and then try again if (!successfull) { System.Threading.Tasks.Task.Delay(10000).Wait(); } } while (!successfull); }
public void Initialize(BookRecommenderContext db) { var sw = Stopwatch.StartNew(); InitializeAuthors(db); InitializeGenres(db); InitializeCharacters(db); InitializeTags(db); InitializeNames(db); sw.Stop(); var elapsed = sw.ElapsedMilliseconds; System.Console.WriteLine($"Spreading cache entries: {SimilarBooksByAuthor.Keys.Count}#"); System.Console.WriteLine($"Spreading cache init took: {elapsed}ms"); }
/// <summary> /// Simpler and faster version of search to be used in dynamically loaded whispering when searching. /// </summary> /// <param name="db">Database context</param> /// <param name="query">Current query</param> /// <param name="howManyTop">how many to show</param> /// <returns>List of recommendations</returns> public static List <string> Autocomplete(BookRecommenderContext db, string query, int howManyTop) { if (string.IsNullOrEmpty(query)) { return(new List <string>()); } var booksStartingWith = db.Books.Select(b => b.NameEn).Where(b => b.ToLower().StartsWith(query.ToLower())).Take(howManyTop).ToList(); var authorsStartingWith = db.Authors.Select(a => a.NameEn).Where(a => a.ToLower().StartsWith(query.ToLower())).Take(howManyTop).ToList(); var startsWith = booksStartingWith.Concat(authorsStartingWith); var booksContaining = db.Books.Select(b => b.NameEn).Where(b => b.ToLower().Contains(query.ToLower())).Take(howManyTop).ToList(); var authorsContaining = db.Authors.Select(a => a.NameEn).Where(a => a.ToLower().Contains(query.ToLower())).Take(howManyTop).ToList(); var contains = booksContaining.Concat(authorsContaining); var final = startsWith.Concat(contains).Distinct().Take(howManyTop); return(final.ToList()); }
// public void InitializeGenres(BookRecommenderContext db) // { // // Book to genres // var bookGenreList = db.BooksGenres.Select(t => new { t.BookId, t.GenreId }) // .ToList(); // var bookToGenres = bookGenreList.GroupBy(bg => bg.BookId) // .ToDictionary(g => g.Key, g => g.Select(bg => bg.GenreId).ToList()); // var genresToBooks = bookGenreList.GroupBy(bg => bg.GenreId) // .ToDictionary(g => g.Key, g => g.Select(bg => bg.BookId).ToList()); // SimilarBooksByGenre = bookToGenres // .ToDictionary( // kvp => kvp.Key, // kvp => kvp.Value // .SelectMany(g => genresToBooks[g]) // .ToList()); // var total = SimilarBooksByGenre.SelectMany(f => f.Value).Count(); // var aproxMemMB = total * 4d / 1024 / 1024; // System.Console.WriteLine($"Genres total hops available: {total}, MB: {aproxMemMB}"); // } public void InitializeGenres(BookRecommenderContext db) { // Book to genres var bookGenreList = db.BooksGenres.Select(t => new { t.BookId, t.GenreId }) .ToList(); BooksGenres = bookGenreList.GroupBy(bg => bg.BookId) .ToDictionary(g => g.Key, g => g.Select(bg => bg.GenreId).ToList()); GenresBooks = bookGenreList.GroupBy(bg => bg.GenreId) .ToDictionary(g => g.Key, g => g.Select(bg => bg.BookId).ToList()); var total = BooksGenres.SelectMany(f => f.Value).Count() + GenresBooks.SelectMany(f => f.Value).Count(); var aproxMemMB = total * 4d / 1024 / 1024; System.Console.WriteLine($"Genres total hops available(2 tables): {total}, MB: {aproxMemMB}"); }
/// <summary> /// Method which takes IEnumerable with the data and calls the assigned line action on every line of the data. /// The data needs to fit inside the line action. /// This method is meant to be used when mining from web interface /// </summary> /// <param name="data">Enumerable with the data lines</param> /// <param name="lineAction">Action which will be executed on each line</param> /// <param name="miningState">Mining state of the operation from the MiningProxySingleton used to monitor the data mining</param> protected void UpdateDatabase <T>(IEnumerable <T> data, LineAction <T> lineAction, MiningState miningState) { // fall back for deprecated commandline mining if (miningState == null) { UpdateDatabase <T>(data, lineAction); return; } try { // set the mining state miningState.CurrentState = MiningStateType.RunningQueryingEndpoint; var listData = data.ToList(); miningState.CurrentState = MiningStateType.Running; var currentPosition = 0; using (var db = new BookRecommenderContext()) { // proccess each line using the line action and log changes to the mining state foreach (var line in listData) { lineAction(line, db); currentPosition++; miningState.Message = String.Format("{0}/{1}", currentPosition, listData.Count); } miningState.CurrentState = MiningStateType.RunningSavingToDatabase; db.SaveChanges(); miningState.CurrentState = MiningStateType.Completed; miningState.Message = DateTime.Now.ToString(); } } catch (Exception ex) { // If something went wrong, wait 10 sec and then try again miningState.CurrentState = MiningStateType.Error; miningState.Message = ex.Message; } }
public void InitializeAuthors(BookRecommenderContext db) { // Book to authors var bookAuthorList = db.BooksAuthors.Select(t => new { t.BookId, t.AuthorId }) .ToList(); var bookToAuthors = bookAuthorList.GroupBy(ba => ba.BookId) .ToDictionary(g => g.Key, g => g.Select(ba => ba.AuthorId).ToList()); var authorToBooks = bookAuthorList.GroupBy(ba => ba.AuthorId) .ToDictionary(g => g.Key, g => g.Select(ba => ba.BookId).ToList()); SimilarBooksByAuthor = bookToAuthors .ToDictionary( kvp => kvp.Key, kvp => kvp.Value .SelectMany(a => authorToBooks[a]) .ToList()); var total = SimilarBooksByAuthor.SelectMany(f => f.Value).Count(); var aproxMemMB = total * 4d / 1024 / 1024; System.Console.WriteLine($"Authors total hops available: {total}, MB: {aproxMemMB}"); }
public void InitializeTags(BookRecommenderContext db) { // Book to tags var bookTagList = db.Tags.Select(t => new { t.BookId, t.TagId }) .ToList(); var bookToTags = bookTagList.GroupBy(bt => bt.BookId) .ToDictionary(g => g.Key, g => g.Select(bt => bt.TagId).ToList()); var tagsToBooks = bookTagList.GroupBy(bt => bt.TagId) .ToDictionary(g => g.Key, g => g.Select(bt => bt.BookId).ToList()); SimilarBooksByTags = bookToTags .ToDictionary( kvp => kvp.Key, kvp => kvp.Value .SelectMany(t => tagsToBooks[t]) .ToList()); var total = SimilarBooksByTags.SelectMany(f => f.Value).Count(); var aproxMemMB = total * 4d / 1024 / 1024; System.Console.WriteLine($"Tags total hops available: {total}, MB: {aproxMemMB}"); }
public void InitializeCharacters(BookRecommenderContext db) { // Book to genres var bookCharacterList = db.BooksCharacters.Select(t => new { t.BookId, t.CharacterId }) .ToList(); var bookToCharacters = bookCharacterList.GroupBy(bc => bc.BookId) .ToDictionary(g => g.Key, g => g.Select(bc => bc.CharacterId).ToList()); var charactersToBooks = bookCharacterList.GroupBy(bc => bc.CharacterId) .ToDictionary(g => g.Key, g => g.Select(bc => bc.BookId).ToList()); SimilarBooksByCharacters = bookToCharacters .ToDictionary( kvp => kvp.Key, kvp => kvp.Value .SelectMany(g => charactersToBooks[g]) .ToList()); var total = SimilarBooksByCharacters.SelectMany(f => f.Value).Count(); var aproxMemMB = total * 4d / 1024 / 1024; System.Console.WriteLine($"Characters total hops available: {total}, MB: {aproxMemMB}"); }
public void InitializeNames(BookRecommenderContext db) { var books = db.Books.ToList(); BooksNames = books.ToDictionary(b => b.BookId, b => b.GetNameEn()); }