public static void SplitWords() { using (var db = new FacebookDebatEntities()) { log.Info("Finding un-splitted comments"); var comments = db.Comments.Where(x => !x.splitted).Take(10000).ToList(); var commentIds = string.Join(",", comments.Select(x => x.id.ToString()).ToArray()); var deleteWordsTask = Task.Factory.StartNew(() => { if (commentIds.Count() != 0) { DatabaseTools.ExecuteNonQuery(string.Format("DELETE FROM CommentWords WHERE comment_id IN ({0})", commentIds)); } log.Info("Finished deleting words"); }); var deleteLinksTask = Task.Factory.StartNew(() => { if (commentIds.Count() != 0) { DatabaseTools.ExecuteNonQuery(string.Format("DELETE FROM CommentLinks WHERE comment_id IN ({0})", commentIds)); } log.Info("Finished deleting links"); }); var commentWords = new List <Tuple <int, string> >(); var commentLinks = new List <Tuple <int, string> >(); log.Info("Building comment cache"); var wordCache = new UpdateOrInsertBuilder <Word>(db.Words, x => x.word1, (x, y) => false); log.Info("Building link cache"); var linkCache = new UpdateOrInsertBuilder <Link>(db.Links, x => x.url, (x, y) => false); log.Info("Splitting"); foreach (var comment in comments) { // Get links String commentWithoutLinks; var links = Tools.StripLinks(comment.message, out commentWithoutLinks); foreach (var link in links) { linkCache.Process(link, () => new Link() { url = link }); commentLinks.Add(Tuple.Create(comment.id, link)); } // Get words from link-stripped comment var words = Tools.SplitWords(commentWithoutLinks.ToLower()).Where(x => !string.IsNullOrEmpty(x)); foreach (var word in words) { if (word.Length >= 100) // Longest possible word in DB { log.Warn("Ignoring " + word); continue; } if (word.Any(x => char.IsDigit(x) || x == '_')) // no words with underscores or digits { continue; } if (word.Length < 2) { continue; } wordCache.Process(word, () => new Word() { word1 = word }); commentWords.Add(Tuple.Create(comment.id, word)); } } log.Info("Waiting for delete-tasks"); Task.WaitAll(deleteLinksTask, deleteWordsTask); wordCache.SyncDatabase(2000, "dbo.Words", "id", (word) => new { id = (int?)null, word = word.word1 }); linkCache.SyncDatabase(2000, "dbo.Links", "id", (link) => new { id = (int?)null, url = link.url }); var linkTranslator = new FacebookDebatEntities().Links.ToDictionary(x => x.url, x => x.id); DatabaseTools.ChunkInsert("dbo.CommentLinks", 10000, commentLinks.Select(x => new { id = (int?)null, comment_id = x.Item1, link_id = linkTranslator[x.Item2] })); var wordTranslator = new FacebookDebatEntities().Words.ToDictionary(x => x.word1, x => x.id); DatabaseTools.ChunkInsert("dbo.CommentWords", 20000, commentWords.Select(x => new { id = (int?)null, comment_id = x.Item1, word_id = wordTranslator[x.Item2] })); log.Info("Marking splitted"); DatabaseTools.ExecuteNonQuery(string.Format("update dbo.Comments set splitted = 1 where id in ({0})", commentIds)); } }