static void Main(string[] args) { Logger.LogStartupMessage("IsServerGC: {0}, LatencyMode: {1}", GCSettings.IsServerGC, GCSettings.LatencyMode); var folder = @"C:\Users\warma11\Downloads\__GitHub__\StackOverflowTagServer\BinaryData\"; var filename = @"Questions-NEW.bin"; var startupTimer = Stopwatch.StartNew(); var rawQuestions = TagServer.GetRawQuestionsFromDisk(folder, filename); //TagServer tagServer = TagServer.CreateFromScratchAndSaveToDisk(rawQuestions, intermediateFilesFolder: folder); TagServer tagServer = TagServer.CreateFromSerialisedData(rawQuestions, intermediateFilesFolder: folder); //, deserialiseBitMapsIndexes: false); startupTimer.Stop(); GC.Collect(2, GCCollectionMode.Forced); var totalMemory = GC.GetTotalMemory(true) / 1024.0 / 1024.0; Logger.LogStartupMessage("Took {0} ({1,6:N2} ms), in total to complete Startup - Using {2:N2} MB ({3:N2} GB) of memory in TOTAL", startupTimer.Elapsed, startupTimer.Elapsed.TotalMilliseconds, totalMemory, totalMemory / 1024.0); //PrintQuestionStats(rawQuestions); //PrintTagStats(tagServer.AllTags); // Run a sanity check on all the query type, for the given exclusion list and nGrams var leppieTags = Utils.GetLeppieTagsFromResource(); NGrams nGrams = WildcardProcessor.CreateNGrams(tagServer.AllTags); var expandedTags = WildcardProcessor.ExpandTagsNGrams(tagServer.AllTags, leppieTags, nGrams, printLoggingMessages: true); foreach (QueryType type in (QueryType[])Enum.GetValues(typeof(QueryType))) { var bitMap = tagServer.CreateBitMapIndexForExcludedTags(expandedTags, type, printLoggingMessages: true); tagServer.ValidateExclusionBitMap(bitMap, expandedTags, type); TestBitMapIndexQueries(tagServer, expandedTags, bitMap, type); RunComparisonQueries(tagServer, expandedTags, bitMap, type); } return; #if false // code that currently isn't run, because of the early "return" statement above foreach (QueryType type in (QueryType[])Enum.GetValues(typeof(QueryType))) { using (Utils.SetConsoleColour(ConsoleColor.Green)) Logger.Log("Creating Bit Map index for \"{0}\"", type); //var expandedTags = WildcardProcessor.ExpandTagsNGrams(tagServer.AllTags, leppieTags, nGrams, printLoggingMessages: true); //var expandedTags = WildcardProcessor.ExpandTagsNGrams(tagServer.AllTags, new List<string>(new[] { "*c#*" }), nGrams, printLoggingMessages: true); var expandedTags = WildcardProcessor.ExpandTagsNGrams(tagServer.AllTags, new List <string>(new[] { "*c#*", "*java*" }), nGrams, printLoggingMessages: true); var bitMap = tagServer.CreateBitMapIndexForExcludedTags(expandedTags, type, printLoggingMessages: true); //using (Utils.SetConsoleColour(ConsoleColor.DarkGreen)) // Logger.Log(bitMap.ToDebugString(printEveryLiteralWord: false)); //tagServer.ValidateExclusionBitMap(bitMap, expandedTags, type); } return; var expandedTagsNGrams = WildcardProcessor.ExpandTagsNGrams(tagServer.AllTags, leppieTags, nGrams, printLoggingMessages: true); //var expandedTagsNGrams = WildcardProcessor.ExpandTagsNGrams(tagServer.AllTags, new List<string>(new[] { "*c#*" }), nGrams, printLoggingMessages: true); var queryTypeToTest = QueryType.AnswerCount; var bitMapIndex = tagServer.CreateBitMapIndexForExcludedTags(expandedTagsNGrams, queryTypeToTest, printLoggingMessages: true); tagServer.ValidateExclusionBitMap(bitMapIndex, expandedTagsNGrams, queryTypeToTest); TestBitMapIndexQueries(tagServer, expandedTagsNGrams, bitMapIndex, queryTypeToTest); // Get some interesting stats on Leppie's Tag (how many qu's the cover/exclude, etc) //GetLeppieTagInfo(rawQuestions, tagServer.AllTags, leppieTags, leppieExpandedTags); return; // TODO currently it takes too long to create the Bit Map Index (expanding the wildcards to tag is fast though) // Either the Bit Map has to be cached OR we need to find a faster way of populating it TestWildcards(tagServer, nGrams, "*c#*"); //TestWildcards(tagServer, nGrams, "*c#", "c#*"); //TestWildcards(tagServer, nGrams, "*c#"); //TestWildcards(tagServer, nGrams, "c#*"); //TestWildcards(tagServer, nGrams, "c#-2.0"); TestWildcards(tagServer, nGrams, leppieTags.ToArray()); //TestWildcards(tagServer, nGrams, "*"); // INCLUDE all Tags //RunExclusionQueryTests(tagServer, leppieExpandedTags, runsPerLoop: 10); //RunSimpleQueries(tagServer); //Logger.LogStartupMessage("Finished, press <ENTER> to exit"); //Console.ReadLine(); #endif }
private static void TestWildcards(TagServer tagServer, NGrams nGrams, params string[] tagsToExpandInput) { var tagsToExpand = tagsToExpandInput.ToList(); if (tagsToExpand.Count == 1 && tagsToExpand[0] == "*") { // special case!! using (Utils.SetConsoleColour(ConsoleColor.Green)) Logger.Log("\nTestWildcards: special case, using ALL Tags", String.Join(", ", tagsToExpand)); var bitMapIndex = tagServer.CreateBitMapIndexForExcludedTags(new CLR.HashSet <string>(tagServer.AllTags.Keys), QueryType.AnswerCount, printLoggingMessages: true); return; } using (Utils.SetConsoleColour(ConsoleColor.Green)) Logger.Log("\nTestWildcards: {0}\n", String.Join(", ", tagsToExpand.Where(t => t.Contains('*')))); var timer = Stopwatch.StartNew(); var expandedTagsNGrams = WildcardProcessor.ExpandTagsNGrams(tagServer.AllTags, tagsToExpand, nGrams, printLoggingMessages: true); timer.Stop(); using (Utils.SetConsoleColour(ConsoleColor.DarkYellow)) { Logger.LogStartupMessage("Took {0,6:N2} ms ({1}) to expanded Wildcards to {2,2:N0} tags (using N-Grams, with N={3})", timer.Elapsed.TotalMilliseconds, timer.Elapsed, expandedTagsNGrams.Count, WildcardProcessor.N); } var wildcards = tagsToExpand.Where(t => t.Contains('*')).ToList(); Logger.LogStartupMessage("There are {0:N0} wildcards in the list and {1:N0} regular tags (i.e. with no '*' in them)", wildcards.Count, tagsToExpand.Count(w => w.Contains('*') == false)); if (wildcards.Count > 50) { Logger.LogStartupMessage("Wildcards: TOO MANY TO PRINT (there are {0:N0} wildcards)", wildcards.Count); } else { Logger.LogStartupMessage("Wildcards: [{0}]", String.Join(", ", tagsToExpand.Where(w => w.Contains('*')))); } var expansions = tagsToExpand.Where(w => w.Contains('*')) .Select(w => String.Format("{0} -> {1}", w, String.Join(", ", WildcardProcessor.CreateSearches(w)))) .ToList(); if (expansions.Count > 50) { Logger.LogStartupMessage("Expansions: TOO MANY TO PRINT (there are {0:N0} expansions)", expansions.Count); } else { Logger.LogStartupMessage("Expansions:\n {0}", String.Join("\n ", expansions)); } if (expandedTagsNGrams.Count > 50) { Logger.LogStartupMessage("Results: TOO MANY TO PRINT (there are {0:N0} results)", expandedTagsNGrams.Count); } else { Logger.LogStartupMessage("Results: [{0}]", String.Join(", ", expandedTagsNGrams)); } var expandTagsContainsTimer = Stopwatch.StartNew(); var expandTagsContains = WildcardProcessor.ExpandTagsContainsStartsWithEndsWith(tagServer.AllTags, tagsToExpand); expandTagsContainsTimer.Stop(); Logger.LogStartupMessage("\nIn Contains but not in NGrams: " + string.Join(", ", expandTagsContains.Except(expandedTagsNGrams))); Logger.LogStartupMessage("\nIn NGrams but not in Contains: " + string.Join(", ", expandedTagsNGrams.Except(expandTagsContains))); Logger.LogStartupMessage(); var bitMapIndexAnswerCount = tagServer.CreateBitMapIndexForExcludedTags(expandedTagsNGrams, QueryType.AnswerCount, printLoggingMessages: true); //var bitMapIndexCreationDate = tagServer.CreateBitMapIndexForExcludedTags(expandedTagsNGrams, QueryType.CreationDate, printLoggingMessages: true); //var bitMapIndexLastActivityDate = tagServer.CreateBitMapIndexForExcludedTags(expandedTagsNGrams, QueryType.LastActivityDate, printLoggingMessages: true); //var bitMapIndexScore = tagServer.CreateBitMapIndexForExcludedTags(expandedTagsNGrams, QueryType.Score, printLoggingMessages: true); //var bitMapIndexViewCount = tagServer.CreateBitMapIndexForExcludedTags(expandedTagsNGrams, QueryType.ViewCount, printLoggingMessages: true); }
private static HashSet ProcessTagsForFastLookup(TagLookup allTags, Trie <int> trie, NGrams nGrams, List <string> tagsToExpand) { var expandTagsContainsTimer = Stopwatch.StartNew(); var expandTagsContains = WildcardProcessor.ExpandTagsContainsStartsWithEndsWith(allTags, tagsToExpand); expandTagsContainsTimer.Stop(); //var expandTagsVBTimer = Stopwatch.StartNew(); //var expandedTagsVB = WildcardProcessor.ExpandTagsVisualBasic(allTags, tagsToExpand); //expandTagsVBTimer.Stop(); var expandTagsRegexTimer = Stopwatch.StartNew(); var expandedTagsRegex = WildcardProcessor.ExpandTagsRegex(allTags, tagsToExpand); expandTagsRegexTimer.Stop(); var expandTagsTrieTimer = Stopwatch.StartNew(); var expandedTagsTrie = WildcardProcessor.ExpandTagsTrie(allTags, tagsToExpand, trie); expandTagsTrieTimer.Stop(); var expandedTagsNGramsTimer = Stopwatch.StartNew(); var expandedTagsNGrams = WildcardProcessor.ExpandTagsNGrams(allTags, tagsToExpand, nGrams, printLoggingMessages: true); expandTagsRegexTimer.Stop(); Logger.LogStartupMessage("\nThere are {0:N0} tags in total", allTags.Count); Logger.LogStartupMessage("There are {0:N0} tags/wildcards (raw) BEFORE expansion", tagsToExpand.Count); Logger.LogStartupMessage("\nExpanded to {0,4:N0} tags (Contains), took {1,8:N2} ms ({2})", expandTagsContains.Count, expandTagsContainsTimer.Elapsed.TotalMilliseconds, expandTagsContainsTimer.Elapsed); //Logger.LogStartupMessage("Expanded to {0,4:N0} tags (VB), took {1,8:N2} ms ({2})", // expandedTagsVB.Count, expandTagsVBTimer.Elapsed.TotalMilliseconds, expandTagsVBTimer.Elapsed); Logger.LogStartupMessage("Expanded to {0,4:N0} tags (Regex), took {1,8:N2} ms ({2})", expandedTagsRegex.Count, expandTagsRegexTimer.Elapsed.TotalMilliseconds, expandTagsRegexTimer.Elapsed); Logger.LogStartupMessage("Expanded to {0,4:N0} tags (Trie), took {1,8:N2} ms ({2})", expandedTagsTrie.Count, expandTagsTrieTimer.Elapsed.TotalMilliseconds, expandTagsTrieTimer.Elapsed); Logger.LogStartupMessage("Expanded to {0,4:N0} tags (N-Grams), took {1,8:N2} ms ({2})", expandedTagsNGrams.Count, expandedTagsNGramsTimer.Elapsed.TotalMilliseconds, expandedTagsNGramsTimer.Elapsed); Logger.LogStartupMessage("\nIn Contains but not in Regex: " + string.Join(", ", expandTagsContains.Except(expandedTagsRegex))); Logger.LogStartupMessage("\nIn Regex but not in Contains: " + string.Join(", ", expandedTagsRegex.Except(expandTagsContains))); //Logger.LogStartupMessage("\nIn Contains but not in VB: " + string.Join(", ", expandTagsContains.Except(expandedTagsVB))); //Logger.LogStartupMessage("\nIn VB but not in Contains: " + string.Join(", ", expandedTagsVB.Except(expandTagsContains))); Logger.LogStartupMessage("\nIn Contains but not in Trie: " + string.Join(", ", expandTagsContains.Except(expandedTagsTrie))); Logger.LogStartupMessage("\nIn Trie but not in Contains: " + string.Join(", ", expandedTagsTrie.Except(expandTagsContains))); Logger.LogStartupMessage("\nIn Contains but not in NGrams: " + string.Join(", ", expandTagsContains.Except(expandedTagsNGrams))); Logger.LogStartupMessage("\nIn NGrams but not in Contains: " + string.Join(", ", expandedTagsNGrams.Except(expandTagsContains))); Logger.LogStartupMessage(); var expandedTags = expandedTagsNGrams; //Logger.LogStartupMessage(string.Join(", ", expandedTags)); // This is an error, we shouldn't have extra tags that aren't in the "allTags" list!! var extra = expandedTags.Except(allTags.Keys).ToList(); if (extra.Count > 0) { Logger.LogStartupMessage("\nExtra Tags: " + string.Join(", ", extra) + "\n"); } return(expandedTags); }