private static void GetLeppieTagInfo(List <Question> rawQuestions, TagLookup allTags, List <string> leppieTags, HashSet leppieExpandedTags) { Logger.Log("\nThere are {0:N0} questions and {1:N0} tags in total", rawQuestions.Count, allTags.Count); Logger.Log("Leppie list of {0:N0} tags contains {1:N0} that are wildcards", leppieTags.Count, leppieTags.Count(t => t.Contains('*'))); Logger.Log("Leppie {0:N0} tags with wildcards expand to {1:N0} tags in total", leppieTags.Count, leppieExpandedTags.Count); var remainingTagsHashSet = new CLR.HashSet <string>(allTags.Keys); remainingTagsHashSet.ExceptWith(leppieExpandedTags); Logger.LogStartupMessage("There are {0:N0} tags remaining, {0:N0} + {1:N0} = {2:N0} (Expected: {3:N0})", remainingTagsHashSet.Count, leppieExpandedTags.Count, remainingTagsHashSet.Count + leppieExpandedTags.Count, allTags.Count); Logger.LogStartupMessage("Sanity checking excluded/included tags and questions..."); var excludedQuestionCounter = rawQuestions.Count(question => question.Tags.Any(t => leppieExpandedTags.Contains(t))); var includedQuestionCounter = rawQuestions.Count(question => question.Tags.All(t => remainingTagsHashSet.Contains(t))); Logger.Log("{0:N0} EXCLUDED tags cover {1:N0} questions (out of {2:N0})", leppieExpandedTags.Count, excludedQuestionCounter, rawQuestions.Count); Logger.Log( "{0:N0} remaining tags cover {1:N0} questions, {2:N0} + {3:N0} = {4:N0} (Expected: {5:N0})", remainingTagsHashSet.Count, includedQuestionCounter, includedQuestionCounter, excludedQuestionCounter, includedQuestionCounter + excludedQuestionCounter, rawQuestions.Count); Logger.Log(); }
private static void TestBitMapIndexAndValidateResults(TagServer tagServer, QueryInfo queryInfo, CLR.HashSet <string> tagsToExclude = null, EwahCompressedBitArray exclusionBitMap = null) { var result = tagServer.ComparisionQueryBitMapIndex(queryInfo, exclusionBitMap, printLoggingMessages: true); var errors = tagServer.GetInvalidResults(result.Questions, queryInfo); if (errors.Any()) { using (Utils.SetConsoleColour(ConsoleColor.Red)) Logger.Log("ERROR Running \"{0}\" Query, {1} (out of {2}) results were invalid", queryInfo.Operator, errors.Count, result.Questions.Count); foreach (var qu in errors) { Logger.Log(" {0,8}: {1}", qu.Id, String.Join(", ", qu.Tags)); } Logger.Log(); } if (tagsToExclude != null && exclusionBitMap != null) { var shouldHaveBeenExcluded = tagServer.GetShouldHaveBeenExcludedResults(result.Questions, queryInfo, tagsToExclude); if (shouldHaveBeenExcluded.Any()) { using (Utils.SetConsoleColour(ConsoleColor.Red)) Logger.Log("ERROR Running \"{0}\" Query, {1} (out of {2}) questions should have been excluded", queryInfo.Operator, shouldHaveBeenExcluded.Select(s => s.Item1.Id).Distinct().Count(), result.Questions.Count); foreach (var error in shouldHaveBeenExcluded) { Logger.Log(" {0,8}: {1} -> {2}", error.Item1.Id, String.Join(", ", error.Item1.Tags), string.Join(", ", error.Item2)); } Logger.Log(); } } }
internal void ValidateExclusionBitMap(EwahCompressedBitArray bitMapIndex, CLR.HashSet <string> expandedTagsNGrams, QueryType queryType) { // Exclusion BitMap is Set (i.e. 1) in places where you CAN use the question, i.e. it's NOT excluded var questionLookup = GetTagByQueryLookup(queryType)[TagServer.ALL_TAGS_KEY]; var invalidQuestions = new List <Tuple <Question, string> >(); var NOTbitMapIndex = ((EwahCompressedBitArray)bitMapIndex.Clone()); NOTbitMapIndex.Not(); var positions = NOTbitMapIndex.GetPositions(); foreach (var position in positions) { var question = questions[questionLookup[position]]; foreach (var tag in question.Tags) { if (expandedTagsNGrams.Contains(tag)) { invalidQuestions.Add(Tuple.Create(question, tag)); } } // Sometimes the validitation locks up my laptop, this *seems* to make a difference?! Thread.Yield(); } using (Utils.SetConsoleColour(ConsoleColor.Blue)) Logger.Log("Validating Exclusion Bit Map, checked {0:N0} positions for INVALID tags", positions.Count); if (invalidQuestions.Any()) { using (Utils.SetConsoleColour(ConsoleColor.Red)) Logger.Log("ERROR Validating Exclusion Bit Map, {0:N0} questions should have been excluded", invalidQuestions.Select(i => i.Item1.Id).Distinct().Count()); foreach (var error in invalidQuestions) { Logger.Log(" {0,8}: {1} -> {2}", error.Item1.Id, String.Join(", ", error.Item1.Tags), error.Item2); } } var expectedPositions = bitMapIndex.GetPositions(); foreach (var position in expectedPositions) { var question = questions[questionLookup[position]]; if (question.Tags.Any(t => expandedTagsNGrams.Contains(t)) == false) { using (Utils.SetConsoleColour(ConsoleColor.Red)) Logger.Log("ERROR {0,8}: {1} -> didn't contain ANY excluded tags", question.Id, String.Join(", ", question.Tags)); } } using (Utils.SetConsoleColour(ConsoleColor.Blue)) Logger.Log("Validating Exclusion Bit Map, checked {0:N0} positions for EXPECTED tags", expectedPositions.Count); Logger.Log(); }
internal QueryResult QueryNoLINQ(QueryInfo info, CLR.HashSet <string> tagsToExclude = null) { var timer = Stopwatch.StartNew(); TagByQueryLookup queryInfo = GetTagByQueryLookup(info.Type); ThrowIfInvalidParameters(info.Tag, info.PageSize, queryInfo); ThrowIfInvalidParameters(info.OtherTag, info.PageSize, queryInfo); ComplexQueryResult queryResult = null; switch (info.Operator) { case "AND": queryResult = AndQuery(queryInfo[info.Tag], queryInfo[info.OtherTag], info.PageSize, info.Skip, tagsToExclude); break; case "AND-NOT": queryResult = AndNotQuery(queryInfo[info.Tag], queryInfo[info.OtherTag], info.PageSize, info.Skip, tagsToExclude); break; case "OR": queryResult = OrQuery(queryInfo[info.Tag], queryInfo[info.OtherTag], info.PageSize, info.Skip, tagsToExclude); break; case "OR-NOT": //"i.e. .net+or+jquery-" queryResult = OrNotQuery(queryInfo[info.Tag], queryInfo[info.OtherTag], queryInfo[TagServer.ALL_TAGS_KEY], info.PageSize, info.Skip, tagsToExclude); break; // TODO Work out what a "NOT" query really means, at the moment it's the same as "AND-NOT"?! //case "NOT": // queryResult = NotQuery(queryInfo[info.Tag], queryInfo[info.OtherTag], info.PageSize, info.Skip, tagsToExclude); // break; default: throw new InvalidOperationException(string.Format("Invalid operator specified: {0}", info.Operator ?? "<NULL>")); } timer.Stop(); Results.AddData(timer.Elapsed.TotalMilliseconds.ToString("#.##")); Logger.Log("NO LINQ Boolean Query: \"{0}\" {1} \"{2}\", pageSize = {3:N0}, skip = {4:N0}, took {5} ({6:N2} ms) NO LINQ", info.Tag, info.Operator, info.OtherTag, info.PageSize, info.Skip, timer.Elapsed, timer.Elapsed.TotalMilliseconds); Logger.Log("Got {0:N0} results in total, baseQueryCounter = {1:N0}, itemsSkipped = {2:N0}, excludedCounter = {3:N0} ({4} tags to be excluded)", queryResult.Results.Count(), queryResult.BaseQueryCounter, queryResult.ItemsSkipped, queryResult.ExcludedCounter, tagsToExclude != null ? tagsToExclude.Count.ToString("N0") : "NO"); return(new QueryResult { Questions = queryResult.Results, Counters = new Dictionary <string, int> { { "BaseQueryCounter", queryResult.BaseQueryCounter }, { "ItemsSkipped", queryResult.ItemsSkipped }, { "ExcludedCounter", queryResult.ExcludedCounter } } }); }
private IEnumerable <int> AddExclusionsToQuery(IEnumerable <int> query, CLR.HashSet <string> tagsToExclude, CounterWrapper exclusionCounter) { return(query.Where(i => { if (questions[i].Tags.All(t => tagsToExclude.Contains(t) == false)) { return true; } exclusionCounter.Counter++; return false; })); }
private static void RunComparisonQueries(TagServer tagServer, CLR.HashSet <string> tagsToExclude, EwahCompressedBitArray exclusionBitMap, QueryType queryTypeToTest) { var smallTag = tagServer.AllTags.Where(t => t.Value <= 200).First().Key; string largeTag = ".net"; int pageSize = 25; // LARGE 1st Tag, SMALL 2nd Tag //RunAndOrNotComparisionQueries(tagServer, tag1: largeTag, tag2: smallTag, pageSize: pageSize); // SMALL 1st Tag, LARGE 2nd Tag //RunAndOrNotComparisionQueries(tagServer, tag1: smallTag, tag2: largeTag, pageSize: pageSize); // 2 large tags (probably the worst case) //RunAndOrNotComparisionQueries(tagServer, "c#", "jquery", pageSize); //RunAndOrNotComparisionQueries(tagServer, ".net", "jquery", pageSize); // Now run the same tests, but with "Exclusions" applied to the queries RunAndOrNotComparisionQueries(tagServer, ".net", "jquery", pageSize, queryTypeToTest, tagsToExclude, exclusionBitMap); }
private static void TestBitMapIndexQueries(TagServer tagServer, CLR.HashSet <string> tagsToExclude, EwahCompressedBitArray exclusionBitMapIndex, QueryType queryTypeToTest) { foreach (var @operator in new[] { "OR", "OR-NOT", "AND", "AND-NOT" }) { var tagsPairings = new[] { Tuple.Create("c#", "java"), Tuple.Create("c#", "jquery"), Tuple.Create("c#", "javascript"), Tuple.Create("c#", ".net-3.5"), // large -> small Tuple.Create(".net-3.5", "c#"), // small -> large }; // Run queries WITHOUT exclusion Bit Map Index using (Utils.SetConsoleColour(ConsoleColor.Green)) Logger.Log("Running \"{0}\" Queries", @operator); foreach (var pairing in tagsPairings) { TestBitMapIndexAndValidateResults( tagServer, new QueryInfo { Tag = pairing.Item1, OtherTag = pairing.Item2, Type = queryTypeToTest, Operator = @operator }); } // Run queries WITH exclusion Bit Map Index using (Utils.SetConsoleColour(ConsoleColor.Green)) Logger.Log("Running \"{0}\" Queries and using an Exclusion Bit Map Index", @operator); foreach (var pairing in tagsPairings) { TestBitMapIndexAndValidateResults( tagServer, new QueryInfo { Tag = pairing.Item1, OtherTag = pairing.Item2, Type = queryTypeToTest, Operator = @operator }, tagsToExclude: tagsToExclude, exclusionBitMap: exclusionBitMapIndex); } } }
public EwahCompressedBitArray CreateBitMapIndexForExcludedTags(CLR.HashSet <string> tagsToExclude, QueryType queryType, bool printLoggingMessages = false) { return(bitMapIndexHandler.CreateBitMapIndexForExcludedTags(tagsToExclude, queryType, printLoggingMessages)); }
public QueryResult ComparisonQueryNoLINQ(QueryInfo info, CLR.HashSet <string> tagsToExclude = null) { return(complexQueryProcessor.QueryNoLINQ(info, tagsToExclude)); }
private static void RunAndOrNotComparisionQueries(TagServer tagServer, string tag1, string tag2, int pageSize, QueryType queryTypeToTest, CLR.HashSet <string> tagsToExclude = null, EwahCompressedBitArray exclusionBitMap = null) { using (Utils.SetConsoleColour(ConsoleColor.Green)) Logger.LogStartupMessage("\nComparison queries:\n\t\"{0}\" has {1:N0} questions\n\t\"{2}\" has {3:N0} questions", tag1, tagServer.AllTags[tag1], tag2, tagServer.AllTags[tag2]); var queries = new[] { "AND", "OR", "AND-NOT", "OR-NOT" }; var skipCounts = new[] { 0, 100, 250, 500, 1000, 2000, 4000, 8000 }; foreach (var query in queries) { Results.CreateNewFile(string.Format("Results-{0}{1}-{2}-{3}-{4}-{5}.csv", (tagsToExclude != null && exclusionBitMap != null) ? "With-Exclusions-" : "", DateTime.Now.ToString("yyyy-MM-dd @ HH-mm-ss"), tag1, query, tag2, queryTypeToTest)); Results.AddHeaders("Skip Count", String.Format("Regular {0} {1} {2}", tag1, query, tag2), String.Format("LINQ {0} {1} {2}", tag1, query, tag2), String.Format("BitMap {0} {1} {2}", tag1, query, tag2), String.Format("Regular {0} {1} {2}", tag2, query, tag1), String.Format("LINQ {0} {1} {2}", tag2, query, tag1), String.Format("BitMap {0} {1} {2}", tag2, query, tag1)); using (Utils.SetConsoleColour(ConsoleColor.Yellow)) Logger.LogStartupMessage("\n{0} Comparison queries: {1} {0} {2}\n", query, tag1, tag2); foreach (var skipCount in skipCounts) { Results.AddData(skipCount.ToString()); // Run the query both ways round, i.e. "c# AND-NOT jquery" as well as "jquery AND-NOT c#") foreach (var tagPair in new[] { Tuple.Create(tag1, tag2), Tuple.Create(tag2, tag1) }) { var info = new QueryInfo { Type = queryTypeToTest, Tag = tagPair.Item1, OtherTag = tagPair.Item2, Operator = query, PageSize = pageSize, Skip = skipCount }; Console.ForegroundColor = ConsoleColor.DarkGreen; var resultRegular = tagServer.ComparisonQueryNoLINQ(info, tagsToExclude); var resultLINQ = tagServer.ComparisonQuery(info, tagsToExclude); //using (Utils.SetConsoleColour(ConsoleColor.Gray)); // Utils.CompareLists(resultRegular.Questions, "Regular", resultLINQ.Questions, "LINQ"); Console.ForegroundColor = ConsoleColor.Cyan; var resultBitMap = tagServer.ComparisionQueryBitMapIndex(info, exclusionBitMap, printLoggingMessages: true); var invalidResults = tagServer.GetInvalidResults(resultBitMap.Questions, info); var shouldHaveBeenExcludedResults = tagServer.GetShouldHaveBeenExcludedResults(resultBitMap.Questions, info, tagsToExclude); if (shouldHaveBeenExcludedResults.Count > 0) { using (Utils.SetConsoleColour(ConsoleColor.Red)) Logger.LogStartupMessage("ERROR: shouldHaveBeenExcludedResults contains {0} items", shouldHaveBeenExcludedResults.Count); } using (Utils.SetConsoleColour(ConsoleColor.Red)) { // See the TODO comments in ComplexQueryProcessor.cs for an explanation of this issue if (query == "OR" || query == "OR-NOT") { Logger.LogStartupMessage("It is EXPECTED that {0} queries won't match when comparing \"Regular\" v. \"BitMap\"", query); } } //using (Utils.SetConsoleColour(ConsoleColor.Gray)); // Utils.CompareLists(resultRegular.Questions, "Regular", resultBitMap.Questions, "BitMap"); } Console.ResetColor(); Results.StartNewRow(); } Results.CloseFile(); } }
ComplexQueryResult OrNotQuery(int[] tag1Ids, int[] tag2Ids, int [] allTagIds, int pageSize, int skip, CLR.HashSet <string> tagsToExclude = null) { var queryResult = new ComplexQueryResult { Results = new List <Question>(pageSize), BaseQueryCounter = 0, ItemsSkipped = 0, ExcludedCounter = 0 }; // TODO this has a small bug, we can get items out of order as we pull them thru in pairs // if t2 has several items that are larger than t1, t1 will still come out first!! // So algorithm needs to be: // 1) pull the LARGEST value (from t1 or t2) // 2) process this item // 3) repeat 1) again var orNotHashSet = cache.Value.GetCachedHashSet(tag2Ids); var seenBefore = secondCache.Value.GetCachedHashSet(); using (IEnumerator <int> e1 = tag1Ids.AsEnumerable().GetEnumerator()) using (IEnumerator <int> e2 = allTagIds.AsEnumerable().GetEnumerator()) { while (e1.MoveNext() && e2.MoveNext()) { if (queryResult.Results.Count >= pageSize) { break; } queryResult.BaseQueryCounter++; if (tagsToExclude != null && questions[e1.Current].Tags.Any(t => tagsToExclude.Contains(t))) { queryResult.ExcludedCounter++; } else if (orNotHashSet.Contains(e1.Current) == false && seenBefore.Add(e1.Current)) { if (queryResult.ItemsSkipped >= skip) { queryResult.Results.Add(questions[e1.Current]); } else { queryResult.ItemsSkipped++; } } if (queryResult.Results.Count >= pageSize) { break; } // TODO should we be doing this here as well!!?!?! //baseQueryCounter++; if (tagsToExclude != null && questions[e2.Current].Tags.Any(t => tagsToExclude.Contains(t))) { queryResult.ExcludedCounter++; } else if (orNotHashSet.Contains(e2.Current) == false && seenBefore.Add(e2.Current)) { if (queryResult.ItemsSkipped >= skip) { queryResult.Results.Add(questions[e2.Current]); } else { queryResult.ItemsSkipped++; } } } } return(queryResult); }
ComplexQueryResult AndNotQuery(int[] tag1Ids, int[] tag2Ids, int pageSize, int skip, CLR.HashSet <string> tagsToExclude = null) { var queryResult = new ComplexQueryResult { Results = new List <Question>(pageSize), BaseQueryCounter = 0, ItemsSkipped = 0, ExcludedCounter = 0 }; // https://github.com/ungood/EduLinq/blob/master/Edulinq/Except.cs#L26-L40 var notHashSet = cache.Value.GetCachedHashSet(tag2Ids); foreach (var item in tag1Ids) { if (queryResult.Results.Count >= pageSize) { break; } queryResult.BaseQueryCounter++; if (tagsToExclude != null && questions[item].Tags.Any(t => tagsToExclude.Contains(t))) { queryResult.ExcludedCounter++; } else if (notHashSet.Add(item)) { if (queryResult.ItemsSkipped >= skip) { queryResult.Results.Add(questions[item]); } else { queryResult.ItemsSkipped++; } } } return(queryResult); }
internal QueryResult Query(QueryInfo info, CLR.HashSet <string> tagsToExclude = null) { var timer = Stopwatch.StartNew(); TagByQueryLookup queryInfo = GetTagByQueryLookup(info.Type); ThrowIfInvalidParameters(info.Tag, info.PageSize, queryInfo); ThrowIfInvalidParameters(info.OtherTag, info.PageSize, queryInfo); var tagCounter = 0; var otherTagCounter = 0; var exclusionCounter = new CounterWrapper(initialValue: 0); IEnumerable <int> tag1Query = queryInfo[info.Tag].Select(t => { tagCounter++; return(t); }); IEnumerable <int> tag2Query = queryInfo[info.OtherTag].Select(t => { otherTagCounter++; return(t); }); IEnumerable <int> query = Enumerable.Empty <int>(); switch (info.Operator) { case "AND": query = tag1Query.Intersect(tag2Query); if (tagsToExclude != null) { query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter); } break; case "AND-NOT": query = tag1Query.Except(tag2Query); if (tagsToExclude != null) { query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter); } break; case "OR": // TODO this has a small bug, we can get items out of order as we pull them thru in pairs // if t2 has several items that are larger than t1, t1 will still come out first!! // So algorithm needs to be: // 1) pull the LARGEST value (from t1 or t2) // 2) process this item // 3) repeat 1) again query = tag1Query.Zip(tag2Query, (t1, t2) => new[] { t1, t2 }) .SelectMany(item => item) .Distinct(); if (tagsToExclude != null) { query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter); } break; case "OR-NOT": //"i.e. .net+or+jquery-" // TODO this has a small bug, we can get items out of order as we pull them thru in pairs // if t2 has several items that are larger than t1, t1 will still come out first!! // So algorithm needs to be: // 1) pull the LARGEST value (from t1 or t2) // 2) process this item // 3) repeat 1) again query = tag1Query.Zip(queryInfo[TagServer.ALL_TAGS_KEY], (t1, t2) => new[] { t1, t2 }) .SelectMany(item => item) .Except(tag2Query) .Distinct(); if (tagsToExclude != null) { query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter); } break; // TODO Work out what a "NOT" query really means, at the moment it's the same as "AND-NOT"?! //case "NOT": // query = tag1Query.Except(tag2Query); // if (tagsToExclude != null) // query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter); // break; default: throw new InvalidOperationException(string.Format("Invalid operator specified: {0}", info.Operator ?? "<NULL>")); } var results = query.Skip(info.Skip) .Take(info.PageSize) .Select(i => questions[i]) .ToList(); timer.Stop(); Results.AddData(timer.Elapsed.TotalMilliseconds.ToString("#.##")); Logger.Log("REGULAR Boolean Query: \"{0}\" {1} \"{2}\", pageSize = {3:N0}, skip = {4:N0}, took {5} ({6:N2} ms) REGULAR", info.Tag, info.Operator, info.OtherTag, info.PageSize, info.Skip, timer.Elapsed, timer.Elapsed.TotalMilliseconds); Logger.Log("Got {0:} results in total, tag1 QueryCounter = {1:N0}, tag2 QueryCounter = {1:N0}", results.Count(), tagCounter, otherTagCounter); //PrintResults(results, string.Format("{0} {1} {2}", info.Tag, info.Operator, info.OtherTag), info.Type); return(new QueryResult { Questions = results, Counters = new Dictionary <string, int> { { "TagCounter", tagCounter }, { "OtherTagCounter", otherTagCounter }, { "ExclusionCounter", exclusionCounter.Counter } } }); }
internal EwahCompressedBitArray CreateBitMapIndexForExcludedTags(CLR.HashSet <string> tagsToExclude, QueryType queryType, bool printLoggingMessages = false) { var bitMapTimer = Stopwatch.StartNew(); var tagLookupForQueryType = GetTagByQueryLookup(queryType); var collectIdsTimer = Stopwatch.StartNew(); var excludedQuestionIds = cache.Value.GetCachedHashSet(); foreach (var tag in tagsToExclude) { foreach (var id in tagLookupForQueryType[tag]) { excludedQuestionIds.Add(id); } } collectIdsTimer.Stop(); // At the end we need to have the BitMap Set (i.e. 1) in places where you CAN use the question, i.e. it's NOT excluded // That way we can efficiently apply the exclusions by ANDing this BitMap to the previous results var allQuestions = tagLookupForQueryType[TagServer.ALL_TAGS_KEY]; var setBitsTimer = Stopwatch.StartNew(); var bitMap = new EwahCompressedBitArray(); for (int index = 0; index < allQuestions.Length; index++) { if (excludedQuestionIds.Contains(allQuestions[index])) { var wasSet = bitMap.SetOptimised(index); // Set a bit where you CAN'T use a question if (wasSet == false) { Logger.LogStartupMessage("Error, unable to set bit {0:N0} (SizeInBits = {1:N0})", index, bitMap.SizeInBits); } } } setBitsTimer.Stop(); var tidyUpTimer = Stopwatch.StartNew(); bitMap.SetSizeInBits(questions.Count, defaultvalue: false); bitMap.Shrink(); tidyUpTimer.Stop(); bitMapTimer.Stop(); if (printLoggingMessages) { Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) to collect {2:N0} Question Ids from {3:N0} Tags", collectIdsTimer.Elapsed, collectIdsTimer.ElapsedMilliseconds, excludedQuestionIds.Count, tagsToExclude.Count); Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) to set {2:N0} bits", setBitsTimer.Elapsed, setBitsTimer.ElapsedMilliseconds, bitMap.GetCardinality()); Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) to tidy-up the Bit Map (SetSizeInBits(..) and Shrink()), Size={2:N0} bytes ({3:N2} MB)", tidyUpTimer.Elapsed, tidyUpTimer.ElapsedMilliseconds, bitMap.SizeInBytes, bitMap.SizeInBytes / 1024.0 / 1024.0); using (Utils.SetConsoleColour(ConsoleColor.DarkYellow)) { Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) in TOTAL, made BitMap from {2:N0} Tags ({3:N0} Qu Ids), Cardinality={4:N0} ({5:N0})\n", bitMapTimer.Elapsed, bitMapTimer.ElapsedMilliseconds, tagsToExclude.Count, excludedQuestionIds.Count, bitMap.GetCardinality(), (ulong)questions.Count - bitMap.GetCardinality()); } } return(bitMap); }
public List <Tuple <Question, List <string> > > GetShouldHaveBeenExcludedResults(List <Question> results, QueryInfo queryInfo, CLR.HashSet <string> tagsToExclude) { return(validator.GetShouldHaveBeenExcludedResults(results, queryInfo, tagsToExclude)); }
internal void ValidateExclusionBitMap(EwahCompressedBitArray bitMapIndex, CLR.HashSet <string> expandedTagsNGrams, QueryType queryType) { validator.ValidateExclusionBitMap(bitMapIndex, expandedTagsNGrams, queryType); }
internal List <Tuple <Question, List <string> > > GetShouldHaveBeenExcludedResults(List <Question> results, QueryInfo queryInfo, CLR.HashSet <string> tagsToExclude) { var errors = new List <Tuple <Question, List <string> > >(); if (tagsToExclude == null) { return(errors); } foreach (var result in results) { var invalidTags = new List <string>(); foreach (var tag in result.Tags) { if (tagsToExclude.Contains(tag)) { invalidTags.Add(tag); } } if (invalidTags.Count > 0) { errors.Add(Tuple.Create(result, invalidTags)); } } return(errors); }