private static void GetLeppieTagInfo(List <Question> rawQuestions, TagLookup allTags, List <string> leppieTags, HashSet leppieExpandedTags)
        {
            Logger.Log("\nThere are {0:N0} questions and {1:N0} tags in total", rawQuestions.Count, allTags.Count);
            Logger.Log("Leppie list of {0:N0} tags contains {1:N0} that are wildcards", leppieTags.Count, leppieTags.Count(t => t.Contains('*')));
            Logger.Log("Leppie {0:N0} tags with wildcards expand to {1:N0} tags in total", leppieTags.Count, leppieExpandedTags.Count);
            var remainingTagsHashSet = new CLR.HashSet <string>(allTags.Keys);

            remainingTagsHashSet.ExceptWith(leppieExpandedTags);
            Logger.LogStartupMessage("There are {0:N0} tags remaining, {0:N0} + {1:N0} = {2:N0} (Expected: {3:N0})",
                                     remainingTagsHashSet.Count, leppieExpandedTags.Count,
                                     remainingTagsHashSet.Count + leppieExpandedTags.Count, allTags.Count);

            Logger.LogStartupMessage("Sanity checking excluded/included tags and questions...");
            var excludedQuestionCounter = rawQuestions.Count(question => question.Tags.Any(t => leppieExpandedTags.Contains(t)));
            var includedQuestionCounter = rawQuestions.Count(question => question.Tags.All(t => remainingTagsHashSet.Contains(t)));

            Logger.Log("{0:N0} EXCLUDED tags cover {1:N0} questions (out of {2:N0})",
                       leppieExpandedTags.Count, excludedQuestionCounter, rawQuestions.Count);
            Logger.Log(
                "{0:N0} remaining tags cover {1:N0} questions, {2:N0} + {3:N0} = {4:N0} (Expected: {5:N0})",
                remainingTagsHashSet.Count, includedQuestionCounter,
                includedQuestionCounter, excludedQuestionCounter,
                includedQuestionCounter + excludedQuestionCounter, rawQuestions.Count);
            Logger.Log();
        }
        private static void TestBitMapIndexAndValidateResults(TagServer tagServer, QueryInfo queryInfo,
                                                              CLR.HashSet <string> tagsToExclude     = null,
                                                              EwahCompressedBitArray exclusionBitMap = null)
        {
            var result = tagServer.ComparisionQueryBitMapIndex(queryInfo, exclusionBitMap, printLoggingMessages: true);
            var errors = tagServer.GetInvalidResults(result.Questions, queryInfo);

            if (errors.Any())
            {
                using (Utils.SetConsoleColour(ConsoleColor.Red))
                    Logger.Log("ERROR Running \"{0}\" Query, {1} (out of {2}) results were invalid",
                               queryInfo.Operator, errors.Count, result.Questions.Count);
                foreach (var qu in errors)
                {
                    Logger.Log("  {0,8}: {1}", qu.Id, String.Join(", ", qu.Tags));
                }
                Logger.Log();
            }

            if (tagsToExclude != null && exclusionBitMap != null)
            {
                var shouldHaveBeenExcluded = tagServer.GetShouldHaveBeenExcludedResults(result.Questions, queryInfo, tagsToExclude);
                if (shouldHaveBeenExcluded.Any())
                {
                    using (Utils.SetConsoleColour(ConsoleColor.Red))
                        Logger.Log("ERROR Running \"{0}\" Query, {1} (out of {2}) questions should have been excluded",
                                   queryInfo.Operator, shouldHaveBeenExcluded.Select(s => s.Item1.Id).Distinct().Count(), result.Questions.Count);
                    foreach (var error in shouldHaveBeenExcluded)
                    {
                        Logger.Log("  {0,8}: {1} -> {2}", error.Item1.Id, String.Join(", ", error.Item1.Tags), string.Join(", ", error.Item2));
                    }
                    Logger.Log();
                }
            }
        }
Esempio n. 3
0
        internal void ValidateExclusionBitMap(EwahCompressedBitArray bitMapIndex, CLR.HashSet <string> expandedTagsNGrams, QueryType queryType)
        {
            // Exclusion BitMap is Set (i.e. 1) in places where you CAN use the question, i.e. it's NOT excluded
            var questionLookup   = GetTagByQueryLookup(queryType)[TagServer.ALL_TAGS_KEY];
            var invalidQuestions = new List <Tuple <Question, string> >();
            var NOTbitMapIndex   = ((EwahCompressedBitArray)bitMapIndex.Clone());

            NOTbitMapIndex.Not();
            var positions = NOTbitMapIndex.GetPositions();

            foreach (var position in positions)
            {
                var question = questions[questionLookup[position]];
                foreach (var tag in question.Tags)
                {
                    if (expandedTagsNGrams.Contains(tag))
                    {
                        invalidQuestions.Add(Tuple.Create(question, tag));
                    }
                }
                // Sometimes the validitation locks up my laptop, this *seems* to make a difference?!
                Thread.Yield();
            }

            using (Utils.SetConsoleColour(ConsoleColor.Blue))
                Logger.Log("Validating Exclusion Bit Map, checked {0:N0} positions for INVALID tags", positions.Count);

            if (invalidQuestions.Any())
            {
                using (Utils.SetConsoleColour(ConsoleColor.Red))
                    Logger.Log("ERROR Validating Exclusion Bit Map, {0:N0} questions should have been excluded",
                               invalidQuestions.Select(i => i.Item1.Id).Distinct().Count());

                foreach (var error in invalidQuestions)
                {
                    Logger.Log("  {0,8}: {1} -> {2}", error.Item1.Id, String.Join(", ", error.Item1.Tags), error.Item2);
                }
            }


            var expectedPositions = bitMapIndex.GetPositions();

            foreach (var position in expectedPositions)
            {
                var question = questions[questionLookup[position]];
                if (question.Tags.Any(t => expandedTagsNGrams.Contains(t)) == false)
                {
                    using (Utils.SetConsoleColour(ConsoleColor.Red))
                        Logger.Log("ERROR {0,8}: {1} -> didn't contain ANY excluded tags", question.Id, String.Join(", ", question.Tags));
                }
            }

            using (Utils.SetConsoleColour(ConsoleColor.Blue))
                Logger.Log("Validating Exclusion Bit Map, checked {0:N0} positions for EXPECTED tags", expectedPositions.Count);

            Logger.Log();
        }
Esempio n. 4
0
        internal QueryResult QueryNoLINQ(QueryInfo info, CLR.HashSet <string> tagsToExclude = null)
        {
            var timer = Stopwatch.StartNew();
            TagByQueryLookup queryInfo = GetTagByQueryLookup(info.Type);

            ThrowIfInvalidParameters(info.Tag, info.PageSize, queryInfo);
            ThrowIfInvalidParameters(info.OtherTag, info.PageSize, queryInfo);

            ComplexQueryResult queryResult = null;

            switch (info.Operator)
            {
            case "AND":
                queryResult = AndQuery(queryInfo[info.Tag], queryInfo[info.OtherTag], info.PageSize, info.Skip, tagsToExclude);
                break;

            case "AND-NOT":
                queryResult = AndNotQuery(queryInfo[info.Tag], queryInfo[info.OtherTag], info.PageSize, info.Skip, tagsToExclude);
                break;

            case "OR":
                queryResult = OrQuery(queryInfo[info.Tag], queryInfo[info.OtherTag], info.PageSize, info.Skip, tagsToExclude);
                break;

            case "OR-NOT":     //"i.e. .net+or+jquery-"
                queryResult = OrNotQuery(queryInfo[info.Tag], queryInfo[info.OtherTag], queryInfo[TagServer.ALL_TAGS_KEY], info.PageSize, info.Skip, tagsToExclude);
                break;

            // TODO Work out what a "NOT" query really means, at the moment it's the same as "AND-NOT"?!
            //case "NOT":
            //    queryResult = NotQuery(queryInfo[info.Tag], queryInfo[info.OtherTag], info.PageSize, info.Skip, tagsToExclude);
            //    break;

            default:
                throw new InvalidOperationException(string.Format("Invalid operator specified: {0}", info.Operator ?? "<NULL>"));
            }
            timer.Stop();

            Results.AddData(timer.Elapsed.TotalMilliseconds.ToString("#.##"));

            Logger.Log("NO LINQ  Boolean Query: \"{0}\" {1} \"{2}\", pageSize = {3:N0}, skip = {4:N0}, took {5} ({6:N2} ms) NO LINQ",
                       info.Tag, info.Operator, info.OtherTag, info.PageSize, info.Skip, timer.Elapsed, timer.Elapsed.TotalMilliseconds);
            Logger.Log("Got {0:N0} results in total, baseQueryCounter = {1:N0}, itemsSkipped = {2:N0}, excludedCounter = {3:N0} ({4} tags to be excluded)",
                       queryResult.Results.Count(), queryResult.BaseQueryCounter, queryResult.ItemsSkipped,
                       queryResult.ExcludedCounter, tagsToExclude != null ? tagsToExclude.Count.ToString("N0") : "NO");

            return(new QueryResult
            {
                Questions = queryResult.Results,
                Counters = new Dictionary <string, int>
                {
                    { "BaseQueryCounter", queryResult.BaseQueryCounter },
                    { "ItemsSkipped", queryResult.ItemsSkipped },
                    { "ExcludedCounter", queryResult.ExcludedCounter }
                }
            });
        }
Esempio n. 5
0
 private IEnumerable <int> AddExclusionsToQuery(IEnumerable <int> query, CLR.HashSet <string> tagsToExclude, CounterWrapper exclusionCounter)
 {
     return(query.Where(i =>
     {
         if (questions[i].Tags.All(t => tagsToExclude.Contains(t) == false))
         {
             return true;
         }
         exclusionCounter.Counter++;
         return false;
     }));
 }
        private static void RunComparisonQueries(TagServer tagServer, CLR.HashSet <string> tagsToExclude, EwahCompressedBitArray exclusionBitMap, QueryType queryTypeToTest)
        {
            var    smallTag = tagServer.AllTags.Where(t => t.Value <= 200).First().Key;
            string largeTag = ".net";
            int    pageSize = 25;

            // LARGE 1st Tag, SMALL 2nd Tag
            //RunAndOrNotComparisionQueries(tagServer, tag1: largeTag, tag2: smallTag, pageSize: pageSize);
            // SMALL 1st Tag, LARGE 2nd Tag
            //RunAndOrNotComparisionQueries(tagServer, tag1: smallTag, tag2: largeTag, pageSize: pageSize);

            // 2 large tags (probably the worst case)
            //RunAndOrNotComparisionQueries(tagServer, "c#", "jquery", pageSize);
            //RunAndOrNotComparisionQueries(tagServer, ".net", "jquery", pageSize);
            // Now run the same tests, but with "Exclusions" applied to the queries
            RunAndOrNotComparisionQueries(tagServer, ".net", "jquery", pageSize, queryTypeToTest, tagsToExclude, exclusionBitMap);
        }
        private static void TestBitMapIndexQueries(TagServer tagServer, CLR.HashSet <string> tagsToExclude, EwahCompressedBitArray exclusionBitMapIndex, QueryType queryTypeToTest)
        {
            foreach (var @operator in new[] { "OR", "OR-NOT", "AND", "AND-NOT" })
            {
                var tagsPairings = new[]
                {
                    Tuple.Create("c#", "java"),
                    Tuple.Create("c#", "jquery"),
                    Tuple.Create("c#", "javascript"),
                    Tuple.Create("c#", ".net-3.5"), // large -> small
                    Tuple.Create(".net-3.5", "c#"), // small -> large
                };

                // Run queries WITHOUT exclusion Bit Map Index
                using (Utils.SetConsoleColour(ConsoleColor.Green))
                    Logger.Log("Running \"{0}\" Queries", @operator);

                foreach (var pairing in tagsPairings)
                {
                    TestBitMapIndexAndValidateResults(
                        tagServer,
                        new QueryInfo {
                        Tag = pairing.Item1, OtherTag = pairing.Item2, Type = queryTypeToTest, Operator = @operator
                    });
                }

                // Run queries WITH exclusion Bit Map Index
                using (Utils.SetConsoleColour(ConsoleColor.Green))
                    Logger.Log("Running \"{0}\" Queries and using an Exclusion Bit Map Index", @operator);

                foreach (var pairing in tagsPairings)
                {
                    TestBitMapIndexAndValidateResults(
                        tagServer,
                        new QueryInfo {
                        Tag = pairing.Item1, OtherTag = pairing.Item2, Type = queryTypeToTest, Operator = @operator
                    },
                        tagsToExclude: tagsToExclude,
                        exclusionBitMap: exclusionBitMapIndex);
                }
            }
        }
Esempio n. 8
0
 public EwahCompressedBitArray CreateBitMapIndexForExcludedTags(CLR.HashSet <string> tagsToExclude, QueryType queryType, bool printLoggingMessages = false)
 {
     return(bitMapIndexHandler.CreateBitMapIndexForExcludedTags(tagsToExclude, queryType, printLoggingMessages));
 }
Esempio n. 9
0
 public QueryResult ComparisonQueryNoLINQ(QueryInfo info, CLR.HashSet <string> tagsToExclude = null)
 {
     return(complexQueryProcessor.QueryNoLINQ(info, tagsToExclude));
 }
Esempio n. 10
0
        private static void RunAndOrNotComparisionQueries(TagServer tagServer, string tag1, string tag2, int pageSize, QueryType queryTypeToTest,
                                                          CLR.HashSet <string> tagsToExclude = null, EwahCompressedBitArray exclusionBitMap = null)
        {
            using (Utils.SetConsoleColour(ConsoleColor.Green))
                Logger.LogStartupMessage("\nComparison queries:\n\t\"{0}\" has {1:N0} questions\n\t\"{2}\" has {3:N0} questions",
                                         tag1, tagServer.AllTags[tag1], tag2, tagServer.AllTags[tag2]);

            var queries    = new[] { "AND", "OR", "AND-NOT", "OR-NOT" };
            var skipCounts = new[] { 0, 100, 250, 500, 1000, 2000, 4000, 8000 };

            foreach (var query in queries)
            {
                Results.CreateNewFile(string.Format("Results-{0}{1}-{2}-{3}-{4}-{5}.csv",
                                                    (tagsToExclude != null && exclusionBitMap != null) ? "With-Exclusions-" : "",
                                                    DateTime.Now.ToString("yyyy-MM-dd @ HH-mm-ss"), tag1, query, tag2, queryTypeToTest));
                Results.AddHeaders("Skip Count",
                                   String.Format("Regular {0} {1} {2}", tag1, query, tag2),
                                   String.Format("LINQ {0} {1} {2}", tag1, query, tag2),
                                   String.Format("BitMap {0} {1} {2}", tag1, query, tag2),
                                   String.Format("Regular {0} {1} {2}", tag2, query, tag1),
                                   String.Format("LINQ {0} {1} {2}", tag2, query, tag1),
                                   String.Format("BitMap {0} {1} {2}", tag2, query, tag1));

                using (Utils.SetConsoleColour(ConsoleColor.Yellow))
                    Logger.LogStartupMessage("\n{0} Comparison queries: {1} {0} {2}\n", query, tag1, tag2);
                foreach (var skipCount in skipCounts)
                {
                    Results.AddData(skipCount.ToString());

                    // Run the query both ways round, i.e. "c# AND-NOT jquery" as well as "jquery AND-NOT c#")
                    foreach (var tagPair in new[] { Tuple.Create(tag1, tag2), Tuple.Create(tag2, tag1) })
                    {
                        var info = new QueryInfo
                        {
                            Type     = queryTypeToTest,
                            Tag      = tagPair.Item1,
                            OtherTag = tagPair.Item2,
                            Operator = query,
                            PageSize = pageSize,
                            Skip     = skipCount
                        };

                        Console.ForegroundColor = ConsoleColor.DarkGreen;
                        var resultRegular = tagServer.ComparisonQueryNoLINQ(info, tagsToExclude);
                        var resultLINQ    = tagServer.ComparisonQuery(info, tagsToExclude);

                        //using (Utils.SetConsoleColour(ConsoleColor.Gray));
                        //    Utils.CompareLists(resultRegular.Questions, "Regular", resultLINQ.Questions, "LINQ");

                        Console.ForegroundColor = ConsoleColor.Cyan;
                        var resultBitMap = tagServer.ComparisionQueryBitMapIndex(info, exclusionBitMap, printLoggingMessages: true);

                        var invalidResults = tagServer.GetInvalidResults(resultBitMap.Questions, info);
                        var shouldHaveBeenExcludedResults = tagServer.GetShouldHaveBeenExcludedResults(resultBitMap.Questions, info, tagsToExclude);
                        if (shouldHaveBeenExcludedResults.Count > 0)
                        {
                            using (Utils.SetConsoleColour(ConsoleColor.Red))
                                Logger.LogStartupMessage("ERROR: shouldHaveBeenExcludedResults contains {0} items", shouldHaveBeenExcludedResults.Count);
                        }

                        using (Utils.SetConsoleColour(ConsoleColor.Red))
                        {
                            // See the TODO comments in ComplexQueryProcessor.cs for an explanation of this issue
                            if (query == "OR" || query == "OR-NOT")
                            {
                                Logger.LogStartupMessage("It is EXPECTED that {0} queries won't match when comparing \"Regular\" v. \"BitMap\"", query);
                            }
                        }

                        //using (Utils.SetConsoleColour(ConsoleColor.Gray));
                        //    Utils.CompareLists(resultRegular.Questions, "Regular", resultBitMap.Questions, "BitMap");
                    }

                    Console.ResetColor();
                    Results.StartNewRow();
                }

                Results.CloseFile();
            }
        }
Esempio n. 11
0
        ComplexQueryResult OrNotQuery(int[] tag1Ids, int[] tag2Ids, int [] allTagIds, int pageSize, int skip, CLR.HashSet <string> tagsToExclude = null)
        {
            var queryResult = new ComplexQueryResult {
                Results = new List <Question>(pageSize), BaseQueryCounter = 0, ItemsSkipped = 0, ExcludedCounter = 0
            };

            // TODO this has a small bug, we can get items out of order as we pull them thru in pairs
            // if t2 has several items that are larger than t1, t1 will still come out first!!
            // So algorithm needs to be:
            //  1) pull the LARGEST value (from t1 or t2)
            //  2) process this item
            //  3) repeat 1) again
            var orNotHashSet = cache.Value.GetCachedHashSet(tag2Ids);
            var seenBefore   = secondCache.Value.GetCachedHashSet();

            using (IEnumerator <int> e1 = tag1Ids.AsEnumerable().GetEnumerator())
                using (IEnumerator <int> e2 = allTagIds.AsEnumerable().GetEnumerator())
                {
                    while (e1.MoveNext() && e2.MoveNext())
                    {
                        if (queryResult.Results.Count >= pageSize)
                        {
                            break;
                        }

                        queryResult.BaseQueryCounter++;

                        if (tagsToExclude != null && questions[e1.Current].Tags.Any(t => tagsToExclude.Contains(t)))
                        {
                            queryResult.ExcludedCounter++;
                        }
                        else if (orNotHashSet.Contains(e1.Current) == false && seenBefore.Add(e1.Current))
                        {
                            if (queryResult.ItemsSkipped >= skip)
                            {
                                queryResult.Results.Add(questions[e1.Current]);
                            }
                            else
                            {
                                queryResult.ItemsSkipped++;
                            }
                        }

                        if (queryResult.Results.Count >= pageSize)
                        {
                            break;
                        }
                        // TODO should we be doing this here as well!!?!?!
                        //baseQueryCounter++;

                        if (tagsToExclude != null && questions[e2.Current].Tags.Any(t => tagsToExclude.Contains(t)))
                        {
                            queryResult.ExcludedCounter++;
                        }
                        else if (orNotHashSet.Contains(e2.Current) == false && seenBefore.Add(e2.Current))
                        {
                            if (queryResult.ItemsSkipped >= skip)
                            {
                                queryResult.Results.Add(questions[e2.Current]);
                            }
                            else
                            {
                                queryResult.ItemsSkipped++;
                            }
                        }
                    }
                }

            return(queryResult);
        }
Esempio n. 12
0
        ComplexQueryResult AndNotQuery(int[] tag1Ids, int[] tag2Ids, int pageSize, int skip, CLR.HashSet <string> tagsToExclude = null)
        {
            var queryResult = new ComplexQueryResult {
                Results = new List <Question>(pageSize), BaseQueryCounter = 0, ItemsSkipped = 0, ExcludedCounter = 0
            };

            // https://github.com/ungood/EduLinq/blob/master/Edulinq/Except.cs#L26-L40
            var notHashSet = cache.Value.GetCachedHashSet(tag2Ids);

            foreach (var item in tag1Ids)
            {
                if (queryResult.Results.Count >= pageSize)
                {
                    break;
                }

                queryResult.BaseQueryCounter++;

                if (tagsToExclude != null && questions[item].Tags.Any(t => tagsToExclude.Contains(t)))
                {
                    queryResult.ExcludedCounter++;
                }
                else if (notHashSet.Add(item))
                {
                    if (queryResult.ItemsSkipped >= skip)
                    {
                        queryResult.Results.Add(questions[item]);
                    }
                    else
                    {
                        queryResult.ItemsSkipped++;
                    }
                }
            }

            return(queryResult);
        }
Esempio n. 13
0
        internal QueryResult Query(QueryInfo info, CLR.HashSet <string> tagsToExclude = null)
        {
            var timer = Stopwatch.StartNew();
            TagByQueryLookup queryInfo = GetTagByQueryLookup(info.Type);

            ThrowIfInvalidParameters(info.Tag, info.PageSize, queryInfo);
            ThrowIfInvalidParameters(info.OtherTag, info.PageSize, queryInfo);

            var tagCounter              = 0;
            var otherTagCounter         = 0;
            var exclusionCounter        = new CounterWrapper(initialValue: 0);
            IEnumerable <int> tag1Query = queryInfo[info.Tag].Select(t => { tagCounter++; return(t); });
            IEnumerable <int> tag2Query = queryInfo[info.OtherTag].Select(t => { otherTagCounter++; return(t); });
            IEnumerable <int> query     = Enumerable.Empty <int>();

            switch (info.Operator)
            {
            case "AND":
                query = tag1Query.Intersect(tag2Query);
                if (tagsToExclude != null)
                {
                    query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter);
                }
                break;

            case "AND-NOT":
                query = tag1Query.Except(tag2Query);
                if (tagsToExclude != null)
                {
                    query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter);
                }
                break;

            case "OR":
                // TODO this has a small bug, we can get items out of order as we pull them thru in pairs
                // if t2 has several items that are larger than t1, t1 will still come out first!!
                // So algorithm needs to be:
                //  1) pull the LARGEST value (from t1 or t2)
                //  2) process this item
                //  3) repeat 1) again
                query = tag1Query.Zip(tag2Query, (t1, t2) => new[] { t1, t2 })
                        .SelectMany(item => item)
                        .Distinct();
                if (tagsToExclude != null)
                {
                    query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter);
                }
                break;

            case "OR-NOT":     //"i.e. .net+or+jquery-"
                // TODO this has a small bug, we can get items out of order as we pull them thru in pairs
                // if t2 has several items that are larger than t1, t1 will still come out first!!
                // So algorithm needs to be:
                //  1) pull the LARGEST value (from t1 or t2)
                //  2) process this item
                //  3) repeat 1) again
                query = tag1Query.Zip(queryInfo[TagServer.ALL_TAGS_KEY], (t1, t2) => new[] { t1, t2 })
                        .SelectMany(item => item)
                        .Except(tag2Query)
                        .Distinct();
                if (tagsToExclude != null)
                {
                    query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter);
                }
                break;

            // TODO Work out what a "NOT" query really means, at the moment it's the same as "AND-NOT"?!
            //case "NOT":
            //    query = tag1Query.Except(tag2Query);
            //    if (tagsToExclude != null)
            //        query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter);
            //    break;

            default:
                throw new InvalidOperationException(string.Format("Invalid operator specified: {0}", info.Operator ?? "<NULL>"));
            }

            var results = query.Skip(info.Skip)
                          .Take(info.PageSize)
                          .Select(i => questions[i])
                          .ToList();

            timer.Stop();

            Results.AddData(timer.Elapsed.TotalMilliseconds.ToString("#.##"));

            Logger.Log("REGULAR  Boolean Query: \"{0}\" {1} \"{2}\", pageSize = {3:N0}, skip = {4:N0}, took {5} ({6:N2} ms) REGULAR",
                       info.Tag, info.Operator, info.OtherTag, info.PageSize, info.Skip, timer.Elapsed, timer.Elapsed.TotalMilliseconds);
            Logger.Log("Got {0:} results in total, tag1 QueryCounter = {1:N0}, tag2 QueryCounter = {1:N0}",
                       results.Count(), tagCounter, otherTagCounter);

            //PrintResults(results, string.Format("{0} {1} {2}", info.Tag, info.Operator, info.OtherTag), info.Type);

            return(new QueryResult
            {
                Questions = results,
                Counters = new Dictionary <string, int>
                {
                    { "TagCounter", tagCounter },
                    { "OtherTagCounter", otherTagCounter },
                    { "ExclusionCounter", exclusionCounter.Counter }
                }
            });
        }
        internal EwahCompressedBitArray CreateBitMapIndexForExcludedTags(CLR.HashSet <string> tagsToExclude, QueryType queryType, bool printLoggingMessages = false)
        {
            var bitMapTimer = Stopwatch.StartNew();

            var tagLookupForQueryType = GetTagByQueryLookup(queryType);
            var collectIdsTimer       = Stopwatch.StartNew();
            var excludedQuestionIds   = cache.Value.GetCachedHashSet();

            foreach (var tag in tagsToExclude)
            {
                foreach (var id in tagLookupForQueryType[tag])
                {
                    excludedQuestionIds.Add(id);
                }
            }
            collectIdsTimer.Stop();

            // At the end we need to have the BitMap Set (i.e. 1) in places where you CAN use the question, i.e. it's NOT excluded
            // That way we can efficiently apply the exclusions by ANDing this BitMap to the previous results

            var allQuestions = tagLookupForQueryType[TagServer.ALL_TAGS_KEY];
            var setBitsTimer = Stopwatch.StartNew();
            var bitMap       = new EwahCompressedBitArray();

            for (int index = 0; index < allQuestions.Length; index++)
            {
                if (excludedQuestionIds.Contains(allQuestions[index]))
                {
                    var wasSet = bitMap.SetOptimised(index); // Set a bit where you CAN'T use a question
                    if (wasSet == false)
                    {
                        Logger.LogStartupMessage("Error, unable to set bit {0:N0} (SizeInBits = {1:N0})", index, bitMap.SizeInBits);
                    }
                }
            }
            setBitsTimer.Stop();

            var tidyUpTimer = Stopwatch.StartNew();

            bitMap.SetSizeInBits(questions.Count, defaultvalue: false);
            bitMap.Shrink();
            tidyUpTimer.Stop();

            bitMapTimer.Stop();

            if (printLoggingMessages)
            {
                Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) to collect {2:N0} Question Ids from {3:N0} Tags",
                                         collectIdsTimer.Elapsed, collectIdsTimer.ElapsedMilliseconds, excludedQuestionIds.Count, tagsToExclude.Count);
                Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) to set {2:N0} bits",
                                         setBitsTimer.Elapsed, setBitsTimer.ElapsedMilliseconds, bitMap.GetCardinality());
                Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) to tidy-up the Bit Map (SetSizeInBits(..) and Shrink()), Size={2:N0} bytes ({3:N2} MB)",
                                         tidyUpTimer.Elapsed, tidyUpTimer.ElapsedMilliseconds, bitMap.SizeInBytes, bitMap.SizeInBytes / 1024.0 / 1024.0);

                using (Utils.SetConsoleColour(ConsoleColor.DarkYellow))
                {
                    Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) in TOTAL, made BitMap from {2:N0} Tags ({3:N0} Qu Ids), Cardinality={4:N0} ({5:N0})\n",
                                             bitMapTimer.Elapsed, bitMapTimer.ElapsedMilliseconds,
                                             tagsToExclude.Count,
                                             excludedQuestionIds.Count,
                                             bitMap.GetCardinality(),
                                             (ulong)questions.Count - bitMap.GetCardinality());
                }
            }

            return(bitMap);
        }
Esempio n. 15
0
 public List <Tuple <Question, List <string> > > GetShouldHaveBeenExcludedResults(List <Question> results, QueryInfo queryInfo, CLR.HashSet <string> tagsToExclude)
 {
     return(validator.GetShouldHaveBeenExcludedResults(results, queryInfo, tagsToExclude));
 }
Esempio n. 16
0
 internal void ValidateExclusionBitMap(EwahCompressedBitArray bitMapIndex, CLR.HashSet <string> expandedTagsNGrams, QueryType queryType)
 {
     validator.ValidateExclusionBitMap(bitMapIndex, expandedTagsNGrams, queryType);
 }
Esempio n. 17
0
        internal List <Tuple <Question, List <string> > > GetShouldHaveBeenExcludedResults(List <Question> results, QueryInfo queryInfo, CLR.HashSet <string> tagsToExclude)
        {
            var errors = new List <Tuple <Question, List <string> > >();

            if (tagsToExclude == null)
            {
                return(errors);
            }

            foreach (var result in results)
            {
                var invalidTags = new List <string>();
                foreach (var tag in result.Tags)
                {
                    if (tagsToExclude.Contains(tag))
                    {
                        invalidTags.Add(tag);
                    }
                }
                if (invalidTags.Count > 0)
                {
                    errors.Add(Tuple.Create(result, invalidTags));
                }
            }
            return(errors);
        }