コード例 #1
0
        internal void ValidateExclusionBitMap(EwahCompressedBitArray bitMapIndex, CLR.HashSet <string> expandedTagsNGrams, QueryType queryType)
        {
            // Exclusion BitMap is Set (i.e. 1) in places where you CAN use the question, i.e. it's NOT excluded
            var questionLookup   = GetTagByQueryLookup(queryType)[TagServer.ALL_TAGS_KEY];
            var invalidQuestions = new List <Tuple <Question, string> >();
            var NOTbitMapIndex   = ((EwahCompressedBitArray)bitMapIndex.Clone());

            NOTbitMapIndex.Not();
            var positions = NOTbitMapIndex.GetPositions();

            foreach (var position in positions)
            {
                var question = questions[questionLookup[position]];
                foreach (var tag in question.Tags)
                {
                    if (expandedTagsNGrams.Contains(tag))
                    {
                        invalidQuestions.Add(Tuple.Create(question, tag));
                    }
                }
                // Sometimes the validitation locks up my laptop, this *seems* to make a difference?!
                Thread.Yield();
            }

            using (Utils.SetConsoleColour(ConsoleColor.Blue))
                Logger.Log("Validating Exclusion Bit Map, checked {0:N0} positions for INVALID tags", positions.Count);

            if (invalidQuestions.Any())
            {
                using (Utils.SetConsoleColour(ConsoleColor.Red))
                    Logger.Log("ERROR Validating Exclusion Bit Map, {0:N0} questions should have been excluded",
                               invalidQuestions.Select(i => i.Item1.Id).Distinct().Count());

                foreach (var error in invalidQuestions)
                {
                    Logger.Log("  {0,8}: {1} -> {2}", error.Item1.Id, String.Join(", ", error.Item1.Tags), error.Item2);
                }
            }


            var expectedPositions = bitMapIndex.GetPositions();

            foreach (var position in expectedPositions)
            {
                var question = questions[questionLookup[position]];
                if (question.Tags.Any(t => expandedTagsNGrams.Contains(t)) == false)
                {
                    using (Utils.SetConsoleColour(ConsoleColor.Red))
                        Logger.Log("ERROR {0,8}: {1} -> didn't contain ANY excluded tags", question.Id, String.Join(", ", question.Tags));
                }
            }

            using (Utils.SetConsoleColour(ConsoleColor.Blue))
                Logger.Log("Validating Exclusion Bit Map, checked {0:N0} positions for EXPECTED tags", expectedPositions.Count);

            Logger.Log();
        }
コード例 #2
0
        internal List <Tuple <Question, List <string> > > GetShouldHaveBeenExcludedResults(List <Question> results, QueryInfo queryInfo, CLR.HashSet <string> tagsToExclude)
        {
            var errors = new List <Tuple <Question, List <string> > >();

            if (tagsToExclude == null)
            {
                return(errors);
            }

            foreach (var result in results)
            {
                var invalidTags = new List <string>();
                foreach (var tag in result.Tags)
                {
                    if (tagsToExclude.Contains(tag))
                    {
                        invalidTags.Add(tag);
                    }
                }
                if (invalidTags.Count > 0)
                {
                    errors.Add(Tuple.Create(result, invalidTags));
                }
            }
            return(errors);
        }
コード例 #3
0
 private IEnumerable <int> AddExclusionsToQuery(IEnumerable <int> query, CLR.HashSet <string> tagsToExclude, CounterWrapper exclusionCounter)
 {
     return(query.Where(i =>
     {
         if (questions[i].Tags.All(t => tagsToExclude.Contains(t) == false))
         {
             return true;
         }
         exclusionCounter.Counter++;
         return false;
     }));
 }
コード例 #4
0
        ComplexQueryResult AndNotQuery(int[] tag1Ids, int[] tag2Ids, int pageSize, int skip, CLR.HashSet <string> tagsToExclude = null)
        {
            var queryResult = new ComplexQueryResult {
                Results = new List <Question>(pageSize), BaseQueryCounter = 0, ItemsSkipped = 0, ExcludedCounter = 0
            };

            // https://github.com/ungood/EduLinq/blob/master/Edulinq/Except.cs#L26-L40
            var notHashSet = cache.Value.GetCachedHashSet(tag2Ids);

            foreach (var item in tag1Ids)
            {
                if (queryResult.Results.Count >= pageSize)
                {
                    break;
                }

                queryResult.BaseQueryCounter++;

                if (tagsToExclude != null && questions[item].Tags.Any(t => tagsToExclude.Contains(t)))
                {
                    queryResult.ExcludedCounter++;
                }
                else if (notHashSet.Add(item))
                {
                    if (queryResult.ItemsSkipped >= skip)
                    {
                        queryResult.Results.Add(questions[item]);
                    }
                    else
                    {
                        queryResult.ItemsSkipped++;
                    }
                }
            }

            return(queryResult);
        }
コード例 #5
0
        private static void GetLeppieTagInfo(List <Question> rawQuestions, TagLookup allTags, List <string> leppieTags, HashSet leppieExpandedTags)
        {
            Logger.Log("\nThere are {0:N0} questions and {1:N0} tags in total", rawQuestions.Count, allTags.Count);
            Logger.Log("Leppie list of {0:N0} tags contains {1:N0} that are wildcards", leppieTags.Count, leppieTags.Count(t => t.Contains('*')));
            Logger.Log("Leppie {0:N0} tags with wildcards expand to {1:N0} tags in total", leppieTags.Count, leppieExpandedTags.Count);
            var remainingTagsHashSet = new CLR.HashSet <string>(allTags.Keys);

            remainingTagsHashSet.ExceptWith(leppieExpandedTags);
            Logger.LogStartupMessage("There are {0:N0} tags remaining, {0:N0} + {1:N0} = {2:N0} (Expected: {3:N0})",
                                     remainingTagsHashSet.Count, leppieExpandedTags.Count,
                                     remainingTagsHashSet.Count + leppieExpandedTags.Count, allTags.Count);

            Logger.LogStartupMessage("Sanity checking excluded/included tags and questions...");
            var excludedQuestionCounter = rawQuestions.Count(question => question.Tags.Any(t => leppieExpandedTags.Contains(t)));
            var includedQuestionCounter = rawQuestions.Count(question => question.Tags.All(t => remainingTagsHashSet.Contains(t)));

            Logger.Log("{0:N0} EXCLUDED tags cover {1:N0} questions (out of {2:N0})",
                       leppieExpandedTags.Count, excludedQuestionCounter, rawQuestions.Count);
            Logger.Log(
                "{0:N0} remaining tags cover {1:N0} questions, {2:N0} + {3:N0} = {4:N0} (Expected: {5:N0})",
                remainingTagsHashSet.Count, includedQuestionCounter,
                includedQuestionCounter, excludedQuestionCounter,
                includedQuestionCounter + excludedQuestionCounter, rawQuestions.Count);
            Logger.Log();
        }
コード例 #6
0
        ComplexQueryResult OrNotQuery(int[] tag1Ids, int[] tag2Ids, int [] allTagIds, int pageSize, int skip, CLR.HashSet <string> tagsToExclude = null)
        {
            var queryResult = new ComplexQueryResult {
                Results = new List <Question>(pageSize), BaseQueryCounter = 0, ItemsSkipped = 0, ExcludedCounter = 0
            };

            // TODO this has a small bug, we can get items out of order as we pull them thru in pairs
            // if t2 has several items that are larger than t1, t1 will still come out first!!
            // So algorithm needs to be:
            //  1) pull the LARGEST value (from t1 or t2)
            //  2) process this item
            //  3) repeat 1) again
            var orNotHashSet = cache.Value.GetCachedHashSet(tag2Ids);
            var seenBefore   = secondCache.Value.GetCachedHashSet();

            using (IEnumerator <int> e1 = tag1Ids.AsEnumerable().GetEnumerator())
                using (IEnumerator <int> e2 = allTagIds.AsEnumerable().GetEnumerator())
                {
                    while (e1.MoveNext() && e2.MoveNext())
                    {
                        if (queryResult.Results.Count >= pageSize)
                        {
                            break;
                        }

                        queryResult.BaseQueryCounter++;

                        if (tagsToExclude != null && questions[e1.Current].Tags.Any(t => tagsToExclude.Contains(t)))
                        {
                            queryResult.ExcludedCounter++;
                        }
                        else if (orNotHashSet.Contains(e1.Current) == false && seenBefore.Add(e1.Current))
                        {
                            if (queryResult.ItemsSkipped >= skip)
                            {
                                queryResult.Results.Add(questions[e1.Current]);
                            }
                            else
                            {
                                queryResult.ItemsSkipped++;
                            }
                        }

                        if (queryResult.Results.Count >= pageSize)
                        {
                            break;
                        }
                        // TODO should we be doing this here as well!!?!?!
                        //baseQueryCounter++;

                        if (tagsToExclude != null && questions[e2.Current].Tags.Any(t => tagsToExclude.Contains(t)))
                        {
                            queryResult.ExcludedCounter++;
                        }
                        else if (orNotHashSet.Contains(e2.Current) == false && seenBefore.Add(e2.Current))
                        {
                            if (queryResult.ItemsSkipped >= skip)
                            {
                                queryResult.Results.Add(questions[e2.Current]);
                            }
                            else
                            {
                                queryResult.ItemsSkipped++;
                            }
                        }
                    }
                }

            return(queryResult);
        }