Пример #1
0
        internal QueryResult Query(QueryInfo info, CLR.HashSet <string> tagsToExclude = null)
        {
            var timer = Stopwatch.StartNew();
            TagByQueryLookup queryInfo = GetTagByQueryLookup(info.Type);

            ThrowIfInvalidParameters(info.Tag, info.PageSize, queryInfo);
            ThrowIfInvalidParameters(info.OtherTag, info.PageSize, queryInfo);

            var tagCounter              = 0;
            var otherTagCounter         = 0;
            var exclusionCounter        = new CounterWrapper(initialValue: 0);
            IEnumerable <int> tag1Query = queryInfo[info.Tag].Select(t => { tagCounter++; return(t); });
            IEnumerable <int> tag2Query = queryInfo[info.OtherTag].Select(t => { otherTagCounter++; return(t); });
            IEnumerable <int> query     = Enumerable.Empty <int>();

            switch (info.Operator)
            {
            case "AND":
                query = tag1Query.Intersect(tag2Query);
                if (tagsToExclude != null)
                {
                    query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter);
                }
                break;

            case "AND-NOT":
                query = tag1Query.Except(tag2Query);
                if (tagsToExclude != null)
                {
                    query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter);
                }
                break;

            case "OR":
                // TODO this has a small bug, we can get items out of order as we pull them thru in pairs
                // if t2 has several items that are larger than t1, t1 will still come out first!!
                // So algorithm needs to be:
                //  1) pull the LARGEST value (from t1 or t2)
                //  2) process this item
                //  3) repeat 1) again
                query = tag1Query.Zip(tag2Query, (t1, t2) => new[] { t1, t2 })
                        .SelectMany(item => item)
                        .Distinct();
                if (tagsToExclude != null)
                {
                    query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter);
                }
                break;

            case "OR-NOT":     //"i.e. .net+or+jquery-"
                // TODO this has a small bug, we can get items out of order as we pull them thru in pairs
                // if t2 has several items that are larger than t1, t1 will still come out first!!
                // So algorithm needs to be:
                //  1) pull the LARGEST value (from t1 or t2)
                //  2) process this item
                //  3) repeat 1) again
                query = tag1Query.Zip(queryInfo[TagServer.ALL_TAGS_KEY], (t1, t2) => new[] { t1, t2 })
                        .SelectMany(item => item)
                        .Except(tag2Query)
                        .Distinct();
                if (tagsToExclude != null)
                {
                    query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter);
                }
                break;

            // TODO Work out what a "NOT" query really means, at the moment it's the same as "AND-NOT"?!
            //case "NOT":
            //    query = tag1Query.Except(tag2Query);
            //    if (tagsToExclude != null)
            //        query = AddExclusionsToQuery(query, tagsToExclude, exclusionCounter);
            //    break;

            default:
                throw new InvalidOperationException(string.Format("Invalid operator specified: {0}", info.Operator ?? "<NULL>"));
            }

            var results = query.Skip(info.Skip)
                          .Take(info.PageSize)
                          .Select(i => questions[i])
                          .ToList();

            timer.Stop();

            Results.AddData(timer.Elapsed.TotalMilliseconds.ToString("#.##"));

            Logger.Log("REGULAR  Boolean Query: \"{0}\" {1} \"{2}\", pageSize = {3:N0}, skip = {4:N0}, took {5} ({6:N2} ms) REGULAR",
                       info.Tag, info.Operator, info.OtherTag, info.PageSize, info.Skip, timer.Elapsed, timer.Elapsed.TotalMilliseconds);
            Logger.Log("Got {0:} results in total, tag1 QueryCounter = {1:N0}, tag2 QueryCounter = {1:N0}",
                       results.Count(), tagCounter, otherTagCounter);

            //PrintResults(results, string.Format("{0} {1} {2}", info.Tag, info.Operator, info.OtherTag), info.Type);

            return(new QueryResult
            {
                Questions = results,
                Counters = new Dictionary <string, int>
                {
                    { "TagCounter", tagCounter },
                    { "OtherTagCounter", otherTagCounter },
                    { "ExclusionCounter", exclusionCounter.Counter }
                }
            });
        }
Пример #2
0
        internal QueryResult Query(QueryInfo info, EwahCompressedBitArray exclusionBitMap = null, bool printLoggingMessages = false)
        {
            var bitMap         = GetTagByQueryBitMapLookup(info.Type);
            var questionLookup = GetTagByQueryLookup(info.Type)[TagServer.ALL_TAGS_KEY];

            // Calculating the Cardinality can be (is?) expensive, we don't want to do it in Queries unless we really need to!?
            bool calculateCardinality = true; // false

            if (printLoggingMessages)
            {
                Logger.Log("Tag \"{0}\" is in {1:N0} Questions, Tag \"{2}\" is in {3:N0} Questions",
                           info.Tag, allTags[info.Tag], info.OtherTag, allTags[info.OtherTag]);
            }

            //PrintResults(Enumerable.Range(0, questionLookup.Length), qu => questionLookup[qu], TagServer.ALL_TAGS_KEY, info.Type);
            //PrintResults(bitMap[info.Tag], qu => questionLookup[qu], info.Tag, info.Type);
            //PrintResults(bitMap[info.OtherTag], qu => questionLookup[qu], info.OtherTag, info.Type);

            var timer      = Stopwatch.StartNew();
            var tag1BitMap = bitMap[info.Tag];
            var tag2BitMap = bitMap[info.OtherTag];
            EwahCompressedBitArray bitMapResult = new EwahCompressedBitArray();

            switch (info.Operator)
            {
            case "AND":
                bitMapResult = tag1BitMap.And(tag2BitMap);
                break;

            case "AND-NOT":
                bitMapResult = tag1BitMap.AndNot(tag2BitMap);
                break;

            case "OR":
                bitMapResult = tag1BitMap.Or(tag2BitMap);
                break;

            case "OR-NOT":     //"i.e. .net+or+jquery-"
                bitMapResult = tag1BitMap.OrNot(tag2BitMap);
                break;

            // TODO Work out what a "NOT" query really means, the LINQ version was "result = tag1Query.Except(tag2Query)" (which is the same as AND-NOT?!)
            //case "NOT":
            //    var bitMapResult = (EwahCompressedBitArray)tag2BitMap.Clone();
            //    bitMapResult.Not();
            //    break;

            default:
                throw new InvalidOperationException(string.Format("Invalid operator specified: {0}", info.Operator ?? "<NULL>"));
            }

            if (exclusionBitMap != null)
            {
                ulong cardinalityBeforeExclusions = 0;
                if (printLoggingMessages)
                {
                    cardinalityBeforeExclusions = bitMapResult.GetCardinality();
                }

                // The Exclusiong BitMap is Set (i.e. 1) in places where you CAN use the question, i.e. it's NOT excluded
                // That way we can efficiently apply the exclusions by ANDing this BitMap to the previous results
                var exclusionTimer = Stopwatch.StartNew();
                bitMapResult = bitMapResult.AndNot(exclusionBitMap);
                exclusionTimer.Stop();

                if (printLoggingMessages)
                {
                    if (calculateCardinality)
                    {
                        Logger.Log("Took {0,5:N2} ms to apply exclusion BitMap (Cardinality={1:N0}), Results Cardinality: Before={2:N0}, After={3:N0}",
                                   exclusionTimer.Elapsed.TotalMilliseconds, exclusionBitMap.GetCardinality(), cardinalityBeforeExclusions, bitMapResult.GetCardinality());
                    }
                    else
                    {
                        Logger.Log("Took {0,5:N2} ms to apply exclusion BitMap", exclusionTimer.Elapsed.TotalMilliseconds);
                    }
                }
            }

            var resultCollectionTimer = Stopwatch.StartNew();
            var result = bitMapResult.Skip(info.Skip)
                         .Take(info.PageSize)
                         .Select(i => questions[questionLookup[i]])
                         .ToList();

            resultCollectionTimer.Stop();
            if (printLoggingMessages)
            {
                Logger.Log("Took {0,5:N2} ms to collect the results", resultCollectionTimer.Elapsed.TotalMilliseconds);
            }

            timer.Stop();

            Results.AddData(timer.Elapsed.TotalMilliseconds.ToString("#.##"));

            if (printLoggingMessages)
            {
                using (Utils.SetConsoleColour(ConsoleColor.DarkYellow))
                {
                    if (calculateCardinality)
                    {
                        Logger.Log("Took {0,5:N2} ms in TOTAL to calculate \"{1} {2} {3}\", Got {4} results, (Result Cardinality={5:N0})",
                                   timer.Elapsed.TotalMilliseconds, info.Tag, info.Operator, info.OtherTag, result.Count, bitMapResult.GetCardinality());
                    }
                    else
                    {
                        Logger.Log("Took {0,5:N2} ms in TOTAL to calculate \"{1} {2} {3}\", Got {4} results",
                                   timer.Elapsed.TotalMilliseconds, info.Tag, info.Operator, info.OtherTag, result.Count);
                    }
                }
                //PrintResults(bitMapResult, qu => questionLookup[qu], string.Format("{0} {1} {2}", info.Tag, info.Operator, info.OtherTag), info.Type);
                Logger.Log();
            }

            return(new QueryResult
            {
                Questions = result,
                // TODO see if we can get meaningful numbers here, WITHOUT calling GetCardinality() (because it's expensive)
                //Counters = new Dictionary<string, int>
                //{
                //    { "TagCounter", tagCounter },
                //    { "OtherTagCounter", otherTagCounter },
                //    { "ExclusionCounter", exclusionCounter.Counter }
                //}
            });
        }