/// <summary>
        /// Autocomplete a query after n characters
        /// </summary>
        /// <param name="queryTime"></param>
        /// <param name="partialQuery"></param>
        /// <param name="fullQuery"></param>
        /// <returns></returns>
        protected override AutoCompletionList AutoCompleteQuery(DateTime queryTime, string partialQuery, string fullQuery)
        {
            // Get the sorted entries
            IEnumerable<BaseIndexEntry> allPrefixEntries = null;

            if (_indexEntries.ContainsKey(partialQuery))
            {
                // Retrieve the existing bucket for the prefix
                allPrefixEntries = _indexEntries[partialQuery].AllBucketQueries; // Retrieve from the bucket
            }
            else
            {
                // Create the bucket for the prefix ready
                NonTemporalBucket<BaseIndexEntry> ntbForPrefix = new NonTemporalBucket<BaseIndexEntry>(_qMaxSum, _qMaxFrequency);

                _indexEntries[partialQuery] = ntbForPrefix;

                allPrefixEntries = ntbForPrefix.AllBucketQueries;
            }

            // Deal with no autocompletions
            if (allPrefixEntries.Count() == 0)
            {
                _indexEntries[partialQuery].AddQuery(fullQuery, this); // Add the query to the bucket
                return new AutoCompletionList(); // Return no autocompletions
            }

            // Create and rank the autocompletions
            AutoCompletionList autoCompletionListOutput = autoCompletionListOutput = CreateAutoCompletionList(allPrefixEntries);

            // Add the new query to the index
            _indexEntries[partialQuery].AddQuery(fullQuery, this);

            // Return the autocompletion list ready to be sent off for evaluation
            return autoCompletionListOutput;
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Autocomplete a query after n characters
        /// </summary>
        /// <param name="queryTime"></param>
        /// <param name="partialQuery"></param>
        /// <param name="fullQuery"></param>
        /// <returns></returns>
        protected override AutoCompletionList AutoCompleteQuery(DateTime queryTime, string partialQuery, string fullQuery)
        {
            // Increment the current query count
            _currentQueryCount++;

            // Determine whether the multiple NTBs for a prefix need to be created first
            #region Setup NTBs
            // Setup the main NTBs
            if (!_ntbsCreated.Contains(partialQuery))
            {
                // Create NTBs
                for (int i = 0; i < _multipleNs.Length; i++)
                {
                    // Retrieve the existing bucket for the prefix
                    _ntbs[i][partialQuery] = new NonTemporalBucket<BaseIndexEntry>(_multipleNs[i], _multipleMaxSingleQueryN[i]);

                    // Hook up the events
                    if (i > 0)
                    {
                        _ntbs[i - 1][partialQuery].OnQueryRemovedFromNTB += _ntbs[i][partialQuery].AddQueryEvent; // Send to next NTB
                    }
                }

                // Create overall NTB
                _overallNtb[partialQuery] = new NonTemporalBucket<BaseIndexEntry>(_totalQueriesAcrossAllNTBs, _totalQueriesAcrossAllNTBs);

                // Create base NTB
                _baseNtb[partialQuery] = new NonTemporalBucket<BaseIndexEntry>(_baseNtbSize, _baseNtbSize);

                // Track the NTBs have been created for the prefix
                _ntbsCreated.Add(partialQuery);
            }

            // Setup the training NTB
            if (!_trainingNtb.ContainsKey(partialQuery))
            {
                _trainingNtb[partialQuery] = new NonTemporalBucket<BaseIndexEntry>(_trainAfterNQueriesForPrefix, _trainAfterNQueriesForPrefix); // Create NTB sized for the training horizon (in queries)
            }
            #endregion

            // Deal with the online training
            #region Handle training package building and use
            if (!_queriesObservedForPrefix.ContainsKey(partialQuery))
                _queriesObservedForPrefix[partialQuery] = 0;

            // Setup a training package for the ML if necessary, and do any training that is outstanding
            if ((_prefixHasStartedTraining.Contains(partialQuery) && _queriesObservedForPrefix[partialQuery] == _trainAfterNQueriesForPrefix)
                    || (!_prefixHasStartedTraining.Contains(partialQuery) && _queriesObservedForPrefix[partialQuery] == _totalQueriesAcrossAllNTBs)) // - start training when biggest NTB is full
            {
                // Apply existing training
                if (_prefixTrainingPackages.ContainsKey(partialQuery))
                {
                    // Train
                    TrainingPackage trainingPackage = _prefixTrainingPackages[partialQuery];
                    trainingPackage.QueryCountAtTrain = _currentQueryCount;

                    // Update the target likelihood variable for each query from the last _trainAfterNQueriesForPrefix queries
                    foreach (FeaturePackage queryFeaturePackage in trainingPackage.TrainingPackageQueries)
                    {
                        double queryCount = _trainingNtb[partialQuery].GetQueryFrequency(queryFeaturePackage.Query);
                        if (queryCount > 0)
                            queryFeaturePackage.TargetLikelihood = queryCount; // / Convert.ToDouble(_trainAfterNQueriesForPrefix);
                    }

                    // Use the training package for the model
                    UseTrainingPackage(trainingPackage);
                    _firstTrainingHasHappened = true;
                    //if (_trainCount % 200 == 0)
                    //Console.WriteLine("Training package " + _trainCount.ToString() + " used for prefix " + trainingPackage.ForPrefix + " (first? " + trainingPackage.IsFirstTrainingPackageForPrefix + ")");

                    _trainCount++;
                }

                // Build package containing exist queries to train with
                //Console.WriteLine("Build package for " + partialQuery);
                bool isFirstTrainingPackage = !_prefixHasStartedTraining.Contains(partialQuery);
                _prefixTrainingPackages[partialQuery] = BuildTrainingPackage(partialQuery, isFirstTrainingPackage);

                _prefixHasStartedTraining.Add(partialQuery); // Mark the prefix as started training

                // Reset the queries observed for the prefix, ready for next training package to run
                _queriesObservedForPrefix[partialQuery] = 0;
            }
            #endregion

            NonTemporalBucket<BaseIndexEntry> overallNTB = _overallNtb[partialQuery];

            // Get the NTB entries
            IEnumerable<BaseIndexEntry> biggestNTBPrefixEntries = null;

            // Try largest NTB first, if it doesn't have the prefix, then no others will either
            biggestNTBPrefixEntries = overallNTB.AllBucketQueries;

            // The prefix entries for output
            List<BaseIndexEntry> outputPrefixEntries = new List<BaseIndexEntry>();

            // Compute scores on the auto-completions
            foreach (BaseIndexEntry prefixEntry in biggestNTBPrefixEntries)
            {
                if (prefixEntry.QueryLogFrequency < 2)
                    continue; // Ignore low frequency completions - increases speed and removes junk

                BaseIndexEntry outputIndexEntry = new BaseIndexEntry();
                outputIndexEntry.Query = prefixEntry.Query;

                // Use predicted likelihood if overall NTB is full, otherwise just use frequency in the largest NTB (it probably doesn't make sense to predict using incomplete NTBs)
                if (overallNTB.TotalQueriesInBucket == overallNTB.QMaxSum)
                {
                    // Use prediction to rank suggestion
                    outputIndexEntry.QueryLogFrequency = Math.Round(2.0 + MakePrediction(prefixEntry, partialQuery), 5);
                }
                else
                {
                    // Use overall NTB count by default
                    outputIndexEntry.QueryLogFrequency = _baseNtb[partialQuery].GetQueryFrequency(outputIndexEntry.Query); // TODO: change this prefixEntry.QueryLogFrequency;
                    if (outputIndexEntry.QueryLogFrequency < 2)
                        continue;
                }

                outputPrefixEntries.Add(outputIndexEntry);
            }

            // Create and rank the autocompletions
            AutoCompletionList autoCompletionListOutput = CreateAutoCompletionList(outputPrefixEntries);

            // Increment the queries observed with the prefix (for knowing when to train)
            if (_queriesObservedForPrefix.ContainsKey(partialQuery))
                _queriesObservedForPrefix[partialQuery] += 1;
            else
                _queriesObservedForPrefix[partialQuery] = 1;

            // Add the new query to the NTBs
            _ntbs[0][partialQuery].AddQuery(fullQuery, null);
            _overallNtb[partialQuery].AddQuery(fullQuery, null);
            _baseNtb[partialQuery].AddQuery(fullQuery, null);

            // Add the query to the training ntb (used for computing prediction likelihood)
            _trainingNtb[partialQuery].AddQuery(fullQuery, null);

            // Return the autocompletion list ready to be sent off for evaluation
            return autoCompletionListOutput;
        }