/// <summary> /// Autocomplete a query after n characters /// </summary> /// <param name="queryTime"></param> /// <param name="partialQuery"></param> /// <param name="fullQuery"></param> /// <returns></returns> protected override AutoCompletionList AutoCompleteQuery(DateTime queryTime, string partialQuery, string fullQuery) { // Get the sorted entries IEnumerable<BaseIndexEntry> allPrefixEntries = null; if (_indexEntries.ContainsKey(partialQuery)) { // Retrieve the existing bucket for the prefix allPrefixEntries = _indexEntries[partialQuery].AllBucketQueries; // Retrieve from the bucket } else { // Create the bucket for the prefix ready NonTemporalBucket<BaseIndexEntry> ntbForPrefix = new NonTemporalBucket<BaseIndexEntry>(_qMaxSum, _qMaxFrequency); _indexEntries[partialQuery] = ntbForPrefix; allPrefixEntries = ntbForPrefix.AllBucketQueries; } // Deal with no autocompletions if (allPrefixEntries.Count() == 0) { _indexEntries[partialQuery].AddQuery(fullQuery, this); // Add the query to the bucket return new AutoCompletionList(); // Return no autocompletions } // Create and rank the autocompletions AutoCompletionList autoCompletionListOutput = autoCompletionListOutput = CreateAutoCompletionList(allPrefixEntries); // Add the new query to the index _indexEntries[partialQuery].AddQuery(fullQuery, this); // Return the autocompletion list ready to be sent off for evaluation return autoCompletionListOutput; }
/// <summary> /// Autocomplete a query after n characters /// </summary> /// <param name="queryTime"></param> /// <param name="partialQuery"></param> /// <param name="fullQuery"></param> /// <returns></returns> protected override AutoCompletionList AutoCompleteQuery(DateTime queryTime, string partialQuery, string fullQuery) { // Increment the current query count _currentQueryCount++; // Determine whether the multiple NTBs for a prefix need to be created first #region Setup NTBs // Setup the main NTBs if (!_ntbsCreated.Contains(partialQuery)) { // Create NTBs for (int i = 0; i < _multipleNs.Length; i++) { // Retrieve the existing bucket for the prefix _ntbs[i][partialQuery] = new NonTemporalBucket<BaseIndexEntry>(_multipleNs[i], _multipleMaxSingleQueryN[i]); // Hook up the events if (i > 0) { _ntbs[i - 1][partialQuery].OnQueryRemovedFromNTB += _ntbs[i][partialQuery].AddQueryEvent; // Send to next NTB } } // Create overall NTB _overallNtb[partialQuery] = new NonTemporalBucket<BaseIndexEntry>(_totalQueriesAcrossAllNTBs, _totalQueriesAcrossAllNTBs); // Create base NTB _baseNtb[partialQuery] = new NonTemporalBucket<BaseIndexEntry>(_baseNtbSize, _baseNtbSize); // Track the NTBs have been created for the prefix _ntbsCreated.Add(partialQuery); } // Setup the training NTB if (!_trainingNtb.ContainsKey(partialQuery)) { _trainingNtb[partialQuery] = new NonTemporalBucket<BaseIndexEntry>(_trainAfterNQueriesForPrefix, _trainAfterNQueriesForPrefix); // Create NTB sized for the training horizon (in queries) } #endregion // Deal with the online training #region Handle training package building and use if (!_queriesObservedForPrefix.ContainsKey(partialQuery)) _queriesObservedForPrefix[partialQuery] = 0; // Setup a training package for the ML if necessary, and do any training that is outstanding if ((_prefixHasStartedTraining.Contains(partialQuery) && _queriesObservedForPrefix[partialQuery] == _trainAfterNQueriesForPrefix) || (!_prefixHasStartedTraining.Contains(partialQuery) && _queriesObservedForPrefix[partialQuery] == _totalQueriesAcrossAllNTBs)) // - start training when biggest NTB is full { // Apply existing training if (_prefixTrainingPackages.ContainsKey(partialQuery)) { // Train TrainingPackage trainingPackage = _prefixTrainingPackages[partialQuery]; trainingPackage.QueryCountAtTrain = _currentQueryCount; // Update the target likelihood variable for each query from the last _trainAfterNQueriesForPrefix queries foreach (FeaturePackage queryFeaturePackage in trainingPackage.TrainingPackageQueries) { double queryCount = _trainingNtb[partialQuery].GetQueryFrequency(queryFeaturePackage.Query); if (queryCount > 0) queryFeaturePackage.TargetLikelihood = queryCount; // / Convert.ToDouble(_trainAfterNQueriesForPrefix); } // Use the training package for the model UseTrainingPackage(trainingPackage); _firstTrainingHasHappened = true; //if (_trainCount % 200 == 0) //Console.WriteLine("Training package " + _trainCount.ToString() + " used for prefix " + trainingPackage.ForPrefix + " (first? " + trainingPackage.IsFirstTrainingPackageForPrefix + ")"); _trainCount++; } // Build package containing exist queries to train with //Console.WriteLine("Build package for " + partialQuery); bool isFirstTrainingPackage = !_prefixHasStartedTraining.Contains(partialQuery); _prefixTrainingPackages[partialQuery] = BuildTrainingPackage(partialQuery, isFirstTrainingPackage); _prefixHasStartedTraining.Add(partialQuery); // Mark the prefix as started training // Reset the queries observed for the prefix, ready for next training package to run _queriesObservedForPrefix[partialQuery] = 0; } #endregion NonTemporalBucket<BaseIndexEntry> overallNTB = _overallNtb[partialQuery]; // Get the NTB entries IEnumerable<BaseIndexEntry> biggestNTBPrefixEntries = null; // Try largest NTB first, if it doesn't have the prefix, then no others will either biggestNTBPrefixEntries = overallNTB.AllBucketQueries; // The prefix entries for output List<BaseIndexEntry> outputPrefixEntries = new List<BaseIndexEntry>(); // Compute scores on the auto-completions foreach (BaseIndexEntry prefixEntry in biggestNTBPrefixEntries) { if (prefixEntry.QueryLogFrequency < 2) continue; // Ignore low frequency completions - increases speed and removes junk BaseIndexEntry outputIndexEntry = new BaseIndexEntry(); outputIndexEntry.Query = prefixEntry.Query; // Use predicted likelihood if overall NTB is full, otherwise just use frequency in the largest NTB (it probably doesn't make sense to predict using incomplete NTBs) if (overallNTB.TotalQueriesInBucket == overallNTB.QMaxSum) { // Use prediction to rank suggestion outputIndexEntry.QueryLogFrequency = Math.Round(2.0 + MakePrediction(prefixEntry, partialQuery), 5); } else { // Use overall NTB count by default outputIndexEntry.QueryLogFrequency = _baseNtb[partialQuery].GetQueryFrequency(outputIndexEntry.Query); // TODO: change this prefixEntry.QueryLogFrequency; if (outputIndexEntry.QueryLogFrequency < 2) continue; } outputPrefixEntries.Add(outputIndexEntry); } // Create and rank the autocompletions AutoCompletionList autoCompletionListOutput = CreateAutoCompletionList(outputPrefixEntries); // Increment the queries observed with the prefix (for knowing when to train) if (_queriesObservedForPrefix.ContainsKey(partialQuery)) _queriesObservedForPrefix[partialQuery] += 1; else _queriesObservedForPrefix[partialQuery] = 1; // Add the new query to the NTBs _ntbs[0][partialQuery].AddQuery(fullQuery, null); _overallNtb[partialQuery].AddQuery(fullQuery, null); _baseNtb[partialQuery].AddQuery(fullQuery, null); // Add the query to the training ntb (used for computing prediction likelihood) _trainingNtb[partialQuery].AddQuery(fullQuery, null); // Return the autocompletion list ready to be sent off for evaluation return autoCompletionListOutput; }