public NonTemporalBucketApproach(int qMaxFrequency, int qMaxSum, int autoCompleteAfterNChars, StandardEvalOutput evalOutput, PrefixProfile queryPrefixProfile) : base(autoCompleteAfterNChars, evalOutput, queryPrefixProfile) { _qMaxFrequency = qMaxFrequency; _qMaxSum = qMaxSum; _indexEntries = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>(); }
/// <summary> /// MultipleNs must be specified in ascending order for the NTBs /// </summary> /// <param name="multipleNs"></param> /// <param name="multipleMaxSingleQueryN"></param> /// <param name="trainAfterNQueriesForPrefix">How many queries to observe between training the ML model (i.e., predict queries in this window of N - OR: 'the prediction horizon'). Note the model won't start training until after (largest NTB size) + (trainAfter parameter) has been reached</param> /// <param name="autoCompleteAfterNChars"></param> /// <param name="evalOutput"></param> /// <param name="queryPrefixProfile"></param> public noSGDLRMNTB(int[] multipleNs, int[] multipleMaxSingleQueryN, int trainAfterNQueriesForPrefix, string queryLogFile, int autoCompleteAfterNChars, StandardEvalOutput evalOutput, PrefixProfile queryPrefixProfile, int baseNtbSize = 200) : base(autoCompleteAfterNChars, evalOutput, queryPrefixProfile) { if (multipleNs.Length != multipleMaxSingleQueryN.Length) throw new Exception("Must be the same length"); _trainAfterNQueriesForPrefix = trainAfterNQueriesForPrefix; _multipleNs = multipleNs; _multipleMaxSingleQueryN = multipleMaxSingleQueryN; _ntbs = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>[multipleNs.Length]; _trainingNtb = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>(); _lrModel = new OnlineSGDNonOverlappingLinearRegressionModel(multipleNs.Length, _multipleNs[0], _trainAfterNQueriesForPrefix); for (int i = 0; i < multipleNs.Length; i++) { _ntbs[i] = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>(); } _ntbsCreated = new HashSet<string>(); // Calculate the total number of queries stored across all NTBs (max overall NTB capacity) _totalQueriesAcrossAllNTBs = _multipleNs.Sum(); _overallNtb = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>(); _baseNtb = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>(); _baseNtbSize = baseNtbSize; }
public ApproachFactory(string collection, int prefixLength, string expType, string[] allArgs, bool isDebug = false) { // Determine which approach to create _runId = collection + '-' + expType; if (expType == "bl-w") _runId += allArgs[4]; // Append the number of days the window is over if (expType == "ntb") _runId += allArgs[4] + "-" + allArgs[5]; // Append the non temporal bucket parameters if (expType == "sgdlrnomntb") // Multiple non-overlapping NTBs with stochastic gradient descent linear regression, args are comma separated _runId += allArgs[4] + "-" + allArgs[5] + "-t" + allArgs[6]; // Append the non temporal bucket parameters along with train between queries last parameter (format: aol 2 mntb 2006-03-01 500,1000 500,1000 100) // Ensure the run output file doesn't already exist if (!isDebug && File.Exists(Utilities.DataDirectory + prefixLength + "chars-" + _runId + ".txt")) { Console.WriteLine("Output file already exists, exiting."); Environment.Exit(0); // Exit now } // Setup the approach if (!isDebug) _evalOutput = new StandardEvalOutput(new FileInfo(Utilities.DataDirectory + prefixLength + "chars-" + _runId + ".txt"), _runId, prefixLength); // new StandardEvalOutput(new FileInfo("aol_wiki_all_history_" + prefixChars + "_baseline_charprefix.txt"), runName, prefixChars); else _evalOutput = new StandardEvalOutput(null, _runId, prefixLength, true); // Debug for event-based output rather than file output if (expType == "bl-a") _approach = new BaselineAllQueryLog<BaseIndexEntry>(prefixLength, _evalOutput, null); else if (expType == "bl-w") _approach = new BaselineWindowQueryLog<BaseIndexEntry>(Convert.ToInt32(allArgs[4]), prefixLength, _evalOutput, null); else if (expType == "ntb") { _approach = new NonTemporalBucketApproach(Convert.ToInt32(allArgs[4]), Convert.ToInt32(allArgs[5]), prefixLength, _evalOutput, null); } else if (expType == "sgdlrnomntb") { _approach = new noSGDLRMNTB( allArgs[4].Split(',').Select(s => int.Parse(s)).ToArray(), allArgs[5].Split(',').Select(s => int.Parse(s)).ToArray(), Convert.ToInt32(allArgs[6]), Utilities.DataDirectory + collection + "-queries.txt", prefixLength, _evalOutput, null, Convert.ToInt32(allArgs[7])); } else { Console.WriteLine("Invalid experiment type, must be bl-a, bl-w, ntb or sgdlrnomntb."); Environment.Exit(0); } // Load the one-off queries for optimisation in some cases OneOffQueries ofq = new OneOffQueries(new FileInfo(Utilities.DataDirectory + collection + "-oneoffqueries.txt")); _approach.OneOffQueries = ofq; }
public BaseApproach(int autoCompleteAfterNChars, StandardEvalOutput evalOutput, PrefixProfile queryPrefixProfile) { _autoCompleteAfterNChars = autoCompleteAfterNChars; _evalOutput = evalOutput; _queryPrefixProfile = queryPrefixProfile; _evalThreadPool = new SmartThreadPool(1000, 6); _evalThreadPool.Start(); // Setup and start the threadpool }