Standard evaluation outputter (outputs in format ready for EQC_Eval tool to run)
 public NonTemporalBucketApproach(int qMaxFrequency, int qMaxSum, int autoCompleteAfterNChars, StandardEvalOutput evalOutput, PrefixProfile queryPrefixProfile)
     : base(autoCompleteAfterNChars, evalOutput, queryPrefixProfile)
 {
     _qMaxFrequency = qMaxFrequency;
     _qMaxSum = qMaxSum;
     _indexEntries = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>();
 }
Esempio n. 2
0
        /// <summary>
        /// MultipleNs must be specified in ascending order for the NTBs
        /// </summary>
        /// <param name="multipleNs"></param>
        /// <param name="multipleMaxSingleQueryN"></param>
        /// <param name="trainAfterNQueriesForPrefix">How many queries to observe between training the ML model (i.e., predict queries in this window of N - OR: 'the prediction horizon'). Note the model won't start training until after (largest NTB size) + (trainAfter parameter) has been reached</param>
        /// <param name="autoCompleteAfterNChars"></param>
        /// <param name="evalOutput"></param>
        /// <param name="queryPrefixProfile"></param>
        public noSGDLRMNTB(int[] multipleNs, int[] multipleMaxSingleQueryN, int trainAfterNQueriesForPrefix, string queryLogFile, int autoCompleteAfterNChars, StandardEvalOutput evalOutput, PrefixProfile queryPrefixProfile, int baseNtbSize = 200)
            : base(autoCompleteAfterNChars, evalOutput, queryPrefixProfile)
        {
            if (multipleNs.Length != multipleMaxSingleQueryN.Length)
                throw new Exception("Must be the same length");

            _trainAfterNQueriesForPrefix = trainAfterNQueriesForPrefix;
            _multipleNs = multipleNs;
            _multipleMaxSingleQueryN = multipleMaxSingleQueryN;
            _ntbs = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>[multipleNs.Length];
            _trainingNtb = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>();

            _lrModel = new OnlineSGDNonOverlappingLinearRegressionModel(multipleNs.Length, _multipleNs[0], _trainAfterNQueriesForPrefix);

            for (int i = 0; i < multipleNs.Length; i++)
            {
                _ntbs[i] = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>();
            }

            _ntbsCreated = new HashSet<string>();

            // Calculate the total number of queries stored across all NTBs (max overall NTB capacity)
            _totalQueriesAcrossAllNTBs = _multipleNs.Sum();

            _overallNtb = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>();

            _baseNtb = new Dictionary<string, NonTemporalBucket<BaseIndexEntry>>();

            _baseNtbSize = baseNtbSize;
        }
        public ApproachFactory(string collection, int prefixLength, string expType, string[] allArgs, bool isDebug = false)
        {
            // Determine which approach to create
            _runId = collection + '-' + expType;

            if (expType == "bl-w")
                _runId += allArgs[4]; // Append the number of days the window is over

            if (expType == "ntb")
                _runId += allArgs[4] + "-" + allArgs[5]; // Append the non temporal bucket parameters

            if (expType == "sgdlrnomntb") // Multiple non-overlapping NTBs with stochastic gradient descent linear regression, args are comma separated
                _runId += allArgs[4] + "-" + allArgs[5] + "-t" + allArgs[6]; // Append the non temporal bucket parameters along with train between queries last parameter (format: aol 2 mntb 2006-03-01 500,1000 500,1000 100)

            // Ensure the run output file doesn't already exist
            if (!isDebug && File.Exists(Utilities.DataDirectory + prefixLength + "chars-" + _runId + ".txt"))
            {
                Console.WriteLine("Output file already exists, exiting.");
                Environment.Exit(0); // Exit now
            }

            // Setup the approach
            if (!isDebug)
                _evalOutput = new StandardEvalOutput(new FileInfo(Utilities.DataDirectory + prefixLength + "chars-" + _runId + ".txt"), _runId, prefixLength); // new StandardEvalOutput(new FileInfo("aol_wiki_all_history_" + prefixChars + "_baseline_charprefix.txt"), runName, prefixChars);
            else
                _evalOutput = new StandardEvalOutput(null, _runId, prefixLength, true); // Debug for event-based output rather than file output

            if (expType == "bl-a")
                _approach = new BaselineAllQueryLog<BaseIndexEntry>(prefixLength, _evalOutput, null);
            else if (expType == "bl-w")
                _approach = new BaselineWindowQueryLog<BaseIndexEntry>(Convert.ToInt32(allArgs[4]), prefixLength, _evalOutput, null);
            else if (expType == "ntb")
            {
                _approach = new NonTemporalBucketApproach(Convert.ToInt32(allArgs[4]), Convert.ToInt32(allArgs[5]), prefixLength, _evalOutput, null);
            }
            else if (expType == "sgdlrnomntb")
            {
                _approach = new noSGDLRMNTB(
                    allArgs[4].Split(',').Select(s => int.Parse(s)).ToArray(),
                    allArgs[5].Split(',').Select(s => int.Parse(s)).ToArray(),
                    Convert.ToInt32(allArgs[6]),
                    Utilities.DataDirectory + collection + "-queries.txt",
                    prefixLength,
                    _evalOutput,
                    null,
                    Convert.ToInt32(allArgs[7]));
            }
            else
            {
                Console.WriteLine("Invalid experiment type, must be bl-a, bl-w, ntb or sgdlrnomntb.");
                Environment.Exit(0);
            }

            // Load the one-off queries for optimisation in some cases
            OneOffQueries ofq = new OneOffQueries(new FileInfo(Utilities.DataDirectory + collection + "-oneoffqueries.txt"));
            _approach.OneOffQueries = ofq;
        }
Esempio n. 4
0
        public BaseApproach(int autoCompleteAfterNChars, StandardEvalOutput evalOutput, PrefixProfile queryPrefixProfile)
        {
            _autoCompleteAfterNChars = autoCompleteAfterNChars;
            _evalOutput = evalOutput;
            _queryPrefixProfile = queryPrefixProfile;

            _evalThreadPool = new SmartThreadPool(1000, 6);
            _evalThreadPool.Start(); // Setup and start the threadpool
        }