Contains an entry in the journal
Example #1
0
        /// <summary>
        /// Add an entry to the journal
        /// </summary>
        /// <param name="entry"></param>
        public void AddEntry(JournalEntry entry)
        {
            if (_oldestEntry == null)
            {
                // No existing entries
                _oldestEntry = entry;
                _newestEntry = entry;
            }
            else
            {
                _newestEntry.NextEntry = entry; // Append to existing entries
                _newestEntry = entry;
            }

            _entryCount++;
        }
Example #2
0
        /// <summary>
        /// Iterator for entries before the provided date. After the entry has been returned it will be removed from the
        /// journal. (THIS METHOD IS *NOT* THREAD-SAFE).
        /// </summary>
        /// <param name="dateTime"></param>
        /// <returns></returns>
        public IEnumerable<JournalEntry> GetEntriesBeforeDateAndDelete(DateTime beforeDateTime)
        {
            List<JournalEntry> toReturnList = new List<JournalEntry>();

            while (_oldestEntry != null && _oldestEntry.EntryDateTime < beforeDateTime)
            {
                // Now remove entry from the journal
                toReturnList.Add(_oldestEntry);

                _oldestEntry = _oldestEntry.NextEntry;

                _entryCount--;
            }

            return toReturnList;
        }
Example #3
0
        static void Main(string[] args)
        {
            Dictionary<string, int> lookupPastQueries = new Dictionary<string, int>();

            Journal journal = new Journal();

            StreamWriter fsOutput = new StreamWriter(new FileStream(@"D:\aol-queries-new.txt", FileMode.Create), Encoding.UTF8);

            string[] files = new string[] { @"C:\hadoop-cdh4.0\aol-filtered.txt" };

            foreach (string file in files)
            {
                using (StreamReader sr = new StreamReader(file))
                {
                    String line = sr.ReadLine();

                    int counter = 0;

                    while (line != null)
                    {
                        counter++;

                        if (counter % 10000 == 0)
                            Console.WriteLine("Reached: " + counter.ToString());

                        string[] row = line.Split('\t');

                        string anonId = row[0];

                        if (anonId == "AnonID")
                        {
                            line = sr.ReadLine();
                            continue;
                        }

                        string query = row[2].ToLower();

                        if (query.StartsWith("http") || query.StartsWith("www."))
                        {
                            line = sr.ReadLine();
                            continue;
                        }

                        string queryTime = row[1];
                        DateTime queryTimeDT = DateTime.Parse(queryTime);

                        string lookupKey = anonId + query; // Lookup

                        int pastCount = 0;
                        lookupPastQueries.TryGetValue(lookupKey, out pastCount);

                        // Ensure query doesn't already exist in the journal
                        if (pastCount == 0)
                        {
                            if (query != "-")
                                fsOutput.WriteLine(query + '\t' + queryTime);
                        }

                        // Add to journal
                        JournalEntry je = new JournalEntry();
                        je.EntryDateTime = queryTimeDT;
                        je.Query = lookupKey; // Set query as lookup query
                        journal.AddEntry(je);

                        // Increment the counter for the query
                        if (lookupPastQueries.ContainsKey(lookupKey))
                            lookupPastQueries[lookupKey] += 1;
                        else
                            lookupPastQueries[lookupKey] = 1;

                        // Remove old journal entries
                        foreach (JournalEntry je2 in journal.GetEntriesBeforeDateAndDelete(queryTimeDT.AddMinutes(-30)))
                        {
                            lookupPastQueries[je2.Query] -= 1;
                        }

                        line = sr.ReadLine();
                    }

                    fsOutput.Close();
                }
            }
        }