static void Main(string[] args) { Dictionary<string, int> lookupPastQueries = new Dictionary<string, int>(); Journal journal = new Journal(); StreamWriter fsOutput = new StreamWriter(new FileStream(@"D:\aol-queries-new.txt", FileMode.Create), Encoding.UTF8); string[] files = new string[] { @"C:\hadoop-cdh4.0\aol-filtered.txt" }; foreach (string file in files) { using (StreamReader sr = new StreamReader(file)) { String line = sr.ReadLine(); int counter = 0; while (line != null) { counter++; if (counter % 10000 == 0) Console.WriteLine("Reached: " + counter.ToString()); string[] row = line.Split('\t'); string anonId = row[0]; if (anonId == "AnonID") { line = sr.ReadLine(); continue; } string query = row[2].ToLower(); if (query.StartsWith("http") || query.StartsWith("www.")) { line = sr.ReadLine(); continue; } string queryTime = row[1]; DateTime queryTimeDT = DateTime.Parse(queryTime); string lookupKey = anonId + query; // Lookup int pastCount = 0; lookupPastQueries.TryGetValue(lookupKey, out pastCount); // Ensure query doesn't already exist in the journal if (pastCount == 0) { if (query != "-") fsOutput.WriteLine(query + '\t' + queryTime); } // Add to journal JournalEntry je = new JournalEntry(); je.EntryDateTime = queryTimeDT; je.Query = lookupKey; // Set query as lookup query journal.AddEntry(je); // Increment the counter for the query if (lookupPastQueries.ContainsKey(lookupKey)) lookupPastQueries[lookupKey] += 1; else lookupPastQueries[lookupKey] = 1; // Remove old journal entries foreach (JournalEntry je2 in journal.GetEntriesBeforeDateAndDelete(queryTimeDT.AddMinutes(-30))) { lookupPastQueries[je2.Query] -= 1; } line = sr.ReadLine(); } fsOutput.Close(); } } }