public IEnumerable <NuixLogEntry> ReadEntries(IEnumerable <long> ids) { string query = null; if (ids == null) { query = GetEmbeddedSQL("NuixLogReviewer.LogRepository.LogEntrySelect.sqlite"); } else { query = GetEmbeddedSQL("NuixLogReviewer.LogRepository.LogEntrySelectIdList.sqlite"); String idlist = String.Join(",", ids.Select((id) => id.ToString())); query = String.Format(query, idlist); } return(ExecuteReader <NuixLogEntry>(query, new Func <SQLiteDataReader, NuixLogEntry>(reader => { string content = reader["Content"] as string; NuixLogEntry entry = new NuixLogEntry() { ID = (long)reader["ID"], LineNumber = (int)(long)reader["LineNumber"], FilePath = reader["FileName"].ToString(), FileName = Path.GetFileName(reader["FileName"].ToString()) + "(" + ((long)reader["FileID"]).ToString() + ")", TimeStamp = DateTime.FromFileTime((long)reader["TimeStamp"]), Channel = reader["Channel"].ToString(), Elapsed = TimeSpan.FromMilliseconds((long)reader["Elapsed"]), Level = String.Intern(reader["Level"].ToString()), // Use interned string Source = reader["Source"].ToString(), Content = content, Flags = (reader["Flags"] as string).Split(' ') }; return entry; }))); }
/// <summary> /// Adds a document to the Lucene index, assuming we are currently InWriteMode (see BeginWrite()) /// </summary> /// <param name="entry">The log entry to index, should already have corresponding DB ID in NuixLogEntry.ID</param> public void IndexLogEntry(NuixLogEntry entry) { if (!InWriteMode) { throw new Exception("LogSearchIndex is not currently in write mode!"); } else { NumericField idField = new NumericField("id", Field.Store.YES, true); NumericField lineField = new NumericField("line"); Field channelField = new Field("channel", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS); Field levelField = new Field("level", "", Field.Store.NO, Field.Index.ANALYZED); Field sourceField = new Field("source", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS); Field contentField = new Field("content", "", Field.Store.NO, Field.Index.ANALYZED); Field existsFields = new Field("exists", "yes", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS); NumericField dateField = new NumericField("date"); Field flagsField = new Field("flag", "", Field.Store.NO, Field.Index.ANALYZED); Document doc = new Document(); doc.Add(existsFields); doc.Add(idField); doc.Add(lineField); doc.Add(channelField); doc.Add(levelField); doc.Add(sourceField); doc.Add(contentField); doc.Add(dateField); doc.Add(flagsField); idField.SetLongValue(entry.ID); lineField.SetLongValue(entry.LineNumber); channelField.SetValue(entry.Channel); levelField.SetValue(entry.Level); sourceField.SetValue(entry.Source); contentField.SetValue(entry.Content); //string dateString = entry.TimeStamp.ToString("yyyyMMdd"); //long date = long.Parse(dateString); long date = (entry.TimeStamp.Year * 10000) + (entry.TimeStamp.Month * 100) + entry.TimeStamp.Day; dateField.SetLongValue(date); flagsField.SetValue(String.Join(" ", entry.Flags)); writer.AddDocument(doc); } }
/// <summary> /// This should only be called by public method LoadlogFiles since it takes care of index drop and rebuild. /// </summary> /// <param name="logFile">Path to a log file to load.</param> /// <param name="pb">ProgressBroadcaster which will received progress updates, can be null.</param> private long LoadLogFile(string logFile, long startingRecordCount, ProgressBroadcaster pb = null) { FileInfo logFileInfo = new FileInfo(logFile); if (logFileInfo.Length < 1) { // Skip 0 length files return(startingRecordCount); } int indexingConcurrency = 4; pb.BroadcastStatus("Loading from " + logFile); // Can be tricky to do batch insert and get each new record's ID, so instead we query database for current // highest ID value and increment and assign IDs here rather than letting DB auto increment do the job. long nextId = Database.GetHighestLogEntryID(); NuixLogReader reader = new NuixLogReader(logFile); SQLiteBatchInserter batchInserter = Database.CreateBatchInserter(1000); batchInserter.Begin(Database.GetEmbeddedSQL("NuixLogReviewer.LogRepository.InsertLogEntry.sqlite")); // Used for progress updates object locker = new object(); long recordCount = startingRecordCount; List <IEntryClassifier> classifiers = getAllClassifiers(); BlockingCollection <NuixLogEntry> toInsert = new BlockingCollection <NuixLogEntry>(); BlockingCollection <NuixLogEntry> toClassify = new BlockingCollection <NuixLogEntry>(); BlockingCollection <NuixLogEntry> toIndex = new BlockingCollection <NuixLogEntry>(); // ==== Task Dedicated to Pulling Entries from Source ==== Task readerConsumer = new Task(new Action(() => { foreach (var entry in reader) { toClassify.Add(entry); } // Signal that was the last one toClassify.Add(null); }), TaskCreationOptions.LongRunning); // ==== Classify Log Entries ==== Task classificationTask = new Task(new Action(() => { while (true) { NuixLogEntry entry = toClassify.Take(); if (entry == null) { break; } // Give each classifier a chance to look at this entry and provide flag // values to be assigned to the entry. HashSet <string> flags = new HashSet <string>(); foreach (var classifier in classifiers) { var calculatedFlags = classifier.Classify(entry); if (calculatedFlags != null) { foreach (var calculatedFlag in calculatedFlags) { flags.Add(calculatedFlag.ToLower()); } } } entry.Flags = flags; toInsert.Add(entry); } // Signal that was the last one toInsert.Add(null); }), TaskCreationOptions.LongRunning); // ==== Task Dedicated to Inserting to SQLite Database ==== Task dbConsumer = new Task(new Action(() => { DateTime lastProgress = DateTime.Now; while (true) { NuixLogEntry entry = toInsert.Take(); if (entry == null) { break; } nextId++; // Push to SQLite database entry.ID = nextId; batchInserter["@id"] = entry.ID; batchInserter["@linenumber"] = entry.LineNumber; batchInserter["@filename"] = Database.GetFilenameID(entry.FilePath); batchInserter["@timestamp"] = entry.TimeStamp.ToFileTime(); batchInserter["@channel"] = Database.GetChannelID(entry.Channel); batchInserter["@elapsed"] = entry.Elapsed.TotalMilliseconds; batchInserter["@level"] = Database.GetLevelID(entry.Level); batchInserter["@source"] = Database.GetSourceID(entry.Source); batchInserter["@content"] = entry.Content; batchInserter["@flags"] = String.Join(" ", entry.Flags); batchInserter.Insert(); recordCount++; // Periodically report progress if ((DateTime.Now - lastProgress).TotalMilliseconds >= 1000) { lock (this) { pb.BroadcastProgress(recordCount); } lastProgress = DateTime.Now; } toIndex.Add(entry); } // Let each indexing task know there are no more to index for (int i = 0; i < indexingConcurrency; i++) { toIndex.Add(null); } }), TaskCreationOptions.LongRunning); // ==== Series of Tasks Dedicated to Adding Entries to Lucene Index ==== Task[] indexers = new Task[indexingConcurrency]; for (int i = 0; i < indexingConcurrency; i++) { Task indexConsumer = new Task(new Action(() => { while (true) { NuixLogEntry entry = toIndex.Take(); if (entry == null) { break; } // Push to Lucene SearchIndex.IndexLogEntry(entry); } pb.BroadcastProgress(recordCount); }), TaskCreationOptions.LongRunning); indexers[i] = indexConsumer; indexConsumer.Start(); } readerConsumer.Start(); classificationTask.Start(); dbConsumer.Start(); // Wait for them all to finish up Task.WaitAll(readerConsumer, classificationTask, dbConsumer); Task.WaitAll(indexers); // Report final progress pb.BroadcastProgress(recordCount); // Make sure batch inserter flushes any pending inserts batchInserter.Complete(); Database.ReleaseBatchInserter(batchInserter); toClassify.Dispose(); toInsert.Dispose(); toIndex.Dispose(); return(recordCount); }