private void TrainCallback(IrcMessageEventArgs message) { _markovChainTrainer.Train(message.Text); }
public static MarkovChainString Train(Config parameters, Action <string> output) { Func <int, string> formatInteger = (n) => n.ToString("N0"); Func <float, string> formatFloat = (n) => n.ToString("0."); Func <string, string> getMessageText = (m) => m.Split('>').Last().Trim(); Func <string, string> getMessageNick = (m) => m.Split(' ').Skip(2).First(); Func <string, string> getMessageTime = (m) => m.Split(']').First().TrimStart('['); Func <IEnumerable <string>, IEnumerable <string> > removeIrcEvents = (m) => m.Skip(1).Where(message => message.Length > 10 && message.Contains("<")); output?.Invoke($"Initializing trainer '{parameters.Load}'"); var markovChainString = new MarkovChainString(); var markovChainTrainer = new Trainer(markovChainString); output?.Invoke($"Loading '{parameters.Load}'"); var lines = File.ReadAllLines(parameters.Load); var numLinesFormatted = formatInteger(lines.Length); output?.Invoke($"Number of lines: {numLinesFormatted}"); output?.Invoke($"Removing IRC events from log"); var messages = removeIrcEvents(lines); if (parameters.Sections > 1) { output?.Invoke($"Splitting log into {parameters.Sections} parts. Generating text from one part"); messages = messages.Reverse().Take(messages.Count() / (int)parameters.Sections); } var numMessagesFormatted = formatInteger(messages.Count()); output?.Invoke($"Number of messages: {numMessagesFormatted}"); if (parameters.Filter != "") { output?.Invoke($"Applying filter '{parameters.Filter}'"); messages = messages.Where(message => { var text = getMessageText(message); var nick = getMessageNick(message); return(Regex.IsMatch(text, parameters.Filter, RegexOptions.IgnoreCase) || Regex.IsMatch(nick, parameters.Filter, RegexOptions.IgnoreCase)); }); numMessagesFormatted = formatInteger(messages.Count()); output?.Invoke($"Number of messages: {numMessagesFormatted}"); } var i = 0; var numReports = parameters.Reports; var messageCount = messages.Count(); var step = messageCount / numReports; output?.Invoke($"Processing messages"); messages.ForEach(message => { if (i++ % step == 0) { var percentage = (float)100 / messageCount * i; var percentageFormatted = formatFloat(percentage); var wordCountFormatted = formatInteger(markovChainTrainer.WordCount); var sentencesFormatted = formatInteger(markovChainTrainer.SentenceCount); var messageTimeFormatted = getMessageTime(message); output?.Invoke($"Processed: {percentageFormatted} %, {wordCountFormatted} words, {sentencesFormatted} sentences, logtime {messageTimeFormatted}"); } var messageText = getMessageText(message); markovChainTrainer.Train(messageText); }); var uniqueWordsFormatted = formatInteger(markovChainString.Nodes.Count); output?.Invoke($"Finished training"); output?.Invoke($"Unique words in brain: {uniqueWordsFormatted}"); return(markovChainString); }