Exemplo n.º 1
0
 private void TrainCallback(IrcMessageEventArgs message)
 {
     _markovChainTrainer.Train(message.Text);
 }
Exemplo n.º 2
0
        public static MarkovChainString Train(Config parameters, Action <string> output)
        {
            Func <int, string>    formatInteger  = (n) => n.ToString("N0");
            Func <float, string>  formatFloat    = (n) => n.ToString("0.");
            Func <string, string> getMessageText = (m) => m.Split('>').Last().Trim();
            Func <string, string> getMessageNick = (m) => m.Split(' ').Skip(2).First();
            Func <string, string> getMessageTime = (m) => m.Split(']').First().TrimStart('[');
            Func <IEnumerable <string>, IEnumerable <string> > removeIrcEvents = (m) => m.Skip(1).Where(message => message.Length > 10 && message.Contains("<"));

            output?.Invoke($"Initializing trainer '{parameters.Load}'");
            var markovChainString  = new MarkovChainString();
            var markovChainTrainer = new Trainer(markovChainString);

            output?.Invoke($"Loading '{parameters.Load}'");
            var lines             = File.ReadAllLines(parameters.Load);
            var numLinesFormatted = formatInteger(lines.Length);

            output?.Invoke($"Number of lines: {numLinesFormatted}");

            output?.Invoke($"Removing IRC events from log");
            var messages = removeIrcEvents(lines);

            if (parameters.Sections > 1)
            {
                output?.Invoke($"Splitting log into {parameters.Sections} parts. Generating text from one part");
                messages = messages.Reverse().Take(messages.Count() / (int)parameters.Sections);
            }
            var numMessagesFormatted = formatInteger(messages.Count());

            output?.Invoke($"Number of messages: {numMessagesFormatted}");

            if (parameters.Filter != "")
            {
                output?.Invoke($"Applying filter '{parameters.Filter}'");

                messages = messages.Where(message => {
                    var text = getMessageText(message);
                    var nick = getMessageNick(message);
                    return(Regex.IsMatch(text, parameters.Filter, RegexOptions.IgnoreCase) || Regex.IsMatch(nick, parameters.Filter, RegexOptions.IgnoreCase));
                });

                numMessagesFormatted = formatInteger(messages.Count());
                output?.Invoke($"Number of messages: {numMessagesFormatted}");
            }

            var i            = 0;
            var numReports   = parameters.Reports;
            var messageCount = messages.Count();
            var step         = messageCount / numReports;

            output?.Invoke($"Processing messages");
            messages.ForEach(message =>
            {
                if (i++ % step == 0)
                {
                    var percentage           = (float)100 / messageCount * i;
                    var percentageFormatted  = formatFloat(percentage);
                    var wordCountFormatted   = formatInteger(markovChainTrainer.WordCount);
                    var sentencesFormatted   = formatInteger(markovChainTrainer.SentenceCount);
                    var messageTimeFormatted = getMessageTime(message);

                    output?.Invoke($"Processed: {percentageFormatted} %, {wordCountFormatted} words, {sentencesFormatted} sentences, logtime {messageTimeFormatted}");
                }

                var messageText = getMessageText(message);
                markovChainTrainer.Train(messageText);
            });

            var uniqueWordsFormatted = formatInteger(markovChainString.Nodes.Count);

            output?.Invoke($"Finished training");
            output?.Invoke($"Unique words in brain: {uniqueWordsFormatted}");

            return(markovChainString);
        }