Ejemplo n.º 1
0
        /// <summary>
        /// Main method: executes all required steps
        /// </summary>
        /// <param name="args">Command-line arguments</param>
        /// <remarks>This method parses the supplied command-line arguments, reads all input files and creates the output file</remarks>
        protected void Execute(string[] args)
        {
            CommandLineParser parser = new CommandLineParser(args, parameters);

            parser.Parse();
            CommandLineParsed(parser);

            foreach (string input in InputFilenames)
            {
                using (Stream inputStream = DataFileTools.OpenInputFile(input))
                {
                    Process(inputStream);
                }
            }

            string outputFilename = OutputFilename;
            Stream outputStream   = String.IsNullOrEmpty(outputFilename) ? Console.OpenStandardInput() : new FileStream(outputFilename, FileMode.Create, FileAccess.Write, FileShare.Read);

            try
            {
                SaveOutput(outputStream);
            }
            finally
            {
                outputStream.Dispose();
            }
        }
Ejemplo n.º 2
0
 private void LoadSpaceSeparatedFile(string filename, EventHandler <RowEventArgs> processEntryDelegate)
 {
     if (processEntryDelegate == null)
     {
         throw new ArgumentNullException("processEntryDelegate");
     }
     using (var streamReader = new CountingStreamReader(filename, DataFileTools.OpenInputFile(filename), Encoding.UTF8))
     {
         fileReader = streamReader;
         var reader = new SpaceSeparatedFileParser(streamReader);
         reader.RowComplete += processEntryDelegate;
         reader.Parse();
     }
     fileReader = null;
 }
Ejemplo n.º 3
0
 /// <summary>
 /// Perform the analysis
 /// </summary>
 /// <param name="filename">Filename of the input file</param>
 private static void Analyze(string filename)
 {
     using (stream = DataFileTools.OpenInputFile(filename))
     {
         var settings = new XmlReaderSettings {
             XmlResolver = null
         };
         using (XmlReader xml = XmlReader.Create(stream, settings))
         {
             dump = new RevisionXmlDumpParser(xml);
             dump.SiteInfoProcessed += SiteInfoProcessed;
             dump.PageStart         += PageStart;
             dump.PageComplete      += PageComplete;
             dump.RevisionComplete  += RevisionComplete;
             length = stream.Length;
             start  = DateTime.Now;
             Console.TreatControlCAsInput = true;
             try
             {
                 dump.Parse();
             }
             catch (AbortException)
             {
                 if (!quiet)
                 {
                     Console.Error.Write("\nAborted");
                 }
             }
             stop = DateTime.Now;
             Console.Error.WriteLine();
             if (!quiet)
             {
                 Console.Error.WriteLine("{0} pages ({1} revisions) by {2} users analyzed in {3:n0} s", pageCount, totalRevisionCount, users.Count, (stop - start).TotalSeconds);
             }
         }
     }
 }
        /// <summary>
        /// Parse the specified data
        /// </summary>
        public override void Parse()
        {
            OnSiteInfoProcessed();

            using (var pageConnection = new MySqlConnection(connectionString))
            {
                pageConnection.Open();

                using (var pageCommand = new MySqlCommand())
                {
                    pageCommand.Connection = pageConnection;

                    var pageCmdBuilder = new StringBuilder();
                    pageCmdBuilder.Append("SELECT page_id, page_namespace, page_title FROM ");
                    pageCmdBuilder.Append(DbTablePrefix);
                    pageCmdBuilder.Append("page");
                    if (!String.IsNullOrEmpty(PageName))
                    {
                        var where = new StringBuilder();
                        if (!String.IsNullOrEmpty(PageName))
                        {
                            Namespace ns;
                            string    title;
                            Page.ParseTitle(PageName, out ns, out title);
                            where.Append("page_title=?pagename AND page_namespace=?namespace");
                            pageCommand.Parameters.AddWithValue("namespace", ns);
                            pageCommand.Parameters.AddWithValue("pagename", title);
                        }

                        pageCmdBuilder.Append(" WHERE ");
                        pageCmdBuilder.Append(where);
                    }
                    pageCommand.CommandText = pageCmdBuilder.ToString();

                    using (MySqlDataReader pageReader = pageCommand.ExecuteReader(CommandBehavior.SequentialAccess))
                    {
                        while (pageReader.Read())
                        {
                            int    pageId     = pageReader.GetInt32("page_id");
                            var    ns         = (Namespace)pageReader.GetInt32("page_namespace");
                            string title      = pageReader.GetString("page_title");
                            bool   isRedirect = pageReader.GetBoolean("page_is_redirect");
                            var    page       = new Page(ns, title, pageId, isRedirect);
                            OnPageStart(page);

                            using (var revisionConnection = new MySqlConnection(connectionString))
                            {
                                revisionConnection.Open();

                                using (var revisionCommand = new MySqlCommand())
                                {
                                    revisionCommand.Connection = revisionConnection;

                                    var revisionCmdBuilder = new StringBuilder();
                                    revisionCmdBuilder.Append("SELECT rev_id, rev_parent_id, rev_timestamp, rev_minor_edit, rev_user, rev_user_text, rev_comment, rev_content_model, rev_content_format, old_text, old_flags FROM ");
                                    revisionCmdBuilder.Append(DbTablePrefix);
                                    revisionCmdBuilder.Append("revision INNER JOIN ");
                                    revisionCmdBuilder.Append(DbTablePrefix);
                                    revisionCmdBuilder.Append("text ON old_id=rev_text_id WHERE rev_page=?pageid");
                                    revisionCommand.Parameters.AddWithValue("pageid", pageId);

                                    if (MinDateTime != null || MaxDateTime != null)
                                    {
                                        var where = new StringBuilder();
                                        if (MinDateTime != null)
                                        {
                                            where.Append("rev_timestamp>=?mintimestamp");
                                            revisionCommand.Parameters.AddWithValue("mintimestamp", MinDateTime.Value.ToString("yyyyMMddHHmmss"));
                                        }
                                        if (MaxDateTime != null)
                                        {
                                            if (where.Length != 0)
                                            {
                                                where.Append(" AND ");
                                            }
                                            where.Append("rev_timestamp<=?maxtimestamp");
                                            revisionCommand.Parameters.AddWithValue("maxtimestamp", MaxDateTime.Value.ToString("yyyyMMddHHmmss"));
                                        }

                                        revisionCmdBuilder.Append(" WHERE ");
                                        revisionCmdBuilder.Append(where);
                                    }

                                    revisionCommand.CommandText = revisionCmdBuilder.ToString();

                                    using (MySqlDataReader revisionReader = revisionCommand.ExecuteReader(CommandBehavior.SequentialAccess))
                                    {
                                        while (revisionReader.Read())
                                        {
                                            int      revisionId = revisionReader.GetInt32("rev_id");
                                            int      parentId   = revisionReader.GetInt32("rev_parent_id");
                                            DateTime timestamp  = DataFileTools.ParseDateTime(revisionReader.GetString("rev_timestamp"));
                                            bool     minor      = revisionReader.GetBoolean("rev_minor_edit");
                                            int      userId     = revisionReader.GetInt32("rev_user");
                                            string   userText   = revisionReader.GetString("rev_user_text");
                                            string   comment    = revisionReader.GetString("rev_comment");
                                            string   model      = revisionReader.GetString("rev_content_model");
                                            string   format     = revisionReader.GetString("rev_content_format");
                                            string   text       = revisionReader.GetString("old_text");
                                            string   textFlags  = revisionReader.GetString("old_flags");

                                            User contributor;
                                            if (userId == 0)
                                            {
                                                contributor = new AnonymousUser(userText);
                                            }
                                            else
                                            {
                                                contributor = new RegisteredUser(userText, userId);
                                            }

                                            if (!String.IsNullOrEmpty(textFlags))
                                            {
                                                string[] flags = textFlags.Split(',');

                                                if (flags.Contains("gzip"))
                                                {
                                                    text = DataFileTools.DecompressGZippedString(text);
                                                }

                                                if (flags.Contains("object"))
                                                {
                                                    throw new NotImplementedException("PHP serialized objects not supported");
                                                }

                                                if (flags.Contains("utf-8"))
                                                {
                                                    byte[] binary = Encoding.Default.GetBytes(text);
                                                    text = new string(Encoding.UTF8.GetChars(binary));
                                                }
                                                else
                                                {
                                                    throw new NotImplementedException("Legacy encoding not supported");
                                                }
                                            }

                                            OnRevisionComplete(new Revision(page, revisionId, parentId, timestamp, minor, contributor, comment, model, format, text));
                                        }
                                    }
                                }
                            }

                            OnPageComplete(page);
                        }
                    }
                }
            }
        }
Ejemplo n.º 5
0
 /// <summary>
 /// Process the argument value
 /// </summary>
 /// <param name="argName">Name of the argument on the command line</param>
 /// <param name="arg">Value given to the argument (after an equal sign)</param>
 protected override void ParseArgument(string argName, string arg)
 {
     base.ParseArgument(argName, arg);
     argumentValue = DataFileTools.ParseDateTime(arg);
 }
Ejemplo n.º 6
0
        /// <summary>
        /// Program entry point
        /// </summary>
        /// <param name="args">Command-line arguments</param>
        private static void Main(string[] args)
        {
            try
            {
                int      i     = 0;
                DateTime?date0 = null;
                try
                {
                    for (i = 0; i < args.Length; ++i)
                    {
                        if (!args[i].StartsWith("-"))
                        {
                            break;
                        }
                        if (args[i] == "--")
                        {
                            ++i;
                            break;
                        }
                        else if (args[i] == "--quiet" || args[i] == "-q")
                        {
                            if (quiet)
                            {
                                ArgumentError("Duplicate --quiet argument.");
                            }
                            quiet = true;
                        }
                        else if (args[i] == "--pernamespace")
                        {
                            if (perNamespace)
                            {
                                ArgumentError("Duplicate --pernamespace argument.");
                            }
                            perNamespace = true;
                        }
                        else if (args[i].StartsWith("--date0="))
                        {
                            if (date0 != null)
                            {
                                ArgumentError("Duplicate --date0 argument.");
                            }
                            string arg = args[i].Substring(8);
                            date0 = DataFileTools.ParseDateTime(arg);
                        }
                        else if (args[i].StartsWith("--botlist="))
                        {
                            if (botList != null)
                            {
                                ArgumentError("Duplicate --botlist argument.");
                            }
                            string arg = args[i].Substring(10);
                            botList = LoadBotList(arg);
                        }
                        else if (args[i].StartsWith("--repeatlimit="))
                        {
                            if (RepeatLimit >= 0)
                            {
                                ArgumentError("Duplicate --repeatlimit argument.");
                            }
                            string arg = args[i].Substring(14);
                            RepeatLimit = Int32.Parse(arg);
                            if (RepeatLimit < 0)
                            {
                                ArgumentError("Invalid value of repeat limit");
                            }
                        }
                        else if (args[i] == "-V" || args[i] == "--version")
                        {
                            Console.WriteLine(AboutLine);
                            return;
                        }
                        else if (args[i] == "-h" || args[i] == "-?" || args[i] == "--help")
                        {
                            Console.WriteLine("Usage: UserStats [options...] filename");
                            Console.WriteLine("Options:");
                            Console.WriteLine("\t--date0=DATE\tDate of dump creation");
                            Console.WriteLine("\t--botlist=FILENAME\tFile with bot usernames");
                            Console.WriteLine("\t--pernamespace\tOutput per-namespace statistics");
                            Console.WriteLine("\t--repeatlimit=LIMIT\tLimit for repeated edits [minutes]");
                            Console.WriteLine("\t--quiet\tSuppress some output");
                            return;
                        }
                        else
                        {
                            ArgumentError("Unknown argument");
                        }
                    }
                }
                catch (Exception e)
                {
                    ArgumentError(e.Message);
                }

                if (i != args.Length - 2)
                {
                    ArgumentError("");
                }
                if (date0 == null)
                {
                    date0 = DateTime.Now;
                }
                if (RepeatLimit < 0)
                {
                    RepeatLimit = 20;
                }
                if (botList == null)
                {
                    botList = new HashSet <string>();
                }

                monthAgo      = date0.Value.AddDays(-30);
                inputFilename = args[i];
                string outputFilename = args[i + 1];

                Analyze(inputFilename);
                SaveResults(outputFilename);
            }
            catch (Exception e)
            {
                Console.Error.WriteLine("Error: {0}", e);
            }
        }