コード例 #1
0
        private void buttonRegExText_Click(object sender, EventArgs e)
        {
            var testStats = StoryUrlStats.Create("http://www.ShitalShah.com");

            WpAnalyzer.AnalyzeWordPressUrl(testStats);
            MessageBox.Show("Themes: {0}, Plugins: {1}".FormatEx(testStats.ThemeNames, testStats.PluginNames));
        }
コード例 #2
0
        public static void AnalyzeWordPressUrl(StoryUrlStats urlStats)
        {
            if (endsWithExtensionRegEx.IsMatch(urlStats.Url))
                return; //URL has extension not WordPress blog

            using (var client = new WebClient())
            {
                try
                {
                    var pageContent = client.DownloadString(urlStats.Url);

                    var themeNames = new HashSet<string>();
                    foreach (Match match in wpThemesRegEx.Matches(pageContent))
                        themeNames.Add(match.Groups[1].Value);

                    var pluginNames = new HashSet<string>();
                    foreach (Match match in wpPluginsRegEx.Matches(pageContent))
                        pluginNames.Add(match.Groups[1].Value);

                    urlStats.ThemeNames = themeNames.ToDelimitedString("/");
                    urlStats.PluginNames = pluginNames.ToDelimitedString("/");
                }
                catch (WebException wex)
                {
                    urlStats.FecthErrorStatus = wex.Status.ToString();
                    urlStats.FetchErrorMessage = wex.Message;
                }
            }
        }
コード例 #3
0
        public static void AnalyzeWordPressUrl(StoryUrlStats urlStats)
        {
            if (endsWithExtensionRegEx.IsMatch(urlStats.Url))
            {
                return; //URL has extension not WordPress blog
            }
            using (var client = new WebClient())
            {
                try
                {
                    var pageContent = client.DownloadString(urlStats.Url);

                    var themeNames = new HashSet <string>();
                    foreach (Match match in wpThemesRegEx.Matches(pageContent))
                    {
                        themeNames.Add(match.Groups[1].Value);
                    }

                    var pluginNames = new HashSet <string>();
                    foreach (Match match in wpPluginsRegEx.Matches(pageContent))
                    {
                        pluginNames.Add(match.Groups[1].Value);
                    }

                    urlStats.ThemeNames  = themeNames.ToDelimitedString("/");
                    urlStats.PluginNames = pluginNames.ToDelimitedString("/");
                }
                catch (WebException wex)
                {
                    urlStats.FecthErrorStatus  = wex.Status.ToString();
                    urlStats.FetchErrorMessage = wex.Message;
                }
            }
        }
コード例 #4
0
        public static StoryUrlStats Create(string url)
        {
            var stats = new StoryUrlStats();
            stats.Url = url;
            stats.CreatedUnixTimeMin = long.MaxValue;
            stats.CreatedUnixTimeMax = long.MinValue;

            stats.AnalyzeWpTask = Task.Run(() => WpAnalyzer.AnalyzeWordPressUrl(stats));

            return stats;
        }
コード例 #5
0
        public static StoryUrlStats Create(string url)
        {
            var stats = new StoryUrlStats();

            stats.Url = url;
            stats.CreatedUnixTimeMin = long.MaxValue;
            stats.CreatedUnixTimeMax = long.MinValue;

            stats.AnalyzeWpTask = Task.Run(() => WpAnalyzer.AnalyzeWordPressUrl(stats));

            return(stats);
        }
コード例 #6
0
        private void buttonAnalyzeUniqueUrls_Click(object sender, EventArgs e)
        {
            var storyUrlsStats = new Dictionary <string, StoryUrlStats>();
            int storyCount = 0, blankUrls = 0, badUrls = 0;

            //Consider stories upto 3 years old for this analysis
            var minUnixDate = DateTime.UtcNow.Subtract(TimeSpan.FromDays(365 * 3)).ToUnixTime();

            foreach (var responseJson in JsonNetUtils.DeserializeSequenceFromJson <JObject>(textBoxStoriesFilePath.Text))
            {
                try
                {
                    foreach (var hitJson in responseJson["hits"])
                    {
                        storyCount++;

                        if (hitJson["created_at_i"].Value <long>() < minUnixDate)
                        {
                            goto endFileReading;
                        }

                        var urlString = hitJson["url"].ToString();
                        if (string.IsNullOrWhiteSpace(urlString))
                        {
                            blankUrls++;
                            continue;
                        }
                        try
                        {
                            var uri   = new Uri(urlString);
                            var host  = uri.Host;
                            var stats = storyUrlsStats.AddOrGetValue(host, () => StoryUrlStats.Create(urlString));
                            stats.AddStory(hitJson);
                        }
                        catch (UriFormatException)
                        {
                            badUrls++;
                        }
                    }
                }
                catch {}
            }
endFileReading:

            Task.WaitAll(storyUrlsStats.Values.Select(s => s.AnalyzeWpTask).ToArray());

            using (var urlStatsFile = File.CreateText(textBoxUrlAnalysisFilePath.Text))
            {
                foreach (var urlStatsKvp in storyUrlsStats)
                {
                    if (string.IsNullOrWhiteSpace(urlStatsKvp.Value.ThemeNames) && string.IsNullOrWhiteSpace(urlStatsKvp.Value.PluginNames))
                    {
                        continue;
                    }

                    //Tab seperated Column Header
                    //Host	URLSample	PointsSum	ThemeNames	PluginNames	StoryCount	CommentsSum	MaxDate	MinDate
                    var values = new string[] { urlStatsKvp.Key, urlStatsKvp.Value.Url, urlStatsKvp.Value.PointsSum.ToStringInvariant(),
                                                urlStatsKvp.Value.ThemeNames, urlStatsKvp.Value.PluginNames,
                                                urlStatsKvp.Value.StoryCount.ToStringInvariant(), urlStatsKvp.Value.CommentsSum.ToStringInvariant(),
                                                urlStatsKvp.Value.CreatedUnixTimeMax.ToString(), urlStatsKvp.Value.CreatedUnixTimeMin.ToString() };

                    urlStatsFile.WriteLine(values.ToDelimitedString("\t"));
                }
            }

            MessageBox.Show("Total: {0}, Blank: {1}, Bad: {2}".FormatEx(storyCount, blankUrls, badUrls));
        }