private void buttonRegExText_Click(object sender, EventArgs e) { var testStats = StoryUrlStats.Create("http://www.ShitalShah.com"); WpAnalyzer.AnalyzeWordPressUrl(testStats); MessageBox.Show("Themes: {0}, Plugins: {1}".FormatEx(testStats.ThemeNames, testStats.PluginNames)); }
public static void AnalyzeWordPressUrl(StoryUrlStats urlStats) { if (endsWithExtensionRegEx.IsMatch(urlStats.Url)) return; //URL has extension not WordPress blog using (var client = new WebClient()) { try { var pageContent = client.DownloadString(urlStats.Url); var themeNames = new HashSet<string>(); foreach (Match match in wpThemesRegEx.Matches(pageContent)) themeNames.Add(match.Groups[1].Value); var pluginNames = new HashSet<string>(); foreach (Match match in wpPluginsRegEx.Matches(pageContent)) pluginNames.Add(match.Groups[1].Value); urlStats.ThemeNames = themeNames.ToDelimitedString("/"); urlStats.PluginNames = pluginNames.ToDelimitedString("/"); } catch (WebException wex) { urlStats.FecthErrorStatus = wex.Status.ToString(); urlStats.FetchErrorMessage = wex.Message; } } }
public static void AnalyzeWordPressUrl(StoryUrlStats urlStats) { if (endsWithExtensionRegEx.IsMatch(urlStats.Url)) { return; //URL has extension not WordPress blog } using (var client = new WebClient()) { try { var pageContent = client.DownloadString(urlStats.Url); var themeNames = new HashSet <string>(); foreach (Match match in wpThemesRegEx.Matches(pageContent)) { themeNames.Add(match.Groups[1].Value); } var pluginNames = new HashSet <string>(); foreach (Match match in wpPluginsRegEx.Matches(pageContent)) { pluginNames.Add(match.Groups[1].Value); } urlStats.ThemeNames = themeNames.ToDelimitedString("/"); urlStats.PluginNames = pluginNames.ToDelimitedString("/"); } catch (WebException wex) { urlStats.FecthErrorStatus = wex.Status.ToString(); urlStats.FetchErrorMessage = wex.Message; } } }
public static StoryUrlStats Create(string url) { var stats = new StoryUrlStats(); stats.Url = url; stats.CreatedUnixTimeMin = long.MaxValue; stats.CreatedUnixTimeMax = long.MinValue; stats.AnalyzeWpTask = Task.Run(() => WpAnalyzer.AnalyzeWordPressUrl(stats)); return stats; }
public static StoryUrlStats Create(string url) { var stats = new StoryUrlStats(); stats.Url = url; stats.CreatedUnixTimeMin = long.MaxValue; stats.CreatedUnixTimeMax = long.MinValue; stats.AnalyzeWpTask = Task.Run(() => WpAnalyzer.AnalyzeWordPressUrl(stats)); return(stats); }
private void buttonAnalyzeUniqueUrls_Click(object sender, EventArgs e) { var storyUrlsStats = new Dictionary <string, StoryUrlStats>(); int storyCount = 0, blankUrls = 0, badUrls = 0; //Consider stories upto 3 years old for this analysis var minUnixDate = DateTime.UtcNow.Subtract(TimeSpan.FromDays(365 * 3)).ToUnixTime(); foreach (var responseJson in JsonNetUtils.DeserializeSequenceFromJson <JObject>(textBoxStoriesFilePath.Text)) { try { foreach (var hitJson in responseJson["hits"]) { storyCount++; if (hitJson["created_at_i"].Value <long>() < minUnixDate) { goto endFileReading; } var urlString = hitJson["url"].ToString(); if (string.IsNullOrWhiteSpace(urlString)) { blankUrls++; continue; } try { var uri = new Uri(urlString); var host = uri.Host; var stats = storyUrlsStats.AddOrGetValue(host, () => StoryUrlStats.Create(urlString)); stats.AddStory(hitJson); } catch (UriFormatException) { badUrls++; } } } catch {} } endFileReading: Task.WaitAll(storyUrlsStats.Values.Select(s => s.AnalyzeWpTask).ToArray()); using (var urlStatsFile = File.CreateText(textBoxUrlAnalysisFilePath.Text)) { foreach (var urlStatsKvp in storyUrlsStats) { if (string.IsNullOrWhiteSpace(urlStatsKvp.Value.ThemeNames) && string.IsNullOrWhiteSpace(urlStatsKvp.Value.PluginNames)) { continue; } //Tab seperated Column Header //Host URLSample PointsSum ThemeNames PluginNames StoryCount CommentsSum MaxDate MinDate var values = new string[] { urlStatsKvp.Key, urlStatsKvp.Value.Url, urlStatsKvp.Value.PointsSum.ToStringInvariant(), urlStatsKvp.Value.ThemeNames, urlStatsKvp.Value.PluginNames, urlStatsKvp.Value.StoryCount.ToStringInvariant(), urlStatsKvp.Value.CommentsSum.ToStringInvariant(), urlStatsKvp.Value.CreatedUnixTimeMax.ToString(), urlStatsKvp.Value.CreatedUnixTimeMin.ToString() }; urlStatsFile.WriteLine(values.ToDelimitedString("\t")); } } MessageBox.Show("Total: {0}, Blank: {1}, Bad: {2}".FormatEx(storyCount, blankUrls, badUrls)); }