private void btnCountInterwiki_Click(object sender, EventArgs e) { var entityTypes = CurrentActiveEntityTypes(); var entitiesWithWikiData = allEntities.Where(x => x.wiki != null && !String.IsNullOrEmpty(x.wiki.wikidata)); var workItems = entitiesWithWikiData.Where(x => entityTypes.Contains(x.type)); StringBuilder collisions = new StringBuilder(); var siteLinkCount = _bot.CountSiteLinks(workItems, collisions); StringBuilder builder = new StringBuilder(); builder.AppendFormat(CultureInfo.CurrentUICulture, "{0} entities on WikiData", workItems.Count()); builder.AppendLine(); foreach (var value in siteLinkCount) { builder.AppendFormat(CultureInfo.CurrentUICulture, " {0}: {1}", value.Key, value.Value); builder.AppendLine(); } var result = builder.ToString(); edtCollisions.Text = collisions.ToString(); var formWikiDataEntries = new StringDisplayForm( "WikiData language coverage", result); formWikiDataEntries.Show(); }
private void btnConvert_Click(object sender, EventArgs e) { var entities = new List <Entity>(); UInt32 count = 0; UInt32 baseGeocode = Convert.ToUInt32(edtGeocode.Value); foreach (var line in edtText.Lines) { var name = line.Replace('\t', ' ').Trim(); if (!String.IsNullOrEmpty(name)) { count++; var entityType = EntityType.Muban; if (chkStripBefore.Checked) { var startPosition = name.IndexOf(ThaiLanguageHelper.Ban); if (startPosition >= 0) { name = name.Substring(startPosition); } if (chkStripAfter.Checked) { name = name.Split(' ').First(); } } var entity = new Entity(); entity.name = name; entity.type = entityType; entity.geocode = baseGeocode * 100 + count; entities.Add(entity); } } if (Romanizator != null) { List <RomanizationEntry> dummy; var romanizations = Romanizator.FindRomanizationSuggestions(out dummy, entities); foreach (var entry in romanizations) { var entity = entities.First(x => x.geocode == entry.Geocode); entity.english = entry.English; } } StringBuilder mubanListBuilder = new StringBuilder(); foreach (var entity in entities) { if (!String.IsNullOrEmpty(entity.english)) { mubanListBuilder.AppendLine(String.Format("<entity type=\"{0}\" geocode=\"{1}\" name=\"{2}\" english=\"{3}\" />", entity.type, entity.geocode, entity.name, entity.english)); } else { mubanListBuilder.AppendLine(String.Format("<entity type=\"{0}\" geocode=\"{1}\" name=\"{2}\" />", entity.type, entity.geocode, entity.name)); } } var form = new StringDisplayForm("Muban", mubanListBuilder.ToString()); form.Show(); }
private void btnStatistics_Click(object sender, EventArgs e) { var entitiesWithWikiData = allEntities.Where(x => x.wiki != null && !String.IsNullOrEmpty(x.wiki.wikidata)); var wikiDataLinks = new List <String>(); wikiDataLinks.AddRange(entitiesWithWikiData.Select(x => x.wiki.wikidata)); var allOffices = allEntities.SelectMany(x => x.office); //var officesWithWikiData = allOffices.Where(y => y.wiki != null && !String.IsNullOrEmpty(y.wiki.wikidata)); //wikiDataLinks.AddRange(officesWithWikiData.Select(x => x.wiki.wikidata)); // write to CSV file? var fittingEntitiesByType = entitiesWithWikiData.GroupBy(y => y.type).OrderBy(z => z.Count()).ToList(); var allEntitiesByType = allEntities.Where(x => !x.IsObsolete).GroupBy(y => y.type); foreach (var expectedType in WikiBase.WikiDataItems) { if (expectedType.Key != EntityType.Country) { if (allEntitiesByType.Any(x => x.Key == expectedType.Key)) { if (!fittingEntitiesByType.Any(x => x.Key == expectedType.Key)) { var emptyEntry = new EntityTypeGrouping <EntityType, Entity>(); emptyEntry.Key = expectedType.Key; fittingEntitiesByType.Add(emptyEntry); } } } } StringBuilder builder = new StringBuilder(); foreach (var type in fittingEntitiesByType) { var fittingAllEntities = allEntitiesByType.First(x => x.Key == type.Key); var expectedCount = fittingAllEntities.Count(); var actualCount = type.Count(); builder.AppendFormat(CultureInfo.CurrentUICulture, "{0}: {1} of {2}", type.Key, type.Count(), expectedCount); if (actualCount != expectedCount && expectedCount - actualCount < 5) { builder.Append(" ("); foreach (var entry in fittingAllEntities) { if (!entitiesWithWikiData.Contains(entry)) { builder.AppendFormat(CultureInfo.CurrentUICulture, "{0},", entry.geocode); } } builder.Append(")"); } builder.AppendLine(); } builder.AppendLine(); //var officesWithWikiDataByType = officesWithWikiData.GroupBy(x => x.type).OrderBy(y => y.Count()); //foreach ( var type in officesWithWikiDataByType ) //{ // builder.AppendFormat(CultureInfo.CurrentUICulture,"{0}: {1}", type.Key, type.Count()); // builder.AppendLine(); //} //builder.AppendLine(); var announcementsWithWikiData = GlobalData.AllGazetteAnnouncements.entry.Where(x => x.wiki != null && !String.IsNullOrEmpty(x.wiki.wikidata)); if (announcementsWithWikiData.Any()) { builder.AppendFormat(CultureInfo.CurrentUICulture, "Announcements: {0}", announcementsWithWikiData.Count()); builder.AppendLine(); builder.AppendLine(); } wikiDataLinks.AddRange(announcementsWithWikiData.Select(x => x.wiki.wikidata)); var duplicateWikiDataLinks = wikiDataLinks.GroupBy(x => x).Where(y => y.Count() > 1); if (duplicateWikiDataLinks.Any()) { builder.AppendLine("Duplicate links:"); foreach (var wikiDataLink in duplicateWikiDataLinks) { builder.AppendLine(wikiDataLink.Key); } } var noUpgradeHistoryEntry = new List <Entity>(); foreach (var entity in allEntities.Where(x => x.type.IsCompatibleEntityType(EntityType.Thesaban) && x.tambonSpecified && !x.IsObsolete)) { if (!entity.history.Items.Any(x => x is HistoryStatus)) { noUpgradeHistoryEntry.Add(entity); } } noUpgradeHistoryEntry.Sort((x, y) => x.geocode.CompareTo(y.geocode)); if (noUpgradeHistoryEntry.Any()) { builder.AppendFormat(CultureInfo.CurrentUICulture, "No history ({0}):", noUpgradeHistoryEntry.Count); builder.AppendLine(); foreach (var entity in noUpgradeHistoryEntry) { builder.AppendFormat(CultureInfo.CurrentUICulture, "{0}: {1}", entity.geocode, entity.english); builder.AppendLine(); } } var result = builder.ToString(); var formWikiDataEntries = new StringDisplayForm( String.Format(CultureInfo.CurrentUICulture, "Wikidata coverage ({0})", entitiesWithWikiData.Count()), result); formWikiDataEntries.Show(); }
private void btnConvert_Click(object sender, EventArgs e) { var entities = new List<Entity>(); UInt32 count = 0; UInt32 baseGeocode = Convert.ToUInt32(edtGeocode.Value); foreach ( var line in edtText.Lines ) { var name = line.Replace('\t', ' ').Trim(); if ( !String.IsNullOrEmpty(name) ) { count++; var entityType = EntityType.Muban; if ( chkStripBefore.Checked ) { var startPosition = name.IndexOf(ThaiLanguageHelper.Ban); if ( startPosition >= 0 ) { name = name.Substring(startPosition); } if ( chkStripAfter.Checked ) { name = name.Split(' ').First(); } } var entity = new Entity(); entity.name = name; entity.type = entityType; entity.geocode = baseGeocode * 100 + count; entities.Add(entity); } } if ( Romanizator != null ) { List<RomanizationEntry> dummy; var romanizations = Romanizator.FindRomanizationSuggestions(out dummy, entities); foreach ( var entry in romanizations ) { var entity = entities.First(x => x.geocode == entry.Geocode); entity.english = entry.English; } } StringBuilder mubanListBuilder = new StringBuilder(); foreach ( var entity in entities ) { if ( !String.IsNullOrEmpty(entity.english) ) { mubanListBuilder.AppendLine(String.Format("<entity type=\"{0}\" geocode=\"{1}\" name=\"{2}\" english=\"{3}\" />", entity.type, entity.geocode, entity.name, entity.english)); } else { mubanListBuilder.AppendLine(String.Format("<entity type=\"{0}\" geocode=\"{1}\" name=\"{2}\" />", entity.type, entity.geocode, entity.name)); } } var form = new StringDisplayForm("Muban", mubanListBuilder.ToString()); form.Show(); }