public void LoadCategoryKeywords_loads_spreadsheet_data() { var kws = new KeywordsSpreadsheet(); Assert.NotNull(kws.Workbook); var ck = kws.LoadData(); ck.Should().NotBeEmpty(); kws.CategoryTypes.GetDuplicates().Should().BeEmpty(); var cats = ck.Select(x => x.CategoryType); var subcats = ck.Select(x => x.SubCategory); var dupes = subcats.GetDuplicates().ToList(); Assert.Empty(dupes); cats.Should().Contain(CategoryType.BankingFinance); subcats.Should().Contain("Fees"); var item = ck.Single(x => x.CategoryType == CategoryType.Entertainment && x.SubCategory == "Movies"); string expect = "cinemas"; item.Keywords.Should().Contain(expect.ToLowerInvariant()); }
private void ImportKeywords() { var ks = new KeywordsSpreadsheet(); foreach (var item in ks.LoadData()) { var found = _classifierRepository.Find(x => x.SubCategory == item.SubCategory); var xxx = found.ToList(); if (xxx.Any()) { Log.Debug("KeywordClassifier skipped {desc}", item.SubCategory); continue; } Util.WaitFor(); item.Id = Guid.NewGuid(); Log.Information(item.ToString()); _busControl.Publish(new ClassifierRequest { Identifier = item }); _classifierRepository.Insert(item); } }
private void LoadKeywordClassifiers() { var data = new KeywordsSpreadsheet().LoadData().ToList(); data.ForEach(x => Classifiers.Save(x)); data.Single(x => x.SubCategory == SubCategories.HealthBeauty.DoctorsDentist) .AddKeyword(CompanyNames.StJohnOfGod.ToLowerInvariant()); data.Single(x => x.SubCategory == SubCategories.Kids.Childcare) .AddKeyword(CompanyNames.Kidz.ToLowerInvariant()); }
public static void WriteOutKeywords() { var localities = new LocalitiesSpreadsheet().LoadData(); var seen = new List <string>(); // var keywordClassifiers = new KeywordsSpreadsheet().LoadData().ToList(); //var companies = CompanyImporter.Import().ToList(); const string folder = @"C:\dev\vita\data\output"; Directory.EnumerateFiles(folder).Each(File.Delete); var sss = new KeywordsSpreadsheet(); var data = sss.LoadData(); foreach (var item in data) { var cleaned = new List <string>(); var words = item.Keywords.Distinct(); var wordsArray = words as string[] ?? words.ToArray(); foreach (var word in wordsArray.Select(x => x.ToLowerInvariant())) { try { var mutate = word; var finds = localities.Where(x => word.Contains(x.Suburb.ToLowerInvariant()) || word.Contains(x.Postcode)); foreach (var found in finds) { if (!string.IsNullOrWhiteSpace(found.Suburb)) { mutate = mutate.Replace(found.Suburb.ToLowerInvariant(), string.Empty); } if (!string.IsNullOrWhiteSpace(found.Postcode)) { mutate = mutate.Replace(found.Postcode.ToLowerInvariant(), string.Empty); } } mutate = TextUtil.CleanWord(mutate); cleaned.Add(mutate); } catch (Exception e) { Log.Warning(e, "word {w}", word); } } var path = Path.Combine(folder, item.SubCategory + ".csv"); if (!seen.Contains(path)) { seen.Add(path); } else { throw new ApplicationException(path); } using (TextWriter writer = new StreamWriter(path)) { var csv = new CsvWriter(writer); foreach (var word in cleaned.Distinct().OrderBy(x => x)) { csv.WriteField(word.ToLowerInvariant()); csv.NextRecord(); } writer.Flush(); } } }