Ejemplo n.º 1
0
        public void LoadCategoryKeywords_loads_spreadsheet_data()
        {
            var kws = new KeywordsSpreadsheet();

            Assert.NotNull(kws.Workbook);

            var ck = kws.LoadData();

            ck.Should().NotBeEmpty();
            kws.CategoryTypes.GetDuplicates().Should().BeEmpty();
            var cats    = ck.Select(x => x.CategoryType);
            var subcats = ck.Select(x => x.SubCategory);

            var dupes = subcats.GetDuplicates().ToList();

            Assert.Empty(dupes);

            cats.Should().Contain(CategoryType.BankingFinance);
            subcats.Should().Contain("Fees");

            var    item   = ck.Single(x => x.CategoryType == CategoryType.Entertainment && x.SubCategory == "Movies");
            string expect = "cinemas";

            item.Keywords.Should().Contain(expect.ToLowerInvariant());
        }
Ejemplo n.º 2
0
        private void ImportKeywords()
        {
            var ks = new KeywordsSpreadsheet();

            foreach (var item in ks.LoadData())
            {
                var found = _classifierRepository.Find(x => x.SubCategory == item.SubCategory);
                var xxx   = found.ToList();
                if (xxx.Any())
                {
                    Log.Debug("KeywordClassifier skipped {desc}", item.SubCategory);
                    continue;
                }

                Util.WaitFor();
                item.Id = Guid.NewGuid();
                Log.Information(item.ToString());
                _busControl.Publish(new ClassifierRequest
                {
                    Identifier = item
                });

                _classifierRepository.Insert(item);
            }
        }
Ejemplo n.º 3
0
        private void LoadKeywordClassifiers()
        {
            var data = new KeywordsSpreadsheet().LoadData().ToList();

            data.ForEach(x => Classifiers.Save(x));

            data.Single(x => x.SubCategory == SubCategories.HealthBeauty.DoctorsDentist)
            .AddKeyword(CompanyNames.StJohnOfGod.ToLowerInvariant());
            data.Single(x => x.SubCategory == SubCategories.Kids.Childcare)
            .AddKeyword(CompanyNames.Kidz.ToLowerInvariant());
        }
Ejemplo n.º 4
0
        public static void WriteOutKeywords()
        {
            var localities = new LocalitiesSpreadsheet().LoadData();
            var seen       = new List <string>();
            //  var keywordClassifiers = new KeywordsSpreadsheet().LoadData().ToList();
            //var companies = CompanyImporter.Import().ToList();
            const string folder = @"C:\dev\vita\data\output";

            Directory.EnumerateFiles(folder).Each(File.Delete);
            var sss  = new KeywordsSpreadsheet();
            var data = sss.LoadData();

            foreach (var item in data)
            {
                var cleaned    = new List <string>();
                var words      = item.Keywords.Distinct();
                var wordsArray = words as string[] ?? words.ToArray();
                foreach (var word in wordsArray.Select(x => x.ToLowerInvariant()))
                {
                    try
                    {
                        var mutate = word;
                        var finds  = localities.Where(x =>
                                                      word.Contains(x.Suburb.ToLowerInvariant()) || word.Contains(x.Postcode));
                        foreach (var found in finds)
                        {
                            if (!string.IsNullOrWhiteSpace(found.Suburb))
                            {
                                mutate = mutate.Replace(found.Suburb.ToLowerInvariant(), string.Empty);
                            }
                            if (!string.IsNullOrWhiteSpace(found.Postcode))
                            {
                                mutate = mutate.Replace(found.Postcode.ToLowerInvariant(), string.Empty);
                            }
                        }

                        mutate = TextUtil.CleanWord(mutate);
                        cleaned.Add(mutate);
                    }
                    catch (Exception e)
                    {
                        Log.Warning(e, "word {w}", word);
                    }
                }


                var path = Path.Combine(folder, item.SubCategory + ".csv");
                if (!seen.Contains(path))
                {
                    seen.Add(path);
                }
                else
                {
                    throw new ApplicationException(path);
                }

                using (TextWriter writer = new StreamWriter(path))
                {
                    var csv = new CsvWriter(writer);
                    foreach (var word in cleaned.Distinct().OrderBy(x => x))
                    {
                        csv.WriteField(word.ToLowerInvariant());
                        csv.NextRecord();
                    }

                    writer.Flush();
                }
            }
        }