Пример #1
0
        public List <Author> ImportAuthorsList()
        {
            List <Author> authors = new List <Author>();

            foreach (var spreadsheetData in CatalogData)
            {
                List <Author> authorsOfOneBook;
                try {
                    authorsOfOneBook = AuthorExtractor.Extract(spreadsheetData.Author);
                }
                catch (ExtractorException e) {
                    _log.Error($"Author Extract Error: [{e.Text}]");
                    continue;
                }

                authors.AddRange(authorsOfOneBook);
            }

            var authorWithoutDoubles = authors
                                       .GroupBy(a => new { a.FirstName, a.LastName })
                                       .Select(a => a.First())
                                       .ToList();

            return(authorWithoutDoubles);
        }
Пример #2
0
        public void Extract_QuestionsMarks_ShouldReturnExtractException()
        {
            // Arrange
            string text = "???";
            // Act
            Action act = () => AuthorExtractor.Extract(text);

            // Assert
            act.Should().Throw <ExtractorException>("Cannot extract data from text");
        }
Пример #3
0
        public void Extract_Null_ShouldReturnEmptyAuthorsList()
        {
            // Arrange
            string text = null;
            // Act
            var authors = AuthorExtractor.Extract(text);

            // Assert
            authors.Should().BeEmpty();
        }
Пример #4
0
        public void Extract_Inni_ShouldReturnEmptyAuthorsList()
        {
            // Arrange
            var text = "inni";
            // Act
            var authors = AuthorExtractor.Extract(text);

            // Assert
            authors.Should().BeEmpty();
        }
Пример #5
0
        public void Extract_PauseWithApostrophe_ShouldReturnEmptyAuthorsList()
        {
            // Arrange
            var text = "'-";
            // Act
            var authors = AuthorExtractor.Extract(text);

            // Assert
            authors.Should().BeEmpty();
        }
Пример #6
0
 public Unfluffer()
 {
     _htmlParser           = new HtmlParser();
     _languageExtractor    = new LanguageExtractor();
     _titleExtractor       = new TitleExtractor();
     _descriptionExtractor = new DescriptionExtractor();
     _favIconExtractor     = new FavIconExtractor();
     _imageExtractor       = new ImageExtractor();
     _authorExtractor      = new AuthorExtractor();
     _canonicalExtractor   = new CanonicalExtractor();
 }
Пример #7
0
        public List <Book> ImportBooksList()
        {
            var authors          = ImportAuthorsList();
            var seriesInfos      = ImportSeriesListInfo();
            var publishingHouses = ImportPublishingHousesList();
            var storagePlaces    = ImportStoragePlacesList();
            var categories       = ImportCategoriesList();

            var seriesList = seriesInfos
                             .GroupBy(a => a.SeriesName)
                             .Select(a => a.First())
                             .Where(a => !string.IsNullOrEmpty(a.SeriesName))
                             .Select(a => a.ToSeries())
                             .ToList();
            List <Book> books = new List <Book>();

            foreach (var spreadsheetCatalogData in CatalogData)
            {
                var bookCategories = new List <Category>()
                {
                    CategoryExtractor.Extract(spreadsheetCatalogData.Category)
                };
                bookCategories = bookCategories.Where(a => a != null).ToList();
                var bookSeriesInfo = SeriesInfoExtractor.Extract(spreadsheetCatalogData.Series);

                var book = new Book {
                    Id              = Guid.NewGuid(),
                    Title           = TitleExtractor.Extract(spreadsheetCatalogData.Title),
                    Authors         = AuthorExtractor.Extract(spreadsheetCatalogData.Author),
                    Series          = bookSeriesInfo?.ToSeries(),
                    PublishingHouse = PublishingHouseExtractor.Extract(spreadsheetCatalogData.PublishingHouse),
                    PublishmentYear = YearExtractor.Extract(spreadsheetCatalogData.Year),
                    ISBN            = IsbnExtractor.Extract(spreadsheetCatalogData.ISBN),
                    Language        = LanguageExtractor.Extract(spreadsheetCatalogData.Language),
                    StoragePlace    = StoragePlaceExtractor.Extract(spreadsheetCatalogData.StoragePlace),
                    Comment         = CommentExtractor.Extract(spreadsheetCatalogData.Comment),
                    Categories      = bookCategories,
                    VolumeNumber    = bookSeriesInfo?.VolumeNumber
                };

                ImportBookValidator.CheckAuthors(authors, book.Authors);
                ImportBookValidator.CheckSeries(seriesList, book.Series);
                ImportBookValidator.CheckPublishingHouse(publishingHouses, book.PublishingHouse);
                ImportBookValidator.CheckStoragePlace(storagePlaces, book.StoragePlace);
                ImportBookValidator.CheckCategory(categories, book.Categories);
                books.Add(book);
            }

            return(books);
        }
Пример #8
0
        public void Extract_OnlyOneNameWithApostrophe_ShouldReturnAuthorListWithAuthorWithoutName()
        {
            // Arrange
            var fullName = "O'Rely";
            // Act
            var authors = AuthorExtractor.Extract(fullName);

            // Assert
            authors.Should().HaveCount(1);
            var author = authors[0];

            author.Id.Should().NotBeEmpty();
            author.FirstName.Should().BeEmpty();
            author.LastName.Should().Be(fullName);
        }
Пример #9
0
        public void Extract_SimpleName_ShouldReturnAuthorsListWithOneElement()
        {
            // Arrange
            var firstName = "Andrzej";
            var lastName  = "Sapkowski";
            var fullName  = $"{firstName} {lastName}";
            // Act
            var authors = AuthorExtractor.Extract(fullName);

            // Assert
            authors.Should().HaveCount(1);
            var author = authors[0];

            author.Id.Should().NotBeEmpty();
            author.FirstName.Should().Be(firstName);
            author.LastName.Should().Be(lastName);
        }
Пример #10
0
        public void Extract_NameWithSecondName_ShouldReturnAuthorsListWithOneElement()
        {
            // Arrange
            var firstName = "Liliana Elena";
            var lastName  = "Wroska";
            var fullName  = $"{firstName} {lastName}";
            // Act
            var authors = AuthorExtractor.Extract(fullName);

            // Assert
            authors.Should().HaveCount(1);
            var author = authors[0];

            author.Id.Should().NotBeEmpty();
            author.FirstName.Should().Be(firstName);
            author.LastName.Should().Be(lastName);
        }
Пример #11
0
        public void Extract_NameWithRussianCharacters_ShouldReturnAuthorsListWithOneElement()
        {
            // Arrange
            var firstName = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя";
            var lastName  = "аБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя";
            var fullName  = $"{firstName} {lastName}";
            // Act
            var authors = AuthorExtractor.Extract(fullName);

            // Assert
            authors.Should().HaveCount(1);
            var author = authors[0];

            author.Id.Should().NotBeEmpty();
            author.FirstName.Should().Be(firstName);
            author.LastName.Should().Be(lastName);
        }
Пример #12
0
        public void Extract_AllNamesWithPause_ShouldReturnAuthorsListWithOneElement()
        {
            // Arrange
            var firstName = "Zygmunt-Karol";
            var lastName  = "Zeydler-Zborowski";
            var fullName  = $"{firstName} {lastName}";
            // Act
            var authors = AuthorExtractor.Extract(fullName);

            // Assert
            authors.Should().HaveCount(1);
            var author = authors[0];

            author.Id.Should().NotBeEmpty();
            author.FirstName.Should().Be(firstName);
            author.LastName.Should().Be(lastName);
        }
Пример #13
0
        public void Extract_NameWithPolishCharacters_ShouldReturnAuthorsListWithOneElement()
        {
            // Arrange
            var firstName = "Ąęóśłżźćń";
            var lastName  = "Eąóśłżźćń";
            var fullName  = $"{firstName} {lastName}";
            // Act
            var authors = AuthorExtractor.Extract(fullName);

            // Assert
            authors.Should().HaveCount(1);
            var author = authors[0];

            author.Id.Should().NotBeEmpty();
            author.FirstName.Should().Be(firstName);
            author.LastName.Should().Be(lastName);
        }
Пример #14
0
        public void Extract_NameWithOneInitial_ShouldReturnAuthorsListWithOneElement()
        {
            // Arrange
            var firstName = "B.";
            var lastName  = "Kwiatek";
            var fullName  = $"{firstName} {lastName}";
            // Act
            var authors = AuthorExtractor.Extract(fullName);

            // Assert
            authors.Should().HaveCount(1);
            var author = authors[0];

            author.Id.Should().NotBeEmpty();
            author.FirstName.Should().Be(firstName);
            author.LastName.Should().Be(lastName);
        }
Пример #15
0
        public void Extract_AllNamesWithApostrophe_ShouldReturnAuthorsListWithOneElement()
        {
            // Arrange
            var firstName = "A'manda";
            var lastName  = "O'rely";
            var fullName  = $"{firstName} {lastName}";
            // Act
            var authors = AuthorExtractor.Extract(fullName);

            // Assert
            authors.Should().HaveCount(1);
            var author = authors[0];

            author.Id.Should().NotBeEmpty();
            author.FirstName.Should().Be(firstName);
            author.LastName.Should().Be(lastName);
        }
Пример #16
0
        public void Extract_AndOthers_ShouldReturnAuthorsListWithOneElement()
        {
            // Arrange
            var firstName = "Anna";
            var lastName  = "Kwiatek";
            var extraText = " i inni";
            var fullName  = $"{firstName} {lastName}{extraText}";
            // Act
            var authors = AuthorExtractor.Extract(fullName);

            // Assert
            authors.Should().HaveCount(1);
            var author = authors[0];

            author.Id.Should().NotBeEmpty();
            author.FirstName.Should().Be(firstName);
            author.LastName.Should().Be(lastName);
        }
Пример #17
0
        public void Extract_TwoAuthors_ShouldReturnAuthorsListWithTwoElement()
        {
            // Arrange
            var firstNameFirstAuthor  = "Anne";
            var lastNameFirstAuthor   = "Plichota";
            var firstNameSecondAuthor = "Cendrine";
            var lastNameSecondAuthor  = "Wolf";
            var fullName =
                $"{firstNameFirstAuthor} {lastNameFirstAuthor}; {firstNameSecondAuthor} {lastNameSecondAuthor}";
            // Act
            var authors = AuthorExtractor.Extract(fullName);

            // Assert
            authors.Should().HaveCount(2);
            var firstAuthor  = authors[0];
            var secondAuthor = authors[1];

            firstAuthor.Id.Should().NotBeEmpty();
            firstAuthor.FirstName.Should().Be(firstNameFirstAuthor);
            firstAuthor.LastName.Should().Be(lastNameFirstAuthor);
            secondAuthor.Id.Should().NotBeEmpty();
            secondAuthor.FirstName.Should().Be(firstNameSecondAuthor);
            secondAuthor.LastName.Should().Be(lastNameSecondAuthor);
        }
Пример #18
0
        public CdaDocument Extract(XmlDocument cdaDocument)
        {
            if (cdaDocument == null)
            {
                throw new ArgumentException("'cdaDocument' cannot be null");
            }

            var cdaXmlDocument = new CdaXmlDocument(cdaDocument);

            IDictionary <string, string> documentXPaths = _documentXPathProvider.GetDocumentXPaths(cdaXmlDocument.TemplateId);

            if (documentXPaths == null)
            {
                throw new ArgumentException("Document with template ID '" + cdaXmlDocument.TemplateId + "' not supported");
            }

            // Extractors
            var authorOrgExtractor = new AuthorExtractor(documentXPaths);
            var author             = authorOrgExtractor.Extract(cdaXmlDocument);

            var medicationsExtractor = new MedicationsExtractor(documentXPaths);
            var medications          = medicationsExtractor.Extract(cdaXmlDocument);

            var adverseReactionsEx = new AdverseReactionsExtractor(documentXPaths);
            var adverseReactions   = adverseReactionsEx.Extract(cdaXmlDocument);

            var documentMetadataExtractor = new DocumentMetadataExtractor(documentXPaths);
            var documentMetadata          = documentMetadataExtractor.Extract(cdaXmlDocument);

            var medicalHistoryExtractor = new MedicalHistoryExtractor(documentXPaths);
            var medicalHistoryData      = medicalHistoryExtractor.Extract(cdaXmlDocument);

            var subjectOfCareExtractor = new SubjectOfCareExtractor(documentXPaths);
            var subjectOfCare          = subjectOfCareExtractor.Extract(cdaXmlDocument);

            var immunisationsEx = new ImmunisationsExtractor(documentXPaths);
            var immunisations   = immunisationsEx.Extract(cdaXmlDocument);

            var consumerNoteEx = new ConsumerNoteExtractor(documentXPaths);
            var consumerNote   = consumerNoteEx.Extract(cdaXmlDocument);

            var advanceCareInformationExtractor = new AdvanceCareInformationExtractor(documentXPaths);
            var advanceCareInformation          = advanceCareInformationExtractor.Extract(cdaXmlDocument);

            var pbsExtractor = new PharmaceuticalBenefitItemExtractor(documentXPaths);
            var pbs          = pbsExtractor.Extract(cdaXmlDocument);

            var diagnosticImagingExtractor = new DiagnosticImagingExtractor(documentXPaths);
            var diagnosticImaging          = diagnosticImagingExtractor.Extract(cdaXmlDocument);

            var pathologyExtractor = new PathologyExtractor(documentXPaths);
            var pathology          = pathologyExtractor.Extract(cdaXmlDocument);

            var document = new CdaDocument
            {
                Author                     = author,
                Medications                = medications,
                AdverseReactions           = adverseReactions,
                MedicalHistoryItems        = medicalHistoryData,
                DocumentMetadata           = documentMetadata,
                SubjectOfCare              = subjectOfCare,
                ImmunisationItems          = immunisations,
                ConsumerNote               = consumerNote,
                AdvanceCareInformation     = advanceCareInformation,
                PharmaceuticalBenefitItems = pbs,
                Pathology                  = pathology,
                DiagnosticImaging          = diagnosticImaging
            };

            return(document);
        }