Beispiel #1
0
        private void AddDocumentGroup(DocumentGroupModel crawledDocumentGroup)
        {
            var newDocumentGroup = new InterlexCrawlerEntities.DocumentGroup();

            newDocumentGroup.CrawlerId           = crawledDocumentGroup.CrawlerId;
            newDocumentGroup.Identifier          = Guid.NewGuid().ToString();
            newDocumentGroup.DocumentGroupName   = crawledDocumentGroup.Name;
            newDocumentGroup.DocumentGroupFormat = "application/zip";
            newDocumentGroup.Lang              = crawledDocumentGroup.TwoLetterLanguage;
            newDocumentGroup.Operation         = (int)DocumentGroupModelOperation.Add;
            newDocumentGroup.DocumentGroupDate = DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss");

            // Byte Array Data
            newDocumentGroup.DataContent = Zip.DocumentGroup(crawledDocumentGroup);
            this.AddDocuments(newDocumentGroup, crawledDocumentGroup);

            using (var context = new InterlexCrawlerEntities())
            {
                context.DocumentGroups.Add(newDocumentGroup);
                context.SaveChanges();
                context.PChangeOperationStatus(newDocumentGroup.Identifier, (int)DocumentGroupModelOperation.Add, "DatabaseDocumentManager", null);
            }
        }
Beispiel #2
0
        private DocumentGroupModel GetDocumentGroupInfo(int cralwerId, string documentGroupName)
        {
            using (var context = new InterlexCrawlerEntities())
            {
                var info = context.DocumentGroups
                           .Include(x => x.Documents)
                           .Where(x => x.CrawlerId == cralwerId && x.DocumentGroupName == documentGroupName)
                           .Select(x => new DocumentGroupModel
                {
                    Name       = x.DocumentGroupName,
                    Identifier = x.Identifier,
                    Documents  = x.Documents.Select(d => new DocumentModel
                    {
                        Name      = d.DocumentName,
                        Md5       = d.Md5,
                        Operation = (DocumentModelOperation)d.Operation,
                        Format    = d.DocumentFormat
                    }).ToList()
                }).FirstOrDefault();

                return(info);
            }
        }
Beispiel #3
0
        /// <summary>
        /// Returns the crawler id with the specified name. If the crawler does not exists creates the crawler in the database and returns the id
        /// </summary>
        /// <param name="crawlerName"></param>
        /// <returns></returns>
        public int GetOrCreateCrawlerId(string crawlerName)
        {
            using (var context = new InterlexCrawlerEntities())
            {
                lock (lockObject)
                {
                    var crawler = (from c in context.Crawlers
                                   where c.CrawlerName == crawlerName
                                   select c.CrawlerId).FirstOrDefault();

                    if (crawler == 0)
                    {
                        var newCrawler = new InterlexCrawlerEntities.Crawler();
                        newCrawler.CrawlerName = crawlerName;
                        context.Crawlers.Add(newCrawler);
                        context.SaveChanges();

                        return(this.GetOrCreateCrawlerId(crawlerName));
                    }

                    return(crawler);
                }
            }
        }
Beispiel #4
0
        public override Task StartAsync()
        {
            using (var context = new InterlexCrawlerEntities())
            {
                foreach (var(id, content) in context.GetNewOrUpdatedInterlexEditorToolLazy())
                {
                    try
                    {
                        var documentGroup = new DocumentGroupModel
                        {
                            CrawlerId         = this.CrawlerId,
                            Name              = id,
                            TwoLetterLanguage = "EU",
                            Documents         =
                            {
                                new DocumentModel
                                {
                                    Raw    = encoding.GetBytes(content),
                                    Name   = "content",
                                    Format = "application/json",
                                    Url    = "local"
                                }
                            }
                        };

                        this.DocumentGroupManager.AddOrUpdateDocumentGroup(documentGroup);
                    }
                    catch (Exception e)
                    {
                        this.Logger.Error($"{id}", e);
                    }
                }
            }

            return(Task.CompletedTask);
        }
Beispiel #5
0
        private void UpdateDocumentGroup(DocumentGroupModel crawledDocumentGroup, DocumentGroupModel documentGroupFromDatabase)
        {
            using (var context = new InterlexCrawlerEntities())
            {
                var documentGroupDb = (from dg in context.DocumentGroups
                                       where dg.Identifier == documentGroupFromDatabase.Identifier
                                       select dg).Single();

                context.Entry(documentGroupDb).Collection(x => x.Documents).Load();

                documentGroupDb.Operation         = (int)DocumentGroupModelOperation.Upd;
                documentGroupDb.DataContent       = Zip.DocumentGroup(crawledDocumentGroup);
                documentGroupDb.DocumentGroupDate = DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss");
                documentGroupDb.Lang = crawledDocumentGroup.TwoLetterLanguage;

                var documentOrder = 0;
                foreach (var crawledDocument in crawledDocumentGroup.Documents)
                {
                    if (crawledDocument.Operation == DocumentModelOperation.Add)
                    {
                        string fileLower = crawledDocument.Name.ToLower();
                        var    document  = new InterlexCrawlerEntities.Document();
                        document.DocumentName   = fileLower;
                        document.Identifier     = crawledDocument.Identifier;
                        document.DocumentFormat = crawledDocument.Format;
                        document.Operation      = (int)crawledDocument.Operation;
                        document.DocumentOrder  = documentOrder;
                        document.Url            = crawledDocument.Url;
                        document.Md5            = crawledDocument.Md5;

                        documentGroupDb.Documents.Add(document);
                    }
                    else if (crawledDocument.Operation == DocumentModelOperation.Upd)
                    {
                        var dbDocument = documentGroupDb.Documents.Where(x => x.DocumentName == crawledDocument.Name).FirstOrDefault();
                        dbDocument.Operation      = (int)crawledDocument.Operation;
                        dbDocument.DocumentOrder  = documentOrder;
                        dbDocument.DocumentFormat = crawledDocument.Format;
                        dbDocument.Url            = crawledDocument.Url;
                        dbDocument.Md5            = crawledDocument.Md5;
                    }

                    documentOrder++;
                }

                foreach (var documenInfo in documentGroupFromDatabase.Documents)
                {
                    if (documenInfo.Operation == DocumentModelOperation.Del)
                    {
                        var dbDocument = documentGroupDb.Documents.Where(x => x.DocumentName == documenInfo.Name).FirstOrDefault();
                        if (dbDocument != null)
                        {
                            dbDocument.Operation = (int)documenInfo.Operation;
                        }
                    }
                }


                context.SaveChanges();

                context.PChangeOperationStatus(documentGroupDb.Identifier, (int)DocumentGroupModelOperation.Upd, "DatabaseDocumentManager", null);
            }
        }