private void AddDocumentGroup(DocumentGroupModel crawledDocumentGroup) { var newDocumentGroup = new InterlexCrawlerEntities.DocumentGroup(); newDocumentGroup.CrawlerId = crawledDocumentGroup.CrawlerId; newDocumentGroup.Identifier = Guid.NewGuid().ToString(); newDocumentGroup.DocumentGroupName = crawledDocumentGroup.Name; newDocumentGroup.DocumentGroupFormat = "application/zip"; newDocumentGroup.Lang = crawledDocumentGroup.TwoLetterLanguage; newDocumentGroup.Operation = (int)DocumentGroupModelOperation.Add; newDocumentGroup.DocumentGroupDate = DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss"); // Byte Array Data newDocumentGroup.DataContent = Zip.DocumentGroup(crawledDocumentGroup); this.AddDocuments(newDocumentGroup, crawledDocumentGroup); using (var context = new InterlexCrawlerEntities()) { context.DocumentGroups.Add(newDocumentGroup); context.SaveChanges(); context.PChangeOperationStatus(newDocumentGroup.Identifier, (int)DocumentGroupModelOperation.Add, "DatabaseDocumentManager", null); } }
private DocumentGroupModel GetDocumentGroupInfo(int cralwerId, string documentGroupName) { using (var context = new InterlexCrawlerEntities()) { var info = context.DocumentGroups .Include(x => x.Documents) .Where(x => x.CrawlerId == cralwerId && x.DocumentGroupName == documentGroupName) .Select(x => new DocumentGroupModel { Name = x.DocumentGroupName, Identifier = x.Identifier, Documents = x.Documents.Select(d => new DocumentModel { Name = d.DocumentName, Md5 = d.Md5, Operation = (DocumentModelOperation)d.Operation, Format = d.DocumentFormat }).ToList() }).FirstOrDefault(); return(info); } }
/// <summary> /// Returns the crawler id with the specified name. If the crawler does not exists creates the crawler in the database and returns the id /// </summary> /// <param name="crawlerName"></param> /// <returns></returns> public int GetOrCreateCrawlerId(string crawlerName) { using (var context = new InterlexCrawlerEntities()) { lock (lockObject) { var crawler = (from c in context.Crawlers where c.CrawlerName == crawlerName select c.CrawlerId).FirstOrDefault(); if (crawler == 0) { var newCrawler = new InterlexCrawlerEntities.Crawler(); newCrawler.CrawlerName = crawlerName; context.Crawlers.Add(newCrawler); context.SaveChanges(); return(this.GetOrCreateCrawlerId(crawlerName)); } return(crawler); } } }
public override Task StartAsync() { using (var context = new InterlexCrawlerEntities()) { foreach (var(id, content) in context.GetNewOrUpdatedInterlexEditorToolLazy()) { try { var documentGroup = new DocumentGroupModel { CrawlerId = this.CrawlerId, Name = id, TwoLetterLanguage = "EU", Documents = { new DocumentModel { Raw = encoding.GetBytes(content), Name = "content", Format = "application/json", Url = "local" } } }; this.DocumentGroupManager.AddOrUpdateDocumentGroup(documentGroup); } catch (Exception e) { this.Logger.Error($"{id}", e); } } } return(Task.CompletedTask); }
private void UpdateDocumentGroup(DocumentGroupModel crawledDocumentGroup, DocumentGroupModel documentGroupFromDatabase) { using (var context = new InterlexCrawlerEntities()) { var documentGroupDb = (from dg in context.DocumentGroups where dg.Identifier == documentGroupFromDatabase.Identifier select dg).Single(); context.Entry(documentGroupDb).Collection(x => x.Documents).Load(); documentGroupDb.Operation = (int)DocumentGroupModelOperation.Upd; documentGroupDb.DataContent = Zip.DocumentGroup(crawledDocumentGroup); documentGroupDb.DocumentGroupDate = DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss"); documentGroupDb.Lang = crawledDocumentGroup.TwoLetterLanguage; var documentOrder = 0; foreach (var crawledDocument in crawledDocumentGroup.Documents) { if (crawledDocument.Operation == DocumentModelOperation.Add) { string fileLower = crawledDocument.Name.ToLower(); var document = new InterlexCrawlerEntities.Document(); document.DocumentName = fileLower; document.Identifier = crawledDocument.Identifier; document.DocumentFormat = crawledDocument.Format; document.Operation = (int)crawledDocument.Operation; document.DocumentOrder = documentOrder; document.Url = crawledDocument.Url; document.Md5 = crawledDocument.Md5; documentGroupDb.Documents.Add(document); } else if (crawledDocument.Operation == DocumentModelOperation.Upd) { var dbDocument = documentGroupDb.Documents.Where(x => x.DocumentName == crawledDocument.Name).FirstOrDefault(); dbDocument.Operation = (int)crawledDocument.Operation; dbDocument.DocumentOrder = documentOrder; dbDocument.DocumentFormat = crawledDocument.Format; dbDocument.Url = crawledDocument.Url; dbDocument.Md5 = crawledDocument.Md5; } documentOrder++; } foreach (var documenInfo in documentGroupFromDatabase.Documents) { if (documenInfo.Operation == DocumentModelOperation.Del) { var dbDocument = documentGroupDb.Documents.Where(x => x.DocumentName == documenInfo.Name).FirstOrDefault(); if (dbDocument != null) { dbDocument.Operation = (int)documenInfo.Operation; } } } context.SaveChanges(); context.PChangeOperationStatus(documentGroupDb.Identifier, (int)DocumentGroupModelOperation.Upd, "DatabaseDocumentManager", null); } }