/// <summary> /// Serialize information for crowled document to XMl and upload document and XML to Web Service /// </summary> /// <param name="documentGroupDb"></param> public bool UploadToService(DocumentGroup documentGroupDb) { try { string docPath = Arguments.DestinationFolder + "\\Data\\" + documentGroupDb.DocumentGroupName; var docArray = File.ReadAllBytes(docPath); UploadDocumentGroup uploadDocumentGroup = new UploadDocumentGroup(); uploadDocumentGroup.Data = docArray; XmlDocumentGroup xmlDocumentGroup = this.GetNewXmlDocumentGroup(documentGroupDb); XmlSerializer serializer = new XmlSerializer(typeof(XmlDocumentGroup)); using (TextWriter writer = new StringWriter()) { serializer.Serialize(writer, xmlDocumentGroup); uploadDocumentGroup.MetaInfo = writer.ToString(); } string msg = this.webService.UploadFile(uploadDocumentGroup); if (msg != "Ok") { CrawlerLog.LogInfo("Service Upload Error DocumentGroupId: " + documentGroupDb.DocumentGroupId + "Error:" + msg); return false; } } catch (Exception ex) { CrawlerLog.LogException(ex); return false; } return true; }
/// <summary> /// Update an existing DocumentGroup /// </summary> /// <param name="filesMetaInfo"></param> /// <param name="docDir"></param> /// <param name="documentGroupDb"></param> /// <returns></returns> private bool UpdateDocumentGroup( List<XmlDocumentMetaInfo> filesMetaInfo, DocumentGroup documentGroupDb) { bool isProcess = false; foreach (var fileMetaInfo in filesMetaInfo) { fileMetaInfo.Md5 = MD5HashHelper.GetMd5Hash(fileMetaInfo.DataContent); var documentDb = documentGroupDb.Documents.Where(x => x.DocumentName == fileMetaInfo.File.ToLower()).FirstOrDefault(); if (documentDb != null) { if (documentDb.Md5 != fileMetaInfo.Md5) { // Upd DocumentGroup documentGroupDb.Operation = (int)Operation.Upd; documentGroupDb.DocumentGroupDate = DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss"); // Upd Document documentDb.Md5 = fileMetaInfo.Md5; documentDb.Operation = (int)Operation.Upd; isProcess = true; } } else { // Add new Document documentGroupDb.Operation = (int)Operation.Upd; documentGroupDb.DocumentGroupDate = DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss"); fileMetaInfo.Operation = Operation.Add; fileMetaInfo.Identifier = Guid.NewGuid().ToString(); Document document = this.ReturnNewDocument(fileMetaInfo); documentGroupDb.Documents.Add(document); isProcess = true; } } return isProcess; }
private XmlDocumentGroup GetNewXmlDocumentGroup(DocumentGroup documentGroupDb) { XmlDocumentGroup xmlDocumentGroup = new XmlDocumentGroup(); xmlDocumentGroup.Crawler = Arguments.CrawlerName; xmlDocumentGroup.Date = documentGroupDb.DocumentGroupDate; xmlDocumentGroup.FileName = documentGroupDb.DocumentGroupName; xmlDocumentGroup.Format = "application/zip"; xmlDocumentGroup.Identifier = documentGroupDb.Identifier; xmlDocumentGroup.Lang = Arguments.Language; xmlDocumentGroup.Operation = (Operation)documentGroupDb.Operation; foreach (var documentDb in documentGroupDb.Documents) { XmlDocumentMetaInfo xmlDocumentMetaInfo = new XmlDocumentMetaInfo(); xmlDocumentMetaInfo.File = documentDb.DocumentName; xmlDocumentMetaInfo.Format = documentDb.DocumentFormat; xmlDocumentMetaInfo.Identifier = documentDb.Identifier; xmlDocumentMetaInfo.Md5 = documentDb.Md5; xmlDocumentMetaInfo.Operation = (Operation)documentDb.Operation; xmlDocumentMetaInfo.Url = documentDb.Url; xmlDocumentGroup.Document.Add(xmlDocumentMetaInfo); } return xmlDocumentGroup; }
/// <summary> /// Add new DocumentGroup /// </summary> /// <param name="filesMetaInfo"></param> /// <param name="docDir"></param> /// <param name="zipFileName"></param> /// <param name="context"></param> /// <returns></returns> private bool AddDocumentGroup( List<XmlDocumentMetaInfo> filesMetaInfo, string zipFileName, SQLiteCrawlerDataEntities context) { bool isProcess = false; // Add DocumentGroup DocumentGroup newDocumentGroup = new DocumentGroup(); newDocumentGroup.DocumentGroupDate = DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss"); newDocumentGroup.DocumentGroupName = zipFileName.ToLower(); newDocumentGroup.Identifier = Guid.NewGuid().ToString(); newDocumentGroup.Operation = (int)Operation.Add; foreach (var fileMetaInfo in filesMetaInfo) { fileMetaInfo.Md5 = MD5HashHelper.GetMd5Hash(fileMetaInfo.DataContent); fileMetaInfo.Operation = Operation.Add; fileMetaInfo.Identifier = Guid.NewGuid().ToString(); isProcess = true; Document document = this.ReturnNewDocument(fileMetaInfo); newDocumentGroup.Documents.Add(document); } context.DocumentGroups.Add(newDocumentGroup); return isProcess; }