public async Task<ActionResult> DataProvider( string id, string baseURL = null, OAIDataProvider dataProvider = null, string OAIDataProviderId = null) { if (!string.IsNullOrEmpty(id)) { var baseLocalUrl = Common.GetBaseApiUrl(this); switch (id.Trim().ToLower()) { case "addorupdate": baseURL = string.IsNullOrEmpty(baseURL) ? null : baseURL.Trim(); var jsonString = JsonConvert.SerializeObject(dataProvider); dataProvider = await OaiApiRestService.AddOrUpdateDataProvider(baseLocalUrl, baseURL, jsonString); if (dataProvider != null) { return Json(new { status = ok, dataProvider = dataProvider }); } return Json(new { status = failure }); case "delete": if (await OaiApiRestService.DeleteDataProvider(baseLocalUrl, OAIDataProviderId)) { return Json(new { status = ok, OAIDataProviderId = OAIDataProviderId }); } return Json(new { status = failure }); case "reidentify": dataProvider = await OaiApiRestService.ReIdentifyDataProvider(baseLocalUrl, OAIDataProviderId); if (dataProvider != null) { return Json(new { status = ok, dataProvider = dataProvider }); } return Json(new { status = failure }); } } return Json(new { status = failure }); }
public static void AddRecordToDatabase( RecordQueryResult record, OaiPmhContext context, OAIDataProvider dp, string metadataPrefix, DateTime harvestDate, bool addProvenance, bool createNewIdentifier, string identifierBase, bool isHarvestDateTime) { if (addProvenance) { record.About.Add(Provenance.NewMeta(harvestDate, isHarvestDateTime, createNewIdentifier, dp.BaseURL, record.Header.OAI_Identifier, record.Header.Datestamp.HasValue ? record.Header.Datestamp.Value : DateTime.MinValue, record.Header.IsDatestampDateTime, FormatList.GetNamespace(metadataPrefix))); } /* add header */ Header.AddRecHeaderToDatabase( context, record.Header, dp, createNewIdentifier, identifierBase); /* add metadata */ DbQueries.AddRecMetadataToDatabase( context, record.Header.HeaderId, record.Metadata); /* add about */ DbQueries.AddRecAboutToDatabase( context, record.Header.HeaderId, record.About); }
public static XElement Encode(OAIDataProvider dataProvider, string granularity) { return new XElement(MlNamespaces.oaiNs + "dataProvider", /* content */ MlEncode.Element(MlNamespaces.oaiNs + "repositoryName", dataProvider.RepositoryName), MlEncode.Element(MlNamespaces.oaiNs + "baseURL", dataProvider.BaseURL), MlEncode.Element(MlNamespaces.oaiNs + "protocolVersion", dataProvider.ProtocolVersion), MlEncode.Element(MlNamespaces.oaiNs + "adminEmail", dataProvider.AdminEmail), !dataProvider.EarliestDatestamp.HasValue ? null : new XElement(MlNamespaces.oaiNs + "earliestDatestamp", dataProvider.EarliestDatestamp.Value.ToUniversalTime().ToString(granularity)), MlEncode.Element(MlNamespaces.oaiNs + "deletedRecord", dataProvider.DeletedRecord), MlEncode.Element(MlNamespaces.oaiNs + "granularity", dataProvider.Granularity), MlEncode.Element(MlNamespaces.oaiNs + "compression", dataProvider.Compression), !dataProvider.LastHarvesting.HasValue ? null : new XElement(MlNamespaces.oaiNs + "lastHarvesting", dataProvider.LastHarvesting.Value.ToUniversalTime().ToString(granularity)) ); }
public static void AddRecHeaderToDatabase( OaiPmhContext context, Header header, OAIDataProvider dataProvider, bool createNewIdentifier, string identifierBase) { header.OAIDataProviderId = dataProvider.OAIDataProviderId; context.Header.Add(header); context.SaveChanges(); if (createNewIdentifier) { if(identifierBase.ElementAt(identifierBase.Length -1) != ':') { identifierBase += ':'; } header.OAI_Identifier = identifierBase + header.HeaderId; context.Entry(header).State = System.Data.EntityState.Modified; } }
public static OAIDataProvider AddOrUpdateDataProvider(string baseURL, OAIDataProvider dataProvider) { using (var context = new OaiPmhContext()) { OAIDataProvider dp = null; bool isUpdateMode = dataProvider != null && dataProvider.OAIDataProviderId != 0; if (isUpdateMode) { /* get data provider to update */ context.Configuration.ProxyCreationEnabled = false; dp = context.OAIDataProvider.Where(d => d.OAIDataProviderId == dataProvider.OAIDataProviderId).FirstOrDefault(); } else if (!string.IsNullOrEmpty(baseURL)) { /* get and parse XML document */ dp = IdentifyDataProvider(baseURL); } if (dp != null && isUpdateMode ? true : !context.OAIDataProvider.Where(d => d.BaseURL == dp.BaseURL).Any()) { if (dataProvider != null) { dp.Function = dataProvider.Function; dp.FirstSource = dataProvider.FirstSource; dp.SecondSource = dataProvider.SecondSource; } if (!isUpdateMode) { context.OAIDataProvider.Add(dp); } context.SaveChanges(); return dp; } } return null; }
private static async Task<string> HarvestRecordsAsync( OAIDataProvider dataProvider, string url, string metadataPrefix, Enums.DeDuplication deDup, bool updateStats, bool harvestFiles, bool isList, int retryCount = 3) { if (dataProvider == null) { if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Data provider is not initialized"; } return null; } if (string.IsNullOrEmpty(metadataPrefix)) { if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Metadata format is not provided"; } return null; } try { XDocument xd; try { xd = await GetAndParseXMLAsync(url).ConfigureAwait(false); } catch (Exception e) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Retrying"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message; return "retry"; } /* we validate if it's enabled */ bool errors = false; if (Properties.validateXml && Properties.schemas.Count > 0) { List<string> eMsgs = new List<string>(); xd.Validate(Properties.schemas, (o, e) => { errors = true; eMsgs.Add(e.Message); }); } if (Properties.validateXml ? !errors : true) { /* get harvest date */ DateTime harvestDate; bool isHarvestDateTime; MlDecode.ResponseDate(ref xd, out harvestDate, out isHarvestDateTime); XElement listRecords = isList ? xd.Root.Element(MlNamespaces.oaiNs + "ListRecords") : xd.Root.Element(MlNamespaces.oaiNs + "GetRecord"); if (listRecords != null) { /* parse records */ List<RecordQueryResult> records = new List<RecordQueryResult>(); foreach (var record in listRecords.Elements(MlNamespaces.oaiNs + "record")) { var rec = ParseRecordAsync(record, metadataPrefix).Result; if (!rec.Header.Deleted) { records.Add(rec); } } int itemsPerPage = records.Count; if (records.Count > 0) { using (var context = new OaiPmhContext()) { /* try to deduplicate (if selected) and add records to database */ try { /* update timestamp of last harvesting */ context.OAIDataProvider.Attach(dataProvider); if (!isList) { dataProvider.LastHarvesting = harvestDate; context.Entry(dataProvider).State = EntityState.Modified; } DeDuplicate.Records( records, context, deDup); /* add records to database */ foreach (var record in records) { if (harvestFiles) { FileHarvester.GetFile(dataProvider, record); } RecordQueryResult.AddRecordToDatabase( record, context, dataProvider, metadataPrefix, harvestDate, Properties.addProvenanceToHarvestedRecords, Properties.createNewIdentifierForHarvestedRecords, Properties.identifierBase, isHarvestDateTime); } context.SaveChanges(); } catch (DbEntityValidationException dbEx) { if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Database exception occured. Please contact administrator"; } SaveXMLWithErrors(xd, dbEx, dataProvider.RepositoryName); } catch (Exception e) { if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message; } /* for debugging purpose only */ string msg = e.Message; } } if (!isList) { return records.Count > 0 ? records[0].Header.HeaderId.ToString() : null; } } var resumption = listRecords.Element(MlNamespaces.oaiNs + "resumptionToken"); if (resumption != null) { /* set complete list size and current progress */ if (updateStats) { int completeListSize = 0; int cursor = 0; var listSizeAttribute = resumption.Attribute("completeListSize"); var cursorAttribute = resumption.Attribute("cursor"); if ((listSizeAttribute != null && int.TryParse(listSizeAttribute.Value, out completeListSize)) && (cursorAttribute != null && int.TryParse(cursorAttribute.Value, out cursor))) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.RatioAll = completeListSize; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.RatioDone = cursor + itemsPerPage; } } if (!String.IsNullOrEmpty(resumption.Value)) { return resumption.Value; } } } } else if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Validation has failed"; } } catch (Exception e) { if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message; } /* for debugging purpose only */ string msg = e.Message; } return null; }
public static void GetFile(OAIDataProvider dataProvider, RecordQueryResult record) { if (dataProvider == null || record == null) { return; } string basePath = Properties.directoryForHarvestedFiles + "\\"; basePath += new Uri(dataProvider.BaseURL).Host + "\\"; if (Properties.overwriteHarvestedFiles ? true : string.IsNullOrEmpty(record.Header.FilePath)) { if (!Directory.Exists(basePath)) { Directory.CreateDirectory(basePath); } string filePath = null; switch (dataProvider.Function) { case "FromPageOnly": filePath = FromPageOnly(record, basePath, dataProvider.FirstSource); break; case "FromSourceTag": filePath = FromSourceTag(record, dataProvider, basePath); break; default: break; } if (!string.IsNullOrEmpty(filePath)) { record.Header.FilePath = filePath; } } }
public static string FromSourceTag( RecordQueryResult record, OAIDataProvider dataProvider, string basePath) { try { if (record == null || dataProvider == null || string.IsNullOrEmpty(basePath)) { return null; } string files = string.Empty; foreach (var sourceItem in GetAllSources(record, dataProvider.FirstSource)) { var fileName = string.IsNullOrEmpty(sourceItem.Name) ? sourceItem.AlternateName : sourceItem.Name; string filePath = CreateUniqueFileName(basePath, fileName); if (DownloadFile(filePath, sourceItem.AbsoluteUri)) { files += filePath + "]["; } } if (!string.IsNullOrEmpty(files)) { return files.Substring(0, files.Length - 2); } return FromPageOnly(record, basePath, string.IsNullOrEmpty(dataProvider.SecondSource) ? dataProvider.FirstSource : dataProvider.SecondSource); } catch (Exception e) { /* for debugging purpose only */ string msg = e.Message; } return null; }