public static IEnumerable<OAIDataProvider> GetDataProviders() { using (var context = new OaiPmhContext()) { context.Configuration.ProxyCreationEnabled = false; return context.OAIDataProvider.ToList(); } }
public static IQueryable<Header> GetHeader( OaiPmhContext context, string identifier) { return from rec in context.Header where rec.OAI_Identifier == identifier select rec; }
public static void AddRecMetadataToDatabase(OaiPmhContext context, int recId, Metadata metadata) { if (context == null || recId == 0 || metadata == null) { return; } AddToDatabase(context, recId, Enums.ObjectType.OAIRecord, Enums.MetadataType.Metadata, metadata); }
public static void AddMetadataToDatabase(OaiPmhContext context, int objId, byte objType, byte metaType, Metadata metadata) { context.Metadata.Add(metadata); context.SaveChanges(); context.ObjectMetadata.Add(new ObjectMetadata() { ObjectId = objId, ObjectType = objType, MetadataType = metaType, MetadataId = metadata.MetadataId }); }
public static void AddRecAboutToDatabase(OaiPmhContext context, int recId, List<Metadata> about) { if (context == null || recId == 0 || about == null) { return; } foreach (var item in about) { if (item != null) { AddToDatabase(context, recId, Enums.ObjectType.OAIRecord, Enums.MetadataType.About, item); } } }
public static void AddToDatabase(OaiPmhContext context, int objId, byte objType, byte metaType, Metadata metadata) { switch (FormatList.Int2Format(metadata.MdFormat)) { case Enums.MetadataFormats.DublinCore: DublinCore.AddToDatabase(context, objId, objType, metaType, metadata); break; case Enums.MetadataFormats.Provenance: Provenance.AddToDatabase(context, objId, objType, metaType, metadata); break; // TODO: Add format here case Enums.MetadataFormats.None: default: break; } }
public static void AddRecordToDatabase( RecordQueryResult record, OaiPmhContext context, OAIDataProvider dp, string metadataPrefix, DateTime harvestDate, bool addProvenance, bool createNewIdentifier, string identifierBase, bool isHarvestDateTime) { if (addProvenance) { record.About.Add(Provenance.NewMeta(harvestDate, isHarvestDateTime, createNewIdentifier, dp.BaseURL, record.Header.OAI_Identifier, record.Header.Datestamp.HasValue ? record.Header.Datestamp.Value : DateTime.MinValue, record.Header.IsDatestampDateTime, FormatList.GetNamespace(metadataPrefix))); } /* add header */ Header.AddRecHeaderToDatabase( context, record.Header, dp, createNewIdentifier, identifierBase); /* add metadata */ DbQueries.AddRecMetadataToDatabase( context, record.Header.HeaderId, record.Metadata); /* add about */ DbQueries.AddRecAboutToDatabase( context, record.Header.HeaderId, record.About); }
public static OAIDataProvider AddOrUpdateDataProvider(string baseURL, OAIDataProvider dataProvider) { using (var context = new OaiPmhContext()) { OAIDataProvider dp = null; bool isUpdateMode = dataProvider != null && dataProvider.OAIDataProviderId != 0; if (isUpdateMode) { /* get data provider to update */ context.Configuration.ProxyCreationEnabled = false; dp = context.OAIDataProvider.Where(d => d.OAIDataProviderId == dataProvider.OAIDataProviderId).FirstOrDefault(); } else if (!string.IsNullOrEmpty(baseURL)) { /* get and parse XML document */ dp = IdentifyDataProvider(baseURL); } if (dp != null && isUpdateMode ? true : !context.OAIDataProvider.Where(d => d.BaseURL == dp.BaseURL).Any()) { if (dataProvider != null) { dp.Function = dataProvider.Function; dp.FirstSource = dataProvider.FirstSource; dp.SecondSource = dataProvider.SecondSource; } if (!isUpdateMode) { context.OAIDataProvider.Add(dp); } context.SaveChanges(); return dp; } } return null; }
public static void AddRecHeaderToDatabase( OaiPmhContext context, Header header, OAIDataProvider dataProvider, bool createNewIdentifier, string identifierBase) { header.OAIDataProviderId = dataProvider.OAIDataProviderId; context.Header.Add(header); context.SaveChanges(); if (createNewIdentifier) { if(identifierBase.ElementAt(identifierBase.Length -1) != ':') { identifierBase += ':'; } header.OAI_Identifier = identifierBase + header.HeaderId; context.Entry(header).State = System.Data.EntityState.Modified; } }
public static void Records( List<RecordQueryResult> records, OaiPmhContext context, Enums.DeDuplication deDup) { switch (deDup) { case Enums.DeDuplication.AddDuplicate: break; case Enums.DeDuplication.UpdateOriginal: { /* get identifiers from XML records */ /* var recIdentifiers = records.Select(re => re.OAIRecord.OAI_Identifier); /* get records with same identifier */ /* var recsToModify = (from rec in context.OAIRecord from recIdentifier in recIdentifiers where rec.OAI_Identifier == recIdentifier select rec).ToList(); (from recToModify in recsToModify from rec in records where rec.OAIRecord.OAI_Identifier == recToModify.OAI_Identifier select new RecRecQueryResult { recToModify = recToModify, rec = rec }).ToList().ForEach((r) => { RecordQueryResult tempRec = r.rec; records.Remove(r.rec); if (tempRec.Metadata != null) { //tempRec.Metadata.Id = r.recToModify.Metadata == null ? 0 : r.recToModify.Metadata.Id; //tempRec.OAIRecord.DCId = tempRec.Metadata.Id; } tempRec.OAIRecord.Id = r.recToModify.Id; //tempRec.OAIRecord.DublinCore = tempRec.Metadata; r.recToModify = tempRec.OAIRecord; context.Entry(r.recToModify).State = EntityState.Modified; });*/ } break; case Enums.DeDuplication.Skip: { var recIdentifiers = records.Select(re => re.Header.OAI_Identifier).ToList(); using (var cmd = context.Database.Connection.CreateCommand()) { cmd.CommandText = "DeDuplicateSkip"; cmd.CommandType = CommandType.StoredProcedure; /* pass whole list as parameter to SP */ var idTable = new DataTable(); idTable.Columns.Add("Item", typeof(string)); recIdentifiers.ForEach(r => idTable.Rows.Add(r)); /* fill table with ids */ cmd.Parameters.Add(new SqlParameter("@idList", SqlDbType.Structured) { TypeName = "dbo.RecIdList", Value = idTable }); cmd.Parameters.Add(new SqlParameter("@objectType", Enums.ObjectType.OAIRecord)); cmd.Parameters.Add(new SqlParameter("@metadataType", Enums.MetadataType.About)); cmd.Parameters.Add(new SqlParameter("@provenanceNum", (byte)Enums.MetadataFormats.Provenance)); try { recIdentifiers.Clear(); context.Database.Connection.Open(); using (var reader = cmd.ExecuteReader()) { if (reader.HasRows) { while (reader.Read()) { var item = reader.GetString(0); recIdentifiers.Add(reader.GetString(0)); } } } } finally { context.Database.Connection.Close(); } } try { records.RemoveAll(r => recIdentifiers.Contains(r.Header.OAI_Identifier)); } catch (Exception e) { string msg = e.Message; } /* we have to split bigger lists because the query becomes too big and ef crashes */ /*foreach (var splitList in Helper.SplitList<string>(recIdentifiers)) { /* check record identifiers */ /*var recsToSkip = (from rec in context.Header from recIdentifier in splitList where rec.OAI_Identifier == recIdentifier select rec.OAI_Identifier).ToList(); /* check provenance identifiers */ /*recsToSkip.AddRange((from rec in context.Header join om in context.ObjectMetadata on rec.HeaderId equals om.ObjectId join md in context.Metadata on om.MetadataId equals md.MetadataId where om.ObjectType == Enums.ObjectType.OAIRecord where om.MetadataType == Enums.MetadataType.About where (md.MdFormat & (byte)Enums.MetadataFormats.Provenance) != 0 from ri in splitList where md.Identifier == ri select md.Identifier).ToList()); try { records.RemoveAll(r => recsToSkip.Contains(r.Header.OAI_Identifier)); } catch (Exception e) { string msg = e.Message; continue; } }*/ } break; default: break; } }
public static XDocument ListIdentifiersOrRecords( string verb, string from, string until, string metadataPrefix, string set, string resumptionToken, bool isRoundtrip, List<XElement> errorList, bool? loadAbout) { List<XElement> errors = errorList; DateTime? fromDate = DateTime.MinValue; DateTime? untilDate = DateTime.MaxValue; /* VERB */ bool isRecord = false; if (String.IsNullOrEmpty(verb) || !(verb == "ListIdentifiers" || verb == "ListRecords")) { errors.Add(MlErrors.badVerbArgument); } else { isRecord = verb == "ListRecords"; } /* FROM */ bool isFrom = !String.IsNullOrEmpty(from); fromDate = MlDecode.SafeDateTime(from); if (isFrom && fromDate == null) { errors.Add(MlErrors.badFromArgument); } /* UNTIL */ bool isUntil = !String.IsNullOrEmpty(until); untilDate = MlDecode.SafeDateTime(until); if (isUntil && untilDate == null) { errors.Add(MlErrors.badUntilArgument); } if (isFrom && isUntil && fromDate > untilDate) { errors.Add(MlErrors.badFromAndUntilArgument); } /* METADATA PREFIX */ bool isPrefixOk = !String.IsNullOrEmpty(metadataPrefix); /* SETS */ bool isSet = !String.IsNullOrEmpty(set); if (isSet && !Properties.supportSets) { errors.Add(MlErrors.noSetHierarchy); } /* RESUMPTION TOKEN */ bool isResumption = !String.IsNullOrEmpty(resumptionToken); if (isResumption && !isRoundtrip) { if (isFrom || isUntil || isPrefixOk || isSet) { errors.Add(MlErrors.badResumptionArgumentOnly); } if (!(Properties.resumptionTokens.ContainsKey(resumptionToken) && Properties.resumptionTokens[resumptionToken].Verb == verb && Properties.resumptionTokens[resumptionToken].ExpirationDate >= DateTime.UtcNow)) { errors.Insert(0, MlErrors.badResumptionArgument); } if (errors.Count == 0) { return ListIdentifiersOrRecords( verb, Properties.resumptionTokens[resumptionToken].From.HasValue ? Properties.resumptionTokens[resumptionToken].From.Value.ToUniversalTime().ToString(Properties.granularity) : null, Properties.resumptionTokens[resumptionToken].Until.HasValue ? Properties.resumptionTokens[resumptionToken].Until.Value.ToUniversalTime().ToString(Properties.granularity) : null, Properties.resumptionTokens[resumptionToken].MetadataPrefix, Properties.resumptionTokens[resumptionToken].Set, resumptionToken, true, errors, loadAbout); } } if (!isPrefixOk) /* Check if the only required attribute is included in the request */ { errors.Add(MlErrors.badMetadataArgument); } else if (FormatList.Prefix2Int(metadataPrefix) == 0) { errors.Add(MlErrors.cannotDisseminateFormat); } bool isAbout = loadAbout.HasValue ? loadAbout.Value : Properties.loadAbout; XElement request = new XElement("request", new XAttribute("verb", verb), isFrom ? new XAttribute("from", from) : null, isUntil ? new XAttribute("until", until) : null, isPrefixOk ? new XAttribute("metadataPrefix", metadataPrefix) : null, isSet ? new XAttribute("set", set) : null, isResumption ? new XAttribute("resumptionToken", resumptionToken) : null, Properties.baseURL); if (errors.Count > 0) { errors.Insert(0, request); /* add request on the first position, that it will be diplayed before errors */ return CreateXml(errors.ToArray()); } var records = new List<RecordQueryResult>(); using (var context = new OaiPmhContext()) { List<string> sets = Helper.GetAllSets(set); var formatNum = FormatList.Prefix2Int(metadataPrefix); var recordsQuery = from rec in context.Header join om in context.ObjectMetadata on rec.HeaderId equals om.ObjectId join md in context.Metadata on om.MetadataId equals md.MetadataId where om.ObjectType == Enums.ObjectType.OAIRecord where om.MetadataType == Enums.MetadataType.Metadata where (!isFrom || rec.Datestamp.Value >= fromDate) where (!isUntil || rec.Datestamp.Value <= untilDate) where (md.MdFormat & formatNum) != 0 orderby rec.Datestamp select rec; if (isSet) { recordsQuery = from rq in recordsQuery from s in context.OAISet from l in sets where s.Spec == l where rq.OAI_Set == s.Spec select rq; } int recordsCount = recordsQuery.Count(); if (recordsCount == 0) { return CreateXml(new XElement[] { request, MlErrors.noRecordsMatch }); } else if (isRoundtrip) { Properties.resumptionTokens[resumptionToken].CompleteListSize = recordsCount; recordsQuery = recordsQuery.Skip( Properties.resumptionTokens[resumptionToken].Cursor.Value).Take( isRecord ? Properties.maxRecordsInList : Properties.maxIdentifiersInList); } else if ((isRecord ? Properties.resumeListRecords : Properties.resumeListIdentifiers) && (isRecord ? recordsCount > Properties.maxRecordsInList : recordsCount > Properties.maxIdentifiersInList)) { resumptionToken = Helper.CreateGuid(); isResumption = true; Properties.resumptionTokens.Add(resumptionToken, new ResumptionToken() { Verb = verb, From = isFrom ? fromDate : null, Until = isUntil ? untilDate : null, MetadataPrefix = metadataPrefix, Set = set, ExpirationDate = DateTime.UtcNow.Add(Properties.expirationTimeSpan), CompleteListSize = recordsCount, Cursor = 0 }); recordsQuery = recordsQuery.Take( isRecord ? Properties.maxRecordsInList : Properties.maxIdentifiersInList); } /* get data from database */ var recGroup = (from rec in recordsQuery join omd in context.ObjectMetadata on rec.HeaderId equals omd.ObjectId join mdt in context.Metadata on omd.MetadataId equals mdt.MetadataId group new { OmdMetaType = omd.MetadataType, OaiMetaData = mdt } by rec into grp select grp).ToList(); /* distribute data into logical units */ records = (from grp in recGroup select new RecordQueryResult() { Header = grp.Key, Metadata = isRecord ? grp.Where(g => g.OmdMetaType == Enums.MetadataType.Metadata).Select(g => g.OaiMetaData).FirstOrDefault() : null, About = isRecord ? grp.Where(g => g.OmdMetaType == Enums.MetadataType.About).Select(g => g.OaiMetaData).ToList() : null }).ToList(); } bool isCompleted = isResumption ? Properties.resumptionTokens[resumptionToken].Cursor + records.Count >= Properties.resumptionTokens[resumptionToken].CompleteListSize : false; XElement list = new XElement(verb, isRecord ? GetListRecords(records, isAbout) : GetListIdentifiers(records), isResumption ? /* add resumption token or not */ MlEncode.ResumptionToken(Properties.resumptionTokens[resumptionToken], resumptionToken, isCompleted) : null); if (isResumption) { if (isCompleted) { Properties.resumptionTokens.Remove(resumptionToken); } else { Properties.resumptionTokens[resumptionToken].Cursor = Properties.resumptionTokens[resumptionToken].Cursor + records.Count; } } return CreateXml(new XElement[] { request, list }); }
public static XDocument ListMetadataFormats(string identifier, List<XElement> errorList) { List<XElement> errors = errorList; bool isIdentifier = !String.IsNullOrEmpty(identifier); XElement request = new XElement("request", new XAttribute("verb", "ListMetadataFormats"), isIdentifier ? new XAttribute("identifier", identifier) : null, Properties.baseURL); List<OAIMetadataFormat> metadataFormats = new List<OAIMetadataFormat>(); using (var context = new OaiPmhContext()) { context.Configuration.LazyLoadingEnabled = false; if (isIdentifier) { Header header = DbQueries.GetHeader(context, identifier).FirstOrDefault(); if (header == null) { errors.Add(MlErrors.idDoesNotExist); } else { int? recMetaFormats = (from omd in context.ObjectMetadata join mtd in context.Metadata on omd.MetadataId equals mtd.MetadataId where omd.ObjectType == Enums.ObjectType.OAIRecord where omd.ObjectId == header.HeaderId where omd.MetadataType == Enums.MetadataType.Metadata select mtd.MdFormat).FirstOrDefault(); metadataFormats = recMetaFormats.HasValue ? FormatList.GetAllFormatsFromInt(recMetaFormats.Value).ToList() : null; if (metadataFormats == null || metadataFormats.Count == 0) { errors.Add(MlErrors.noMetadataFormats); } } } else { metadataFormats = FormatList.List; } } if (errors.Count > 0) { errors.Insert(0, request); /* add request on the first position, that it will be diplayed before errors */ return CreateXml(errors.ToArray()); } XElement listMetadataFormats = new XElement("ListMetadataFormats", from mf in metadataFormats where mf.IsForList select new XElement("metadataFormat", new XElement("metadataPrefix", mf.Prefix), new XElement("schema", mf.Schema), new XElement("metadataNamespace", mf.Namespace))); return CreateXml(new XElement[] { request, listMetadataFormats }); }
public static XDocument ListSets(string resumptionToken, bool isRoundtrip, List<XElement> errorList) { List<XElement> errors = errorList; if (!Properties.supportSets) { errors.Add(MlErrors.noSetHierarchy); } bool isResumption = !String.IsNullOrEmpty(resumptionToken); if (isResumption && !isRoundtrip) { if (!(Properties.resumptionTokens.ContainsKey(resumptionToken) && Properties.resumptionTokens[resumptionToken].Verb == "ListSets" && Properties.resumptionTokens[resumptionToken].ExpirationDate >= DateTime.UtcNow)) { errors.Insert(0, MlErrors.badResumptionArgument); } if (errors.Count == 0) { return ListSets(resumptionToken, true, new List<XElement>()); } } XElement request = new XElement("request", new XAttribute("verb", "ListSets"), isResumption ? new XAttribute("resumptionToken", resumptionToken) : null, Properties.baseURL); if (errors.Count > 0) { errors.Insert(0, request); /* add request on the first position, that it will be diplayed before errors */ return CreateXml(errors.ToArray()); } var sets = new List<Set>(); using (var context = new OaiPmhContext()) { context.Configuration.LazyLoadingEnabled = false; int setsCount = context.OAISet.Count(); var setsQuery = from s in context.OAISet join omd in context.ObjectMetadata on s.SetId equals omd.ObjectId join mdt in context.Metadata on omd.MetadataId equals mdt.MetadataId where omd.ObjectType == Enums.ObjectType.OAISet orderby s.Name group mdt by s into grp select grp; if (isRoundtrip) { Properties.resumptionTokens[resumptionToken].CompleteListSize = setsCount; setsQuery = setsQuery.Skip( Properties.resumptionTokens[resumptionToken].Cursor.Value).Take( Properties.maxSetsInList); } else if (Properties.resumeListSets && setsCount > Properties.maxSetsInList) { resumptionToken = Helper.CreateGuid(); isResumption = true; Properties.resumptionTokens.Add(resumptionToken, new ResumptionToken() { Verb = "ListSets", ExpirationDate = DateTime.UtcNow.Add(Properties.expirationTimeSpan), CompleteListSize = setsCount, Cursor = 0 }); setsQuery = setsQuery.Take(Properties.maxSetsInList); } /* execute query */ sets = (from g in setsQuery select g.Key.MergeSetAndDescription(g.ToList())).ToList(); } bool isCompleted = isResumption ? Properties.resumptionTokens[resumptionToken].Cursor + sets.Count == Properties.resumptionTokens[resumptionToken].CompleteListSize : false; XElement list = new XElement("ListSets", from s in sets select new XElement("set", new XElement("setSpec", s.Spec), new XElement("setName", s.Name), String.IsNullOrEmpty(s.Description) ? null : new XElement("setDescription", s.Description), MlEncode.SetDescription(s.AdditionalDescriptions, Properties.granularity)), isResumption ? /* add resumption token or not */ MlEncode.ResumptionToken(Properties.resumptionTokens[resumptionToken], resumptionToken, isCompleted) : null); if (isResumption) { if (isCompleted) { Properties.resumptionTokens.Remove(resumptionToken); } else { Properties.resumptionTokens[resumptionToken].Cursor = Properties.resumptionTokens[resumptionToken].Cursor + sets.Count; } } return CreateXml(new XElement[] { request, list }); }
public static bool DeleteDataProvider(int id) { using (var context = new OaiPmhContext()) { var dataProvider = context.OAIDataProvider.Where(d => d.OAIDataProviderId == id).FirstOrDefault(); if (dataProvider != null) { context.OAIDataProvider.Remove(dataProvider); context.SaveChanges(); return true; } } return false; }
internal static OAIDataProvider ReIdentifyDataProvider(int id) { using (var context = new OaiPmhContext()) { context.Configuration.ProxyCreationEnabled = false; var dataProvider = context.OAIDataProvider.Where(d => d.OAIDataProviderId == id).FirstOrDefault(); if (dataProvider != null) { var dp = IdentifyDataProvider(dataProvider.BaseURL); if (dp != null) { dataProvider.AdminEmail = dp.AdminEmail; dataProvider.BaseURL = dp.BaseURL; dataProvider.Compression = dp.Compression; dataProvider.DeletedRecord = dp.DeletedRecord; dataProvider.EarliestDatestamp = dp.EarliestDatestamp; dataProvider.Granularity = dp.Granularity; dataProvider.ProtocolVersion = dp.ProtocolVersion; dataProvider.RepositoryName = dp.RepositoryName; context.SaveChanges(); return dataProvider; } } } return null; }
public static void AddToDatabase(OaiPmhContext context, int objId, byte objType, byte metaType, Metadata provenance) { context.Metadata.Add(provenance); context.SaveChanges(); context.ObjectMetadata.Add(new ObjectMetadata() { ObjectId = objId, ObjectType = objType, MetadataType = metaType, MetadataId = provenance.MetadataId }); if (provenance.NestedElements != null) { int prevId = provenance.MetadataId; foreach (var item in provenance.NestedElements) { item.AdditionalInt1 = prevId; context.Metadata.Add(item); context.SaveChanges(); context.ObjectMetadata.Add(new ObjectMetadata() { ObjectId = objId, ObjectType = objType, MetadataType = metaType, MetadataId = item.MetadataId }); prevId = item.MetadataId; } } }
private static async Task<string> HarvestRecordsAsync( OAIDataProvider dataProvider, string url, string metadataPrefix, Enums.DeDuplication deDup, bool updateStats, bool harvestFiles, bool isList, int retryCount = 3) { if (dataProvider == null) { if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Data provider is not initialized"; } return null; } if (string.IsNullOrEmpty(metadataPrefix)) { if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Metadata format is not provided"; } return null; } try { XDocument xd; try { xd = await GetAndParseXMLAsync(url).ConfigureAwait(false); } catch (Exception e) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Retrying"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message; return "retry"; } /* we validate if it's enabled */ bool errors = false; if (Properties.validateXml && Properties.schemas.Count > 0) { List<string> eMsgs = new List<string>(); xd.Validate(Properties.schemas, (o, e) => { errors = true; eMsgs.Add(e.Message); }); } if (Properties.validateXml ? !errors : true) { /* get harvest date */ DateTime harvestDate; bool isHarvestDateTime; MlDecode.ResponseDate(ref xd, out harvestDate, out isHarvestDateTime); XElement listRecords = isList ? xd.Root.Element(MlNamespaces.oaiNs + "ListRecords") : xd.Root.Element(MlNamespaces.oaiNs + "GetRecord"); if (listRecords != null) { /* parse records */ List<RecordQueryResult> records = new List<RecordQueryResult>(); foreach (var record in listRecords.Elements(MlNamespaces.oaiNs + "record")) { var rec = ParseRecordAsync(record, metadataPrefix).Result; if (!rec.Header.Deleted) { records.Add(rec); } } int itemsPerPage = records.Count; if (records.Count > 0) { using (var context = new OaiPmhContext()) { /* try to deduplicate (if selected) and add records to database */ try { /* update timestamp of last harvesting */ context.OAIDataProvider.Attach(dataProvider); if (!isList) { dataProvider.LastHarvesting = harvestDate; context.Entry(dataProvider).State = EntityState.Modified; } DeDuplicate.Records( records, context, deDup); /* add records to database */ foreach (var record in records) { if (harvestFiles) { FileHarvester.GetFile(dataProvider, record); } RecordQueryResult.AddRecordToDatabase( record, context, dataProvider, metadataPrefix, harvestDate, Properties.addProvenanceToHarvestedRecords, Properties.createNewIdentifierForHarvestedRecords, Properties.identifierBase, isHarvestDateTime); } context.SaveChanges(); } catch (DbEntityValidationException dbEx) { if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Database exception occured. Please contact administrator"; } SaveXMLWithErrors(xd, dbEx, dataProvider.RepositoryName); } catch (Exception e) { if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message; } /* for debugging purpose only */ string msg = e.Message; } } if (!isList) { return records.Count > 0 ? records[0].Header.HeaderId.ToString() : null; } } var resumption = listRecords.Element(MlNamespaces.oaiNs + "resumptionToken"); if (resumption != null) { /* set complete list size and current progress */ if (updateStats) { int completeListSize = 0; int cursor = 0; var listSizeAttribute = resumption.Attribute("completeListSize"); var cursorAttribute = resumption.Attribute("cursor"); if ((listSizeAttribute != null && int.TryParse(listSizeAttribute.Value, out completeListSize)) && (cursorAttribute != null && int.TryParse(cursorAttribute.Value, out cursor))) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.RatioAll = completeListSize; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.RatioDone = cursor + itemsPerPage; } } if (!String.IsNullOrEmpty(resumption.Value)) { return resumption.Value; } } } } else if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Validation has failed"; } } catch (Exception e) { if (updateStats) { HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception"; HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message; } /* for debugging purpose only */ string msg = e.Message; } return null; }
public static void BeginHarvesting(IList<DataProviderProperties> dataProviderList) { /* list of ids from data providers that were not excluded and are not currently being harvested */ var useOnlyDataProviders = dataProviderList .Where(dp => !dp.Exclude && !HarvestStats.ContainsKey(dp.OAIDataProviderId)) .Select(dp => dp.OAIDataProviderId); var dataProviders = new List<OAIDataProvider>(); using (var context = new OaiPmhContext()) { dataProviders = (from dp in context.OAIDataProvider join uo in useOnlyDataProviders on dp.OAIDataProviderId equals uo select dp).ToList(); } /* add new statistical objects for data providers and start jobs */ foreach (var id in useOnlyDataProviders) { var dataProvider = dataProviders.Where(dp => dp.OAIDataProviderId == id).FirstOrDefault(); var harvestSettings = dataProviderList.Where(dp => dp.OAIDataProviderId == id).FirstOrDefault(); if (dataProvider != null && harvestSettings != null) { harvestSettings.MetadataPrefix = string.IsNullOrEmpty(harvestSettings.MetadataPrefix) ? "oai_dc" : harvestSettings.MetadataPrefix; harvestSettings.Stats = new DataProviderHarvestStats() { OAIDataProviderId = id, Status = "Starting" }; var tmpIntern = new DataProviderIntern() { DataProvider = dataProvider, HarvestOptions = harvestSettings, TokenSource = new CancellationTokenSource() }; if (HarvestStats.TryAdd(id, tmpIntern)) { Task.Factory.StartNew( delegate { StartHarvestTask(id); }, tmpIntern.TokenSource.Token, TaskCreationOptions.LongRunning | TaskCreationOptions.PreferFairness, TaskScheduler.Default); } } } }
public static XDocument GetRecord(string identifier, string metadataPrefix, List<XElement> errorList, bool? loadAbout) { List<XElement> errors = errorList; bool isIdentifier = !String.IsNullOrEmpty(identifier); if (!isIdentifier) { errors.Add(MlErrors.badIdentifierArgument); } bool isPrefixOk = !String.IsNullOrEmpty(metadataPrefix); if (!isPrefixOk) { errors.Add(MlErrors.badMetadataArgument); } else if (FormatList.Prefix2Int(metadataPrefix) == 0) { errors.Add(MlErrors.cannotDisseminateFormat); isPrefixOk = false; } bool isAbout = loadAbout.HasValue ? loadAbout.Value : Properties.loadAbout; RecordQueryResult record = null; if (isIdentifier && isPrefixOk) { using (var context = new OaiPmhContext()) { Header header = DbQueries.GetHeader(context, identifier).FirstOrDefault(); if (header == null) { errors.Add(MlErrors.idDoesNotExist); } else { var formatNum = FormatList.Prefix2Int(metadataPrefix); /* execute query */ var recQuery = (from om in context.ObjectMetadata join md in context.Metadata on om.MetadataId equals md.MetadataId where om.ObjectId == header.HeaderId where om.ObjectType == Enums.ObjectType.OAIRecord where ((om.MetadataType == Enums.MetadataType.Metadata && (md.MdFormat & formatNum) != 0) || (om.MetadataType == Enums.MetadataType.About)) group md by om.MetadataType into grp select grp).ToList(); record = new RecordQueryResult() { Header = header, Metadata = (Metadata)recQuery.Where(g => g.Key == Enums.MetadataType.Metadata).SelectMany(g => g).FirstOrDefault(), About = isAbout ? recQuery.Where(g => g.Key == Enums.MetadataType.About).SelectMany(g => g).Cast<Metadata>().ToList() : null }; if (record == null || record.Metadata == null) { errors.Add(MlErrors.cannotDisseminateRecordFormat); } } } } XElement request = new XElement("request", new XAttribute("verb", "GetRecord"), isIdentifier ? new XAttribute("identifier", identifier) : null, isPrefixOk ? new XAttribute("metadataPrefix", metadataPrefix) : null, Properties.baseURL); if (errors.Count > 0) { errors.Insert(0, request); /* add request on the first position, that it will be diplayed before errors */ return CreateXml(errors.ToArray()); } XElement theRecord = new XElement("GetRecord", new XElement("record", MlEncode.HeaderItem(record.Header, Properties.granularity), MlEncode.Metadata(record.Metadata, Properties.granularity)), isAbout ? MlEncode.About(record.About, Properties.granularity) : null); return CreateXml(new XElement[] { request, theRecord }); }
public static void Register() { using(var context = new OaiPmhContext()) { context.Database.Initialize(true); } Properties props = new Properties(); /* Set default settings */ /* Identify properties */ props["RepositoryName"] = new Property() { Key = "RepositoryName", Value = "Test repository", Section = "ip" }; props["BaseURL"] = new Property() { Key = "BaseURL", Value = "http://*****:*****@domain.com", Section = "ip" }; props["Compression"] = new Property() { Key = "Compression", Value = null, Section = "ip" }; props["Description"] = new Property() { Key = "Description", Value = null, Section = "ip" }; /* Data provider properties */ props["SupportSets"] = new Property() { Key = "SupportSets", Value = "False", Section = "dpp" }; props["ResumeListSets"] = new Property() { Key = "ResumeListSets", Value = "False", Section = "dpp" }; props["MaxSetsInList"] = new Property() { Key = "MaxSetsInList", Value = "30", Section = "dpp" }; props["ResumeListIdentifiers"] = new Property() { Key = "ResumeListIdentifiers", Value = "True", Section = "dpp" }; props["MaxIdentifiersInList"] = new Property() { Key = "MaxIdentifiersInList", Value = "100", Section = "dpp" }; props["ResumeListRecords"] = new Property() { Key = "ResumeListRecords", Value = "True", Section = "dpp" }; props["MaxRecordsInList"] = new Property() { Key = "MaxRecordsInList", Value = "30", Section = "dpp" }; props["ExpirationTimeSpan"] = new Property() { Key = "ExpirationTimeSpan", Value = new TimeSpan(1, 0, 0, 0).ToString(), Section = "dpp" }; props["LoadAbout"] = new Property() { Key = "LoadAbout", Value = "True", Section = "dpp" }; /* Harvester properties */ props["ValidateXml"] = new Property() { Key = "ValidateXml", Value = "False", Section = "hp" }; props["HarvestAbout"] = new Property() { Key = "HarvestAbout", Value = "True", Section = "hp" }; props["AddProvenanceToHarvestedRecords"] = new Property() { Key = "AddProvenanceToHarvestedRecords", Value = "True", Section = "hp" }; props["CreateNewIdentifierForHarvestedRecords"] = new Property() { Key = "CreateNewIdentifierForHarvestedRecords", Value = "False", Section = "hp" }; props["IdentifierBase"] = new Property() { Key = "IdentifierBase", Value = "oai:test.org:", Section = "hp" }; props["MinTimeBetweenRequests"] = new Property() { Key = "MinTimeBetweenRequests", Value = new TimeSpan(0, 0, 30).ToString(), Section = "hp" }; props["RetryRetrievalCount"] = new Property() { Key = "RetryRetrievalCount", Value = "3", Section = "hp" }; props["OverwriteHarvestedFiles"] = new Property() { Key = "OverwriteHarvestedFiles", Value = "False", Section = "hp" }; props["LimitHarvestedFileTypes"] = new Property() { Key = "LimitHarvestedFileTypes", Value = "True", Section = "hp" }; props["DirectoryForHarvestedFiles"] = new Property() { Key = "DirectoryForHarvestedFiles", Value = "C:\\OAIHarvestedFiles", Section = "hp" }; props["AllowedMimeTypes"] = new Property() { Key = "AllowedMimeTypes", Value = "application/pdf;" + "application/msword;" + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", Section = "hp" }; props["PropertySections"] = new Property() { Key = "PropertySections", Value = "ip=Identify;" + "dpp=Dataprovider;" + "hp=Harvester;" + "pfhp=Page fileharvester;", Section = "hp" }; /* Other ... */ if (File.Exists(Directory.GetCurrentDirectory() + "\\oaiSchema.xsd")) { props.SetSchemaSet(MlNamespaces.oaiNs.ToString(), Directory.GetCurrentDirectory() + "\\oaiSchema.xsd"); } Properties.UpdateFromDatabase(); Properties.UpdateMimeTypeList(); Properties.UpdatePropertySections(); Properties.UpdatePageFileHarvestProperties(); }
public static bool AddOrUpdateSetting(Property newSetting) { if (!string.IsNullOrEmpty(newSetting.Key)) { using (var context = new OaiPmhContext()) { if (newSetting.Value == null) { newSetting.Value = ""; } Property setting = context.Property.Where(s => s.Key == newSetting.Key).FirstOrDefault(); if (setting == null) { /* add */ context.Property.Add(newSetting); context.SaveChanges(); return true; } /* update */ setting.Value = newSetting.Value; setting.Section = newSetting.Section; context.SaveChanges(); return true; } } return false; }
private static void DeleteMetadataAndLink(OaiPmhContext context, ObjectMetadata objectMetadata, Metadata metadata) { if (metadata != null) { context.Metadata.Remove(metadata); } if (objectMetadata != null) { context.ObjectMetadata.Remove(objectMetadata); } }
public static bool DeleteMetadata(List<DataProviderProperties> dataProviders) { if (dataProviders != null || dataProviders.Count > 0) { try { using (var context = new OaiPmhContext()) { foreach (var dataProvider in dataProviders) { if (dataProvider.HarvestDeleteFiles) { var filesToDelete = (from h in context.Header where h.OAIDataProviderId == dataProvider.OAIDataProviderId select h.FilePath).ToList(); foreach (var fileList in filesToDelete) { if (!string.IsNullOrEmpty(fileList)) { foreach (var file in fileList.Split(new string[] { "][" }, StringSplitOptions.RemoveEmptyEntries)) { if (!string.IsNullOrEmpty(file) && File.Exists(file)) { try { File.Delete(file); } catch (Exception) { } } } } } } using (var cmd = context.Database.Connection.CreateCommand()) { cmd.CommandText = "DeleteMetadata"; cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add(new SqlParameter("dataProviderId", dataProvider.OAIDataProviderId)); cmd.Parameters.Add(new SqlParameter("fullDelete", dataProvider.FullHarvestDelete)); var retVal = new SqlParameter() { Direction = ParameterDirection.ReturnValue }; cmd.Parameters.Add(retVal); try { context.Database.Connection.Open(); cmd.ExecuteNonQuery(); } finally { context.Database.Connection.Close(); } return retVal.Value == null ? false : (int)retVal.Value != 0; } } } } catch (Exception) { return false; } } return true; }
public static bool DeleteSetting(string name) { if (!string.IsNullOrEmpty(name)) { using (var context = new OaiPmhContext()) { Property setting = context.Property.Where(s => s.Key == name).FirstOrDefault(); if (setting != null) { context.Property.Remove(setting); context.SaveChanges(); return true; } } } return false; }
public static string HarvestRecord( string baseURL, string identifier, string metadataPrefix, Enums.DeDuplication deDup, bool harvestFile) { if (string.IsNullOrEmpty(metadataPrefix)) { metadataPrefix = "oai_dc"; } string url = baseURL + "?verb=GetRecord&identifier=" + identifier + "&metadataPrefix=" + metadataPrefix; OAIDataProvider dataProvider; using (var context = new OaiPmhContext()) { dataProvider = context.OAIDataProvider.FirstOrDefault(dp => dp.BaseURL == baseURL); } if (dataProvider != null) { return HarvestRecordsAsync( dataProvider, url, metadataPrefix, deDup, false, harvestFile, false).Result; } return null; }
public static void HarvestAll(string metadataPrefix) { var harvestSettingsList = new List<DataProviderProperties>(); using (var context = new OaiPmhContext()) { foreach (var dataProvider in context.OAIDataProvider.ToList()) { harvestSettingsList.Add(new DataProviderProperties() { BaseURL = dataProvider.BaseURL, Exclude = false, FullHarvestDelete = false, HarvestDeleteFiles = true, MetadataPrefix = string.IsNullOrEmpty(metadataPrefix) ? null : metadataPrefix, OAIDataProviderId = dataProvider.OAIDataProviderId, RepositoryName = dataProvider.RepositoryName }); } } BeginHarvesting(harvestSettingsList); }
public static void AddToDatabase(OaiPmhContext context, int objId, byte objType, byte metaType, Metadata dublinCore) { DbQueries.AddMetadataToDatabase(context, objId, objType, metaType, dublinCore); }
public static void UpdateFromDatabase() { var propertiesList = new List<Property>(); using (var context = new OaiPmhContext()) { propertiesList = context.Property.ToList(); } foreach (var property in propertiesList) { properties.AddOrUpdate(property.Key, property, (key, oldValue) => property); } }