Example #1
0
        public static void AddRecHeaderToDatabase(
            OaiPmhContext context, 
            Header header, 
            OAIDataProvider dataProvider, 
            bool createNewIdentifier, 
            string identifierBase)
        {
            header.OAIDataProviderId = dataProvider.OAIDataProviderId;
            context.Header.Add(header);
            context.SaveChanges();

            if (createNewIdentifier)
            {
                if(identifierBase.ElementAt(identifierBase.Length -1) != ':')
                {
                    identifierBase += ':';
                }

                header.OAI_Identifier = identifierBase + header.HeaderId;
                context.Entry(header).State = System.Data.EntityState.Modified;
            }
        }
Example #2
0
        private static async Task<string> HarvestRecordsAsync(
            OAIDataProvider dataProvider,
            string url,
            string metadataPrefix,
            Enums.DeDuplication deDup,
            bool updateStats,
            bool harvestFiles,
            bool isList,
            int retryCount = 3)
        {
            if (dataProvider == null)
            {
                if (updateStats)
                {
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Data provider is not initialized";
                }
                return null;
            }
            if (string.IsNullOrEmpty(metadataPrefix))
            {
                if (updateStats)
                {
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Metadata format is not provided";
                }
                return null;
            }

            try
            {
                XDocument xd;
                try
                {
                    xd = await GetAndParseXMLAsync(url).ConfigureAwait(false);
                }
                catch (Exception e)
                {
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Retrying";
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message;
                    return "retry";
                }

                /* we validate if it's enabled */
                bool errors = false;
                if (Properties.validateXml && Properties.schemas.Count > 0)
                {
                    List<string> eMsgs = new List<string>();
                    xd.Validate(Properties.schemas, (o, e) => { errors = true; eMsgs.Add(e.Message); });
                }

                if (Properties.validateXml ? !errors : true)
                {
                    /* get harvest date */
                    DateTime harvestDate;
                    bool isHarvestDateTime;
                    MlDecode.ResponseDate(ref xd, out harvestDate, out isHarvestDateTime);

                    XElement listRecords = isList ? xd.Root.Element(MlNamespaces.oaiNs + "ListRecords") :
                                                    xd.Root.Element(MlNamespaces.oaiNs + "GetRecord");
                    if (listRecords != null)
                    {
                        /* parse records */
                        List<RecordQueryResult> records = new List<RecordQueryResult>();
                        foreach (var record in listRecords.Elements(MlNamespaces.oaiNs + "record"))
                        {
                            var rec = ParseRecordAsync(record, metadataPrefix).Result;
                            if (!rec.Header.Deleted)
                            {
                                records.Add(rec);
                            }
                        }
                        int itemsPerPage = records.Count;

                        if (records.Count > 0)
                        {
                            using (var context = new OaiPmhContext())
                            {
                                /* try to deduplicate (if selected) and add records to database */
                                try
                                {
                                    /* update timestamp of last harvesting */
                                    context.OAIDataProvider.Attach(dataProvider);
                                    if (!isList)
                                    {
                                        dataProvider.LastHarvesting = harvestDate;
                                        context.Entry(dataProvider).State = EntityState.Modified;
                                    }

                                    DeDuplicate.Records(
                                        records,
                                        context,
                                        deDup);

                                    /* add records to database */
                                    foreach (var record in records)
                                    {
                                        if (harvestFiles)
                                        {
                                            FileHarvester.GetFile(dataProvider, record);
                                        }
                                        RecordQueryResult.AddRecordToDatabase(
                                            record,
                                            context,
                                            dataProvider,
                                            metadataPrefix,
                                            harvestDate,
                                            Properties.addProvenanceToHarvestedRecords,
                                            Properties.createNewIdentifierForHarvestedRecords,
                                            Properties.identifierBase,
                                            isHarvestDateTime);
                                    }
                                    context.SaveChanges();
                                }
                                catch (DbEntityValidationException dbEx)
                                {
                                    if (updateStats)
                                    {
                                        HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                                        HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Database exception occured. Please contact administrator";
                                    }
                                    SaveXMLWithErrors(xd, dbEx, dataProvider.RepositoryName);
                                }
                                catch (Exception e)
                                {
                                    if (updateStats)
                                    {
                                        HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                                        HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message;
                                    }
                                    /* for debugging purpose only */
                                    string msg = e.Message;
                                }
                            }
                            if (!isList)
                            {
                                return records.Count > 0 ? records[0].Header.HeaderId.ToString() : null;
                            }
                        }

                        var resumption = listRecords.Element(MlNamespaces.oaiNs + "resumptionToken");
                        if (resumption != null)
                        {
                            /* set complete list size and current progress */
                            if (updateStats)
                            {
                                int completeListSize = 0;
                                int cursor = 0;

                                var listSizeAttribute = resumption.Attribute("completeListSize");
                                var cursorAttribute = resumption.Attribute("cursor");

                                if ((listSizeAttribute != null &&
                                     int.TryParse(listSizeAttribute.Value, out completeListSize)) &&
                                    (cursorAttribute != null &&
                                    int.TryParse(cursorAttribute.Value, out cursor)))
                                {
                                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.RatioAll = completeListSize;
                                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.RatioDone = cursor + itemsPerPage;
                                }
                            }
                            if (!String.IsNullOrEmpty(resumption.Value))
                            {
                                return resumption.Value;
                            }
                        }
                    }
                }
                else if (updateStats)
                {
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Validation has failed";
                }
            }
            catch (Exception e)
            {
                if (updateStats)
                {
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message;
                }
                /* for debugging purpose only */
                string msg = e.Message;
            }

            return null;
        }