Пример #1
0
        public async Task<ActionResult> DataProvider(
            string id,
            string baseURL = null,
            OAIDataProvider dataProvider = null,
            string OAIDataProviderId = null)
        {
            if (!string.IsNullOrEmpty(id))
            {
                var baseLocalUrl = Common.GetBaseApiUrl(this);
                switch (id.Trim().ToLower())
                {
                    case "addorupdate":
                        baseURL = string.IsNullOrEmpty(baseURL) ? null : baseURL.Trim();
                        var jsonString = JsonConvert.SerializeObject(dataProvider);
                        dataProvider = await OaiApiRestService.AddOrUpdateDataProvider(baseLocalUrl, baseURL, jsonString);
                        if (dataProvider != null)
                        {
                            return Json(new
                            {
                                status = ok,
                                dataProvider = dataProvider
                            });
                        }
                        return Json(new { status = failure });

                    case "delete":
                        if (await OaiApiRestService.DeleteDataProvider(baseLocalUrl, OAIDataProviderId))
                        {
                            return Json(new { status = ok, OAIDataProviderId = OAIDataProviderId });
                        }
                        return Json(new { status = failure });

                    case "reidentify":
                        dataProvider = await OaiApiRestService.ReIdentifyDataProvider(baseLocalUrl, OAIDataProviderId);
                        if (dataProvider != null)
                        {
                            return Json(new { status = ok, dataProvider = dataProvider });
                        }
                        return Json(new { status = failure });
                }
            }

            return Json(new { status = failure });
        }
Пример #2
0
        public static void AddRecordToDatabase(
            RecordQueryResult record, 
            OaiPmhContext context, 
            OAIDataProvider dp, 
            string metadataPrefix, 
            DateTime harvestDate, 
            bool addProvenance, 
            bool createNewIdentifier,
            string identifierBase,
            bool isHarvestDateTime)
        {
            if (addProvenance)
            {
                record.About.Add(Provenance.NewMeta(harvestDate,
                    isHarvestDateTime,
                    createNewIdentifier,
                    dp.BaseURL,
                    record.Header.OAI_Identifier,
                    record.Header.Datestamp.HasValue ? record.Header.Datestamp.Value : DateTime.MinValue,
                    record.Header.IsDatestampDateTime,
                    FormatList.GetNamespace(metadataPrefix)));
            }

            /* add header */
            Header.AddRecHeaderToDatabase(
                context,
                record.Header,
                dp,
                createNewIdentifier,
                identifierBase);

            /* add metadata */
            DbQueries.AddRecMetadataToDatabase(
                context,
                record.Header.HeaderId,
                record.Metadata);

            /* add about */
            DbQueries.AddRecAboutToDatabase(
                context,
                record.Header.HeaderId,
                record.About);
        }
Пример #3
0
 public static XElement Encode(OAIDataProvider dataProvider, string granularity)
 {
     return new XElement(MlNamespaces.oaiNs + "dataProvider",
         /* content */
         MlEncode.Element(MlNamespaces.oaiNs + "repositoryName", dataProvider.RepositoryName),
         MlEncode.Element(MlNamespaces.oaiNs + "baseURL", dataProvider.BaseURL),
         MlEncode.Element(MlNamespaces.oaiNs + "protocolVersion", dataProvider.ProtocolVersion),
         MlEncode.Element(MlNamespaces.oaiNs + "adminEmail", dataProvider.AdminEmail),
         !dataProvider.EarliestDatestamp.HasValue ? null
                 : new XElement(MlNamespaces.oaiNs + "earliestDatestamp",
                     dataProvider.EarliestDatestamp.Value.ToUniversalTime().ToString(granularity)),
         MlEncode.Element(MlNamespaces.oaiNs + "deletedRecord", dataProvider.DeletedRecord),
         MlEncode.Element(MlNamespaces.oaiNs + "granularity", dataProvider.Granularity),
         MlEncode.Element(MlNamespaces.oaiNs + "compression", dataProvider.Compression),
         !dataProvider.LastHarvesting.HasValue ? null
                 : new XElement(MlNamespaces.oaiNs + "lastHarvesting",
                     dataProvider.LastHarvesting.Value.ToUniversalTime().ToString(granularity))
         );
 }
Пример #4
0
        public static void AddRecHeaderToDatabase(
            OaiPmhContext context, 
            Header header, 
            OAIDataProvider dataProvider, 
            bool createNewIdentifier, 
            string identifierBase)
        {
            header.OAIDataProviderId = dataProvider.OAIDataProviderId;
            context.Header.Add(header);
            context.SaveChanges();

            if (createNewIdentifier)
            {
                if(identifierBase.ElementAt(identifierBase.Length -1) != ':')
                {
                    identifierBase += ':';
                }

                header.OAI_Identifier = identifierBase + header.HeaderId;
                context.Entry(header).State = System.Data.EntityState.Modified;
            }
        }
Пример #5
0
        public static OAIDataProvider AddOrUpdateDataProvider(string baseURL, OAIDataProvider dataProvider)
        {
            using (var context = new OaiPmhContext())
            {
                OAIDataProvider dp = null;
                bool isUpdateMode = dataProvider != null && dataProvider.OAIDataProviderId != 0;
                if (isUpdateMode)
                {
                    /* get data provider to update */
                    context.Configuration.ProxyCreationEnabled = false; 
                    dp = context.OAIDataProvider.Where(d => d.OAIDataProviderId == dataProvider.OAIDataProviderId).FirstOrDefault();
                }

                else if (!string.IsNullOrEmpty(baseURL))
                {
                    /* get and parse XML document */
                    dp = IdentifyDataProvider(baseURL);
                }

                if (dp != null && isUpdateMode ? true : !context.OAIDataProvider.Where(d => d.BaseURL == dp.BaseURL).Any())
                {
                    if (dataProvider != null)
                    {
                        dp.Function = dataProvider.Function;
                        dp.FirstSource = dataProvider.FirstSource;
                        dp.SecondSource = dataProvider.SecondSource;
                    }
                    if (!isUpdateMode)
                    {
                        context.OAIDataProvider.Add(dp);
                    }

                    context.SaveChanges();
                    return dp;
                }
            }

            return null;
        }
Пример #6
0
        private static async Task<string> HarvestRecordsAsync(
            OAIDataProvider dataProvider,
            string url,
            string metadataPrefix,
            Enums.DeDuplication deDup,
            bool updateStats,
            bool harvestFiles,
            bool isList,
            int retryCount = 3)
        {
            if (dataProvider == null)
            {
                if (updateStats)
                {
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Data provider is not initialized";
                }
                return null;
            }
            if (string.IsNullOrEmpty(metadataPrefix))
            {
                if (updateStats)
                {
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Metadata format is not provided";
                }
                return null;
            }

            try
            {
                XDocument xd;
                try
                {
                    xd = await GetAndParseXMLAsync(url).ConfigureAwait(false);
                }
                catch (Exception e)
                {
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Retrying";
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message;
                    return "retry";
                }

                /* we validate if it's enabled */
                bool errors = false;
                if (Properties.validateXml && Properties.schemas.Count > 0)
                {
                    List<string> eMsgs = new List<string>();
                    xd.Validate(Properties.schemas, (o, e) => { errors = true; eMsgs.Add(e.Message); });
                }

                if (Properties.validateXml ? !errors : true)
                {
                    /* get harvest date */
                    DateTime harvestDate;
                    bool isHarvestDateTime;
                    MlDecode.ResponseDate(ref xd, out harvestDate, out isHarvestDateTime);

                    XElement listRecords = isList ? xd.Root.Element(MlNamespaces.oaiNs + "ListRecords") :
                                                    xd.Root.Element(MlNamespaces.oaiNs + "GetRecord");
                    if (listRecords != null)
                    {
                        /* parse records */
                        List<RecordQueryResult> records = new List<RecordQueryResult>();
                        foreach (var record in listRecords.Elements(MlNamespaces.oaiNs + "record"))
                        {
                            var rec = ParseRecordAsync(record, metadataPrefix).Result;
                            if (!rec.Header.Deleted)
                            {
                                records.Add(rec);
                            }
                        }
                        int itemsPerPage = records.Count;

                        if (records.Count > 0)
                        {
                            using (var context = new OaiPmhContext())
                            {
                                /* try to deduplicate (if selected) and add records to database */
                                try
                                {
                                    /* update timestamp of last harvesting */
                                    context.OAIDataProvider.Attach(dataProvider);
                                    if (!isList)
                                    {
                                        dataProvider.LastHarvesting = harvestDate;
                                        context.Entry(dataProvider).State = EntityState.Modified;
                                    }

                                    DeDuplicate.Records(
                                        records,
                                        context,
                                        deDup);

                                    /* add records to database */
                                    foreach (var record in records)
                                    {
                                        if (harvestFiles)
                                        {
                                            FileHarvester.GetFile(dataProvider, record);
                                        }
                                        RecordQueryResult.AddRecordToDatabase(
                                            record,
                                            context,
                                            dataProvider,
                                            metadataPrefix,
                                            harvestDate,
                                            Properties.addProvenanceToHarvestedRecords,
                                            Properties.createNewIdentifierForHarvestedRecords,
                                            Properties.identifierBase,
                                            isHarvestDateTime);
                                    }
                                    context.SaveChanges();
                                }
                                catch (DbEntityValidationException dbEx)
                                {
                                    if (updateStats)
                                    {
                                        HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                                        HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Database exception occured. Please contact administrator";
                                    }
                                    SaveXMLWithErrors(xd, dbEx, dataProvider.RepositoryName);
                                }
                                catch (Exception e)
                                {
                                    if (updateStats)
                                    {
                                        HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                                        HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message;
                                    }
                                    /* for debugging purpose only */
                                    string msg = e.Message;
                                }
                            }
                            if (!isList)
                            {
                                return records.Count > 0 ? records[0].Header.HeaderId.ToString() : null;
                            }
                        }

                        var resumption = listRecords.Element(MlNamespaces.oaiNs + "resumptionToken");
                        if (resumption != null)
                        {
                            /* set complete list size and current progress */
                            if (updateStats)
                            {
                                int completeListSize = 0;
                                int cursor = 0;

                                var listSizeAttribute = resumption.Attribute("completeListSize");
                                var cursorAttribute = resumption.Attribute("cursor");

                                if ((listSizeAttribute != null &&
                                     int.TryParse(listSizeAttribute.Value, out completeListSize)) &&
                                    (cursorAttribute != null &&
                                    int.TryParse(cursorAttribute.Value, out cursor)))
                                {
                                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.RatioAll = completeListSize;
                                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.RatioDone = cursor + itemsPerPage;
                                }
                            }
                            if (!String.IsNullOrEmpty(resumption.Value))
                            {
                                return resumption.Value;
                            }
                        }
                    }
                }
                else if (updateStats)
                {
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = "Validation has failed";
                }
            }
            catch (Exception e)
            {
                if (updateStats)
                {
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Status = "Exception";
                    HarvestStats[dataProvider.OAIDataProviderId].HarvestOptions.Stats.Message = e.Message;
                }
                /* for debugging purpose only */
                string msg = e.Message;
            }

            return null;
        }
Пример #7
0
        public static void GetFile(OAIDataProvider dataProvider, RecordQueryResult record)
        {
            if (dataProvider == null || record == null)
            {
                return;
            }

            string basePath = Properties.directoryForHarvestedFiles + "\\";
            basePath += new Uri(dataProvider.BaseURL).Host + "\\";
            if (Properties.overwriteHarvestedFiles ? true : string.IsNullOrEmpty(record.Header.FilePath))
            {
                if (!Directory.Exists(basePath))
                {
                    Directory.CreateDirectory(basePath);
                }

                string filePath = null;
                switch (dataProvider.Function)
                {
                    case "FromPageOnly":
                        filePath = FromPageOnly(record, basePath, dataProvider.FirstSource);
                        break;
                    case "FromSourceTag":
                        filePath = FromSourceTag(record, dataProvider, basePath);
                        break;
                    default:
                        break;
                }

                if (!string.IsNullOrEmpty(filePath))
                {
                    record.Header.FilePath = filePath;
                }
            }
        }
Пример #8
0
        public static string FromSourceTag(
            RecordQueryResult record,
            OAIDataProvider dataProvider,
            string basePath)
        {
            try
            {
                if (record == null || dataProvider == null || string.IsNullOrEmpty(basePath))
                {
                    return null;
                }

                string files = string.Empty;
                foreach (var sourceItem in GetAllSources(record, dataProvider.FirstSource))
                {
                    var fileName = string.IsNullOrEmpty(sourceItem.Name) ? sourceItem.AlternateName : sourceItem.Name;
                    string filePath = CreateUniqueFileName(basePath, fileName);

                    if (DownloadFile(filePath, sourceItem.AbsoluteUri))
                    {
                        files += filePath + "][";
                    }
                }
                if (!string.IsNullOrEmpty(files))
                {
                    return files.Substring(0, files.Length - 2);
                }

                return FromPageOnly(record, basePath,
                    string.IsNullOrEmpty(dataProvider.SecondSource) ? dataProvider.FirstSource : dataProvider.SecondSource);
            }
            catch (Exception e)
            {
                /* for debugging purpose only */
                string msg = e.Message;
            }
            return null;
        }