public ResourcePackage Import(string path, string agencyId) { this.harmonizingCache = new HarmonizingCache(MultilingualString.CurrentCulture); var resourcePackage = new ResourcePackage(); resourcePackage.AgencyId = agencyId; logger.Debug("Importing RData"); if (string.IsNullOrEmpty(path)) { throw new ArgumentNullException("fileName"); } if (!File.Exists(path)) { throw new ArgumentException("The specified file must exist"); } string fileNameWithExtension = Path.GetFileName(path); string fileNameOnly = Path.GetFileNameWithoutExtension(path); logger.Debug("RData import file: " + fileNameOnly); resourcePackage.DublinCoreMetadata.Title.Current = fileNameOnly; // Create the PhysicalInstance. var physicalInstance = new PhysicalInstance() { AgencyId = agencyId }; resourcePackage.PhysicalInstances.Add(physicalInstance); physicalInstance.DublinCoreMetadata.Title.Current = fileNameOnly; // File location if (path != null) { DataFileIdentification fileID = new DataFileIdentification(); Uri uri; if (Uri.TryCreate(path, UriKind.RelativeOrAbsolute, out uri)) { fileID.Uri = uri; } fileID.Path = path; physicalInstance.FileIdentifications.Add(fileID); } // Create the DataRelationship. var dataRelationship = new DataRelationship(); physicalInstance.DataRelationships.Add(dataRelationship); dataRelationship.AgencyId = agencyId; dataRelationship.Label.Current = fileNameOnly; // Load the file into R. string pathForR = path.Replace("\\", "/"); engine.Evaluate(string.Format("load('{0}')", pathForR)); // Find all the data frames. var dataFrames = GetDataFrames(); // For each data frame in the RData file, create a LogicalRecord. foreach (var pair in dataFrames) { string name = pair.Key; var dataFrame = pair.Value; // TODO This should be tracked per record, not PhysicalInstance. physicalInstance.FileStructure.CaseQuantity = dataFrame.RowCount; var logicalRecord = new LogicalRecord() { AgencyId = agencyId }; dataRelationship.LogicalRecords.Add(logicalRecord); logicalRecord.Label.Current = name; List <string> variableLabels = null; var variableLabelsExpr = dataFrame.GetAttribute("var.labels"); if (variableLabelsExpr != null) { var labelVector = variableLabelsExpr.AsVector(); variableLabels = new List <string>(labelVector.Select(x => (string)x)); } for (int i = 0; i < dataFrame.ColumnCount; i++) { string columnName = dataFrame.ColumnNames[i]; var column = dataFrame[i]; var variable = new Variable() { AgencyId = agencyId }; logicalRecord.VariablesInRecord.Add(variable); // Name variable.ItemName.Current = columnName; // Label if (variableLabels != null) { variable.Label.Current = variableLabels[i]; } // Type if (column.Type == RDotNet.Internals.SymbolicExpressionType.NumericVector) { variable.RepresentationType = RepresentationType.Numeric; variable.Additivity = AdditivityType.Stock; } else if (column.Type == RDotNet.Internals.SymbolicExpressionType.IntegerVector) { if (column.IsFactor()) { variable.RepresentationType = RepresentationType.Code; string[] factors = column.AsFactor().GetLevels(); variable.CodeRepresentation.Codes = GetCodeList(factors, agencyId, resourcePackage); } else { variable.RepresentationType = RepresentationType.Numeric; variable.NumericRepresentation.NumericType = NumericType.Integer; variable.Additivity = AdditivityType.Stock; } } else if (column.Type == RDotNet.Internals.SymbolicExpressionType.CharacterVector) { variable.RepresentationType = RepresentationType.Text; } } } return(resourcePackage); }
public void Map(ManagedFile file, PhysicalInstance pi) { if (file == null) { throw new ArgumentNullException("file"); } if (pi == null) { throw new ArgumentNullException("physicalInstance"); } // Map properties from the ManagedFile to DDI. pi.SetUserId("FileNumber", file.Number?.ToString()); pi.DublinCoreMetadata.Title.Current = file.Title; pi.DublinCoreMetadata.AlternateTitle.Current = file.PublicName; var fileId = pi.FileIdentifications.FirstOrDefault(); if (fileId == null) { fileId = new DataFileIdentification(); pi.FileIdentifications.Add(fileId); } Uri uri; bool gotUri = Uri.TryCreate(file.PersistentLink, UriKind.RelativeOrAbsolute, out uri); if (gotUri) { fileId.Uri = uri; } pi.SetUserAttribute("PersistentLinkDate", file.PersistentLinkDate?.ToString()); pi.SetUserAttribute("FileType", file.Type); pi.SetUserAttribute("FormatName", file.FormatName); pi.SetUserAttribute("FormatId", file.FormatId); pi.SetUserAttribute("Size", file.Size.ToString()); pi.SetUserAttribute("CreationDate", file.CreationDate); pi.SetUserAttribute("KindOfData", file.KindOfData); pi.DublinCoreMetadata.Source.Current = file.Source; pi.SetUserAttribute("SourceInformation", file.SourceInformation); pi.DublinCoreMetadata.Rights.Current = file.Rights; fileId.IsPublic = file.IsPublicAccess; pi.SetUserAttribute("UploadedDate", file.UploadedDate); pi.SetUserAttribute("ExternalDatabase", file.ExternalDatabase); pi.SetUserAttribute("Software", file.Software); pi.SetUserAttribute("SoftwareVersion", file.SoftwareVersion); pi.SetUserAttribute("Hardware", file.Hardware); var fingerprint = pi.Fingerprints.FirstOrDefault(); if (fingerprint == null) { fingerprint = new Fingerprint(); pi.Fingerprints.Add(fingerprint); } fingerprint.FingerprintValue = file.Checksum; fingerprint.AlgorithmSpecification = file.ChecksumMethod; pi.SetUserAttribute("ChecksumDate", file.ChecksumDate); pi.SetUserAttribute("VirusCheckOutcome", file.VirusCheckOutcome); pi.SetUserAttribute("VirusCheckMethod", file.VirusCheckMethod); pi.SetUserAttribute("VirusCheckDate", file.VirusCheckDate); pi.SetUserAttribute("AcceptedDate", file.AcceptedDate); pi.SetUserAttribute("CertifiedDate", file.CertifiedDate); }