コード例 #1
0
ファイル: RdataImporter.cs プロジェクト: Colectica/curation
        public ResourcePackage Import(string path, string agencyId)
        {
            this.harmonizingCache = new HarmonizingCache(MultilingualString.CurrentCulture);

            var resourcePackage = new ResourcePackage();

            resourcePackage.AgencyId = agencyId;

            logger.Debug("Importing RData");

            if (string.IsNullOrEmpty(path))
            {
                throw new ArgumentNullException("fileName");
            }
            if (!File.Exists(path))
            {
                throw new ArgumentException("The specified file must exist");
            }

            string fileNameWithExtension = Path.GetFileName(path);
            string fileNameOnly          = Path.GetFileNameWithoutExtension(path);

            logger.Debug("RData import file: " + fileNameOnly);


            resourcePackage.DublinCoreMetadata.Title.Current = fileNameOnly;

            // Create the PhysicalInstance.
            var physicalInstance = new PhysicalInstance()
            {
                AgencyId = agencyId
            };

            resourcePackage.PhysicalInstances.Add(physicalInstance);
            physicalInstance.DublinCoreMetadata.Title.Current = fileNameOnly;

            // File location
            if (path != null)
            {
                DataFileIdentification fileID = new DataFileIdentification();
                Uri uri;
                if (Uri.TryCreate(path, UriKind.RelativeOrAbsolute, out uri))
                {
                    fileID.Uri = uri;
                }
                fileID.Path = path;

                physicalInstance.FileIdentifications.Add(fileID);
            }

            // Create the DataRelationship.
            var dataRelationship = new DataRelationship();

            physicalInstance.DataRelationships.Add(dataRelationship);
            dataRelationship.AgencyId      = agencyId;
            dataRelationship.Label.Current = fileNameOnly;

            // Load the file into R.
            string pathForR = path.Replace("\\", "/");

            engine.Evaluate(string.Format("load('{0}')", pathForR));


            // Find all the data frames.
            var dataFrames = GetDataFrames();

            // For each data frame in the RData file, create a LogicalRecord.
            foreach (var pair in dataFrames)
            {
                string name      = pair.Key;
                var    dataFrame = pair.Value;

                // TODO This should be tracked per record, not PhysicalInstance.
                physicalInstance.FileStructure.CaseQuantity = dataFrame.RowCount;

                var logicalRecord = new LogicalRecord()
                {
                    AgencyId = agencyId
                };
                dataRelationship.LogicalRecords.Add(logicalRecord);
                logicalRecord.Label.Current = name;

                List <string> variableLabels     = null;
                var           variableLabelsExpr = dataFrame.GetAttribute("var.labels");
                if (variableLabelsExpr != null)
                {
                    var labelVector = variableLabelsExpr.AsVector();
                    variableLabels = new List <string>(labelVector.Select(x => (string)x));
                }

                for (int i = 0; i < dataFrame.ColumnCount; i++)
                {
                    string columnName = dataFrame.ColumnNames[i];
                    var    column     = dataFrame[i];

                    var variable = new Variable()
                    {
                        AgencyId = agencyId
                    };
                    logicalRecord.VariablesInRecord.Add(variable);

                    // Name
                    variable.ItemName.Current = columnName;

                    // Label
                    if (variableLabels != null)
                    {
                        variable.Label.Current = variableLabels[i];
                    }

                    // Type
                    if (column.Type == RDotNet.Internals.SymbolicExpressionType.NumericVector)
                    {
                        variable.RepresentationType = RepresentationType.Numeric;
                        variable.Additivity         = AdditivityType.Stock;
                    }
                    else if (column.Type == RDotNet.Internals.SymbolicExpressionType.IntegerVector)
                    {
                        if (column.IsFactor())
                        {
                            variable.RepresentationType = RepresentationType.Code;

                            string[] factors = column.AsFactor().GetLevels();
                            variable.CodeRepresentation.Codes = GetCodeList(factors, agencyId, resourcePackage);
                        }
                        else
                        {
                            variable.RepresentationType = RepresentationType.Numeric;
                            variable.NumericRepresentation.NumericType = NumericType.Integer;
                            variable.Additivity = AdditivityType.Stock;
                        }
                    }
                    else if (column.Type == RDotNet.Internals.SymbolicExpressionType.CharacterVector)
                    {
                        variable.RepresentationType = RepresentationType.Text;
                    }
                }
            }

            return(resourcePackage);
        }
コード例 #2
0
        public void Map(ManagedFile file, PhysicalInstance pi)
        {
            if (file == null)
            {
                throw new ArgumentNullException("file");
            }

            if (pi == null)
            {
                throw new ArgumentNullException("physicalInstance");
            }


            // Map properties from the ManagedFile to DDI.
            pi.SetUserId("FileNumber", file.Number?.ToString());
            pi.DublinCoreMetadata.Title.Current          = file.Title;
            pi.DublinCoreMetadata.AlternateTitle.Current = file.PublicName;

            var fileId = pi.FileIdentifications.FirstOrDefault();

            if (fileId == null)
            {
                fileId = new DataFileIdentification();
                pi.FileIdentifications.Add(fileId);
            }

            Uri  uri;
            bool gotUri = Uri.TryCreate(file.PersistentLink, UriKind.RelativeOrAbsolute, out uri);

            if (gotUri)
            {
                fileId.Uri = uri;
            }

            pi.SetUserAttribute("PersistentLinkDate", file.PersistentLinkDate?.ToString());
            pi.SetUserAttribute("FileType", file.Type);
            pi.SetUserAttribute("FormatName", file.FormatName);
            pi.SetUserAttribute("FormatId", file.FormatId);
            pi.SetUserAttribute("Size", file.Size.ToString());

            pi.SetUserAttribute("CreationDate", file.CreationDate);
            pi.SetUserAttribute("KindOfData", file.KindOfData);
            pi.DublinCoreMetadata.Source.Current = file.Source;
            pi.SetUserAttribute("SourceInformation", file.SourceInformation);
            pi.DublinCoreMetadata.Rights.Current = file.Rights;
            fileId.IsPublic = file.IsPublicAccess;
            pi.SetUserAttribute("UploadedDate", file.UploadedDate);
            pi.SetUserAttribute("ExternalDatabase", file.ExternalDatabase);
            pi.SetUserAttribute("Software", file.Software);
            pi.SetUserAttribute("SoftwareVersion", file.SoftwareVersion);
            pi.SetUserAttribute("Hardware", file.Hardware);


            var fingerprint = pi.Fingerprints.FirstOrDefault();

            if (fingerprint == null)
            {
                fingerprint = new Fingerprint();
                pi.Fingerprints.Add(fingerprint);
            }
            fingerprint.FingerprintValue       = file.Checksum;
            fingerprint.AlgorithmSpecification = file.ChecksumMethod;
            pi.SetUserAttribute("ChecksumDate", file.ChecksumDate);

            pi.SetUserAttribute("VirusCheckOutcome", file.VirusCheckOutcome);
            pi.SetUserAttribute("VirusCheckMethod", file.VirusCheckMethod);
            pi.SetUserAttribute("VirusCheckDate", file.VirusCheckDate);
            pi.SetUserAttribute("AcceptedDate", file.AcceptedDate);
            pi.SetUserAttribute("CertifiedDate", file.CertifiedDate);
        }