Esempio n. 1
0
        /// <summary>
        /// add the local file to the list of resources, adding it to HDFS decorated
        /// with its hash if necessary
        /// </summary>
        /// <param name="fileName">leaf name of the local file</param>
        /// <param name="localDirectory">local directory containing the file</param>
        public void EnsureResource(string fileName, string localDirectory)
        {
            var filePath = Path.Combine(localDirectory, fileName);

            if (!File.Exists(filePath))
            {
                throw new FileNotFoundException("Can't find resource", fileName);
            }

            var hash = ResourceTools.MakeHash(filePath);

            var remoteName = fileName + "." + hash;
            var hdfsFile   = new Uri(directoryURI, remoteName).AbsoluteUri;

            using (var hdfs = new Hdfs.HdfsInstance(directoryURI))
            {
                if (!hdfs.IsFileExists(hdfsFile))
                {
                    hdfs.UploadAll(filePath, hdfsFile);
                }

                var info = hdfs.GetFileInfo(hdfsFile, false);

                var file = new HdfsIdentifier();
                file.remoteName = remoteName;
                file.localName  = fileName;
                file.size       = info.Size;
                file.timestamp  = info.LastModified;

                files.Add(file);
            }
        }
Esempio n. 2
0
        /// <summary>
        /// get the details of the file names and their location from an XML element
        /// </summary>
        /// <param name="config">the XML element containing config information</param>
        public void ReadFromConfig(XElement config)
        {
            files = new List <HdfsIdentifier>();

            // the relative or absolute path prefix that identifies the location of the
            // files
            directoryURI = new Uri(config.Attribute("location").Value);

            isPublic = (config.Attribute("public") != null && config.Attribute("public").Value == "true");

            bool needsLookup = false;

            // the metadata for each file is stored in a separate Resource element.
            foreach (var e in config.Descendants("Resource"))
            {
                var file = new HdfsIdentifier();

                var lnAttribute = e.Attribute("localName");
                if (lnAttribute == null)
                {
                    file.localName = e.Value;
                }
                else
                {
                    file.localName = lnAttribute.Value;
                }

                file.remoteName = e.Value;

                if (e.Attribute("timestamp") == null || e.Attribute("size") == null)
                {
                    needsLookup    = true;
                    file.timestamp = -1;
                    file.size      = -1;
                }
                else
                {
                    file.timestamp = long.Parse(e.Attribute("timestamp").Value);
                    file.size      = long.Parse(e.Attribute("size").Value);
                }

                files.Add(file);
            }

            if (needsLookup)
            {
                using (var instance = new Hdfs.HdfsInstance(directoryURI))
                {
                    foreach (var r in files.Where(f => f.timestamp < 0 || f.size < 0))
                    {
                        var fileUri = new Uri(directoryURI, r.remoteName);
                        var info    = instance.GetFileInfo(fileUri.AbsoluteUri, false);
                        r.timestamp = info.LastModified;
                        r.size      = info.Size;
                    }
                }
            }
        }
Esempio n. 3
0
        /// <summary>
        /// asynchronously copy all the resources to a local directory
        /// </summary>
        /// <param name="target">the path of the destination directory</param>
        /// <returns>true if and only if all the copies succeeded</returns>
        public async Task <bool> FetchToLocalDirectoryAsync(string target)
        {
            try
            {
                using (var instance = new Hdfs.HdfsInstance(directoryURI))
                {
                    // start all the copies overlapped, returning an array of Task<bool> to
                    // wait for
                    var waiters = files.Select(r => CopyFileAsync(instance, r.remoteName, r.localName, target)).ToArray();

                    // wait until all the copies finish
                    var results = await Task.WhenAll(waiters);

                    // return true if and only if all the copies returned true
                    return(results.Aggregate(true, (a, b) => a && b));
                }
            }
            catch (Exception e)
            {
                logger.Log("Got exception copying HDFS files: " + e.ToString());
                return(false);
            }
        }
Esempio n. 4
0
        /// <summary>
        /// asynchronously copy a file to a target directory
        /// </summary>
        /// <param name="instance">the HDFS instance of the file system</param>
        /// <param name="leafName">the leaf name of the file to be copied</param>
        /// <param name="dstDirectory">the destination directory</param>
        /// <returns>true if and only if the copy succeeded</returns>
        private async Task <bool> CopyFileAsync(Hdfs.HdfsInstance instance, string remoteName, string localName, string dstDirectory)
        {
            // find the leaf name from the path, and thus construct the path name
            // of the destination file
            var dstPath = Path.Combine(dstDirectory, localName);
            var srcURI  = new Uri(directoryURI, remoteName);

            logger.Log("Copying HDFS " + srcURI.AbsoluteUri + " to " + dstPath);

            try
            {
                await Task.Run(() => instance.DownloadAll(srcURI.AbsoluteUri, dstPath));

                // if no exception was thrown, the copy succeeded
                return(true);
            }
            catch (Exception e)
            {
                logger.Log("Copy hdfs " + srcURI + " to " + dstPath + " failed with " + e.Message);
            }

            // an exception was thrown; the copy failed
            return(false);
        }