/// <summary> /// add the local file to the list of resources, adding it to HDFS decorated /// with its hash if necessary /// </summary> /// <param name="fileName">leaf name of the local file</param> /// <param name="localDirectory">local directory containing the file</param> public void EnsureResource(string fileName, string localDirectory) { var filePath = Path.Combine(localDirectory, fileName); if (!File.Exists(filePath)) { throw new FileNotFoundException("Can't find resource", fileName); } var hash = ResourceTools.MakeHash(filePath); var remoteName = fileName + "." + hash; var hdfsFile = new Uri(directoryURI, remoteName).AbsoluteUri; using (var hdfs = new Hdfs.HdfsInstance(directoryURI)) { if (!hdfs.IsFileExists(hdfsFile)) { hdfs.UploadAll(filePath, hdfsFile); } var info = hdfs.GetFileInfo(hdfsFile, false); var file = new HdfsIdentifier(); file.remoteName = remoteName; file.localName = fileName; file.size = info.Size; file.timestamp = info.LastModified; files.Add(file); } }
/// <summary> /// get the details of the file names and their location from an XML element /// </summary> /// <param name="config">the XML element containing config information</param> public void ReadFromConfig(XElement config) { files = new List <HdfsIdentifier>(); // the relative or absolute path prefix that identifies the location of the // files directoryURI = new Uri(config.Attribute("location").Value); isPublic = (config.Attribute("public") != null && config.Attribute("public").Value == "true"); bool needsLookup = false; // the metadata for each file is stored in a separate Resource element. foreach (var e in config.Descendants("Resource")) { var file = new HdfsIdentifier(); var lnAttribute = e.Attribute("localName"); if (lnAttribute == null) { file.localName = e.Value; } else { file.localName = lnAttribute.Value; } file.remoteName = e.Value; if (e.Attribute("timestamp") == null || e.Attribute("size") == null) { needsLookup = true; file.timestamp = -1; file.size = -1; } else { file.timestamp = long.Parse(e.Attribute("timestamp").Value); file.size = long.Parse(e.Attribute("size").Value); } files.Add(file); } if (needsLookup) { using (var instance = new Hdfs.HdfsInstance(directoryURI)) { foreach (var r in files.Where(f => f.timestamp < 0 || f.size < 0)) { var fileUri = new Uri(directoryURI, r.remoteName); var info = instance.GetFileInfo(fileUri.AbsoluteUri, false); r.timestamp = info.LastModified; r.size = info.Size; } } } }
/// <summary> /// asynchronously copy all the resources to a local directory /// </summary> /// <param name="target">the path of the destination directory</param> /// <returns>true if and only if all the copies succeeded</returns> public async Task <bool> FetchToLocalDirectoryAsync(string target) { try { using (var instance = new Hdfs.HdfsInstance(directoryURI)) { // start all the copies overlapped, returning an array of Task<bool> to // wait for var waiters = files.Select(r => CopyFileAsync(instance, r.remoteName, r.localName, target)).ToArray(); // wait until all the copies finish var results = await Task.WhenAll(waiters); // return true if and only if all the copies returned true return(results.Aggregate(true, (a, b) => a && b)); } } catch (Exception e) { logger.Log("Got exception copying HDFS files: " + e.ToString()); return(false); } }
/// <summary> /// asynchronously copy a file to a target directory /// </summary> /// <param name="instance">the HDFS instance of the file system</param> /// <param name="leafName">the leaf name of the file to be copied</param> /// <param name="dstDirectory">the destination directory</param> /// <returns>true if and only if the copy succeeded</returns> private async Task <bool> CopyFileAsync(Hdfs.HdfsInstance instance, string remoteName, string localName, string dstDirectory) { // find the leaf name from the path, and thus construct the path name // of the destination file var dstPath = Path.Combine(dstDirectory, localName); var srcURI = new Uri(directoryURI, remoteName); logger.Log("Copying HDFS " + srcURI.AbsoluteUri + " to " + dstPath); try { await Task.Run(() => instance.DownloadAll(srcURI.AbsoluteUri, dstPath)); // if no exception was thrown, the copy succeeded return(true); } catch (Exception e) { logger.Log("Copy hdfs " + srcURI + " to " + dstPath + " failed with " + e.Message); } // an exception was thrown; the copy failed return(false); }