protected override StageResult Extract(int?recordBatchSize = null, int?recordLimit = null, Dictionary <string, string> options = null) { Contract.Requires(InputFileUri != null); if (InputFileUri.Segments.Any(s => s.EndsWith(".zip"))) { InputFileName += ".zip"; } else if (InputFileUri.Segments.Any(s => s.EndsWith(".gz"))) { InputFileName += ".gz"; } else if (InputFileUri.Segments.Any(s => s.EndsWith(".tar.gz"))) { InputFileName += ".tar.gz"; } FileDownload = new HttpFileDownload(InputFileUri.ToString(), TempFile); FileDownloadTask = FileDownload.StartTask(); try { FileDownloadTask.Wait(); } catch (Exception e) { Error(e, "Exception throw attempting to download file from Url {0} to {1}.", InputFileUri.ToString(), TempFile); return(StageResult.INPUT_ERROR); } if (!FileDownload.CompletedSuccessfully) { Error("Failed to download file from Url {0} to {1}.", InputFileUri.ToString(), TempFile); return(StageResult.INPUT_ERROR); } return(base.Extract()); }
void wc_DownloadFileCompleted(object sender, AsyncCompletedEventArgs e) { if (e.Error != null || e.Cancelled) { if (_mode == 0 && System.IO.File.Exists(_file)) { // delete temp file that was downloaded when trying to get gdrive direct download url System.IO.File.Delete(_file); } DownloadFileCompleted(this, e); } else { if (_mode == 0) { if (new System.IO.FileInfo(_file).Length < 100 * 1024) { string text = System.IO.File.ReadAllText(_file); int html = text.IndexOf("<html", StringComparison.InvariantCultureIgnoreCase); if (html >= 0 && html < 100) { int ilink = text.IndexOf("\"uc-download-link\"", StringComparison.InvariantCultureIgnoreCase); if (ilink > 0) { int href = text.IndexOf("href=\"", ilink, StringComparison.InvariantCultureIgnoreCase); if (href > 0) { int hrefend = text.IndexOf('"', href + 6); if (hrefend > 0) { string url = text.Substring(href + 6, hrefend - href - 6).Replace("&", "&"); if (url.IndexOf("://") < 0) { url = new Uri(_url).GetLeftPart(UriPartial.Authority) + url; } System.Diagnostics.Debug.WriteLine("GDrive: redirecting to " + url); System.Diagnostics.Debug.WriteLine("GDrive: {0} cookies set", _cookies.Count); DownloadItem downloadInfo = _state as DownloadItem; FileDownloadTask fileDownload = new FileDownloadTask(url, _file, downloadInfo, _cookies) { Headers = new WebHeaderCollection() }; fileDownload.Headers.Add("Referer", _url); downloadInfo.PerformCancel = () => { fileDownload.CancelAsync(); downloadInfo.OnCancel?.Invoke(); }; fileDownload.DownloadProgressChanged += FileDownload_DownloadProgressChanged; fileDownload.DownloadFileCompleted += wc_DownloadFileCompleted; System.IO.File.Delete(_file); // delete temp html file just downloaded downloadInfo.FileDownloadTask = fileDownload; fileDownload.Start(); _mode = 1; CleanUpWebClient(); return; } } } int tstart = text.IndexOf("<title>", StringComparison.InvariantCultureIgnoreCase); int tend = text.IndexOf("</title>", StringComparison.InvariantCultureIgnoreCase); string err = "Couldn't parse data"; if (tstart > 0 && tend > 0) { err = text.Substring(tstart + 7, tend - tstart - 7); } //If we get here, it went wrong System.IO.File.Delete(_file); DownloadFileCompleted?.Invoke(this, new AsyncCompletedEventArgs(new Exception(err), false, _state)); } else { DownloadFileCompleted?.Invoke(this, e); } } else { DownloadFileCompleted?.Invoke(this, e); } } else { // actual file being downloaded has finished successfully at this point CleanUpFileDownloadTask(); DownloadFileCompleted?.Invoke(this, e); } } }