private async Task<PhantomResponse> CallPhantom(ResourceTask task) { Regex regex = new Regex(@"to wait (?<minutes>[0-9]+) minutes"); PhantomResponse response = new PhantomResponse { Lines = new List<string>() }; task.OnStatus.Invoke("starting"); task.OnLog.Information("Starting PhantomJS."); ProcessStartInfo info = new ProcessStartInfo { FileName = GetPhantomPath(), Arguments = GetScriptPath(task.Hosting) + " download " + task.Url.ToString(), UseShellExecute = false, RedirectStandardOutput = true, RedirectStandardInput = true, CreateNoWindow = true, WindowStyle = ProcessWindowStyle.Hidden, WorkingDirectory = GetDataPath() }; using (Process process = Process.Start(info)) { PhantomCallback callback = new PhantomCallback { OnDownload = url => { response.DownloadUrl = url; return false; }, OnMessage = message => { Match match = regex.Match(message); if (match.Success == true) { if (match.Groups["minutes"].Success == true) { response.Waiting = TimeSpan.FromMinutes(Int32.Parse(match.Groups["minutes"].Value)); } } return true; }, OnDebug = text => { task.OnLog.Debug(text); return true; }, OnFatal = text => { task.OnLog.Debug(text); return true; }, OnDumpImage = base64 => { task.OnLog.Debug("PhantomJS dumped an image.", Convert.FromBase64String(base64), "image"); return true; }, OnDumpHtml = base64 => { task.OnLog.Debug("PhantomJS dumped an html content.", Convert.FromBase64String(base64), "text"); return true; }, OnFileName = text => true, OnFileSize = text => true, OnFileStatus = text => true, OnFallback = text => true, OnRaw = line => { }, }; callback.OnCaptcha = async url => { string solution; task.Cancellation.ThrowIfCancellationRequested(); task.OnLog.Information("Handling captcha."); using (WebClient client = new WebClient()) { task.OnStatus("decaptching"); TimeSpan timeout = TimeSpan.FromMinutes(3); CancellationTokenSource source = CancellationTokenSource.CreateLinkedTokenSource(new CancellationTokenSource(timeout).Token, task.Cancellation); Captcha captcha = new Captcha { Type = "image", Data = client.DownloadData(url), Cancellation = source.Token }; Action debug = () => { switch (captcha.Type) { case "image": task.OnLog.Debug("Got captcha image data.", captcha.Data, "image"); break; case "audio": task.OnLog.Debug("Got captcha audio data.", captcha.Data, "audio"); break; } }; PhantomCallback local = callback.Override(new PhantomCallback { OnCaptcha = async reloadUrl => { source = CancellationTokenSource.CreateLinkedTokenSource(new CancellationTokenSource(timeout).Token, task.Cancellation); captcha.Cancellation = source.Token; captcha.Data = await client.DownloadDataTaskAsync(reloadUrl); debug.Invoke(); return false; } }); debug.Invoke(); captcha.Reload = async () => { await process.StandardInput.WriteLineAsync("::reload::"); task.OnLog.Information("Reloading captcha."); await this.HandleInThread(local, task.Cancellation, process); }; captcha.ToAudio = async () => { await process.StandardInput.WriteLineAsync("::audio::"); task.OnLog.Information("Switching to audio."); captcha.Type = "audio"; await this.HandleInThread(local, task.Cancellation, process); }; captcha.ToImage = async () => { await process.StandardInput.WriteLineAsync("::image::"); task.OnLog.Information("Switching to image."); captcha.Type = "image"; await this.HandleInThread(local, task.Cancellation, process); }; solution = await task.OnCaptcha.Invoke(captcha); task.OnStatus("working"); } task.Cancellation.ThrowIfCancellationRequested(); task.OnLog.Information("Sending captcha."); await process.StandardInput.WriteLineAsync(solution); return true; }; try { task.OnStatus("working"); await this.Handle(callback, task.Cancellation, process); process.WaitForExit(); return response; } finally { if (process.HasExited == false) { process.Kill(); } } } }
private async Task Handle(PhantomCallback callback, CancellationToken cancellation, Process process) { bool proceed = true; string line; do { cancellation.ThrowIfCancellationRequested(); line = await process.StandardOutput.ReadLineAsync(); cancellation.ThrowIfCancellationRequested(); if (line != null) { string[] parts = line.Split(new[] { ':' }, 2); if (parts.Length == 2) { callback.OnRaw.Invoke(line); switch (parts[0]) { case "file-name": proceed = callback.OnFileName.Invoke(parts[1].Trim()); break; case "file-size": proceed = callback.OnFileSize.Invoke(parts[1].Trim()); break; case "file-status": proceed = callback.OnFileStatus.Invoke(parts[1].Trim()); break; case "captcha-url": proceed = await callback.OnCaptcha.Invoke(parts[1].Trim()); break; case "download-url": proceed = callback.OnDownload.Invoke(parts[1].Trim()); break; case "message": proceed = callback.OnMessage.Invoke(parts[1].Trim()); break; case "debug": proceed = callback.OnDebug.Invoke(parts[1].Trim()); break; case "fatal": proceed = callback.OnFatal.Invoke(parts[1].Trim()); break; case "dump-image": proceed = callback.OnDumpImage.Invoke(parts[1].Trim()); break; case "dump-html": proceed = callback.OnDumpHtml.Invoke(parts[1].Trim()); break; default: proceed = callback.OnFallback.Invoke(parts[1].Trim()); break; } } } } while (proceed == true && line != null); }
public PhantomCallback Override(PhantomCallback callback) { return new PhantomCallback { OnCaptcha = callback.OnCaptcha ?? this.OnCaptcha, OnFileName = callback.OnFileName ?? this.OnFileName, OnFileSize = callback.OnFileSize ?? this.OnFileSize, OnFileStatus = callback.OnFileStatus ?? this.OnFileStatus, OnDownload = callback.OnDownload ?? this.OnDownload, OnMessage = callback.OnMessage ?? this.OnMessage, OnDebug = callback.OnDebug ?? this.OnDebug, OnFatal = callback.OnFatal ?? this.OnFatal, OnDumpImage = callback.OnDumpImage ?? this.OnDumpImage, OnDumpHtml = callback.OnDumpHtml ?? this.OnDumpHtml, OnRaw = callback.OnRaw ?? this.OnRaw, OnFallback = callback.OnFallback ?? this.OnFallback }; }
private Task HandleInThread(PhantomCallback callback, CancellationToken cancellation, Process process) { return Task.Run(async () => await this.Handle(callback, cancellation, process)); }
public async Task Analyze(LinkTask task) { if (task.Links.Length > 0) { foreach (Link link in task.Links) { task.OnStatus(link, "pending"); } await Task.Run(() => { Parallel.ForEach(task.Links, new ParallelOptions { MaxDegreeOfParallelism = 2 }, link => { task.OnStatus.Invoke(link, "checking"); task.OnLog.Information("Analyzing '{0}'.", link.Url); task.OnLog.Information("Starting PhantomJS"); ProcessStartInfo info = new ProcessStartInfo { FileName = GetPhantomPath(), Arguments = GetScriptPath(link.Hosting) + " query " + link.Url.ToString(), UseShellExecute = false, RedirectStandardOutput = true, CreateNoWindow = true, WindowStyle = ProcessWindowStyle.Hidden, WorkingDirectory = GetDataPath() }; using (Process process = Process.Start(info)) { Resource resource = new Resource { Url = link.Url, Hosting = link.Hosting, IsAvailable = true }; PhantomCallback callback = new PhantomCallback { OnDownload = url => true, OnFileName = text => { resource.Name = text; return true; }, OnFileSize = text => { resource.Size = text; return true; }, OnFileStatus = text => { resource.IsAvailable = false; return false; }, OnFatal = text => { task.OnLog.Warning(text); return true; }, OnCaptcha = text => Task.FromResult(false), OnDebug = text => { task.OnLog.Debug(text); return true; }, OnFallback = text => true, OnMessage = text => true, OnRaw = text => { } }; this.Handle(callback, CancellationToken.None, process).Wait(); if (resource.IsAvailable == false) { task.OnStatus(link, "unavailable"); } if (resource.IsAvailable == true) { task.OnStatus(link, "available"); } if (resource.Name != null && resource.Size != null) { task.OnCompleted(link, resource); } process.WaitForExit(); } }); }); } }