public static void PrintToFile(this SpiderTreeNode node, string fileName) { using (StreamWriter sw = new StreamWriter(fileName)) { node.Print(sw); } }
public static void FillDirectoryTree(SpiderTreeNode node, bool recursive) { node.Print(Console.Out); if (node.Name.Contains(".Level_") || node.Name.Contains(".mage-tab.")) { return; } var content = WebUtils.DownloadHtml(node.Uri); //Console.WriteLine(content); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(content); foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]")) { HtmlAttribute att = link.Attributes["href"]; var curlink = att.Value; if (curlink.StartsWith("/") || !curlink.EndsWith("/") || curlink.Equals("lost+found/")) { continue; } string cururi; if (node.Uri.EndsWith("/")) { cururi = node.Uri + curlink; } else { cururi = node.Uri + "/" + curlink; } var curname = curlink.TrimEnd('/'); var lastModified = link.NextSibling.InnerText.Trim(); lastModified = lastModified.Substring(0, lastModified.Length - 1).Trim(); node.Nodes.Add(new SpiderTreeNode() { Name = curname, Uri = cururi, LastModified = lastModified, Depth = node.Depth + 1 }); } if (recursive) { foreach (var subnode in node.Nodes) { FillDirectoryTree(subnode, true); } } }
public static void DownloadFiles(SpiderTreeNode node, string targetDir, Action <List <DownloadItem> > filterFile, IProgressCallback callback = null) { List <DownloadItem> items = GetDownloadFiles(node, targetDir, filterFile); foreach (var item in items) { if (!WebUtils.DownloadFile(item.Url, item.TargetFile, callback)) { throw new Exception(string.Format("Download {0} to {1} failed!", item.Url, item.TargetFile)); } } }
public static void Print(this SpiderTreeNode node, TextWriter writer) { for (int i = 1; i < node.Depth; i++) { writer.Write("->"); } writer.WriteLine("{0}:[{1}]", node.Name, node.LastModified); foreach (var subnode in node.Nodes) { subnode.Print(writer); } }
public static SpiderTreeNode GetDirectoryTree(string name, string uri, bool recursive) { var result = new SpiderTreeNode() { Name = name, Uri = uri, Depth = 1 }; FillDirectoryTree(result, recursive); return(result); }
public static SpiderTreeNode GetDirectoryTree(string name, string uri, bool recursive) { var result = new SpiderTreeNode() { Name = name, Uri = uri, Depth = 1 }; FillDirectoryTree(result, recursive); return result; }
private void btnLoad_Click(object sender, EventArgs e) { if (!File.Exists(xmlFile.FullName)) { MessageBox.Show(this, string.Format("Xml file {0} not exists", xmlFile.FullName)); return; } _lastXml = xmlFile.FullName; _rootNode = new SpiderTreeNodeXmlFormat().ReadFromFile(xmlFile.FullName); _tumors = (from node in _rootNode.Nodes select node.Name as object).Distinct().ToList(); FillTumor(); }
public static List <DownloadItem> GetDownloadFiles(SpiderTreeNode node, string targetDir, Action <List <DownloadItem> > filterFile) { List <DownloadItem> result = new List <DownloadItem>(); var content = WebUtils.DownloadHtml(node.Uri); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(content); var links = new List <string>(); foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]")) { HtmlAttribute att = link.Attributes["href"]; var curlink = att.Value; if (curlink.EndsWith("/") || curlink.StartsWith("?")) { continue; } string cururi; if (node.Uri.EndsWith("/")) { cururi = node.Uri + curlink; } else { cururi = node.Uri + "/" + curlink; } var targetFile = new FileInfo(targetDir + "/" + curlink).FullName; result.Add(new DownloadItem() { Url = cururi, TargetFile = targetFile, RetryCount = 0 }); } if (null != filterFile) { filterFile(result); } FilterExists(result); return(result); }
public void DownloadClinicalData(string tumor, SpiderTreeNode node, string targetDir, IProgressCallback callback = null) { TCGASpider.DownloadFiles(node, targetDir, null, callback); //var gzfile = string.Format("clinical_{0}.tar.gz", tumor); //var uri = string.Format("{0}/{1}", node.Uri, gzfile); //var targetFile = string.Format("{0}/{1}", targetDir, gzfile); //if (!WebUtils.DownloadFile(uri, targetFile, callback)) //{ // TCGASpider.DownloadFiles(node, targetDir, null, callback); // return; //} //else //{ // UncompressFile(targetDir, targetDir, targetFile, true); //} }
private void DownloadLevel3Data(SpiderTreeNode m, string currDir) { if (m.Nodes.Any(n => TCGAUtils.IsLevel3(n.Name))) { m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name) || TCGAUtils.IsLevel2(n.Name)); } else //download level2 data { m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name)); } m.MarkHighestVersionNodes(); foreach (var node in m.Nodes) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var fDir = currDir + "/" + node.Name; var compressed = fDir + ".tar.gz"; var compressedMd5 = fDir + ".tar.gz.md5"; var parentDir = Path.GetDirectoryName(currDir); var parentFDir = parentDir + "/" + node.Name; var parentFComparessed = parentFDir + ".tar.gz"; var parentFComparessedMd5 = parentFDir + ".tar.gz.md5"; if (Directory.Exists(parentFDir)) { Directory.Move(parentFDir, fDir); } if (File.Exists(parentFComparessed)) { File.Move(parentFComparessed, compressed); } if (File.Exists(parentFComparessedMd5)) { File.Move(parentFComparessedMd5, compressedMd5); } if (node.IsPreviousVersion) { if (Directory.Exists(fDir)) { Progress.SetMessage("Deleting previous version : " + fDir); Directory.GetFiles(fDir).ToList().ForEach(File.Delete); Directory.Delete(fDir); } if (!File.Exists(compressed)) continue; File.Delete(compressed); File.Delete(compressedMd5); } else { Progress.SetMessage("Processing {0}.{1} ...", m.Name, node.Name); var bDownload = !File.Exists(compressed); var bTar = bDownload || !Directory.Exists(fDir); if (bDownload) { var uri = node.Uri.Substring(0, node.Uri.Length - 1) + ".tar.gz"; if (!WebUtils.DownloadFile(uri, compressed, this.Progress)) { throw new Exception(string.Format("Downloading {0} failed", uri)); } if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } WebUtils.DownloadFile(uri + ".md5", compressedMd5); var downloadedMD5 = HashUtils.GetMD5Hash(compressed, true, false); var trueMD5 = File.ReadAllText(compressedMd5).Split(new[] { '\t', ' ' })[0]; if (!downloadedMD5.Equals(trueMD5)) { throw new Exception(string.Format("MD5 of file {0} doesn't equal to server provided MD5, downloading failed!\nYou may consider to delete the file and try again, or you may download and de-compress it by youself.", compressed)); } } UncompressFile(currDir, fDir, compressed, bTar); } } }
public static void DownloadFiles(SpiderTreeNode node, string targetDir, Action<List<DownloadItem>> filterFile, IProgressCallback callback = null) { List<DownloadItem> items = GetDownloadFiles(node, targetDir, filterFile); foreach (var item in items) { if (!WebUtils.DownloadFile(item.Url, item.TargetFile, callback)) { throw new Exception(string.Format("Download {0} to {1} failed!", item.Url, item.TargetFile)); } } }
public static List<DownloadItem> GetDownloadFiles(SpiderTreeNode node, string targetDir, Action<List<DownloadItem>> filterFile) { List<DownloadItem> result = new List<DownloadItem>(); var content = WebUtils.DownloadHtml(node.Uri); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(content); var links = new List<string>(); foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]")) { HtmlAttribute att = link.Attributes["href"]; var curlink = att.Value; if (curlink.EndsWith("/") || curlink.StartsWith("?")) { continue; } string cururi; if (node.Uri.EndsWith("/")) { cururi = node.Uri + curlink; } else { cururi = node.Uri + "/" + curlink; } var targetFile = new FileInfo(targetDir + "/" + curlink).FullName; result.Add(new DownloadItem() { Url = cururi, TargetFile = targetFile, RetryCount = 0 }); } if (null != filterFile) { filterFile(result); } FilterExists(result); return result; }
private void DownloadLevel3Data(SpiderTreeNode m, string currDir) { if (m.Nodes.Any(n => TCGAUtils.IsLevel3(n.Name))) { m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name) || TCGAUtils.IsLevel2(n.Name)); } else //download level2 data { m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name)); } m.MarkHighestVersionNodes(); foreach (var node in m.Nodes) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var fDir = currDir + "/" + node.Name; var compressed = fDir + ".tar.gz"; var compressedMd5 = fDir + ".tar.gz.md5"; var parentDir = Path.GetDirectoryName(currDir); var parentFDir = parentDir + "/" + node.Name; var parentFComparessed = parentFDir + ".tar.gz"; var parentFComparessedMd5 = parentFDir + ".tar.gz.md5"; if (Directory.Exists(parentFDir)) { Directory.Move(parentFDir, fDir); } if (File.Exists(parentFComparessed)) { File.Move(parentFComparessed, compressed); } if (File.Exists(parentFComparessedMd5)) { File.Move(parentFComparessedMd5, compressedMd5); } if (node.IsPreviousVersion) { if (Directory.Exists(fDir)) { Progress.SetMessage("Deleting previous version : " + fDir); Directory.GetFiles(fDir).ToList().ForEach(File.Delete); Directory.Delete(fDir); } if (!File.Exists(compressed)) { continue; } File.Delete(compressed); File.Delete(compressedMd5); } else { Progress.SetMessage("Processing {0}.{1} ...", m.Name, node.Name); var bDownload = !File.Exists(compressed); var bTar = bDownload || !Directory.Exists(fDir); if (bDownload) { var uri = node.Uri.Substring(0, node.Uri.Length - 1) + ".tar.gz"; if (!WebUtils.DownloadFile(uri, compressed, this.Progress)) { throw new Exception(string.Format("Downloading {0} failed", uri)); } if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } WebUtils.DownloadFile(uri + ".md5", compressedMd5); var downloadedMD5 = HashUtils.GetMD5Hash(compressed, true, false); var trueMD5 = File.ReadAllText(compressedMd5).Split(new[] { '\t', ' ' })[0]; if (!downloadedMD5.Equals(trueMD5)) { throw new Exception(string.Format("MD5 of file {0} doesn't equal to server provided MD5, downloading failed!\nYou may consider to delete the file and try again, or you may download and de-compress it by youself.", compressed)); } } UncompressFile(currDir, fDir, compressed, bTar); } } }