public static void PrintToFile(this SpiderTreeNode node, string fileName)
 {
     using (StreamWriter sw = new StreamWriter(fileName))
     {
         node.Print(sw);
     }
 }
Пример #2
0
        public static void FillDirectoryTree(SpiderTreeNode node, bool recursive)
        {
            node.Print(Console.Out);

            if (node.Name.Contains(".Level_") || node.Name.Contains(".mage-tab."))
            {
                return;
            }

            var content = WebUtils.DownloadHtml(node.Uri);

            //Console.WriteLine(content);

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(content);
            foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
            {
                HtmlAttribute att     = link.Attributes["href"];
                var           curlink = att.Value;
                if (curlink.StartsWith("/") || !curlink.EndsWith("/") || curlink.Equals("lost+found/"))
                {
                    continue;
                }

                string cururi;
                if (node.Uri.EndsWith("/"))
                {
                    cururi = node.Uri + curlink;
                }
                else
                {
                    cururi = node.Uri + "/" + curlink;
                }

                var curname      = curlink.TrimEnd('/');
                var lastModified = link.NextSibling.InnerText.Trim();
                lastModified = lastModified.Substring(0, lastModified.Length - 1).Trim();

                node.Nodes.Add(new SpiderTreeNode()
                {
                    Name         = curname,
                    Uri          = cururi,
                    LastModified = lastModified,
                    Depth        = node.Depth + 1
                });
            }

            if (recursive)
            {
                foreach (var subnode in node.Nodes)
                {
                    FillDirectoryTree(subnode, true);
                }
            }
        }
Пример #3
0
    public static void FillDirectoryTree(SpiderTreeNode node, bool recursive)
    {
      node.Print(Console.Out);

      if (node.Name.Contains(".Level_") || node.Name.Contains(".mage-tab."))
      {
        return;
      }

      var content = WebUtils.DownloadHtml(node.Uri);

      //Console.WriteLine(content);

      HtmlDocument doc = new HtmlDocument();
      doc.LoadHtml(content);
      foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
      {
        HtmlAttribute att = link.Attributes["href"];
        var curlink = att.Value;
        if (curlink.StartsWith("/") || !curlink.EndsWith("/") || curlink.Equals("lost+found/"))
        {
          continue;
        }

        string cururi;
        if (node.Uri.EndsWith("/"))
        {
          cururi = node.Uri + curlink;
        }
        else
        {
          cururi = node.Uri + "/" + curlink;
        }

        var curname = curlink.TrimEnd('/');
        var lastModified = link.NextSibling.InnerText.Trim();
        lastModified = lastModified.Substring(0, lastModified.Length - 1).Trim();

        node.Nodes.Add(new SpiderTreeNode()
        {
          Name = curname,
          Uri = cururi,
          LastModified = lastModified,
          Depth = node.Depth + 1
        });
      }

      if (recursive)
      {
        foreach (var subnode in node.Nodes)
        {
          FillDirectoryTree(subnode, true);
        }
      }
    }
Пример #4
0
        public static void DownloadFiles(SpiderTreeNode node, string targetDir, Action <List <DownloadItem> > filterFile, IProgressCallback callback = null)
        {
            List <DownloadItem> items = GetDownloadFiles(node, targetDir, filterFile);

            foreach (var item in items)
            {
                if (!WebUtils.DownloadFile(item.Url, item.TargetFile, callback))
                {
                    throw new Exception(string.Format("Download {0} to {1} failed!", item.Url, item.TargetFile));
                }
            }
        }
 public static void Print(this SpiderTreeNode node, TextWriter writer)
 {
     for (int i = 1; i < node.Depth; i++)
     {
         writer.Write("->");
     }
     writer.WriteLine("{0}:[{1}]", node.Name, node.LastModified);
     foreach (var subnode in node.Nodes)
     {
         subnode.Print(writer);
     }
 }
Пример #6
0
        public static SpiderTreeNode GetDirectoryTree(string name, string uri, bool recursive)
        {
            var result = new SpiderTreeNode()
            {
                Name  = name,
                Uri   = uri,
                Depth = 1
            };

            FillDirectoryTree(result, recursive);

            return(result);
        }
Пример #7
0
    public static SpiderTreeNode GetDirectoryTree(string name, string uri, bool recursive)
    {
      var result = new SpiderTreeNode()
      {
        Name = name,
        Uri = uri,
        Depth = 1
      };

      FillDirectoryTree(result, recursive);

      return result;
    }
        private void btnLoad_Click(object sender, EventArgs e)
        {
            if (!File.Exists(xmlFile.FullName))
            {
                MessageBox.Show(this, string.Format("Xml file {0} not exists", xmlFile.FullName));
                return;
            }

            _lastXml  = xmlFile.FullName;
            _rootNode = new SpiderTreeNodeXmlFormat().ReadFromFile(xmlFile.FullName);
            _tumors   = (from node in _rootNode.Nodes
                         select node.Name as object).Distinct().ToList();
            FillTumor();
        }
Пример #9
0
        public static List <DownloadItem> GetDownloadFiles(SpiderTreeNode node, string targetDir, Action <List <DownloadItem> > filterFile)
        {
            List <DownloadItem> result = new List <DownloadItem>();

            var content = WebUtils.DownloadHtml(node.Uri);

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(content);

            var links = new List <string>();

            foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
            {
                HtmlAttribute att     = link.Attributes["href"];
                var           curlink = att.Value;
                if (curlink.EndsWith("/") || curlink.StartsWith("?"))
                {
                    continue;
                }

                string cururi;
                if (node.Uri.EndsWith("/"))
                {
                    cururi = node.Uri + curlink;
                }
                else
                {
                    cururi = node.Uri + "/" + curlink;
                }

                var targetFile = new FileInfo(targetDir + "/" + curlink).FullName;
                result.Add(new DownloadItem()
                {
                    Url        = cururi,
                    TargetFile = targetFile,
                    RetryCount = 0
                });
            }

            if (null != filterFile)
            {
                filterFile(result);
            }

            FilterExists(result);

            return(result);
        }
        public void DownloadClinicalData(string tumor, SpiderTreeNode node, string targetDir, IProgressCallback callback = null)
        {
            TCGASpider.DownloadFiles(node, targetDir, null, callback);
            //var gzfile = string.Format("clinical_{0}.tar.gz", tumor);
            //var uri = string.Format("{0}/{1}", node.Uri, gzfile);
            //var targetFile = string.Format("{0}/{1}", targetDir, gzfile);

            //if (!WebUtils.DownloadFile(uri, targetFile, callback))
            //{
            //  TCGASpider.DownloadFiles(node, targetDir, null, callback);
            //  return;
            //}
            //else
            //{
            //  UncompressFile(targetDir, targetDir, targetFile, true);
            //}
        }
Пример #11
0
    private void btnLoad_Click(object sender, EventArgs e)
    {
      if (!File.Exists(xmlFile.FullName))
      {
        MessageBox.Show(this, string.Format("Xml file {0} not exists", xmlFile.FullName));
        return;
      }

      _lastXml = xmlFile.FullName;
      _rootNode = new SpiderTreeNodeXmlFormat().ReadFromFile(xmlFile.FullName);
      _tumors = (from node in _rootNode.Nodes
                select node.Name as object).Distinct().ToList();
      FillTumor();
    }
Пример #12
0
    private void DownloadLevel3Data(SpiderTreeNode m, string currDir)
    {
      if (m.Nodes.Any(n => TCGAUtils.IsLevel3(n.Name)))
      {
        m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name) || TCGAUtils.IsLevel2(n.Name));
      }
      else //download level2 data
      {
        m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name));
      }

      m.MarkHighestVersionNodes();
      foreach (var node in m.Nodes)
      {
        if (Progress.IsCancellationPending())
        {
          throw new UserTerminatedException();
        }

        var fDir = currDir + "/" + node.Name;
        var compressed = fDir + ".tar.gz";
        var compressedMd5 = fDir + ".tar.gz.md5";

        var parentDir = Path.GetDirectoryName(currDir);
        var parentFDir = parentDir + "/" + node.Name;
        var parentFComparessed = parentFDir + ".tar.gz";
        var parentFComparessedMd5 = parentFDir + ".tar.gz.md5";

        if (Directory.Exists(parentFDir))
        {
          Directory.Move(parentFDir, fDir);
        }
        if (File.Exists(parentFComparessed))
        {
          File.Move(parentFComparessed, compressed);
        }
        if (File.Exists(parentFComparessedMd5))
        {
          File.Move(parentFComparessedMd5, compressedMd5);
        }

        if (node.IsPreviousVersion)
        {
          if (Directory.Exists(fDir))
          {
            Progress.SetMessage("Deleting previous version : " + fDir);
            Directory.GetFiles(fDir).ToList().ForEach(File.Delete);
            Directory.Delete(fDir);
          }

          if (!File.Exists(compressed)) 
            continue;

          File.Delete(compressed);
          File.Delete(compressedMd5);
        }
        else
        {
          Progress.SetMessage("Processing {0}.{1} ...", m.Name, node.Name);

          var bDownload = !File.Exists(compressed);
          var bTar = bDownload || !Directory.Exists(fDir);

          if (bDownload)
          {
            var uri = node.Uri.Substring(0, node.Uri.Length - 1) + ".tar.gz";
            if (!WebUtils.DownloadFile(uri, compressed, this.Progress))
            {
              throw new Exception(string.Format("Downloading {0} failed", uri));
            }

            if (Progress.IsCancellationPending())
            {
              throw new UserTerminatedException();
            }

            WebUtils.DownloadFile(uri + ".md5", compressedMd5);

            var downloadedMD5 = HashUtils.GetMD5Hash(compressed, true, false);
            var trueMD5 = File.ReadAllText(compressedMd5).Split(new[] { '\t', ' ' })[0];

            if (!downloadedMD5.Equals(trueMD5))
            {
              throw new Exception(string.Format("MD5 of file {0} doesn't equal to server provided MD5, downloading failed!\nYou may consider to delete the file and try again, or you may download and de-compress it by youself.", compressed));
            }
          }

          UncompressFile(currDir, fDir, compressed, bTar);
        }
      }
    }
Пример #13
0
    public void DownloadClinicalData(string tumor, SpiderTreeNode node, string targetDir, IProgressCallback callback = null)
    {
      TCGASpider.DownloadFiles(node, targetDir, null, callback);
      //var gzfile = string.Format("clinical_{0}.tar.gz", tumor);
      //var uri = string.Format("{0}/{1}", node.Uri, gzfile);
      //var targetFile = string.Format("{0}/{1}", targetDir, gzfile);

      //if (!WebUtils.DownloadFile(uri, targetFile, callback))
      //{
      //  TCGASpider.DownloadFiles(node, targetDir, null, callback);
      //  return;
      //}
      //else
      //{
      //  UncompressFile(targetDir, targetDir, targetFile, true);
      //}
    }
Пример #14
0
    public static void DownloadFiles(SpiderTreeNode node, string targetDir, Action<List<DownloadItem>> filterFile, IProgressCallback callback = null)
    {
      List<DownloadItem> items = GetDownloadFiles(node, targetDir, filterFile);

      foreach (var item in items)
      {
        if (!WebUtils.DownloadFile(item.Url, item.TargetFile, callback))
        {
          throw new Exception(string.Format("Download {0} to {1} failed!", item.Url, item.TargetFile));
        }
      }
    }
Пример #15
0
    public static List<DownloadItem> GetDownloadFiles(SpiderTreeNode node, string targetDir, Action<List<DownloadItem>> filterFile)
    {
      List<DownloadItem> result = new List<DownloadItem>();

      var content = WebUtils.DownloadHtml(node.Uri);

      HtmlDocument doc = new HtmlDocument();
      doc.LoadHtml(content);

      var links = new List<string>();

      foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
      {
        HtmlAttribute att = link.Attributes["href"];
        var curlink = att.Value;
        if (curlink.EndsWith("/") || curlink.StartsWith("?"))
        {
          continue;
        }

        string cururi;
        if (node.Uri.EndsWith("/"))
        {
          cururi = node.Uri + curlink;
        }
        else
        {
          cururi = node.Uri + "/" + curlink;
        }

        var targetFile = new FileInfo(targetDir + "/" + curlink).FullName;
        result.Add(new DownloadItem()
        {
          Url = cururi,
          TargetFile = targetFile,
          RetryCount = 0
        });
      }

      if (null != filterFile)
      {
        filterFile(result);
      }

      FilterExists(result);

      return result;
    }
        private void DownloadLevel3Data(SpiderTreeNode m, string currDir)
        {
            if (m.Nodes.Any(n => TCGAUtils.IsLevel3(n.Name)))
            {
                m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name) || TCGAUtils.IsLevel2(n.Name));
            }
            else //download level2 data
            {
                m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name));
            }

            m.MarkHighestVersionNodes();
            foreach (var node in m.Nodes)
            {
                if (Progress.IsCancellationPending())
                {
                    throw new UserTerminatedException();
                }

                var fDir          = currDir + "/" + node.Name;
                var compressed    = fDir + ".tar.gz";
                var compressedMd5 = fDir + ".tar.gz.md5";

                var parentDir             = Path.GetDirectoryName(currDir);
                var parentFDir            = parentDir + "/" + node.Name;
                var parentFComparessed    = parentFDir + ".tar.gz";
                var parentFComparessedMd5 = parentFDir + ".tar.gz.md5";

                if (Directory.Exists(parentFDir))
                {
                    Directory.Move(parentFDir, fDir);
                }
                if (File.Exists(parentFComparessed))
                {
                    File.Move(parentFComparessed, compressed);
                }
                if (File.Exists(parentFComparessedMd5))
                {
                    File.Move(parentFComparessedMd5, compressedMd5);
                }

                if (node.IsPreviousVersion)
                {
                    if (Directory.Exists(fDir))
                    {
                        Progress.SetMessage("Deleting previous version : " + fDir);
                        Directory.GetFiles(fDir).ToList().ForEach(File.Delete);
                        Directory.Delete(fDir);
                    }

                    if (!File.Exists(compressed))
                    {
                        continue;
                    }

                    File.Delete(compressed);
                    File.Delete(compressedMd5);
                }
                else
                {
                    Progress.SetMessage("Processing {0}.{1} ...", m.Name, node.Name);

                    var bDownload = !File.Exists(compressed);
                    var bTar      = bDownload || !Directory.Exists(fDir);

                    if (bDownload)
                    {
                        var uri = node.Uri.Substring(0, node.Uri.Length - 1) + ".tar.gz";
                        if (!WebUtils.DownloadFile(uri, compressed, this.Progress))
                        {
                            throw new Exception(string.Format("Downloading {0} failed", uri));
                        }

                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }

                        WebUtils.DownloadFile(uri + ".md5", compressedMd5);

                        var downloadedMD5 = HashUtils.GetMD5Hash(compressed, true, false);
                        var trueMD5       = File.ReadAllText(compressedMd5).Split(new[] { '\t', ' ' })[0];

                        if (!downloadedMD5.Equals(trueMD5))
                        {
                            throw new Exception(string.Format("MD5 of file {0} doesn't equal to server provided MD5, downloading failed!\nYou may consider to delete the file and try again, or you may download and de-compress it by youself.", compressed));
                        }
                    }

                    UncompressFile(currDir, fDir, compressed, bTar);
                }
            }
        }