public SevenZipFs(string path)
        {
            zipfile   = path;
            extractor = new SevenZipExtractor(path);
            root      = CreateTree <ArchiveFileInfo>(extractor.ArchiveFileData, x => x.FileName, x => IsDirectory(x.Attributes));


            cache = new MemoryStreamCache <FsNode <ArchiveFileInfo> >((item, stream) =>
            {
                lock (readerLock)
                {
                    extractor.ExtractFile(item.Info.Index, stream);
                }
            });
        }
Beispiel #2
0
        public SevenZipFs(string path)
        {
            zipfile   = path;
            extractor = new SevenZipExtractor(path);
            root      = CreateTree <ArchiveFileInfo>(extractor.ArchiveFileData, x => x.FileName, x => IsDirectory(x.Attributes));

            CheckDirectorys(root);

            extractor.Dispose();
            extractor = null;

            cache = new MemoryStreamCache <FsNode <ArchiveFileInfo> >((item, stream) =>
            {
                var th = new Thread(action =>
                {
                    lock (readerLock)
                    {
                        try
                        {
                            extractor                        = new SevenZipExtractor(path);
                            extractor.Extracting            += Extractor_Extracting;
                            extractor.FileExtractionStarted += Extractor_FileExtractionStarted;



                            extractor.ExtractFile(item.Info.Index, stream);

                            extractor.Dispose();
                            extractor = null;
                        }
                        catch (Exception ex)
                        {
                            Console.ForegroundColor = ConsoleColor.Red;
                            Console.WriteLine(ex);
                            Console.ForegroundColor = ConsoleColor.White;
                        }
                    }
                });
                th.Start();

                th.Join();
            });
        }
Beispiel #3
0
        public WarcFs(string cdx)
        {
            this.cdx = cdx;
            byte[] fileNameBytes  = null;
            string fileNameString = null;
            var    folder         = Path.GetDirectoryName(cdx);

            this.Root = CreateTree <WarcItem>(WarcCdxItemRaw.Read(cdx).Select(x =>
            {
                var response = x.ResponseCode;
                if (response.Length != 0)
                {
                    var responseCode = Utf8Utils.ParseInt32(response);
                    if (responseCode < 200 || responseCode >= 300)
                    {
                        return(null);
                    }
                }
                return(x.ToWarcItem(folder, ref fileNameBytes, ref fileNameString));
            }).Where(x => x != null), x =>
            {
                var url = new Uri(x.Url);

                var keep = -1;
                if (url.AbsolutePath.StartsWith("/w/images/"))
                {
                    keep = 2;
                }
                else if (url.AbsolutePath.StartsWith("/wiki/"))
                {
                    keep = 1;
                }
                else if (url.Host.EndsWith(".fbcdn.net"))
                {
                    keep = 0;
                }
                else if (url.Host.EndsWith(".media.tumblr.com"))
                {
                    keep = 0;
                }
                else if (url.Host.EndsWith(".bp.blogspot.com"))
                {
                    keep = 0;
                }
                else if (url.Host.EndsWith(".reddit.com") && url.AbsolutePath.Contains("/comments/"))
                {
                    keep = 3;
                }
                else if (url.Host.EndsWith(".staticflickr.com"))
                {
                    keep = 0;
                }
                else if (url.Host.EndsWith(".giphy.com") && url.Host.Contains("media"))
                {
                    keep = 0;
                }
                var path = WebsiteScraper.GetPathInternal(null, url, x.ContentType, keep);
                path     = path.Replace('/', '\\');

                if (path.Length > 150)
                {
                    var z = path.IndexOf('‽');
                    if (z != -1)
                    {
                        path = path.Substring(0, z) + "‽{" + Math.Abs((long)path.GetHashCode()) + "}" + Path.GetExtension(path);
                    }
                }

                if (url.IsHostedOn("facebook.com") && url.AbsolutePath.StartsWith("/pages_reaction_units/"))
                {
                    path  = path.TrimEnd(".js");
                    path += ".html";
                }

                return(path);
            }, null, x =>
            {
                x.Tag = TagVirtual;
                if (x.Info != null)
                {
                    urlToFsNode[x.Info.Url] = x;
                }
            });

            FsNode <WarcItem> rawRoot = null;

            rawRoot = new FsNode <WarcItem>()
            {
                Name = "_raw", GetChildrenDelegate = CreateGetChildrenDelegate(this.Root)
            };
            Func <List <FsNode <WarcItem> > > CreateGetChildrenDelegate(FsNode <WarcItem> reference)
            {
                if (reference.Children == null)
                {
                    return(() => null);
                }
                return(new Func <List <FsNode <WarcItem> > >(() =>
                {
                    return reference.Children.Where(x => x != rawRoot).Select(x =>
                    {
                        var k = new FsNode <WarcItem>()
                        {
                            Info = x.Info,
                            Name = x.Name,
                            GetChildrenDelegate = CreateGetChildrenDelegate(x),
                            Tag = null,
                            FullName = x.FullName != null ? "_raw\\" + x.FullName : null
                        };
                        return k;
                    }).ToList();
                }));
            }

            this.Root.Children.Add(rawRoot);


            cache = new MemoryStreamCache <FsNode <WarcItem> >((item, dest) =>
            {
                if (item.Tag == TagVirtual)
                {
                    var ct = item.Info.ContentType;
                    if (ct != null && ct.Contains("/html") || item.Info.Url.Contains("facebook.com/pages_reaction_units/"))
                    {
                        HtmlNode doc;
                        var pagePath = item.FullName;
                        if (item.Info.Url.Contains("/pages_reaction_units/"))
                        {
                            var jsontext = item.Info.ReadText();
                            var idx      = jsontext.IndexOf('{');
                            var json     = (JObject)HttpUtils.ReadJsonToken(jsontext, idx);
                            doc          = new HtmlDocument("<!doctype html><html><head><meta charset=\"utf-8\"></head><body></body></html>").DocumentNode;
                            doc.OwnerDocument.SetPageUrl(item.Info.Url.AsUri());
                            var body = doc.Descendants("body").First();

                            foreach (var domop in (JArray)json["domops"])
                            {
                                var html = ((JArray)domop).First(x => x is JObject)["__html"].Value <string>();
                                body.AppendChild(html.AsHtmlNode());
                            }
                        }
                        else
                        {
                            doc = item.Info.ReadHtml();
                        }
                        ProcessHtml(ref doc, pagePath);
                        var simpleStyle = doc.OwnerDocument.CreateElement("link");
                        simpleStyle.SetAttributeValue("rel", "stylesheet");
                        simpleStyle.SetAttributeValue("href", @"file:///C:\Users\Andrea\Desktop\facebook-simple-css.css");
                        (doc.FindSingle("head") ?? doc).AppendChild(simpleStyle);
                        using (var sw = new StreamWriter(dest, Encoding.UTF8, 16 * 1024, true))
                        {
                            doc.WriteTo(sw);
                        }
                        return;
                    }
                }

                using (var k = item.Info.OpenStream())
                {
                    k.CopyTo(dest);
                }
            });
        }