示例#1
0
        public static async Task MainAsync()
        {
            Shaman.Runtime.Tld.GetTldRulesCallback  = () => File.ReadAllText(ConfigurationManager.CombineRepositoryOrEntrypointPath("Awdee2.Declarative/effective_tld_names.dat"));
            HtmlDocument.CustomPageUrlTypeConverter = x => ((LazyUri)x).Url;

            if (Configuration_MakeCdx != null)
            {
                WarcCdxItemRaw.GenerateCdx(string.IsNullOrEmpty(Configuration_MakeCdx) ? Directory.GetCurrentDirectory() : Configuration_MakeCdx);
                return;
            }
            var cookies =
                (Configuration_Cookies ??
                 (Configuration_CookieFile != null ? File.ReadAllText(Configuration_CookieFile) : null))?.Trim().TrimStart("Cookie:").Trim();


            var allDynamicParameters = typeof(Program)
                                       .GetTypeInfo()
                                       .Assembly
                                       .GetTypes()
                                       .Where(x => typeof(WebsiteScraper).IsAssignableFrom(x))
                                       .SelectMany(x => ((IEnumerable <MemberInfo>)x.GetFields(BindingFlags.Public | BindingFlags.Instance | BindingFlags.DeclaredOnly).Where(y => IsMemberTypeOkForDynamicParameter(y.FieldType))).Concat(x.GetProperties(BindingFlags.Public | BindingFlags.Instance | BindingFlags.DeclaredOnly).Where(y => IsMemberTypeOkForDynamicParameter(y.PropertyType))));
            var dynamicParameters = new Dictionary <string, MemberInfo>();

            foreach (var item in allDynamicParameters)
            {
                var name = "--" + Dasherize((item.DeclaringType == typeof(WebsiteScraper) ? "Site" : item.DeclaringType.Name.TrimEnd("Scraper"))) + "-" + Dasherize(item.Name);
                dynamicParameters.Add(name, item);
            }
            //File.WriteAllLines(@"c:\temp\-scraping-extra-parameters.txt", dynamicParameters.Keys.OrderBy(x => x), Encoding.UTF8);

            var positional = Environment.GetCommandLineArgs();

            //Caching.EnableWebCache("/Awdee/Cache/DomEmulation");

            WebsiteScraper commandLineScraper = null;

            if (Configuration_SiteUrl != null)
            {
                commandLineScraper = new WebsiteScraper();
                Program.InitScraperDefaults(commandLineScraper);
                if (Configuration_SiteRules != null)
                {
                    commandLineScraper.Rules = Configuration_SiteRules;
                }
                commandLineScraper.Cookies = cookies;
            }

            for (var i = 0; i < positional.Length; i++)
            {
                if (dynamicParameters.TryGetValue(positional[i], out var member))
                {
                    var t = member.DeclaringType;
                    if (commandLineScraper == null || !(t.IsAssignableFrom(commandLineScraper.GetType())))
                    {
                        commandLineScraper = (WebsiteScraper)Activator.CreateInstance(t);
                        Program.InitScraperDefaults(commandLineScraper);
                        commandLineScraper.Cookies = cookies;
                    }
                    var    next     = i + 1 < positional.Length ? positional[i + 1] : null;
                    string val      = string.Empty;
                    var    hasValue = false;
                    if (next != null && !next.StartsWith("--"))
                    {
                        val = next;
                        i++;
                        hasValue = true;
                    }
                    object v  = null;
                    var    mt = (member as PropertyInfo)?.PropertyType ?? ((FieldInfo)member).FieldType;
                    mt = Nullable.GetUnderlyingType(mt) ?? mt;
                    if (mt == typeof(bool))
                    {
                        if (val.In(string.Empty, "1", "y", "yes", "true"))
                        {
                            v = true;
                        }
                        else if (val.In("0", "n", "no", "false"))
                        {
                            v = false;
                        }
                        else
                        {
                            throw new Exception("Cannot parse boolean: " + val);
                        }
                    }
                    else
                    {
                        if (!hasValue)
                        {
                            throw new Exception("Missing value for parameter " + positional[i]);
                        }
                        if (mt == typeof(string))
                        {
                            v = val;
                        }
                        else if (mt == typeof(string[]))
                        {
                            v = val.SplitFast(',', StringSplitOptions.RemoveEmptyEntries);
                        }
                        else if (mt == typeof(double) || mt == typeof(float))
                        {
                            v = Convert.ChangeType(double.Parse(val), mt);
                        }
                        else
                        {
                            v = Convert.ChangeType(decimal.Parse(val), mt);
                        }
                    }

                    if (member is PropertyInfo p)
                    {
                        p.SetValue(commandLineScraper, v);
                    }
                    else
                    {
                        ((FieldInfo)member).SetValue(commandLineScraper, v);
                    }
                }
            }


            if (commandLineScraper != null)
            {
                using (commandLineScraper)
                {
                    //commandLineScraper.DatabaseSaveInterval = TimeSpan.FromMinutes(10);
                    if (Configuration_Destination != null)
                    {
                        if (Configuration_Destination.Contains("/") || Configuration_Destination.Contains("\\"))
                        {
                            commandLineScraper.DestinationDirectory = Path.GetFullPath(Configuration_Destination);
                        }
                        else
                        {
                            commandLineScraper.DestinationSuggestedName = Configuration_Destination;
                        }
                    }
                    if (Configuration_SiteUrl != null)
                    {
                        commandLineScraper.AddToCrawl(Configuration_SiteUrl.AsUri());
                    }
                    commandLineScraper.PerformInitialization();
                    if (commandLineScraper is FacebookScraper)
                    {
                        var postsCsv = Path.Combine(commandLineScraper.DestinationDirectory, "Posts.csv");

                        if (
                            !Configuration_RetryFailed &&
                            !Configuration_ReconsiderAll &&
                            !Configuration_FacebookUpdate &&
                            File.Exists(postsCsv))
                        {
                            return;
                        }

                        if (Configuration_FacebookUpdate)
                        {
                            var candidates = WarcItem.ReadIndex(Path.Combine(commandLineScraper.DestinationDirectory, "index.cdx"));

                            /*string user = null;
                             * first.OpenStream((key, value) => {
                             *  if (key == "Cookie") user = value.TryCaptureBetween((Utf8String)"c_user="******";")?.ToString();
                             * }).Dispose();*/
                            // the id might also be in the redirected page
                            var    user     = candidates.Take(2).Select(x => x.ReadText().TryCaptureBetween("USER_ID\":\"", "\"")).FirstOrDefault(x => x != null);
                            string filename =
                                user == "100009411610822" ? "facebookcookiesanisea.txt" :
                                user == "1485885333" ? "facebookcookies.txt" :
                                user == "100021796298990" ? "facebookcookiesgloria.txt" :
                                user == null || user == "0" ? (string)null :
                                throw new ArgumentException("Unknown facebook user: "******","), "yyyy-MM-dd HH:mm:ss", CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
                            var fb     = (FacebookScraper)commandLineScraper;
                            fb.UpdateUpTo = stopAt;
                            if (File.Exists(Path.Combine(commandLineScraper.DestinationDirectory, "fbgroup-not-a-member")))
                            {
                                return;
                            }
                            if (File.Exists(Path.Combine(commandLineScraper.DestinationDirectory, "fberror")))
                            {
                                return;
                            }
                            fb.SetStatus(fb.Root, UrlStatus.ToCrawl);

                            if (long.TryParse(fb.Page, out var id))
                            {
                                var url = "https://www.facebook.com/profile.php?id=" + id;
                                if (fb.GetStatus(url) != UrlStatus.UnknownUrl)
                                {
                                    fb.SetStatus(url.AsUri(), UrlStatus.ToCrawl);
                                }
                            }

                            if (filename != null)
                            {
                                filename = Path.Combine(@"C:\Users\Andrea\OneDrive\QwawaDesktop\Scraping", filename);
                                commandLineScraper.Cookies = File.ReadAllText(filename).Trim().TrimStart("Cookie:").Trim();
                            }
                        }
                    }


                    if (Configuration_ReconsiderAll)
                    {
                        commandLineScraper.ReconsiderForScraping("**");
                    }
                    else
                    {
                        commandLineScraper.ReconsiderSkippedUrls();
                    }

                    if (Configuration_RetryFailed)
                    {
                        commandLineScraper.ReconsiderFailedUrls();
                    }

                    /*
                     * foreach (var line in File.ReadAllLines(@"C:\Users\Andrea\Desktop\arcilesbica-robots.txt"))
                     * {
                     *  var l = line.Trim();
                     *  if (l.Length == 0) continue;
                     *  commandLineScraper.AddToCrawl("http://www.arcilesbica.it"+l);
                     * }
                     */

                    if (!Configuration_FacebookMakeCsv)
                    {
                        await commandLineScraper.ScrapeAsync();
                    }
                    if (commandLineScraper is FacebookScraper f)
                    {
                        //f.AddImagesToCrawl();
                        //f.ReconsiderSkippedUrls();
                        //await f.ScrapeAsync();

                        Console.WriteLine("Creating post list");
                        f.CreatePostList();
                    }
                }
                return;
            }

#if SCRAPING_SANDBOX
            using (var scraper = new WebsiteScraper())
            {
                InitScraperDefaults(scraper);


                /*
                 * scraper.DestinationSuggestedName = "fb-anisea-friends";
                 * scraper.Cookies = File.ReadAllText(@"C:\Repositories\Awdee\Awdee2.Declarative\phantomjs\facebookcookiesanisea.txt").Trim();
                 * scraper.ShouldScrape = (url, prereq) =>
                 * {
                 *  if (prereq) return true;
                 *  return false;
                 * };
                 * scraper.ForceLinks = "a:link-has-host-path('m.facebook.com'; '/friends/center/friends/')";
                 * scraper.AddToCrawl("https://m.facebook.com/friends/center/friends/", true);
                 */
                /*
                 * var friends = scraper.CdxIndex.Values.Where(x => x.Url.Contains("/friends/center/friends/"))
                 *  .SelectMany(x =>
                 *  {
                 *      var zz = x.ReadHtml();
                 *      return zz.FindAll("div.w.bk").Select(z=> {
                 *          var m = z.TryGetValue("div.bn.bo", pattern: @"^([,\d]+) mutual");
                 *          return new FacebookFriend
                 *          {
                 *              Name = z.TryGetValue("img", "alt"),
                 *              MutualFriends = m != null ? int.Parse(m.Replace(",", string.Empty)) : -1,
                 *              Id = long.Parse(z.GetLinkUrl("a").GetQueryParameter("uid"))
                 *          };
                 *      });
                 *  }).ToList();
                 * JsonFile.Save(friends, Path.Combine(scraper.DestinationDirectory, "friends.json"));
                 */
                /*
                 * var friends = JsonFile.Read<List<FacebookFriend>>(Path.Combine(scraper.DestinationDirectory, "friends.json"))
                 *  .OrderByDescending(x => x.MutualFriends);
                 */
                //friends.OpenExcel();

                /*
                 * await friends.ForEachThrottledAsync(async x =>
                 * {
                 * Console.WriteLine(x.Name);
                 * var p = await scraper.GetHtmlNodeAsync(("https://m.facebook.com/" + x.Id).AsUri());
                 * }, Debugger.IsAttached ? 1 : scraper.Parallelism);
                 */
                /*
                 * foreach (var item in scraper.CdxIndex.Values.Where(x=>x.Url.EndsWith("_rdr")))
                 * {
                 *  Console.WriteLine(item.Url);
                 *  scraper.CrawlLinks(item.ReadHtml());
                 * }
                 * scraper.SaveDatabase();
                 * await scraper.ScrapeAsync();
                 */

                /*
                 * var actuallyExisting = new HashSet<string>(scraper.CdxIndex.Keys);
                 *
                 * foreach (var item in scraper.GetScrapedUrls(false))
                 * {
                 *  if (!actuallyExisting.Contains(item.AbsoluteUri))
                 *  {
                 *      scraper.SetStatus(item, UrlStatus.ToCrawl);
                 *  }
                 * }
                 * return;*/

                var resourcesOnly = true;
                scraper.AddToCrawl("http://knowyourmeme.com/");
                if (!resourcesOnly)
                {
                    scraper.Parallelism       = 2;
                    scraper.InterRequestDelay = TimeSpan.FromSeconds(0.4);
                }
                scraper.HtmlReceived += (a, b, page) =>
                {
                    if (page.FindSingle("h3:contains('include your ip address then there')") != null)
                    {
                        scraper.Dispose();
                        throw new Exception("IP banned.");
                    }
                };
                scraper.RewriteLink = (url) =>
                {
                    if (url.Query == "?fb")
                    {
                        return(url.GetLeftPart(UriPartial.Path).AsUri());
                    }
                    return(url);
                };
                scraper.ShouldScrape = (url, prereq) =>
                {
                    if (url.Contains("kym") && url.EndsWith(".jpg"))
                    {
                    }
                    if (resourcesOnly && url.IsHostedOn("knowyourmeme.com"))
                    {
                        return(false);
                    }
                    if (!resourcesOnly && !url.IsHostedOn("knowyourmeme.com"))
                    {
                        return(false);
                    }

                    //if (url.IsHostedOn("kym-cdn.com")) return false; // todo
                    if (url.IsHostedOn("imgur.com"))
                    {
                        return(false);                             // todo
                    }
                    if (url.IsHostedOn("meme.am"))
                    {
                        return(false);
                    }
                    //if (!url.IsHostedOn("kym-cdn.com")) return false;
                    if (prereq)
                    {
                        return(true);
                    }
                    if (!scraper.IsSubfolderOfFirstUrl(url))
                    {
                        return(false);
                    }
                    if (url.HasQueryParameters())
                    {
                        return(false);
                    }
                    if (url.AbsolutePath == "/")
                    {
                        return(true);
                    }
                    if (url.PathEndsWith("/photos/page/1"))
                    {
                        return(false);
                    }
                    if (url.PathEndsWith("/videos/page/1"))
                    {
                        return(false);
                    }
                    if (url.PathEndsWith("/editorships"))
                    {
                        return(false);
                    }
                    if (url.PathEndsWith("/ask"))
                    {
                        return(false);
                    }
                    if (url.PathContainsComponent("/photos/trending"))
                    {
                        return(false);
                    }
                    if (url.PathContainsComponent("/videos/trending"))
                    {
                        return(false);
                    }
                    //if (url.PathStartsWith("/page/")) return true;
                    if (url.PathContainsComponent("/memes/popular/"))
                    {
                        return(false);
                    }
                    if (url.PathContainsComponent("/deadpool/"))
                    {
                        return(false);
                    }
                    if (url.PathContainsComponent("/memes/researching/page/"))
                    {
                        return(false);
                    }

                    if (url.PathContainsComponent("/edits/"))
                    {
                        return(false);
                    }
                    if (url.PathContainsComponent("/sort/"))
                    {
                        return(false);
                    }
                    if (url.PathContainsComponent("/favorites/"))
                    {
                        return(false);
                    }
                    if (url.PathContainsComponent("/new/"))
                    {
                        return(false);
                    }
                    if (url.PathStartsWith("/memes/"))
                    {
                        return(true);
                    }

                    //if (url.PathContains("/photos/")) return false;

                    //if (url.PathEndsWith("/children")) return false;
                    return(false);
                };

                scraper.ReconsiderForScraping("**");

                //await scraper.ScrapeFailedImagesFromKnownHostsAsync();

                await scraper.ScrapeAsync();
            }
#else
            Console.WriteLine("Bad command line.");
#endif
        }
示例#2
0
        public WarcItem WriteRecord(string url, bool isresponse, MemoryStream req, DateTime date, string ip, string recordId, string concurrentTo, LazyUri shamanUrl)
        {
            var initialPosition = outstream.Position;

            StartRecord();
            currentRecord.WriteClrStringLine("WARC/1.0");
            if (isresponse)
            {
                currentRecord.WriteClrStringLine("WARC-Type: response");
            }
            else
            {
                currentRecord.WriteClrStringLine("WARC-Type: request");
            }
            if (isresponse)
            {
                currentRecord.WriteClrStringLine("Content-Type: application/http;msgtype=response");
            }
            else
            {
                currentRecord.WriteClrStringLine("Content-Type: application/http;msgtype=request");
            }
            currentRecord.WriteClrString("WARC-Date: ");
            currentRecord.WriteClrString(date.ToString("o").Substring(0, 19));
            currentRecord.WriteClrStringLine("Z");
            currentRecord.WriteClrString("WARC-Record-ID: <urn:uuid:");
            currentRecord.WriteClrString(recordId);
            currentRecord.WriteClrStringLine(">");
            currentRecord.WriteClrString("WARC-Target-URI: ");
            currentRecord.WriteClrStringLine(url);
            if (shamanUrl != null)
            {
                var abs = shamanUrl.AbsoluteUri;
                if (abs != url)
                {
                    currentRecord.WriteClrString("WARC-Shaman-URI: ");
                    currentRecord.WriteClrStringLine(abs);
                }
            }
            currentRecord.WriteClrString("WARC-IP-Address: ");
            currentRecord.WriteClrStringLine(ip);
            if (concurrentTo != null)
            {
                currentRecord.WriteClrString("WARC-Concurrent-To: <urn:uuid:");
                currentRecord.WriteClrString(concurrentTo);
                currentRecord.WriteClrStringLine(">");
            }
            currentRecord.WriteClrString("Content-Length: ");
            currentRecord.Write(req.Length);
            currentRecord.WriteLine();
            currentRecord.WriteClrString("WARC-Warcinfo-ID: <urn:uuid:");
            currentRecord.WriteClrString(WarcInfoId);
            currentRecord.WriteClrStringLine(">");
            currentRecord.WriteLine();
            req.TryGetBuffer(out var buf);
            currentRecord.Write(buf.Array.Slice(buf.Offset, (int)req.Length));
            EndRecord();
            if (isresponse)
            {
                req.Seek(0, SeekOrigin.Begin);
                scratchpad.Reset();
                using (var http = new Utf8StreamReader(req, true))
                {
                    using (var s = WarcItem.OpenHttp(http, scratchpad, url.AsUri(), req.Length, out var payloadLength, out var location, out var responseCode, out var contentType, out var lastModified, null))
                    {
                        if (payloadLength == -1)
                        {
                            var l = 0;
                            while (true)
                            {
                                var m = s.Read(lengthCalculationBuffer, 0, lengthCalculationBuffer.Length);
                                if (m == 0)
                                {
                                    break;
                                }
                                l += m;
                            }
                            payloadLength = l;
                        }

                        var warcItem = new WarcItem()
                        {
                            Url              = shamanUrl?.AbsoluteUri ?? url,
                            Date             = date,
                            ContentType      = contentType.ToStringCached(),
                            LastModified     = lastModified,
                            PayloadLength    = payloadLength,
                            ResponseCode     = (HttpStatusCode)responseCode,
                            CompressedLength = outstream.Position - initialPosition,
                            CompressedOffset = initialPosition,
                            WarcFile         = WarcName
                        };
                        recordedResponses.Add(warcItem);
                        onNewWarcItem?.Invoke(warcItem);
                        return(warcItem);
                    }
                }
            }

            return(null);
        }
示例#3
0
        public static void GenerateCdx(string cdx, IEnumerable <string> warcs)
        {
            var scratchpad = new Scratchpad();
            var buf        = new byte[16 * 1024];

            using (var output = File.Open(cdx + ".tmp", FileMode.Create, FileAccess.Write, FileShare.Delete | FileShare.Read))
            {
                using (var writer = new Utf8StreamWriter(output))
                {
                    writer.WriteClrStringLine(WarcColumns);
                    foreach (var warc in warcs)
                    {
                        Console.WriteLine(Path.GetFileName(warc));

                        using (var warcStream = File.Open(warc, FileMode.Open, FileAccess.Read, FileShare.Delete | FileShare.Read))
                        {
                            try
                            {
                                var warcname = new Utf8String(Path.GetFileName(warc));
                                while (warcStream.Position != warcStream.Length)
                                {
                                    var  startPosition     = warcStream.Position;
                                    long warcContentLength = -1;
                                    long payloadLength     = -1;
                                    int  responseCode      = -1;
                                    var  contentType       = Utf8String.Empty;
                                    var  date = scratchpad.Use(14);
                                    date[0] = 0;
                                    Utf8String url          = Utf8String.Empty;
                                    Utf8String shamanUrl    = Utf8String.Empty;
                                    DateTime?  lastModified = null;
                                    bool       isresponse   = false;
                                    using (var gz = new GZipStream(warcStream, CompressionMode.Decompress, true))
                                    {
                                        using (var reader = new Utf8StreamReader(gz, true))
                                        {
                                            while (true)
                                            {
                                                if (reader.IsCompleted)
                                                {
                                                    throw new EndOfStreamException();
                                                }
                                                var line = reader.ReadLine();
                                                if (line.Length == 0)
                                                {
                                                    break;
                                                }
                                                if (line.Equals(Warc_Response))
                                                {
                                                    isresponse = true;
                                                }
                                                if (line.StartsWith(Warc_ContentLength))
                                                {
                                                    warcContentLength = Utf8Utils.ParseInt64(WarcItem.GetHeaderValue(line));
                                                }
                                                else if (line.StartsWith(Warc_Date))
                                                {
                                                    var val = WarcItem.GetHeaderValue(line).Bytes;
                                                    val.Slice(0, 4).CopyTo(date.Slice(0));
                                                    val.Slice(5, 2).CopyTo(date.Slice(4));
                                                    val.Slice(8, 2).CopyTo(date.Slice(6));
                                                    val.Slice(11, 2).CopyTo(date.Slice(8));
                                                    val.Slice(14, 2).CopyTo(date.Slice(10));
                                                    val.Slice(17, 2).CopyTo(date.Slice(12));
                                                }
                                                else if (line.StartsWith(Warc_URL))
                                                {
                                                    url = scratchpad.Copy(WarcItem.GetHeaderValue(line));
                                                }
                                                if (line.StartsWith(Warc_Shaman_URI))
                                                {
                                                    shamanUrl = scratchpad.Copy(WarcItem.GetHeaderValue(line));
                                                }
                                            }
                                            if (warcContentLength == -1)
                                            {
                                                throw new InvalidOperationException();
                                            }


                                            if (isresponse)
                                            {
                                                using (var s = WarcItem.OpenHttp(reader, scratchpad, new Uri(url.ToString()), warcContentLength, out var payloadLengthFromHeader, out var redirectLocation, out responseCode, out contentType, out lastModified, null))
                                                {
                                                    long l = 0;
                                                    while (true)
                                                    {
                                                        var m = s.Read(buf, 0, buf.Length);
                                                        if (m == 0)
                                                        {
                                                            break;
                                                        }
                                                        l += m;
                                                    }
                                                    payloadLength = l;
                                                    if (payloadLengthFromHeader != -1 && payloadLengthFromHeader != payloadLength)
                                                    {
                                                        throw new Exception("Content-Length mismatch.");
                                                    }
                                                }
                                                //var httpData = new LimitedStream(reader, contentLength);
                                                var cr = reader.ReadByte();
                                                if (cr != 13)
                                                {
                                                    throw new InvalidDataException();
                                                }
                                                var lf = reader.ReadByte();
                                                if (lf != 10)
                                                {
                                                    throw new InvalidDataException();
                                                }


                                                cr = reader.ReadByte();
                                                if (cr != 13)
                                                {
                                                    throw new InvalidDataException();
                                                }
                                                lf = reader.ReadByte();
                                                if (lf != 10)
                                                {
                                                    throw new InvalidDataException();
                                                }
                                                //if (reader.ReadByte() != 13) throw new Exception();
                                                //if (reader.ReadByte() != 10) throw new Exception();
                                            }
                                            else
                                            {
                                                var remaining = warcContentLength;
                                                while (remaining != 0)
                                                {
                                                    var m = reader.Read((int)Math.Min(remaining, int.MaxValue));
                                                    if (m.Count == 0)
                                                    {
                                                        throw new Exception();
                                                    }
                                                    remaining -= m.Count;
                                                }

                                                var e = reader.ReadLine();
                                                if (e.Length != 0)
                                                {
                                                    throw new InvalidDataException();
                                                }
                                                e = reader.ReadLine();
                                                if (e.Length != 0)
                                                {
                                                    throw new InvalidDataException();
                                                }
                                                e = reader.ReadLine();
                                                if (!reader.IsCompleted)
                                                {
                                                    throw new InvalidDataException();
                                                }
                                            }

                                            //var r = reader.RemainingBufferedData;
                                            var end = reader.ReadByte();
                                            if (end != -1)
                                            {
                                                throw new InvalidDataException();
                                            }
                                            //Console.WriteLine($"Remaining: {r.Length}");
                                        }



                                        warcStream.Position -= GetRemainingUnusedBytes(gz);
                                    }



                                    if (isresponse)
                                    {
                                        if (shamanUrl.Length > 0)
                                        {
                                            writer.Write(shamanUrl);
                                        }
                                        else
                                        {
                                            writer.Write(url);
                                        }
                                        writer.Write((byte)' ');
                                        writer.Write(startPosition);
                                        writer.Write((byte)' ');
                                        writer.Write(warcStream.Position - startPosition);
                                        writer.Write((byte)' ');
                                        if (date[0] != 0)
                                        {
                                            writer.Write(date);
                                        }
                                        else
                                        {
                                            writer.Write((byte)'-');
                                        }
                                        writer.Write((byte)' ');
                                        writer.Write(warcname);
                                        writer.Write((byte)' ');
                                        if (responseCode != -1)
                                        {
                                            writer.Write(responseCode);
                                        }
                                        else
                                        {
                                            writer.Write((byte)'-');
                                        }
                                        writer.Write((byte)' ');
                                        writer.Write(contentType);
                                        writer.Write((byte)' ');
                                        writer.Write(payloadLength);
                                        writer.Write((byte)' ');
                                        if (lastModified != null)
                                        {
                                            WriteDate(writer, lastModified.Value);
                                        }
                                        else
                                        {
                                            writer.Write((byte)'-');
                                        }
                                        writer.Write((byte)' ');

                                        writer.WriteLine();
                                    }
                                    scratchpad.Reset();
                                }
                            }
                            catch
                            {
                                if (warcStream.Position == warcStream.Length)
                                {
                                    Console.WriteLine("WARNING: truncated WARC.");;
                                }
                                else
                                {
                                    throw;
                                }
                            }
                        }
                    }
                }
            }
            File.Delete(cdx);
            File.Move(cdx + ".tmp", cdx);
        }