public CdxWebResponse GetResponse(string requestUrl) { CdxEntry entry = entries.Find(x => x.Url.Equals(requestUrl)); if (entry == null) { return(null); } return(GetResponse(entry)); }
internal static CdxWebResponse Build(CdxEntry entry, Stream warcFile) { CdxWebResponse result = new CdxWebResponse(); warcFile.Position = entry.Offset; GZipStream gz = new GZipStream(warcFile, CompressionMode.Decompress, true); BinaryReader br = new BinaryReader(gz); string srLine = br.ReadAsciiLine(); while (!srLine.Equals("")) { srLine = br.ReadAsciiLine(); if (srLine.StartsWith(WARC_TARGET_URI)) { result.responseUri = new Uri(srLine.Substring(WARC_TARGET_URI.Length)); } } string httpStatusCode = br.ReadAsciiLine(); while (true) { srLine = br.ReadAsciiLine(); if (srLine.Equals("")) { break; } int colonPos = srLine.IndexOf(':'); string headerKey = srLine.Substring(0, colonPos); string headerValue = srLine.Substring(colonPos + 1); headerValue = headerValue.Trim(); result.Headers.Add(headerKey, headerValue); } switch (result.TransferEncoding) { case "chunked": result.responseStream = unchunk(gz); gz.Dispose(); break; case null: result.responseStream = gz; break; default: throw new NotImplementedException(result.TransferEncoding); } return(result); }
public CdxWebResponse GetResponse(CdxEntry ce) { if (currentWarcFileInfo != ce.Warc) { if (currentWarcFileStream != null) { currentWarcFileStream.Dispose(); } currentWarcFileInfo = ce.Warc; currentWarcFileStream = ce.Warc.OpenRead(); currentWarcFileName = ce.Warc.Name; } return(CdxWebResponse.Build(ce, currentWarcFileStream)); }
public void AddCdx(FileInfo fi) { if (cdxFiles.Contains(fi)) { return; } cdxFiles.Add(fi); StreamReader sr = fi.OpenText(); string headerLine = sr.ReadLine(); char seperator = headerLine[0]; if (headerLine[1] != 'C' || headerLine[2] != 'D' || headerLine[3] != 'X') { throw new Exception("invalid magic"); } headerLine = headerLine.Substring(5); string[] headerArgs = headerLine.Split(seperator); int aIndex = headerArgs.IndexOf(x => x.Equals("a")); int bIndex = headerArgs.IndexOf(x => x.Equals("b")); int kIndex = headerArgs.IndexOf(x => x.Equals("k")); int SIndex = headerArgs.IndexOf(x => x.Equals("S")); int VIndex = headerArgs.IndexOf(x => x.Equals("V")); int gIndex = headerArgs.IndexOf(x => x.Equals("g")); int uIndex = headerArgs.IndexOf(x => x.Equals("u")); string line; string[] lineArgs; while (!sr.EndOfStream) { line = sr.ReadLine(); lineArgs = line.Split(seperator); CdxEntry child = new CdxEntry(); child.Url = lineArgs[aIndex]; child.GrabDate = ConvertFromUnixTimestamp(Convert.ToInt64(lineArgs[bIndex])); child.Fingerprint = lineArgs[kIndex]; child.Size = Convert.ToInt64(lineArgs[SIndex]); child.Offset = Convert.ToInt64(lineArgs[VIndex]); child.Warc = ResovleWarcFile(lineArgs[gIndex]); child.Uuid = ParseGuid(lineArgs[uIndex]); entries.Add(child); } }