Esempio n. 1
0
        public CdxWebResponse GetResponse(string requestUrl)
        {
            CdxEntry entry = entries.Find(x => x.Url.Equals(requestUrl));

            if (entry == null)
            {
                return(null);
            }
            return(GetResponse(entry));
        }
Esempio n. 2
0
        internal static CdxWebResponse Build(CdxEntry entry, Stream warcFile)
        {
            CdxWebResponse result = new CdxWebResponse();

            warcFile.Position = entry.Offset;
            GZipStream   gz     = new GZipStream(warcFile, CompressionMode.Decompress, true);
            BinaryReader br     = new BinaryReader(gz);
            string       srLine = br.ReadAsciiLine();

            while (!srLine.Equals(""))
            {
                srLine = br.ReadAsciiLine();
                if (srLine.StartsWith(WARC_TARGET_URI))
                {
                    result.responseUri = new Uri(srLine.Substring(WARC_TARGET_URI.Length));
                }
            }


            string httpStatusCode = br.ReadAsciiLine();

            while (true)
            {
                srLine = br.ReadAsciiLine();
                if (srLine.Equals(""))
                {
                    break;
                }
                int    colonPos    = srLine.IndexOf(':');
                string headerKey   = srLine.Substring(0, colonPos);
                string headerValue = srLine.Substring(colonPos + 1);
                headerValue = headerValue.Trim();
                result.Headers.Add(headerKey, headerValue);
            }

            switch (result.TransferEncoding)
            {
            case "chunked":
                result.responseStream = unchunk(gz);
                gz.Dispose();
                break;

            case null:
                result.responseStream = gz;
                break;

            default:
                throw new NotImplementedException(result.TransferEncoding);
            }

            return(result);
        }
Esempio n. 3
0
        public CdxWebResponse GetResponse(CdxEntry ce)
        {
            if (currentWarcFileInfo != ce.Warc)
            {
                if (currentWarcFileStream != null)
                {
                    currentWarcFileStream.Dispose();
                }

                currentWarcFileInfo   = ce.Warc;
                currentWarcFileStream = ce.Warc.OpenRead();
                currentWarcFileName   = ce.Warc.Name;
            }

            return(CdxWebResponse.Build(ce, currentWarcFileStream));
        }
Esempio n. 4
0
        public void AddCdx(FileInfo fi)
        {
            if (cdxFiles.Contains(fi))
            {
                return;
            }

            cdxFiles.Add(fi);

            StreamReader sr         = fi.OpenText();
            string       headerLine = sr.ReadLine();
            char         seperator  = headerLine[0];

            if (headerLine[1] != 'C' || headerLine[2] != 'D' || headerLine[3] != 'X')
            {
                throw new Exception("invalid magic");
            }

            headerLine = headerLine.Substring(5);
            string[] headerArgs = headerLine.Split(seperator);
            int      aIndex     = headerArgs.IndexOf(x => x.Equals("a"));
            int      bIndex     = headerArgs.IndexOf(x => x.Equals("b"));
            int      kIndex     = headerArgs.IndexOf(x => x.Equals("k"));
            int      SIndex     = headerArgs.IndexOf(x => x.Equals("S"));
            int      VIndex     = headerArgs.IndexOf(x => x.Equals("V"));
            int      gIndex     = headerArgs.IndexOf(x => x.Equals("g"));
            int      uIndex     = headerArgs.IndexOf(x => x.Equals("u"));

            string line;

            string[] lineArgs;
            while (!sr.EndOfStream)
            {
                line     = sr.ReadLine();
                lineArgs = line.Split(seperator);
                CdxEntry child = new CdxEntry();
                child.Url         = lineArgs[aIndex];
                child.GrabDate    = ConvertFromUnixTimestamp(Convert.ToInt64(lineArgs[bIndex]));
                child.Fingerprint = lineArgs[kIndex];
                child.Size        = Convert.ToInt64(lineArgs[SIndex]);
                child.Offset      = Convert.ToInt64(lineArgs[VIndex]);
                child.Warc        = ResovleWarcFile(lineArgs[gIndex]);
                child.Uuid        = ParseGuid(lineArgs[uIndex]);
                entries.Add(child);
            }
        }