/// <summary> /// Unpacks the package contents and indexes them. /// </summary> /// <param name="indexPath">Path to the index.</param> /// <param name="requestHandler">Calling handler for this method.</param> /// <returns>Total unpacked content size.</returns> public static long Unpack(LocalRequestHandler requestHandler, string indexPath) { string packageIndexSizeStr = requestHandler.RCRequest.GenericWebResponse.GetResponseHeader("Package-IndexSize"); string packageContentSizeStr = requestHandler.RCRequest.GenericWebResponse.GetResponseHeader("Package-ContentSize"); long packageIndexSize = Int64.Parse(packageIndexSizeStr); long packageContentSize = Int64.Parse(packageContentSizeStr); string packageFileName = requestHandler.PackageFileName; string unpackedPackageFileName = packageFileName.Replace(".gzip", ""); GZipWrapper.GZipDecompress(packageFileName, unpackedPackageFileName, packageIndexSize + packageContentSize); FileStream packageFs = new FileStream(unpackedPackageFileName, FileMode.Open); // read the package index Byte[] packageIndexBuffer = new Byte[packageIndexSize]; packageFs.Read(packageIndexBuffer, 0, (int)packageIndexSize); // split the big package file into pieces string[] stringSeparator = new string[] { "\r\n" }; System.Text.UTF8Encoding enc = new System.Text.UTF8Encoding(); string package = enc.GetString(packageIndexBuffer); string[] packageContentArr = package.Split(stringSeparator, StringSplitOptions.RemoveEmptyEntries); Byte[] bufferOverflow = new Byte[1024]; int bufferOverflowCount = 0; int bytesRead = 0; long bytesReadOfCurrFile = 0; long unpackedBytes = 0; Byte[] buffer = new Byte[1024]; string[] packageEntryArr; string currUri = ""; long currFileSize = 0; foreach (string entry in packageContentArr) { stringSeparator = new string[] { " " }; packageEntryArr = entry.Split(stringSeparator, StringSplitOptions.RemoveEmptyEntries); currUri = packageEntryArr[0]; try { currFileSize = Int64.Parse(packageEntryArr[1]); } catch (Exception e) { requestHandler.LogDebug("problem unpacking: " + entry + " " + e.StackTrace + " " + e.Message); return unpackedBytes; } if (!Util.IsValidUri(currUri)) { requestHandler.LogDebug("problem unpacking: " + currUri); return unpackedBytes; } RCRequest rcRequest = new RCRequest(requestHandler, currUri); unpackedBytes += currFileSize; //requestHandler.LogDebug("unpacking: " + rcRequest.Uri + " - " + currFileSize + " bytes"); // make sure the file doesn't already exist for indexing purposes only bool existed = false; FileInfo ftest = new FileInfo(rcRequest.CacheFileName); if (ftest.Exists) { existed = true; } // try to delete the old version if (!Util.DeleteFile(rcRequest.CacheFileName)) { return unpackedBytes; } // create directory if it doesn't exist if (!Util.CreateDirectoryForFile(rcRequest.CacheFileName)) { return unpackedBytes; } // create the file if it doesn't exist FileStream currFileFS = Util.CreateFile(rcRequest.CacheFileName); if (currFileFS == null) { return unpackedBytes; } // check for overflow from previous file, and use it up first if (bufferOverflowCount > 0) { Buffer.BlockCopy(bufferOverflow, 0, buffer, 0, bufferOverflowCount); bytesRead = bufferOverflowCount; } else { bytesRead = packageFs.Read(buffer, 0, 1024); } // reset for current file bytesReadOfCurrFile = 0; while (bytesRead != 0 && bytesReadOfCurrFile < currFileSize) { // check if we read too much if (bytesReadOfCurrFile + bytesRead > currFileSize) { // bytes left must be less than 1024, fine to convert to Int int bytesLeftOfCurrFile = ((int)(currFileSize - bytesReadOfCurrFile)); currFileFS.Write(buffer, 0, bytesLeftOfCurrFile); // done with this file bytesReadOfCurrFile = currFileSize; // handle overflow bufferOverflowCount = bytesRead - bytesLeftOfCurrFile; Buffer.BlockCopy(buffer, bytesLeftOfCurrFile, bufferOverflow, 0, bytesRead - bytesLeftOfCurrFile); } else { // append what we read currFileFS.Write(buffer, 0, bytesRead); // update bytesReadOfCurrFile bytesReadOfCurrFile += bytesRead; bytesRead = packageFs.Read(buffer, 0, 1024); } } if (bytesReadOfCurrFile != currFileSize) { // ran out of bytes for this file requestHandler.LogDebug("error, unexpected package size: " + rcRequest.CacheFileName + "(" + bytesReadOfCurrFile + " / " + currFileSize + ")"); return unpackedBytes * -1; } currFileFS.Close(); // add the file to Lucene if (Util.IsParseable(rcRequest)) { string document = Util.ReadFileAsString(rcRequest.CacheFileName); string title = Util.GetPageTitle(document); string content = Util.GetPageContent(document); //request.LogDebug("indexing: " + rcRequest._uri); if (!existed) { IndexWrapper.IndexDocument(indexPath, "Content-Type: text/html", rcRequest.Uri, title, content); } } } if (packageFs != null) { packageFs.Close(); } return unpackedBytes; }