public void SetRedirect(string path, HttpStatusCode status, Uri redirectUri) { string targetUri = redirectUri.PathAndQuery; if (!_baseUri.IsSameHost(redirectUri)) { targetUri = redirectUri.AbsoluteUri; } _data.Update(path, rec => { Modified = Modified || rec.HttpStatus != (uint)status || rec.ContentRedirect != targetUri; ContentRecord.Builder builder = rec.ToBuilder() .SetContentUri(path) .SetLastCrawled(CrawlTime) .SetLastValid(CrawlTime) .SetHttpStatus((uint)status) .SetContentRedirect(targetUri) ; return(builder.Build()); } ); AddUri(redirectUri); }
public void Rename( [Argument("page", "p", Description = "The full http address of the page to move the source content to.")] string targetLink, [Argument("source", "s", Description = "The full http address of the page you want to move.")] string sourceLink, [Argument("redirect", "r", DefaultValue = true, Description = "True to insert a redirect after moving the content.")] bool redirect) { Uri targetUri = new Uri(targetLink, UriKind.Absolute); Uri sourceUri = new Uri(sourceLink, UriKind.Absolute); Check.Assert <InvalidOperationException>(sourceUri.IsSameHost(targetUri), "The source and target should be in the same site."); using (ContentStorage store = new ContentStorage(StoragePath(sourceLink), false)) { store.Rename(sourceUri.NormalizedPathAndQuery(), targetUri.NormalizedPathAndQuery()); if (redirect) { DateTime time = DateTime.Now; ContentRecord.Builder builder = store.New(sourceUri.NormalizedPathAndQuery(), time); builder .SetHttpStatus((uint)HttpStatusCode.Redirect) .SetContentRedirect(targetUri.NormalizedPathAndQuery()) ; store.Add(builder.ContentUri, builder.Build()); } } }
public void SetContents(string path, HttpStatusCode status, string contentType, string etag, DateTime?modified, byte[] contents) { if (contents.Length == 0) { Console.Error.WriteLine("{0} - {1} (Content is empty)", (int)status, path); } ContentRecord rec = _data[path]; ITransactable pendingUpdate = null; try { ContentRecord.Builder builder = rec.ToBuilder() .SetContentUri(path) .SetLastCrawled(CrawlTime) .SetLastValid(CrawlTime) .SetHttpStatus((uint)status) ; builder.ClearContentRedirect(); builder.SetContentType(contentType); builder.SetContentLength((uint)contents.Length); if (!String.IsNullOrEmpty(etag)) { builder.SetETag(etag); } string hash = Hash.SHA256(contents).ToString(); if (hash != builder.HashOriginal) { Modified = true; builder.SetHashOriginal(hash); builder.SetDateModified(CrawlTime); pendingUpdate = _data.WriteContent(builder, contents); } if (_data.AddOrUpdate(path, rec = builder.Build())) { if (pendingUpdate != null) { pendingUpdate.Commit(); pendingUpdate.Dispose(); pendingUpdate = null; } } } finally { if (pendingUpdate != null) { pendingUpdate.Rollback(); pendingUpdate.Dispose(); } } ProcessFileContent(rec, contents); }
public void SetNotModified(string path) { _data.Update(path, rec => { ContentRecord.Builder builder = rec.ToBuilder() .SetLastCrawled(CrawlTime) .SetLastValid(CrawlTime) ; return(builder.Build()); } ); }
public void SetHttpError(string path, HttpStatusCode status) { Console.Error.WriteLine("{0} - {1}", (int)status, path); _data.Update(path, rec => { ContentRecord.Builder builder = rec.ToBuilder() .SetContentUri(path) .SetLastCrawled(CrawlTime) .SetHttpStatus((uint)status) ; return(builder.Build()); } ); }
public ITransactable WriteContent(ContentRecord.Builder builder, byte[] contents) { byte[] compressed; builder.SetContentLength((uint)contents.Length); if (contents.TryCompress(out compressed)) { builder.SetHashContents(Hash.SHA256(compressed).ToString()); builder.SetCompressedLength((uint)compressed.Length); return(WriteBytes(builder, compressed)); } else { builder.SetHashContents(Hash.SHA256(contents).ToString()); builder.ClearCompressedLength(); return(WriteBytes(builder, contents)); } }
private ITransactable WriteBytes(ContentRecord.Builder builder, byte[] content) { AssertModify(); if (!builder.HasContentStoreId) { ulong fid; string name; do { fid = Guid.NewGuid().ToUInt64(); name = FileName(fid); }while(File.Exists(name)); builder.SetContentStoreId(fid); } ReplaceFile f = new ReplaceFile(FileName(builder.ContentStoreId)); f.WriteAllBytes(content); return(f); }
public void WriteContent(ContentRecord rec, byte[] bytes) { bool[] modified = new bool[1]; modified[0] = false; bool success = Update(rec.ContentUri, r => { ContentRecord.Builder b = r.ToBuilder(); using (ITransactable t = WriteContent(b, bytes)) t.Commit(); ContentRecord newRec = b.Build(); modified[0] = !newRec.Equals(r); return(newRec); } ); if (!success && modified[0]) { throw new ApplicationException("Record not found."); } }
public void CopyTo(ContentStorage writer, Func <ContentRecord, byte[], byte[]> fnprocess) { bool success; foreach (KeyValuePair <string, ContentRecord> item in _content) { ContentRecord.Builder builder = item.Value.ToBuilder(); if (item.Value.HasContentStoreId) { byte[] data = _content.ReadContent(item.Value, true); if (fnprocess != null) { data = fnprocess(item.Value, data); } using (ITransactable trans = writer.WriteContent(builder, data)) { success = Overwrite ? writer.AddOrUpdate(item.Key, builder.Build()) : writer.Add(item.Key, builder.Build()); if (success) { trans.Commit(); } } } else { success = Overwrite ? writer.AddOrUpdate(item.Key, builder.Build()) : writer.Add(item.Key, builder.Build()); } if (!success) { Console.Error.WriteLine("Path already exists " + item.Key); } } }
public void Deduplicate( [Argument("site", "s", Description = "The root http address of the website copy.")] string site, [Argument("remove", "r", DefaultValue = false, Description = "True to remove the page and modify source links, otherwise inserts a redirect.")] bool remove, [Argument("noprompt", "q", DefaultValue = false, Description = "True to stop prompt for confirmation before changing content.")] bool noPrompt) { using (ContentStorage store = new ContentStorage(StoragePath(site), false)) { Dictionary <string, string> replacements = new Dictionary <string, string>(StringComparer.Ordinal); Dictionary <string, string> hashes = new Dictionary <string, string>(StringComparer.Ordinal); foreach (KeyValuePair <string, ContentRecord> item in store) { if (item.Value.HasHashContents) { string original; if (hashes.TryGetValue(item.Value.HashContents, out original)) { replacements[item.Key] = original; Console.WriteLine("{0,-38} => {1,-38}", item.Key, original); } else { hashes.Add(item.Value.HashContents, item.Key); } } } if (replacements.Count > 0 && (noPrompt || new ConfirmPrompt().Continue("Replace all of the above links"))) { Uri baseUri = new Uri(site, UriKind.Absolute); if (remove) { ContentParser parser = new ContentParser(store, baseUri); parser.RewriteUri += u => { string target; if (u.IsSameHost(baseUri) && replacements.TryGetValue(u.NormalizedPathAndQuery(), out target)) { return(new Uri(baseUri, target)); } return(u); }; parser.ProcessAll(); } foreach (string removed in replacements.Keys) { ContentRecord rec = store[removed]; store.Remove(removed); if (!remove) { ContentRecord.Builder builder = rec.ToBuilder(); builder .ClearCompressedLength() .ClearContentLength() .ClearContentStoreId() .ClearContentType() .ClearHashContents() .SetHttpStatus((uint)HttpStatusCode.Redirect) .SetContentRedirect(replacements[removed]) ; store.Add(removed, builder.Build()); } } } } }