private void ProcessFileContent(ContentRecord record, byte[] contentBytes) { if (AddUrlsFound) { _parser.ProcessFile(record, contentBytes); } }
public ContentResponse(ContentStorage content, Uri uri) { _content = content; _status = HttpStatusCode.InternalServerError; _record = ContentRecord.DefaultInstance; try { string path = uri.NormalizedPathAndQuery(); if (_content.TryGetValue(path, out _record)) { if (_record.HasContentRedirect) { _status = HttpStatusCode.Redirect; } else { _status = HttpStatusCode.OK; } } else { _record = ContentRecord.DefaultInstance; _status = HttpStatusCode.NotFound; Log.Warning("404 - {0}", path); } } catch (Exception ex) { Log.Error(ex, "Exception on {0}", uri); } }
public XmlRewriter(ContentParser processor, HttpCloneOptimizations optimizations, IDictionary <string, string> namedValues) { _context = ContentRecord.DefaultInstance; _processor = processor; _optimizations = optimizations; _namedValues = namedValues; _elements = new Dictionary <XmlLightElement, object>(); _replaces = new Dictionary <object, HttpCloneOptimizationReplace>(); foreach (var rep in optimizations.AllItems()) { foreach (var i in rep.ReplaceItem) { _replaces.Add(i, rep); } } _bytag = optimizations.AllItems() .SelectMany(item => item.ReplaceItem.OfType <HttpCloneTag>()) .ToLookup(t => t.TagName); _xpaths = optimizations.AllItems() .SelectMany(item => item.ReplaceItem.OfType <HttpCloneXPath>()) .ToArray(); if (_bytag.Count > 0 || _xpaths.Length > 0) { processor.ContextChanged += ContextChanged; processor.RewriteElement += RewriteElement; processor.RewriteXmlDocument += RewriteXmlDocument; } }
internal BaseContent( DatabaseManager man, ContentRecord contentRecord ) : base(man) { Record = contentRecord; }
private void ProcessFile(ContentRecord record, Func <ContentRecord, byte[]> readBytes, Action <ContentRecord, byte[]> writeBytes) { string mime = record.MimeType; OnContextChanged(record); HttpCloneDocType type; if (_documentTypes.TryGetValue(mime, out type)) { string content = Encoding.UTF8.GetString(readBytes(record)); content = OnRewriteContent(content); string result = ProcessFileText(record.ContentUri, mime, RelativeUri && type.UsesRelativePaths, content); if (!ReferenceEquals(content, result) || RewriteAll) { writeBytes(record, Encoding.UTF8.GetBytes(result)); } } else { byte[] bytes; if (OnSkipContent(record, out bytes)) { writeBytes(record, bytes); } } }
private void OnContextChanged(ContentRecord rec) { if (ContextChanged != null) { ContextChanged(rec); } }
public void SetContents(string path, HttpStatusCode status, string contentType, string etag, DateTime?modified, byte[] contents) { if (contents.Length == 0) { Console.Error.WriteLine("{0} - {1} (Content is empty)", (int)status, path); } ContentRecord rec = _data[path]; ITransactable pendingUpdate = null; try { ContentRecord.Builder builder = rec.ToBuilder() .SetContentUri(path) .SetLastCrawled(CrawlTime) .SetLastValid(CrawlTime) .SetHttpStatus((uint)status) ; builder.ClearContentRedirect(); builder.SetContentType(contentType); builder.SetContentLength((uint)contents.Length); if (!String.IsNullOrEmpty(etag)) { builder.SetETag(etag); } string hash = Hash.SHA256(contents).ToString(); if (hash != builder.HashOriginal) { Modified = true; builder.SetHashOriginal(hash); builder.SetDateModified(CrawlTime); pendingUpdate = _data.WriteContent(builder, contents); } if (_data.AddOrUpdate(path, rec = builder.Build())) { if (pendingUpdate != null) { pendingUpdate.Commit(); pendingUpdate.Dispose(); pendingUpdate = null; } } } finally { if (pendingUpdate != null) { pendingUpdate.Rollback(); pendingUpdate.Dispose(); } } ProcessFileContent(rec, contents); }
private bool OnSkipContent(ContentRecord record, out byte[] bytes) { if (RewriteAll && record.HasContentStoreId) { bytes = _content.ReadContent(record); return(true); } bytes = null; return(false); }
public ContentEntity(ContentRecord saveResult) { Reset(); if (saveResult != null) { Id = saveResult.Id; ContentValue = saveResult.ContentValue; HierachyId = saveResult.HierachyId; Name = saveResult.Name; } }
public static async Task <ContentRecord> GetMemberIcon(string id) { UserAppMemberServiceProxy umsvc = new UserAppMemberServiceProxy(); var um = await umsvc.LoadEntityByKeyAsync(Cntx, ApplicationContext.App.ID, id); if (um == null) { return(null); } ContentRecord rec = new ContentRecord(); rec.MimeType = um.IconMime; rec.LastModified = um.IconLastModified.HasValue ? um.IconLastModified.Value : DateTime.MaxValue; rec.Data = await umsvc.LoadEntityIconImgAsync(Cntx, ApplicationContext.App.ID, id); return(rec); }
public ContentEntity Set() { ContentEntity result = null; ContentRecord saveResult = null; var handler = GetHandler(); if (handler != null) { saveResult = handler.Save(); } if (saveResult != null) { result = new ContentEntity(saveResult); } return(result); }
public static async Task <ContentRecord> GetUserPhoto(string id) { UserDetailServiceProxy udsvc = new UserDetailServiceProxy(); var ud = await udsvc.LoadEntityByKeyAsync(Cntx, GuidMix(ApplicationContext.App.ID, id)); if (ud == null) { return(null); } ContentRecord rec = new ContentRecord(); rec.MimeType = ud.PhotoMime; if (ud.LastModified.HasValue) { rec.LastModified = ud.LastModified.Value; } rec.Data = udsvc.LoadEntityPhoto(Cntx, GuidMix(ApplicationContext.App.ID, id)); return(rec); }
public void LinkSource( [Argument("site", "s", Description = "The root http address of the website copy.")] string site, [Argument("link", "l", Description = "The target link to search for.")] string link) { using (ContentStorage storage = new ContentStorage(StoragePath(site), true)) { ContentParser parser = new ContentParser(storage, new Uri(site, UriKind.Absolute)); ContentRecord rec = ContentRecord.DefaultInstance; parser.ContextChanged += r => rec = r; parser.VisitUri += u => { if (StringComparer.Ordinal.Equals(link, u.OriginalString)) { Console.WriteLine(rec.ContentUri); } }; parser.ProcessAll((r, b) => { }); } }
void ContextChanged(ContentRecord obj) { _context = obj; _elements.Clear(); }
void ContextChanged(ContentRecord obj) { _context = obj; }
public byte[] ProcessFile(ContentRecord record, byte[] contentBytes) { byte[] bytes = contentBytes; ProcessFile(record, r => bytes, (r, b) => bytes = b); return(bytes); }
private void UpdateTemplate(bool forced) { string tempPath = new Uri(_baseUri, _config.Searching.TemplateUri).NormalizedPathAndQuery(); ContentRecord record; ContentRecord.Builder update; if (_data.TryGetValue(TemplatePath, out record)) { update = record.ToBuilder(); } else { update = _data.New(TemplatePath, DateTime.Now); } ContentRecord template; if (_data.TryGetValue(tempPath, out template)) { if (template.HasContentStoreId && (forced || template.HashOriginal != update.HashOriginal)) { update.SetContentType(template.ContentType); update.SetHashOriginal(template.HashOriginal); update.SetLastCrawled(template.LastCrawled); update.SetLastValid(template.LastValid); update.SetDateModified(DateTime.Now); update.SetHttpStatus(template.HttpStatus); update.ClearContentRedirect(); if (template.HasContentRedirect) { update.SetContentRedirect(update.ContentRedirect); } ContentParser parser = new ContentParser(_data, _baseUri); parser.RelativeUri = true; parser.RewriteUri += uri => new Uri(uri.OriginalString); Uri templateUri = new Uri(_baseUri, SearchTemplate.SearchPath); parser.MakeRelativeUri = (s, d) => templateUri.MakeRelativeUri(d); byte[] mapped = parser.ProcessFile(template, _data.ReadContent(template, true)); string templateHtml = CreateTemplate(Encoding.UTF8.GetString(mapped)); using (ITransactable trans = _data.WriteContent(update, Encoding.UTF8.GetBytes(templateHtml))) { _data.AddOrUpdate(TemplatePath, update.Build()); trans.Commit(); } } } if (!_data.TryGetValue(SearchCssPath, out record)) { ContentRecord cssRecord = _data.New(SearchCssPath, DateTime.Now) .SetContentType("text/css") .SetHttpStatus(200) .Build(); _data.Add(cssRecord.ContentUri, cssRecord); _data.WriteContent(cssRecord, Encoding.UTF8.GetBytes(Properties.Resources.search_css)); } }
public void Deduplicate( [Argument("site", "s", Description = "The root http address of the website copy.")] string site, [Argument("remove", "r", DefaultValue = false, Description = "True to remove the page and modify source links, otherwise inserts a redirect.")] bool remove, [Argument("noprompt", "q", DefaultValue = false, Description = "True to stop prompt for confirmation before changing content.")] bool noPrompt) { using (ContentStorage store = new ContentStorage(StoragePath(site), false)) { Dictionary <string, string> replacements = new Dictionary <string, string>(StringComparer.Ordinal); Dictionary <string, string> hashes = new Dictionary <string, string>(StringComparer.Ordinal); foreach (KeyValuePair <string, ContentRecord> item in store) { if (item.Value.HasHashContents) { string original; if (hashes.TryGetValue(item.Value.HashContents, out original)) { replacements[item.Key] = original; Console.WriteLine("{0,-38} => {1,-38}", item.Key, original); } else { hashes.Add(item.Value.HashContents, item.Key); } } } if (replacements.Count > 0 && (noPrompt || new ConfirmPrompt().Continue("Replace all of the above links"))) { Uri baseUri = new Uri(site, UriKind.Absolute); if (remove) { ContentParser parser = new ContentParser(store, baseUri); parser.RewriteUri += u => { string target; if (u.IsSameHost(baseUri) && replacements.TryGetValue(u.NormalizedPathAndQuery(), out target)) { return(new Uri(baseUri, target)); } return(u); }; parser.ProcessAll(); } foreach (string removed in replacements.Keys) { ContentRecord rec = store[removed]; store.Remove(removed); if (!remove) { ContentRecord.Builder builder = rec.ToBuilder(); builder .ClearCompressedLength() .ClearContentLength() .ClearContentStoreId() .ClearContentType() .ClearHashContents() .SetHttpStatus((uint)HttpStatusCode.Redirect) .SetContentRedirect(replacements[removed]) ; store.Add(removed, builder.Build()); } } } } }
private int ParseSection(int pos, string sourceString, string boundLine) { var boundIndex = sourceString.IndexOf(boundLine, pos); if (boundIndex < 0) { return(-1); } var descrIndex = sourceString.IndexOf("Content-Disposition: form-data;", boundIndex); if (descrIndex < 0) { return(-1); } var ctxHeaderEnd = sourceString.IndexOf(NewLineHeader, descrIndex); if (ctxHeaderEnd < 0) { return(-1); } var headerStr = sourceString.Substring(descrIndex + "Content-Disposition: form-data;".Length, ctxHeaderEnd - descrIndex - "Content-Disposition: form-data;".Length); // строка вида Content-Disposition: form-data; name="uploadedFile"; filename="settings_news.xml" // или Content-Disposition: form-data; name="accountId" var dicPtrVal = new Dictionary <string, string>(); var attrParts = headerStr.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); foreach (var attrPart in attrParts) { var nameValParts = attrPart.Split(new[] { '=' }, StringSplitOptions.RemoveEmptyEntries); if (nameValParts.Length != 2) { continue; } dicPtrVal.Add(nameValParts[0].Trim(), nameValParts[1].Trim(' ', '"')); } // имеем словарь вида { name, uploadedFile }, { filename, settings_news.xml } string fieldName, fileName; if (!dicPtrVal.TryGetValue("name", out fieldName)) { return(ctxHeaderEnd); } if (!dicPtrVal.TryGetValue("filename", out fileName)) { fileName = string.Empty; } // след строка мб пустая либо содержит Content-Type: text/xml var ctxTypeStart = ctxHeaderEnd + NewLineHeader.Length; // индекс первого символа данных var bodyStart = ctxTypeStart + NewLineHeader.Length; // распарсить тип данных, если он указан var fileType = string.Empty; if (sourceString.Substring(ctxTypeStart, "Content-Type: ".Length) == "Content-Type: ") { var lineEndIndex = sourceString.IndexOf(NewLineHeader, ctxTypeStart); if (lineEndIndex < 0) { return(ctxHeaderEnd); } var strCtxType = sourceString.Substring(ctxTypeStart + "Content-Type: ".Length, lineEndIndex - ctxTypeStart - "Content-Type: ".Length); fileType = strCtxType; bodyStart = lineEndIndex + NewLineHeader.Length; } // получить тельце var bodyEnd = sourceString.IndexOf(NewLineHeader + boundLine, bodyStart); if (bodyEnd < 0) { return(-1); } var strBody = sourceString.Substring(bodyStart, bodyEnd - bodyStart); // создать запись var rec = new ContentRecord { AttachedFileType = fileType, FileName = fileName, Name = fieldName, FileData = strBody }; records.Add(rec); return(bodyEnd); }