Ejemplo n.º 1
0
 private void ProcessFileContent(ContentRecord record, byte[] contentBytes)
 {
     if (AddUrlsFound)
     {
         _parser.ProcessFile(record, contentBytes);
     }
 }
Ejemplo n.º 2
0
 public ContentResponse(ContentStorage content, Uri uri)
 {
     _content = content;
     _status  = HttpStatusCode.InternalServerError;
     _record  = ContentRecord.DefaultInstance;
     try
     {
         string path = uri.NormalizedPathAndQuery();
         if (_content.TryGetValue(path, out _record))
         {
             if (_record.HasContentRedirect)
             {
                 _status = HttpStatusCode.Redirect;
             }
             else
             {
                 _status = HttpStatusCode.OK;
             }
         }
         else
         {
             _record = ContentRecord.DefaultInstance;
             _status = HttpStatusCode.NotFound;
             Log.Warning("404 - {0}", path);
         }
     }
     catch (Exception ex)
     {
         Log.Error(ex, "Exception on {0}", uri);
     }
 }
Ejemplo n.º 3
0
            public XmlRewriter(ContentParser processor, HttpCloneOptimizations optimizations, IDictionary <string, string> namedValues)
            {
                _context       = ContentRecord.DefaultInstance;
                _processor     = processor;
                _optimizations = optimizations;
                _namedValues   = namedValues;
                _elements      = new Dictionary <XmlLightElement, object>();
                _replaces      = new Dictionary <object, HttpCloneOptimizationReplace>();
                foreach (var rep in optimizations.AllItems())
                {
                    foreach (var i in rep.ReplaceItem)
                    {
                        _replaces.Add(i, rep);
                    }
                }

                _bytag = optimizations.AllItems()
                         .SelectMany(item => item.ReplaceItem.OfType <HttpCloneTag>())
                         .ToLookup(t => t.TagName);

                _xpaths = optimizations.AllItems()
                          .SelectMany(item => item.ReplaceItem.OfType <HttpCloneXPath>())
                          .ToArray();

                if (_bytag.Count > 0 || _xpaths.Length > 0)
                {
                    processor.ContextChanged     += ContextChanged;
                    processor.RewriteElement     += RewriteElement;
                    processor.RewriteXmlDocument += RewriteXmlDocument;
                }
            }
Ejemplo n.º 4
0
 internal BaseContent(
     DatabaseManager man,
     ContentRecord contentRecord
     ) : base(man)
 {
     Record = contentRecord;
 }
Ejemplo n.º 5
0
        private void ProcessFile(ContentRecord record, Func <ContentRecord, byte[]> readBytes, Action <ContentRecord, byte[]> writeBytes)
        {
            string mime = record.MimeType;

            OnContextChanged(record);

            HttpCloneDocType type;

            if (_documentTypes.TryGetValue(mime, out type))
            {
                string content = Encoding.UTF8.GetString(readBytes(record));
                content = OnRewriteContent(content);
                string result = ProcessFileText(record.ContentUri, mime, RelativeUri && type.UsesRelativePaths, content);
                if (!ReferenceEquals(content, result) || RewriteAll)
                {
                    writeBytes(record, Encoding.UTF8.GetBytes(result));
                }
            }
            else
            {
                byte[] bytes;
                if (OnSkipContent(record, out bytes))
                {
                    writeBytes(record, bytes);
                }
            }
        }
Ejemplo n.º 6
0
 private void OnContextChanged(ContentRecord rec)
 {
     if (ContextChanged != null)
     {
         ContextChanged(rec);
     }
 }
Ejemplo n.º 7
0
        public void SetContents(string path, HttpStatusCode status, string contentType, string etag, DateTime?modified, byte[] contents)
        {
            if (contents.Length == 0)
            {
                Console.Error.WriteLine("{0} - {1}  (Content is empty)", (int)status, path);
            }

            ContentRecord rec           = _data[path];
            ITransactable pendingUpdate = null;

            try
            {
                ContentRecord.Builder builder = rec.ToBuilder()
                                                .SetContentUri(path)
                                                .SetLastCrawled(CrawlTime)
                                                .SetLastValid(CrawlTime)
                                                .SetHttpStatus((uint)status)
                ;

                builder.ClearContentRedirect();
                builder.SetContentType(contentType);
                builder.SetContentLength((uint)contents.Length);
                if (!String.IsNullOrEmpty(etag))
                {
                    builder.SetETag(etag);
                }

                string hash = Hash.SHA256(contents).ToString();
                if (hash != builder.HashOriginal)
                {
                    Modified = true;
                    builder.SetHashOriginal(hash);
                    builder.SetDateModified(CrawlTime);
                    pendingUpdate = _data.WriteContent(builder, contents);
                }

                if (_data.AddOrUpdate(path, rec = builder.Build()))
                {
                    if (pendingUpdate != null)
                    {
                        pendingUpdate.Commit();
                        pendingUpdate.Dispose();
                        pendingUpdate = null;
                    }
                }
            }
            finally
            {
                if (pendingUpdate != null)
                {
                    pendingUpdate.Rollback();
                    pendingUpdate.Dispose();
                }
            }

            ProcessFileContent(rec, contents);
        }
Ejemplo n.º 8
0
 private bool OnSkipContent(ContentRecord record, out byte[] bytes)
 {
     if (RewriteAll && record.HasContentStoreId)
     {
         bytes = _content.ReadContent(record);
         return(true);
     }
     bytes = null;
     return(false);
 }
Ejemplo n.º 9
0
 public ContentEntity(ContentRecord saveResult)
 {
     Reset();
     if (saveResult != null)
     {
         Id           = saveResult.Id;
         ContentValue = saveResult.ContentValue;
         HierachyId   = saveResult.HierachyId;
         Name         = saveResult.Name;
     }
 }
Ejemplo n.º 10
0
        public static async Task <ContentRecord> GetMemberIcon(string id)
        {
            UserAppMemberServiceProxy umsvc = new UserAppMemberServiceProxy();
            var um = await umsvc.LoadEntityByKeyAsync(Cntx, ApplicationContext.App.ID, id);

            if (um == null)
            {
                return(null);
            }
            ContentRecord rec = new ContentRecord();

            rec.MimeType     = um.IconMime;
            rec.LastModified = um.IconLastModified.HasValue ? um.IconLastModified.Value : DateTime.MaxValue;
            rec.Data         = await umsvc.LoadEntityIconImgAsync(Cntx, ApplicationContext.App.ID, id);

            return(rec);
        }
Ejemplo n.º 11
0
        public ContentEntity Set()
        {
            ContentEntity result     = null;
            ContentRecord saveResult = null;
            var           handler    = GetHandler();

            if (handler != null)
            {
                saveResult = handler.Save();
            }
            if (saveResult != null)
            {
                result = new ContentEntity(saveResult);
            }

            return(result);
        }
Ejemplo n.º 12
0
        public static async Task <ContentRecord> GetUserPhoto(string id)
        {
            UserDetailServiceProxy udsvc = new UserDetailServiceProxy();
            var ud = await udsvc.LoadEntityByKeyAsync(Cntx, GuidMix(ApplicationContext.App.ID, id));

            if (ud == null)
            {
                return(null);
            }
            ContentRecord rec = new ContentRecord();

            rec.MimeType = ud.PhotoMime;
            if (ud.LastModified.HasValue)
            {
                rec.LastModified = ud.LastModified.Value;
            }
            rec.Data = udsvc.LoadEntityPhoto(Cntx, GuidMix(ApplicationContext.App.ID, id));
            return(rec);
        }
Ejemplo n.º 13
0
 public void LinkSource(
     [Argument("site", "s", Description = "The root http address of the website copy.")]
     string site,
     [Argument("link", "l", Description = "The target link to search for.")]
     string link)
 {
     using (ContentStorage storage = new ContentStorage(StoragePath(site), true))
     {
         ContentParser parser = new ContentParser(storage, new Uri(site, UriKind.Absolute));
         ContentRecord rec    = ContentRecord.DefaultInstance;
         parser.ContextChanged += r => rec = r;
         parser.VisitUri       += u =>
         {
             if (StringComparer.Ordinal.Equals(link, u.OriginalString))
             {
                 Console.WriteLine(rec.ContentUri);
             }
         };
         parser.ProcessAll((r, b) => { });
     }
 }
Ejemplo n.º 14
0
 void ContextChanged(ContentRecord obj)
 {
     _context = obj;
     _elements.Clear();
 }
Ejemplo n.º 15
0
 void ContextChanged(ContentRecord obj)
 {
     _context = obj;
 }
Ejemplo n.º 16
0
 public byte[] ProcessFile(ContentRecord record, byte[] contentBytes)
 {
     byte[] bytes = contentBytes;
     ProcessFile(record, r => bytes, (r, b) => bytes = b);
     return(bytes);
 }
Ejemplo n.º 17
0
        private void UpdateTemplate(bool forced)
        {
            string        tempPath = new Uri(_baseUri, _config.Searching.TemplateUri).NormalizedPathAndQuery();
            ContentRecord record;

            ContentRecord.Builder update;
            if (_data.TryGetValue(TemplatePath, out record))
            {
                update = record.ToBuilder();
            }
            else
            {
                update = _data.New(TemplatePath, DateTime.Now);
            }

            ContentRecord template;

            if (_data.TryGetValue(tempPath, out template))
            {
                if (template.HasContentStoreId && (forced || template.HashOriginal != update.HashOriginal))
                {
                    update.SetContentType(template.ContentType);
                    update.SetHashOriginal(template.HashOriginal);
                    update.SetLastCrawled(template.LastCrawled);
                    update.SetLastValid(template.LastValid);
                    update.SetDateModified(DateTime.Now);
                    update.SetHttpStatus(template.HttpStatus);
                    update.ClearContentRedirect();
                    if (template.HasContentRedirect)
                    {
                        update.SetContentRedirect(update.ContentRedirect);
                    }

                    ContentParser parser = new ContentParser(_data, _baseUri);
                    parser.RelativeUri = true;
                    parser.RewriteUri += uri => new Uri(uri.OriginalString);
                    Uri templateUri = new Uri(_baseUri, SearchTemplate.SearchPath);
                    parser.MakeRelativeUri = (s, d) => templateUri.MakeRelativeUri(d);
                    byte[] mapped = parser.ProcessFile(template, _data.ReadContent(template, true));

                    string templateHtml = CreateTemplate(Encoding.UTF8.GetString(mapped));

                    using (ITransactable trans = _data.WriteContent(update, Encoding.UTF8.GetBytes(templateHtml)))
                    {
                        _data.AddOrUpdate(TemplatePath, update.Build());
                        trans.Commit();
                    }
                }
            }

            if (!_data.TryGetValue(SearchCssPath, out record))
            {
                ContentRecord cssRecord = _data.New(SearchCssPath, DateTime.Now)
                                          .SetContentType("text/css")
                                          .SetHttpStatus(200)
                                          .Build();

                _data.Add(cssRecord.ContentUri, cssRecord);
                _data.WriteContent(cssRecord, Encoding.UTF8.GetBytes(Properties.Resources.search_css));
            }
        }
Ejemplo n.º 18
0
        public void Deduplicate(
            [Argument("site", "s", Description = "The root http address of the website copy.")]
            string site,
            [Argument("remove", "r", DefaultValue = false, Description = "True to remove the page and modify source links, otherwise inserts a redirect.")]
            bool remove,
            [Argument("noprompt", "q", DefaultValue = false, Description = "True to stop prompt for confirmation before changing content.")]
            bool noPrompt)
        {
            using (ContentStorage store = new ContentStorage(StoragePath(site), false))
            {
                Dictionary <string, string> replacements = new Dictionary <string, string>(StringComparer.Ordinal);
                Dictionary <string, string> hashes       = new Dictionary <string, string>(StringComparer.Ordinal);
                foreach (KeyValuePair <string, ContentRecord> item in store)
                {
                    if (item.Value.HasHashContents)
                    {
                        string original;
                        if (hashes.TryGetValue(item.Value.HashContents, out original))
                        {
                            replacements[item.Key] = original;
                            Console.WriteLine("{0,-38} => {1,-38}", item.Key, original);
                        }
                        else
                        {
                            hashes.Add(item.Value.HashContents, item.Key);
                        }
                    }
                }

                if (replacements.Count > 0 &&
                    (noPrompt || new ConfirmPrompt().Continue("Replace all of the above links")))
                {
                    Uri baseUri = new Uri(site, UriKind.Absolute);

                    if (remove)
                    {
                        ContentParser parser = new ContentParser(store, baseUri);
                        parser.RewriteUri += u =>
                        {
                            string target;
                            if (u.IsSameHost(baseUri) && replacements.TryGetValue(u.NormalizedPathAndQuery(), out target))
                            {
                                return(new Uri(baseUri, target));
                            }
                            return(u);
                        };
                        parser.ProcessAll();
                    }
                    foreach (string removed in replacements.Keys)
                    {
                        ContentRecord rec = store[removed];
                        store.Remove(removed);
                        if (!remove)
                        {
                            ContentRecord.Builder builder = rec.ToBuilder();
                            builder
                            .ClearCompressedLength()
                            .ClearContentLength()
                            .ClearContentStoreId()
                            .ClearContentType()
                            .ClearHashContents()
                            .SetHttpStatus((uint)HttpStatusCode.Redirect)
                            .SetContentRedirect(replacements[removed])
                            ;
                            store.Add(removed, builder.Build());
                        }
                    }
                }
            }
        }
Ejemplo n.º 19
0
        private int ParseSection(int pos, string sourceString, string boundLine)
        {
            var boundIndex = sourceString.IndexOf(boundLine, pos);

            if (boundIndex < 0)
            {
                return(-1);
            }

            var descrIndex = sourceString.IndexOf("Content-Disposition: form-data;", boundIndex);

            if (descrIndex < 0)
            {
                return(-1);
            }

            var ctxHeaderEnd = sourceString.IndexOf(NewLineHeader, descrIndex);

            if (ctxHeaderEnd < 0)
            {
                return(-1);
            }

            var headerStr = sourceString.Substring(descrIndex + "Content-Disposition: form-data;".Length,
                                                   ctxHeaderEnd - descrIndex - "Content-Disposition: form-data;".Length);

            // строка вида Content-Disposition: form-data; name="uploadedFile"; filename="settings_news.xml"
            // или Content-Disposition: form-data; name="accountId"
            var dicPtrVal = new Dictionary <string, string>();
            var attrParts = headerStr.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);

            foreach (var attrPart in attrParts)
            {
                var nameValParts = attrPart.Split(new[] { '=' }, StringSplitOptions.RemoveEmptyEntries);
                if (nameValParts.Length != 2)
                {
                    continue;
                }

                dicPtrVal.Add(nameValParts[0].Trim(), nameValParts[1].Trim(' ', '"'));
            }

            // имеем словарь вида { name, uploadedFile }, { filename, settings_news.xml }
            string fieldName, fileName;

            if (!dicPtrVal.TryGetValue("name", out fieldName))
            {
                return(ctxHeaderEnd);
            }
            if (!dicPtrVal.TryGetValue("filename", out fileName))
            {
                fileName = string.Empty;
            }

            // след строка мб пустая либо содержит Content-Type: text/xml
            var ctxTypeStart = ctxHeaderEnd + NewLineHeader.Length;

            // индекс первого символа данных
            var bodyStart = ctxTypeStart + NewLineHeader.Length;

            // распарсить тип данных, если он указан
            var fileType = string.Empty;

            if (sourceString.Substring(ctxTypeStart, "Content-Type: ".Length) ==
                "Content-Type: ")
            {
                var lineEndIndex = sourceString.IndexOf(NewLineHeader, ctxTypeStart);
                if (lineEndIndex < 0)
                {
                    return(ctxHeaderEnd);
                }
                var strCtxType = sourceString.Substring(ctxTypeStart + "Content-Type: ".Length,
                                                        lineEndIndex - ctxTypeStart - "Content-Type: ".Length);
                fileType  = strCtxType;
                bodyStart = lineEndIndex + NewLineHeader.Length;
            }

            // получить тельце
            var bodyEnd = sourceString.IndexOf(NewLineHeader + boundLine, bodyStart);

            if (bodyEnd < 0)
            {
                return(-1);
            }
            var strBody = sourceString.Substring(bodyStart, bodyEnd - bodyStart);

            // создать запись
            var rec = new ContentRecord
            {
                AttachedFileType = fileType,
                FileName         = fileName,
                Name             = fieldName,
                FileData         = strBody
            };

            records.Add(rec);
            return(bodyEnd);
        }