コード例 #1
0
        public void Export(string directory)
        {
            Dictionary <string, string> renamed = GetFriendlyNames();
            Uri location = new Uri(directory, UriKind.Absolute);

            ContentParser parser = new ContentParser(_content, _baseUri);

            parser.RewriteElement +=
                e =>
            {
                return(e);
            };

            parser.RewriteUri +=
                uri =>
            {
                string rename;
                if (uri.IsSameHost(_baseUri))
                {
                    if (renamed.TryGetValue(uri.NormalizedPathAndQuery(), out rename))
                    {
                        if (RebaseLinks)
                        {
                            return(new Uri(location, rename.TrimStart('/', '\\')));
                        }
                        else
                        {
                            return(new Uri(_baseUri, rename.TrimStart('/', '\\')));
                        }
                    }
                }
                return(uri);
            };

            parser.RewriteAll  = true;
            parser.RelativeUri = RebaseLinks;
            parser.Reformat    = Reformat;
            parser.IndentChars = "  ";
            parser.ProcessAll(
                (r, b) =>
            {
                string path;
                if (renamed.TryGetValue(r.ContentUri, out path))
                {
                    string file = Path.Combine(directory, path.Replace('/', '\\').TrimStart('\\'));
                    if (!Directory.Exists(Path.GetDirectoryName(file)))
                    {
                        Directory.CreateDirectory(Path.GetDirectoryName(file));
                    }
                    File.WriteAllBytes(file, b);
                }
            }
                );
        }
コード例 #2
0
        public void ConvertTo(string target, ContentStorage writer)
        {
            Uri           tgt       = new Uri(target, UriKind.Absolute);
            ContentParser processor = new ContentParser(writer, _baseUri);

            processor.Reformat    = Reformat;
            processor.RelativeUri = false;
            if (RebaseLinks)
            {
                processor.RewriteUri +=
                    uri =>
                {
                    if (uri.IsSameHost(_baseUri))
                    {
                        return(new Uri(tgt, uri.PathAndQuery));
                    }
                    return(uri);
                };
            }
            CopyTo(writer, processor.ProcessFile);
        }
コード例 #3
0
        private void RunOptimizer(Predicate <ContentRecord> filter)
        {
            Dictionary <string, string> values = new Dictionary <string, string>(StringComparer.OrdinalIgnoreCase)
            {
                { "site.uri", _baseUri.AbsoluteUri }
            };

            foreach (var type in _config.DocumentTypes.Where(t => t.Optimizations != null))
            {
                ILookup <string, HttpCloneFileType> lookup = new[] { type }
                .Concat(type.Aliases.SafeEnumeration())
                .ToLookup(a => a.MimeType);

                ContentParser processor = new ContentParser(_content, _baseUri);
                processor.Reformat    = CondenseHtml;
                processor.RelativeUri = true;
                processor.IndentChars = String.Empty;

                if (type.Type == ContentFormat.Html && _config.Searching != null && _config.Searching.FormXPath != null)
                {
                    processor.RewriteXmlDocument += RewriteSearchForm;
                }

                if (type.Type == ContentFormat.Html || type.Type == ContentFormat.Xml)
                {
                    new XmlRewriter(processor, type.Optimizations, values);
                }
                new RegexRewriter(processor, type.Optimizations, values);

                processor.ContextChanged +=
                    r =>
                {
                    values["page.path"] = r.ContentUri;
                    values["page.uri"]  = new Uri(_baseUri, r.ContentUri).AbsoluteUri;
                    values["page.mime"] = r.MimeType;
                };
                processor.Process(x => (filter == null || filter(x)) && lookup.Contains(x.MimeType));
            }
        }
コード例 #4
0
        public SiteCollector(string directory, string uriStart)
        {
            CrawlTime   = DateTime.Now;
            _instanceId = Guid.NewGuid().ToUInt64();
            _baseUri    = new Uri(new Uri(uriStart, UriKind.Absolute), "/");

            if (!Directory.Exists(directory))
            {
                Directory.CreateDirectory(directory);
            }

            _config           = Config.ReadConfig(_baseUri, directory);
            _excluded         = new PathExclusionList();
            _queue            = new TextQueue(Path.Combine(directory, "workqueue.txt"), true);
            _data             = new ContentStorage(directory, false);
            _parser           = new ContentParser(_data, _baseUri);
            _parser.VisitUri += AddUri;

            AddUrlsFound         = true;
            UpdateSearchTemplate = true;
            MaxCrawlAge          = TimeSpan.MaxValue;
            AddUri(new Uri(uriStart, UriKind.Absolute));
        }
コード例 #5
0
        private void UpdateTemplate(bool forced)
        {
            string        tempPath = new Uri(_baseUri, _config.Searching.TemplateUri).NormalizedPathAndQuery();
            ContentRecord record;

            ContentRecord.Builder update;
            if (_data.TryGetValue(TemplatePath, out record))
            {
                update = record.ToBuilder();
            }
            else
            {
                update = _data.New(TemplatePath, DateTime.Now);
            }

            ContentRecord template;

            if (_data.TryGetValue(tempPath, out template))
            {
                if (template.HasContentStoreId && (forced || template.HashOriginal != update.HashOriginal))
                {
                    update.SetContentType(template.ContentType);
                    update.SetHashOriginal(template.HashOriginal);
                    update.SetLastCrawled(template.LastCrawled);
                    update.SetLastValid(template.LastValid);
                    update.SetDateModified(DateTime.Now);
                    update.SetHttpStatus(template.HttpStatus);
                    update.ClearContentRedirect();
                    if (template.HasContentRedirect)
                    {
                        update.SetContentRedirect(update.ContentRedirect);
                    }

                    ContentParser parser = new ContentParser(_data, _baseUri);
                    parser.RelativeUri = true;
                    parser.RewriteUri += uri => new Uri(uri.OriginalString);
                    Uri templateUri = new Uri(_baseUri, SearchTemplate.SearchPath);
                    parser.MakeRelativeUri = (s, d) => templateUri.MakeRelativeUri(d);
                    byte[] mapped = parser.ProcessFile(template, _data.ReadContent(template, true));

                    string templateHtml = CreateTemplate(Encoding.UTF8.GetString(mapped));

                    using (ITransactable trans = _data.WriteContent(update, Encoding.UTF8.GetBytes(templateHtml)))
                    {
                        _data.AddOrUpdate(TemplatePath, update.Build());
                        trans.Commit();
                    }
                }
            }

            if (!_data.TryGetValue(SearchCssPath, out record))
            {
                ContentRecord cssRecord = _data.New(SearchCssPath, DateTime.Now)
                                          .SetContentType("text/css")
                                          .SetHttpStatus(200)
                                          .Build();

                _data.Add(cssRecord.ContentUri, cssRecord);
                _data.WriteContent(cssRecord, Encoding.UTF8.GetBytes(Properties.Resources.search_css));
            }
        }