public XDocument Serialise(UrlSet urlSet) { if (urlSet == null) { throw new ArgumentNullException("urlSet"); } XDocument xd = new XDocument(); XElement root = new XElement(UrlSetElementName); root.Add(new XAttribute(XNamespace.Xmlns + "xhtml", XhtmlNamespace)); root.Name = SitemapNamespace + root.Name.LocalName; xd.Document.Add(root); foreach (Url sitemapUrl in urlSet) { var xUrl = serialiseUrl(sitemapUrl); xUrl.Name = SitemapNamespace + xUrl.Name.LocalName; root.Add(xUrl); } return(xd); }
public UrlSet Generate(FileDefinition definition) { if (definition == null) { throw new ArgumentNullException("definition"); } if (!definition.LanguagesResolved) { throw new ArgumentException("The SitemapFileDefinition passed has not resolved languages prior to sitemap generation.", "definition"); } UrlSet urlSet = new UrlSet(); if (definition.RootItem != Guid.Empty) { Database db = Sitecore.Configuration.Factory.GetDatabase(definition.SourceDatabase); Item rootItem = db.GetItem(new ID(definition.RootItem)); process(rootItem, urlSet, definition); } return(urlSet); }
public async Task <IHttpActionResult> GetSiteMap(GetSiteMapRequest req) { UrlSet retVal = new UrlSet(); var dcd = await utils.InstanceDBAsync(); var bookeditionIds = dcd.Contents.BookEditions.Values.Where(w => w.active && dcd.ActivePublications.Contains(w.publishercode)); foreach (var bookId in bookeditionIds) { var chaps = await dcd.ChaptersByBookIdAsync(bookId.bookid); foreach (var ch in chaps) { var contents = await dcd.LoadChaptersAsync(ch.Key.chapter, bookId.bookid); //dcd.getVersesByBookEditionEtag(new[] { p.bookEditionid }, ch.chapter, 0, out DateTime maxTs); var resp = contents.Data; retVal.AddUrl(new Url() { LastModifiedDateTime = resp.Max(m => m.timestamp), Loc = string.Format(req.FormatUrl, bookId.bookid, ch.Key.chapter) }); } } using (var io = (MemoryStream)retVal.ToStream()) { return(Ok(Encoding.UTF8.GetString(io.ToArray()))); } }
/// <summary> /// Gets the data that identifies this Crex template and action data. /// </summary> /// <param name="isPreview">if set to <c>true</c> then this is for a preview.</param> /// <returns>A CrexAction object.</returns> public override CrexAction GetCrexAction(bool isPreview) { var mergeFields = GetCommonMergeFields(); UrlSet urlSet = GetUrlSetFromAttributes("BackgroundImage", "BackgroundImageUrl", mergeFields); return(new CrexAction("Image", urlSet)); }
/// <summary> /// Gets the UrlSet from the provided attributes. /// </summary> /// <param name="binaryFileAttribute">The attribute name that contains a binary file.</param> /// <param name="urlAttribute">The attribute name that contains a Lava enabled URL string.</param> /// <param name="mergeFields">The merge fields.</param> /// <returns>A UrlSet object that identifies the requested image.</returns> protected UrlSet GetUrlSetFromAttributes(string binaryFileAttribute, string urlAttribute, Dictionary <string, object> mergeFields) { if (!string.IsNullOrWhiteSpace(binaryFileAttribute) && GetAttributeValue(binaryFileAttribute).AsGuidOrNull().HasValue) { return(UrlSet.FromBinaryImage(GetAttributeValue(binaryFileAttribute).AsGuid())); } if (!string.IsNullOrWhiteSpace(urlAttribute)) { var bgValue = GetAttributeValue(urlAttribute).ResolveMergeFields(mergeFields, CurrentPerson); var bgGuid = bgValue.AsGuidOrNull(); if (bgGuid.HasValue) { return(UrlSet.FromBinaryImage(bgGuid.Value)); } return(new UrlSet { HD = bgValue, FHD = bgValue, UHD = bgValue }); } return(new UrlSet()); }
public void HasXmlnsXhtmlAttribute() { var urlSet = new UrlSet(); var xml = urlSet.ToXml(); Assert.IsTrue(xml.Attributes().Any(attr => attr.Name == XhtmlNamespace), "xmlns:html attribute missing."); }
public void HasXmlnsAttribute() { var urlSet = new UrlSet(); var xml = urlSet.ToXml(); Assert.IsTrue(xml.Attributes().Any(attr => attr.Name == "xmlns"), "xmlns attribute missing."); }
private void BuildUrlSet(List <Url> urls, int sequence) { this._SitemapNames.Add(String.Format("{0}{1}.xml", this.SitemapName, sequence + 1)); var urlSet = new UrlSet(); urlSet.UrlList.AddRange(urls); urlSet.Serialize(Path.Combine(SitemapPath, String.Format("{0}{1}.xml", this.SitemapName, sequence + 1))); }
public void HasXmlnsDefaultValue() { const string expected = "http://www.sitemaps.org/schemas/sitemap/0.9"; var urlSet = new UrlSet(); var xml = urlSet.ToXml(); Assert.AreEqual(xml.Attributes().First(attr => attr.Name == "xmlns").Value, expected, "xmlns default value incorrect."); }
public void HasXmlnsXhtmlDefaultValue() { const string expected = "http://www.w3.org/1999/xhtml"; var urlSet = new UrlSet(); var xml = urlSet.ToXml(); Assert.AreEqual(xml.Attributes().First(attr => attr.Name == XhtmlNamespace).Value, expected, "xmlns:html default value incorrect."); }
private async Task DoPing(string url) { do { using (var client = new PingerWebClient("Mozilla/5.0 (compatible; Pingerbot/0.2)")) { SitemapIndex index = null; try { using (var reader = await client.GetAsync(url)) { var s = new XmlSerializer(typeof(SitemapIndex)); index = (SitemapIndex)s.Deserialize(await reader.Content.ReadAsStreamAsync()); } } catch (Exception exc) { logger.Error(exc, "Error on first call"); await Task.Delay(30 * 1000); continue; } logger.Info("sitemaps to ping -> " + index.Sitemaps.Count); foreach (var sitemap in index.Sitemaps) { UrlSet urlSet = null; using (var reader = await client.GetAsync(sitemap.loc)) { var s = new XmlSerializer(typeof(UrlSet)); urlSet = (UrlSet)s.Deserialize(await reader.Content.ReadAsStreamAsync()); } logger.Info("Urls to ping -> " + urlSet.Urls.Count); foreach (var urlToPing in urlSet.Urls) { logger.Info("Pinging " + urlToPing.loc); try { await client.GetAsync(new Uri(urlToPing.loc)); } catch (Exception exc) { logger.Error(exc, $"Error downloading page: {urlToPing.loc}"); //throw new Exception("Error downloading page: " + urlToPing.loc, exc); } await Task.Delay(30 * 1000); } } } } while (true); }
public void CanOverrideXmlnsXhtml() { const string expected = "http://www.somethingnew.com/2015/xhtml"; var urlSet = new UrlSet { XmlNamespaceXHtml = expected }; var xml = urlSet.ToXml(); Assert.AreEqual(xml.Attributes().First(attr => attr.Name == XhtmlNamespace).Value, expected, "xmlns:html default value was not overriden."); }
public void UrlSetSerialiser_Serialise_EmptyUrlSetGivesValidEmptyDocument() { var urlSet = new UrlSet(); var serialiser = new UrlSetSerialiser(); XDocument result = serialiser.Serialise(urlSet); Assert.IsNotNull(result, "Serialise() should always return a valid XML document"); Assert.AreEqual("<urlset xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" />", result.ToString(SaveOptions.DisableFormatting)); }
public void HasChildren() { var urlSet = new UrlSet(); var url1 = new UmbracoContent(); var url2 = new UmbracoContent(); urlSet.Urls.Add(url1); urlSet.Urls.Add(url2); var xml = urlSet.ToXml(); Assert.IsTrue(xml.HasElements, "xml is missing child elements"); Assert.AreEqual(xml.Elements().Count(), 2, "xml has the wrong number of elements."); }
private void process(Item itm, UrlSet urlSet, FileDefinition definition) { if (definition.TemplatesToInclude.Count == 0 || definition.TemplatesToInclude.Contains(itm.TemplateID.ToGuid())) { if (definition.LanguageCodesToInclude.Count == 0 || hasAValidLanguage(itm, definition.LanguageCodesToInclude)) { var url = makeUrl(itm, definition); urlSet.Add(url); } } foreach (Item child in itm.Children) { process(child, urlSet, definition); } }
/// <summary> /// Generates a valid xml sitemap from the supplied content. /// </summary> /// <param name="content">Content to be added to the sitemap.</param> /// <returns>An XML Sitemap.</returns> public XDocument Generate(IEnumerable <ISitemapContent> content) { if (content == null) { throw new ArgumentNullException(nameof(content)); } var urlSet = new UrlSet { Urls = content.ToList() }; var sitemap = new XDocument { Declaration = new XDeclaration(Version, Encoding, Standalone) }; sitemap.Add(urlSet.ToXml()); return(sitemap); }
public void UrlSetSerialiser_Serialise_MultipleUrlsGivesCorrectDocument() { var urlSet = new UrlSet(); for (int i = 0; i < 10; i++) { urlSet.Add(new Url(i.ToString())); } var serialiser = new UrlSetSerialiser(); XDocument result = serialiser.Serialise(urlSet); Assert.IsNotNull(result, "Serialise() should always return a valid XML document"); var set = result.Document.Element(UrlSetSerialiser.SitemapNamespace + UrlSetSerialiser.UrlSetElementName); var urls = set.Elements(UrlSetSerialiser.SitemapNamespace + UrlSetSerialiser.UrlElementName); Assert.AreEqual(10, urls.Count()); }
public void UrlSetSerialiser_Serialise_SingleUrlGivesValidDocument() { var urlSet = new UrlSet(); urlSet.Add(new Url("123")); var serialiser = new UrlSetSerialiser(); XDocument result = serialiser.Serialise(urlSet); Assert.IsNotNull(result, "Serialise() should always return a valid XML document"); var set = result.Document.Element(UrlSetSerialiser.SitemapNamespace + UrlSetSerialiser.UrlSetElementName); var urls = set.Elements(UrlSetSerialiser.SitemapNamespace + UrlSetSerialiser.UrlElementName); Assert.AreEqual(1, urls.Count()); var loc = urls.First().Element(UrlSetSerialiser.SitemapNamespace + UrlSetSerialiser.UrlLocationElemenName); Assert.AreEqual("123", loc.Value); }
private void RetrieveUrls(QueryServiceClient queryServiceClient, Region region) { WSU.MainServiceLogger.Info("Retrieve Urls for Region: " + ApexConsumer.ToString(region)); UrlSet urlSet = queryServiceClient.RetrieveUrlsForContext( SessionHeader, new SingleRegionContext { BusinessUnitEntityKey = region.BusinessUnitEntityKey, RegionEntityKey = region.EntityKey }); if (urlSet == null) { throw new Exception("Retrieve Urls failed with a null result for Region: " + ApexConsumer.ToString(region)); } else { WSU.MainServiceLogger.Debug("Retrieve Urls completed successfully for Region: " + ApexConsumer.ToString(region)); RegionUrlSets.Add(region.EntityKey, urlSet); } }
// Retrieves and creates the URL Webservices objects public static string[] RetrieveUrls(long buId, long regionId) { try { // Object with the search parameters RegionContext = new SingleRegionContext { BusinessUnitEntityKey = buId, RegionEntityKey = regionId }; // Call the Web Services UrlSet urlSet = QueryServiceClient.RetrieveUrlsForContext( SessionHeader, RegionContext); // Checks if the returned data is valid if (urlSet == null) { throw new Exception("Retrieve Urls failed."); } else { // Creates URL Webservice objects MappingServiceClient = new MappingServiceClient("BasicHttpBinding_IMappingService", urlSet.MappingService); RoutingServiceClient = new RoutingServiceClient("BasicHttpBinding_IRoutingService", urlSet.RoutingService); // Stores the URL address string[] r = new string[] { urlSet.MappingService, urlSet.RoutingService }; // Retrieves URL completed successfully return(r); } } catch (Exception Ex) { throw new Exception(Ex.Message); } }
public void UrlSetSerialiser_Serialise_OverallDocumentHasCorrectFormat() { var urlSet = new UrlSet(); var url = new Url("http://www.com/") { ChangeFrequency = ChangeFrequency.Daily }; url.AlternateUrls.Add(new AlternateUrl() { Url = "http://www.com/en/", Language = "EN-GB" }); urlSet.Add(url); var serialiser = new UrlSetSerialiser(); var xDoc = serialiser.Serialise(urlSet); string s = xDoc.ToString(SaveOptions.OmitDuplicateNamespaces | SaveOptions.DisableFormatting); Assert.AreEqual("<urlset xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\"><url><loc>http://www.com/</loc><changefreq>daily</changefreq><xhtml:link rel=\"alternate\" hreflang=\"EN-GB\" href=\"http://www.com/en/\" /></url></urlset>", s); }
public void RunStep(RuntimeSettings settings, ILog log) { log.Info("Creating sitemap.xml..."); UrlSet sitemap = new UrlSet(); var pages = from file in settings.OutputDirectory.GetAllFiles() where file.Extension == ".html" select file.ToString().Replace(settings.OutputDirectory.ToString(), ""); foreach (var page in pages) { var reallink = $"{settings.Configuration.HostName}{page.Replace("\\", "/")}"; sitemap.Url.Add(CreateEntry(reallink)); log.Detail("Creating sitemap entry for: {0}", page); } var output = settings.OutputDirectory.Combine("sitemap.xml"); output.SerializeXml(sitemap, log); }
public string GetXml(UrlSet urlSet) => Helper.GetXml(urlSet);
public FileSpec GetUrl(Client /*!*/ client, int id) { // first check the local cache // then see if anyone else is generating the global if (cache.pos > 0) { return(cache.urls[--cache.pos]); } // return local free to global free pool client.globalLock.WaitOne(); while (free.pos > 0) { if (client.freeDeck.pos >= client.freeDeck.urls.Length) { //Console.WriteLine(" too many free slots!"); free.urls[--free.pos] = null; //DebugStub.Break(); free.pos--; } else { client.freeDeck.urls[client.freeDeck.pos++] = free.urls[--free.pos]; } } // now see if there are any urls available // if not either wait on or generate the global deck while (client.urlDeck.pos == 0) { if (client.generatingUrls == false) { //Console.WriteLine("Generating cache"); UrlSet temp = client.urlDeck; client.urlDeck = client.spareDeck; client.spareDeck = temp; client.generatingUrls = true; client.syncLock.WaitOne(); client.globalLock.ReleaseMutex(); client.GenerateFileSpecs(); client.globalLock.WaitOne(); client.generatingUrls = false; client.syncLock.ReleaseMutex(); } else { //Console.WriteLine("Waiting for cache"); client.globalLock.ReleaseMutex(); client.syncLock.WaitOne(); client.syncLock.ReleaseMutex(); client.globalLock.WaitOne(); } } // the global deck is ok -- prime our cache if (client.urlDeck.pos > 0) { for (int i = 0; i < cache.urls.Length && (client.urlDeck.pos > 0); i++) { cache.urls[cache.pos++] = client.urlDeck.urls[--client.urlDeck.pos]; } } client.globalLock.ReleaseMutex(); return(cache.urls[--cache.pos]); }
public void ProcessRequest(HttpContext context) { HttpResponse Response = context.Response; HttpRequest Request = context.Request; bool isCrawler = utils.IsCrawler(Request, out string browser); if (browser.Contains("Yandex") || browser.Contains("Baiduspider")) { Response.StatusCode = (int)HttpStatusCode.Unauthorized; Response.StatusDescription = "Unauthorized"; return; } Response.ContentType = "text/xml"; Response.Charset = "utf-8"; // not needed because of BinaryWrite //Response.ContentEncoding = Encoding.UTF8; string qr = Request.QueryString.ToString(); //var pubCodes = GetPubs(Request); ////TODO check on allowed pub codes, otherwise a hacker could cause memory to fill up //if (pubCodes.Count() > 3 || qr.Length > 20) //{ // Response.StatusCode = (int)HttpStatusCode.BadRequest; // Response.StatusDescription = "Invalid request"; // return; //} //TODO put in config file var formatUrl = "http://www.peshitta.nl/book.aspx?"; //Uri uri = new Uri(formatUrl); //string host = uri.Host; //string serverHost = Request.Url.Host; //if (host != serverHost) //{ // Response.StatusCode = (int)HttpStatusCode.NotImplemented; // Response.StatusDescription = "Not implemented for this host"; // return; //} if (_cache == null) { lock (lockobj) { _cache = new Dictionary <string, CacheType>(); } } CacheType ct; if (!_cache.ContainsKey(qr)) { lock (lockobj) { ct = new CacheType(); //TODO check params otherwise memory migt be stuffed _cache.Add(qr, ct); } } else { ct = _cache[qr]; } if (ct.LastUpdate == DateTime.MinValue || DateTime.UtcNow - ct.LastUpdate > TimeSpan.FromHours(24)) { ct.LastUpdate = DateTime.Now.AddYears(-1); //temp UrlSet retVal = new UrlSet(); var dcd = utils.InstanceDBAsync().Result; var bookeditionIds = dcd.Contents.BookEditions.Values.Where(w => w.active && dcd.ActivePublications.Contains(w.publishercode)); foreach (var bookeditionid in bookeditionIds) { var chaps = dcd.ChaptersByBookIdAsync(bookeditionid.bookid).Result; foreach (var ch in chaps) { var chapterAlineas = ch.Values.Select(s => s.BookchapterAlineaId).ToArray(); var maxupd = dcd.Contents.Pubs[bookeditionid.publishercode].Texts .Values.Where(w => chapterAlineas.Contains(w.BookChapterAlineaid)) .Max(m => m.timestamp); retVal.AddUrl(new Url() { LastModifiedDateTime = maxupd, Loc = string.Format("{0}://www.peshitta.nl/book.aspx?booked={1}&ch={2}", Uri.UriSchemeHttps , bookeditionid.bookEditionid, ch.Key.chapter), }); } //creates a list limited by it's existance // if a bookeditioned was not published, it will not be shown. using (var io = (MemoryStream)retVal.ToStream()) { _cache[qr].Bytes = io.ToArray(); } } } string etag; using (var mem = new MemoryStream()) using (var wr = new BinaryWriter(mem)) { var btLen = Encoding.UTF8.GetBytes(formatUrl); wr.Write(btLen); wr.Write(ct.LastUpdate.ToBinary()); wr.Write(_cache[qr].Bytes); wr.Flush(); using (var MD5Enc = MD5.Create()) { mem.Position = 0; etag = BitConverter.ToString(MD5Enc.ComputeHash(mem)).Replace("-", "");; } //Response.Cache.SetExpires(ct.LastUpdate.AddHours(24)); if (utils.SetLastModified(Response, Request, ct.LastUpdate, etag)) { return; } //some old style crawlers, use HEAD instead of conditional get; so get out, and avoid memory waste. if (Request.HttpMethod == "HEAD") { return; } Response.BinaryWrite(_cache[qr].Bytes); } }
/// <summary> /// Gets the data that identifies this Crex template and action data. /// </summary> /// <param name="isPreview">if set to <c>true</c> then this is for a preview.</param> /// <returns>A CrexAction object.</returns> public override CrexAction GetCrexAction(bool isPreview) { var mergeFields = GetCommonMergeFields(); var layout = GetAttributeValue("Layout"); var lavaTemplate = GetAttributeValue("Template"); var allowedChannelTypes = GetAttributeValues("AllowedChannelTypes").AsGuidList(); int?contentItemId = HttpContext.Current.Request.QueryString["ContentItemId"].AsIntegerOrNull(); if (!contentItemId.HasValue) { return(new CrexAction()); } var detailPage = Rock.Web.Cache.PageCache.Get(GetAttributeValue("DetailPage").AsGuid()); var linkedPages = new Dictionary <string, object> { { "DetailPageId", detailPage != null?detailPage.Id.ToString() : string.Empty } }; mergeFields.Add("LinkedPages", linkedPages); using (var rockContext = new RockContext()) { var contentItem = new ContentChannelItemService(rockContext).Get(contentItemId.Value); // // Verify the content item is one we can display. // if (allowedChannelTypes.Any() && !allowedChannelTypes.Contains(contentItem.ContentChannelType.Guid)) { throw new Exception("Content channel item is not in an allowed channel type."); } // // Perform sorting on the content item. // var contentChannelItems = GetContentItems(contentItem); // // Get the JSON from the lava template. // mergeFields.AddOrReplace("ParentItem", contentItem); mergeFields.AddOrReplace("Items", contentChannelItems.ToList()); var json = lavaTemplate.ResolveMergeFields(mergeFields, CurrentPerson, GetAttributeValue("EnabledLavaCommands")); // // Get the background image. // UrlSet backgroundImage = GetUrlSetFromAttributes("BackgroundImage", "BackgroundImageUrl", mergeFields); // // Final layout configuration. // if (layout == "Menu") { var menuData = new com.blueboxmoon.Crex.Rest.Menu { BackgroundImage = backgroundImage }; menuData.Buttons = json.FromJsonOrNull <List <MenuButton> >(); return(new CrexAction(layout, menuData)); } else if (layout == "PosterList") { var posterListData = new PosterList { BackgroundImage = backgroundImage, Title = contentItem.Title }; posterListData.Items = json.FromJsonOrNull <List <PosterListItem> >(); return(new CrexAction(layout, posterListData)); } } return(new CrexAction()); }
public Cache(int cacheSize, int deckSize) { cache = new UrlSet(cacheSize); free = new UrlSet(deckSize); this.size = cacheSize; }