Having stumbled across Google's Webmaster Tools a while ago it's been in the back of my mind to create a custom HttpHandler to dynamically create a (virtual) SiteMap.xml file that Google (and other search engines) can use as a reference when spidering my sites.

I thought I'd share the basic implantation as a starting point. Ideally you'd add a property to each EPiServer PageType page for "change frequency" and "priority" which the sitemap generator would then use, but for this basic version I've simply set the homepage to 1.0 (the maximum priority) with a daily change, and all other pages to 0.6 and weekly.

First declare the class and the required members:

public class SearchEngineSiteMap : IHttpHandler
{
    bool IHttpHandler.IsReusable
    {
        get { return true; }
    }
    void IHttpHandler.ProcessRequest(HttpContext context)
    {
        GenerateSiteMap(context);
    }

Next we need to configure the output stream, create an XmlTextWriter and the outer Xml block:

/// <summary>
/// Generate the SiteMap
/// </summary>
/// <param name="context"></param>
private void GenerateSiteMap(HttpContext context)
{
    //Set the response information
    context.Response.Expires = -1;
    context.Response.ContentType = "application/xml";
    Encoding encoding = new UTF8Encoding();
    context.Response.ContentEncoding = encoding;

    //Create an XMLTextWriter to build the XML, passing it the context's outputstream
    XmlTextWriter xmlTextWriter = new XmlTextWriter(context.Response.OutputStream, encoding);
    xmlTextWriter.Formatting = Formatting.Indented;
    xmlTextWriter.WriteStartDocument();

    //Write the root xml element
    xmlTextWriter.WriteStartElement("urlset");
    xmlTextWriter.WriteStartAttribute("xmlns");
    xmlTextWriter.WriteValue("http://www.sitemaps.org/schemas/sitemap/0.9");
    xmlTextWriter.WriteEndAttribute();

    //Get EPiServer's StartPage (not the RootPage!)
    PageData p = EPiServer.DataFactory.Instance.GetPage(PageReference.StartPage);

    //SiteMaps can only contain unique urls so maintain a list of added urls
    List<string> alreadyAddedUrls = new List<string>();

    //Now call recursive method to populate every published/visible etc page
    RenderNodesToSiteMap(
        context,
        xmlTextWriter,
        alreadyAddedUrls,
        p);

    //Close the root element
    xmlTextWriter.WriteEndElement();
    //end of document
    xmlTextWriter.WriteEndDocument();
    //finally close the XMLTextWriter
    xmlTextWriter.Close();
}

Finally we need to add a method that will be recursively called for each published page in the site:

/// <summary>
/// Recursively converts the given page into XML for use in the sitemap.
/// </summary>
/// <param name="context">Current Context</param>
/// <param name="xmlTextWriter">XMLTextWriter to write give page (p) to</param>
/// <param name="alreadyAddedUrls">List of Urls already added to the SiteMap</param>
/// <param name="p">The page to add to the sitemap</param>
private void RenderNodesToSiteMap(
    HttpContext context,
    XmlTextWriter xmlTextWriter,
    List<string> alreadyAddedUrls,
    PageData p)
{
    //Make sure the page is published
    if (PageDataUtilities.IsPagePublished(child))
    {
        //Get the page's 'Friendly' URL
        string url = PageDataUtilities.GetFriendlyUrl(p, true);

        // Make sure this URL is not in the XML already
        if (!alreadyAddedUrls.Contains(url))
        {
            //Add it ready to check later
            alreadyAddedUrls.Add(url);
            //Write the Url element
            xmlTextWriter.WriteStartElement("url");
            //Add the location (Url) attribute - making sure its encoded!
            xmlTextWriter.WriteElementString("loc", HttpUtility.HtmlEncode(url));
            //Add when it was last modified
            xmlTextWriter.WriteElementString(
                "lastmod",
                p.Changed.ToString("u", CultureInfo.InvariantCulture).Replace(" ", "T"));
            //If its the StartPage set the change frequency to daily
            //and the priority to 1
            if (p.PageLink == PageReference.StartPage)
            {
                xmlTextWriter.WriteElementString(
                "changefreq",
                "daily");
                xmlTextWriter.WriteElementString(
                    "priority",
                    "1.0");
            }
            else //Otherwise weekly and a lower priority
            {
                xmlTextWriter.WriteElementString(
                "changefreq",
                "weekly");
                xmlTextWriter.WriteElementString(
                 "priority",
                 "0.6");
            }
            //Close the URL node
            xmlTextWriter.WriteEndElement();
        }
        //Now loop through all the 
        foreach (PageData child in EPiServer.DataFactory.Instance.GetChildren(p.PageLink))
        {

            RenderNodesToSiteMap(
                context,
                xmlTextWriter,
                alreadyAddedUrls,
                child);

        }
    }

With all that done the last thing is to register it in the Web.config (inside the System.Web element) as follows:

<httpHandlers>
  ...
  <add 
    path="sitemap.xml" 
    verb="*" 
    type="MyLibrary.SearchEngineSiteMap, MyLibrary" />
  ...
</httpHandlers>

Bookmark with :
Digg It! DZone StumbleUpon Technorati Reddit Del.icio.us Newsvine Furl Blinklist
posted @ Thursday, June 12, 2008 11:09 AM | in C# EPiServer .NET ASP.NET

Comments

No comments posted yet.

Post Comment

Title *
Name *
Email
Url
Comment *  


Please add 2 and 3 and type the answer here: