ó
ç'ÊUc           @   sX   d  d l  Z  d  d l m Z m Z d e  j f d „  ƒ  YZ e d k rT e  j ƒ  n  d S(   iÿÿÿÿN(   t   Sitemapt   sitemap_urls_from_robotst   SitemapTestc           B   sb   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z RS(
   c         C   sy   t  d ƒ } | j d k s! t ‚ |  j t | ƒ i d d 6d d 6d d 6d	 d
 6i d d 6d d 6d d 6d d
 6g ƒ d  S(   Ns²  <?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
  <url>
    <loc>http://www.example.com/</loc>
    <lastmod>2009-08-16</lastmod>
    <changefreq>daily</changefreq>
    <priority>1</priority>
  </url>
  <url>
    <loc>http://www.example.com/Special-Offers.html</loc>
    <lastmod>2009-08-16</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
</urlset>t   urlsett   1t   prioritys   http://www.example.com/t   locs
   2009-08-16t   lastmodt   dailyt
   changefreqs   0.8s*   http://www.example.com/Special-Offers.htmlt   weekly(   R    t   typet   AssertionErrort   assertEqualt   list(   t   selft   s(    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   test_sitemap   s
    	c         C   s]   t  d ƒ } | j d k s! t ‚ |  j t | ƒ i d d 6d d 6i d d 6d d 6g ƒ d  S(	   Nsv  <?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <sitemap>
      <loc>http://www.example.com/sitemap1.xml.gz</loc>
      <lastmod>2004-10-01T18:23:17+00:00</lastmod>
   </sitemap>
   <sitemap>
      <loc>http://www.example.com/sitemap2.xml.gz</loc>
      <lastmod>2005-01-01</lastmod>
   </sitemap>
</sitemapindex>t   sitemapindexs&   http://www.example.com/sitemap1.xml.gzR   s   2004-10-01T18:23:17+00:00R   s&   http://www.example.com/sitemap2.xml.gzs
   2005-01-01(   R    R   R   R   R   (   R   R   (    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   test_sitemap_index   s    
	c         C   sV   t  d ƒ } |  j t | ƒ i d d 6d d 6d d 6d d	 6i d
 d 6d d 6g ƒ d S(   s]   Assert we can deal with trailing spaces inside <loc> tags - we've
        seen those
        sP  <?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
  <url>
    <loc> http://www.example.com/</loc>
    <lastmod>2009-08-16</lastmod>
    <changefreq>daily</changefreq>
    <priority>1</priority>
  </url>
  <url>
    <loc> http://www.example.com/2</loc>
    <lastmod />
  </url>
</urlset>
R   R   s   http://www.example.com/R   s
   2009-08-16R   R   R	   s   http://www.example.com/2t    N(   R    R   R   (   R   R   (    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   test_sitemap_strip*   s
    	c         C   sV   t  d ƒ } |  j t | ƒ i d d 6d d 6d d 6d d	 6i d
 d 6d d 6g ƒ d S(   su   We have seen sitemaps with wrongs ns. Presumably, Google still works
        with these, though is not 100% confirmedsb  <?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
  <url xmlns="">
    <loc> http://www.example.com/</loc>
    <lastmod>2009-08-16</lastmod>
    <changefreq>daily</changefreq>
    <priority>1</priority>
  </url>
  <url xmlns="">
    <loc> http://www.example.com/2</loc>
    <lastmod />
  </url>
</urlset>
R   R   s   http://www.example.com/R   s
   2009-08-16R   R   R	   s   http://www.example.com/2R   N(   R    R   R   (   R   R   (    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   test_sitemap_wrong_nsA   s
    	c         C   sk   t  d ƒ } | j d k s! t ‚ |  j t | ƒ i d d 6d d 6d d 6d	 d
 6i d d 6d d 6g ƒ d S(   su   We have seen sitemaps with wrongs ns. Presumably, Google still works
        with these, though is not 100% confirmeds/  <?xml version="1.0" encoding="UTF-8"?>
<urlset>
  <url xmlns="">
    <loc> http://www.example.com/</loc>
    <lastmod>2009-08-16</lastmod>
    <changefreq>daily</changefreq>
    <priority>1</priority>
  </url>
  <url xmlns="">
    <loc> http://www.example.com/2</loc>
    <lastmod />
  </url>
</urlset>
R   R   R   s   http://www.example.com/R   s
   2009-08-16R   R   R	   s   http://www.example.com/2R   N(   R    R   R   R   R   (   R   R   (    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   test_sitemap_wrong_ns2W   s    	c         C   s,   d } |  j  t t | ƒ ƒ d d g ƒ d  S(   Ns!  User-agent: *
Disallow: /aff/
Disallow: /wl/

# Search and shopping refining
Disallow: /s*/*facet
Disallow: /s*/*tags

# Sitemap files
Sitemap: http://example.com/sitemap.xml
Sitemap: http://example.com/sitemap-product-index.xml

# Forums
Disallow: /forum/search/
Disallow: /forum/active/
s   http://example.com/sitemap.xmls,   http://example.com/sitemap-product-index.xml(   R   R   R   (   R   t   robots(    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   test_sitemap_urls_from_robotsn   s    c         C   sY   t  d ƒ } |  j t | ƒ i d d 6d d 6i d d 6d d 6i d d 6d d 6g ƒ d S(	   s=   Assert we can deal with starting blank lines before <xml> tagsû  
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">

<!-- cache: cached = yes name = sitemap_jspCache key = sitemap -->
<sitemap>
<loc>http://www.example.com/sitemap1.xml</loc>
<lastmod>2013-07-15</lastmod>
</sitemap>

<sitemap>
<loc>http://www.example.com/sitemap2.xml</loc>
<lastmod>2013-07-15</lastmod>
</sitemap>

<sitemap>
<loc>http://www.example.com/sitemap3.xml</loc>
<lastmod>2013-07-15</lastmod>
</sitemap>

<!-- end cache -->
</sitemapindex>
s
   2013-07-15R   s#   http://www.example.com/sitemap1.xmlR   s#   http://www.example.com/sitemap2.xmls#   http://www.example.com/sitemap3.xmlN(   R    R   R   (   R   R   (    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   test_sitemap_blanklines‚   s    	c         C   s0   t  d ƒ } |  j t | ƒ i d d 6g ƒ d  S(   Nsc  <?xml version="1.0" encoding="UTF-8"?>
    <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
        xmlns:xhtml="http://www.w3.org/1999/xhtml">
        <url>
            <loc>http://www.example.com/</loc>
            <!-- this is a comment on which the parser might raise an exception if implemented incorrectly -->
        </url>
    </urlset>s   http://www.example.com/R   (   R    R   R   (   R   R   (    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   test_comment¢   s    	c         C   s@   t  d ƒ } |  j t | ƒ i d d 6d d d g d 6g ƒ d  S(   Ns«  <?xml version="1.0" encoding="UTF-8"?>
    <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
        xmlns:xhtml="http://www.w3.org/1999/xhtml">
        <url>
            <loc>http://www.example.com/english/</loc>
            <xhtml:link rel="alternate" hreflang="de"
                href="http://www.example.com/deutsch/"/>
            <xhtml:link rel="alternate" hreflang="de-ch"
                href="http://www.example.com/schweiz-deutsch/"/>
            <xhtml:link rel="alternate" hreflang="en"
                href="http://www.example.com/english/"/>
            <xhtml:link rel="alternate" hreflang="en"/><!-- wrong tag without href -->
        </url>
    </urlset>s   http://www.example.com/english/R   s   http://www.example.com/deutsch/s'   http://www.example.com/schweiz-deutsch/t	   alternate(   R    R   R   (   R   R   (    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   test_alternate°   s
    	
c         C   s0   t  d ƒ } |  j t | ƒ i d d 6g ƒ d  S(   Ns^  <?xml version="1.0" encoding="utf-8"?>
          <!DOCTYPE foo [
          <!ELEMENT foo ANY >
          <!ENTITY xxe SYSTEM "file:///etc/passwd" >
          ]>
          <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
            <url>
              <loc>http://127.0.0.1:8000/&xxe;</loc>
            </url>
          </urlset>
        s   http://127.0.0.1:8000/R   (   R    R   R   (   R   R   (    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   test_xml_entity_expansionÆ   s    
	(   t   __name__t
   __module__R   R   R   R   R   R   R   R   R   R   (    (    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyR      s   							 		t   __main__(   t   unittestt   scrapy.utils.sitemapR    R   t   TestCaseR   R   t   main(    (    (    s<   /home/travis/build/scrapy/scrapy/tests/test_utils_sitemap.pyt   <module>   s   Ñ