# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file
Sitemap: http://allpoetry.com/sitemap_index
User-Agent: *
Disallow: /poem/next/
Disallow: /poem/print/
Disallow: /poem/printall/
Disallow: /poem/revisions/
Disallow: /poem/friend/
Disallow: /poem/newest/
Disallow: /poem/log/
Disallow: /contest/next/
Disallow: /contest/printall/
Disallow: /contest/friend/
Disallow: /contest/remove_entry
Disallow: /bookmark/
Disallow: /user/feed/
Disallow: /comment/
Disallow: /board/reply/
Disallow: /tag/edit/
#should be using /list/id-best-in-allpoetry, same with user+list. verify sitemap
Disallow: /list/show/
Disallow: /user/show/
Disallow: /poem/show/
Disallow: /background/
Disallow: /bookmark/
Disallow: /favorite/
Disallow: /comment/
Disallow: /comment/to/
Disallow: /comment/by/
Disallow: /opoem/log/
Disallow: /journal/log/
Disallow: /journal/next/
Disallow: /spotlight/show/
Crawl-delay: 1
# for now at least, re-consider again later
#Disallow: /opoem/
#Disallow: /oauthor/
#http://www.google.com/support/webmasters/bin/answer.py?answer=40367&query=robots&topic=&type=
#User-agent: Googlebot
Disallow: /*?*
#the dumb adwords bot will show PSAs if they can't spider a page, so let them into anything? Hmm.
#hmm, it defaults to the same stuff as above, how to override?
User-agent: Mediapartners-Google*
Allow: /
# Crawlers that are kind enough to obey, but which we'd rather not have
# unless they're feeding search engines.
User-agent: dotbot
Disallow: /
User-agent: VoilaBot
Disallow: /
User-agent: UbiCrawler
Disallow: /
User-agent: DOC
Disallow: /
User-agent: Zao
Disallow: /
# Some bots are known to be trouble, particularly those designed to copy
# entire sites. Please obey robots.txt.
User-agent: sitecheck.internetseer.com
Disallow: /
User-agent: Zealbot
Disallow: /
User-agent: MSIECrawler
Disallow: /
User-agent: SiteSnagger
Disallow: /
User-agent: WebStripper
Disallow: /
User-agent: WebCopier
Disallow: /
User-agent: Fetch
Disallow: /
User-agent: Offline Explorer
Disallow: /
User-agent: Teleport
Disallow: /
User-agent: TeleportPro
Disallow: /
User-agent: WebZIP
Disallow: /
User-agent: linko
Disallow: /
User-agent: HTtrack
Disallow: /
User-agent: Microsoft.URL.Control
Disallow: /
User-agent: Xenu
Disallow: /
User-agent: larbin
Disallow: /
User-agent: libwww
Disallow: /
User-agent: ZyBORG
Disallow: /
User-agent: Download Ninja
Disallow: /
# Sorry, wget in its recursive mode is a frequent problem.
# Please read the man page and use it properly; there is a
# --wait option you can use to set the delay between hits,
# for instance.
User-agent: wget
Disallow: /
# The 'grub' distributed client has been *very* poorly behaved.
User-agent: grub-client
Disallow: /
# Doesn't follow robots.txt anyway, but...
User-agent: k2spider
Disallow: /
# Hits many times per second, not acceptable
# http://www.nameprotect.com/botinfo.html
User-agent: NPBot
Disallow: /
# A capture bot, downloads gazillions of pages with no public benefit
# http://www.webreaper.net/
User-agent: WebReaper
Disallow: /