Wednesday, October 21, 2009

How to fetch feed url from web page in python

Easyly.


import urllib2
from lxml import etree

webPageUrl = "http://www.osmonov.com"
try:
opener = urllib2.build_opener()
response = opener.open(webPageUrl)
if response.headers.__getitem__('Content-Type').startswith('text/html'):
html_element = etree.HTML( response.read() )
link_elements =html_element.findall('.//link[@rel="alternate"]')
if link_elements.__len__() > 0:
# so we can have several link tags, i use just the first
feedUrl = link_elements.__getitem__(0).get('href')

except urllib2.URLError:
# commander! do something violent
Powered by Blogger.