Python Beautiful Soup Url extract from web page
from BeautifulSoup import BeautifulSoup, SoupStrainer
import re
import urllib2
def get_url_content(site_url):
rt=""
try:
request = urllib2.Request(site_url)
f=urllib2.urlopen(request)
content=f.read()
f.close()
except urllib2.HTTPError, error:
content=str(error.read())
return content
response=get_url_content('http://www.sust.edu/')
for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
if link.has_key('href'):
print link['href']
Output:
All urls under this link
