Python Beautiful Soup Url extract from web page
from BeautifulSoup import BeautifulSoup, SoupStrainer import re import urllib2 def get_url_content(site_url): rt="" try: request = urllib2.Request(site_url) f=urllib2.urlopen(request) content=f.read() f.close() except urllib2.HTTPError, error: content=str(error.read()) return content response=get_url_content('http://www.sust.edu/') for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')): if link.has_key('href'): print link['href']
Output:
All urls under this link