from pprint import pprint from bs4 import BeautifulSoup html_content = open('bs_sample.html') # http://dl.dropbox.com/u/49962071/blog/python/resource/bs_sample.html soup = BeautifulSoup(html_content) # making soap print soup.prettify() # prettify html_content even complete uncompleted tag print soup.title # page title tag print soup.title.name # page title name print soup.title.parent.name # page title parent print soup.p # first p tag print soup.p.string # string content of first p tag print soup.p['class'] # first p tag class name print soup.a # first a tag pprint( soup.find_all('a')) # all a tag pprint( soup.find_all('p')) # all p tag print soup.find(id='link3') # find tag with id = link3 print 'All links:' for link in soup.find_all('a'): print link.get('href') # get url print soup.get_text() # return text part of html_document
Friday, November 2, 2012
Beautiful Soup 4 exploring
Quick explore to Beautiful Soup 4. I used this
document for practicing. All source codes of this blog available on github.
Posted by
Abu Zahed Jony
at
7:26 PM
Email ThisBlogThis!Share to XShare to FacebookShare to Pinterest
Labels:
Beautiful Soup
Subscribe to:
Post Comments (Atom)
0 comments:
Post a Comment