Python Tutorial

Sunday, March 24, 2019

Beautiful Soup IMDB movie page parser

Scrap all information of a movie from imdb page

Source code available on github
from bs4 import BeautifulSoup
import requests


class IMDB(object):
    """Get all information of a imdb movie, pass html content of imdb page"""

    def __init__(self, html=None):
        if not html:
            self.soup = None
        else:
            self.soup = BeautifulSoup(html, "html.parser")

    def _get_year(self):

        d = self.soup.select(".title_bar_wrapper .titleBar .subtext a")
        release_date = self._get_data_prity(d[len(d) - 1])

        return release_date.split()[2]

    def _get_rating(self):
        d = self.soup.select(".title_bar_wrapper .ratingValue span")[0]

        return self._get_data_prity(d)

    def _get_name(self):
        d = self.soup.select(".title_bar_wrapper .titleBar .title_wrapper h1")[0]

        return self._get_data_prity(d)

    def get_info(self):
        m = dict()
        m["name"] = self._get_name()
        m["year"] = self._get_year()
        m["rate"] = self._get_rating()
        m["review_count"] = self._get_review_count()
        m["category"] = self._get_category()

        return m

    def _get_category(self):
        w = []
        a = self.soup.select(".title_bar_wrapper .subtext a")
        for i in range(0, len(a) - 1):
            n = a[i]
            w.append(self._get_data_prity(n))
        return w

    def _get_review_count(self):
        d = self.soup.select(".title_bar_wrapper .imdbRating .small")[0]
        return self._get_data_prity(d)

    def _get_data_prity(self, d):
        r = d.find(text=True, recursive=False)
        return str(r.strip())


def get_url_content(url):
    r = requests.get(url)
    content = r.text.encode('utf-8', 'ignore')
    return content


if __name__ == '__main__':
    url = "https://www.imdb.com/title/tt0034583/"
    imdbHtml = get_url_content(url)
    imdb = IMDB(imdbHtml)
    print (imdb.get_info())


Output:

{'category': ['Drama', 'Romance', 'War'], 'rate': '8.5', 'review_count': '469,931', 'name': 'Casablanca', 'year': '1943'}

Friday, May 13, 2016

Selenium unittest example


import unittest
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

class JPythonSearch(unittest.TestCase):

    def setUp(self):
        self.driver = webdriver.Firefox()

    def test_search_in_python_org(self):
     URL = "http://jpython.blogspot.com"
        driver = self.driver
        driver.get(URL)
        self.assertIn("Life is very easy with Python", driver.title)
        elem = driver.find_element_by_name("q")
        elem.send_keys("BFS")
        elem.send_keys(Keys.RETURN)
        assert "No results found." not in driver.page_source


    def tearDown(self):
        self.driver.close()

if __name__ == "__main__":
    unittest.main()

Output:

.
----------------------------------------------------------------------
Ran 1 test in 25.592s

OK
[Finished in 25.7s]

Selenium example

Here I am showing the example of browse and search into a site programmatically.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

URL = "http://jpython.blogspot.com"
driver = webdriver.Firefox()
driver.get(URL)
print driver.title


search = driver.find_element_by_name("q") # find search field
search.send_keys("BFS") # set search key
search.send_keys(Keys.RETURN) # hit enter

driver.implicitly_wait(10) # wait 10 seconds
driver.close()

Output:

Life is very easy with Python

Selenium installation

Selenium Python bindings provides a simple API to write functional/acceptance tests using Selenium WebDriver. Through Selenium Python API you can access all functionalities of Selenium WebDriver in an intuitive way.

Installation

pip install selenium

If you don't have pip installed try this

Wednesday, November 11, 2015

map keyword

Apply function to every item of iterable and return a list of the results. If additional iterable arguments are passed, function must take that many arguments and is applied to the items from all iterables in parallel. If one iterable is shorter than another it is assumed to be extended with None items.


All source code available on github
intArray = [2, 3, 4, 6, 5, 7]
strArray = ['2', '3', '4', '6', '5', '7']

# square all elements of array
print map(lambda x:x**2, intArray)

# convert string array to int array
print map(int, strArray)

a = [1, 2, 3]
b = [4, 5, 6]

# sum two array elements  by index
print map(lambda x, y : x+y, a, b)


Output:
[4, 9, 16, 36, 25, 49]
[2, 3, 4, 6, 5, 7]
[5, 7, 9]

Monday, October 19, 2015

Dijkstra algorithm


Dijkstra algorithm is a single source shortest path algorithm. For a given source node in the graph, the algorithm finds the shortest path between that node and every other.

Here is the python implementation of Dijkstra algorithm
from heapq import heappush, heappop

# 0 index base dijkstra algorithm
def Dijkstra(graph, source):
    A = [None] * len(graph)
    queue = [(0, source)]
    while queue:
        path_len, v = heappop(queue)
        if A[v] is None: # v is unvisited
            A[v] = path_len
            for w, edge_len in graph[v].items():
                if A[w] is None:
                    heappush(queue, (path_len + edge_len, w))

    # set -1 for unreachable           
    return [-1 if x is None else x for x in A] 

graph = {
  0: { 1:2, 2:4, 3:1 },
  1: { 2:1, 3:3 },
  2: { 4: 7},
  3: { 2: 2 },
  4: { 0:2, 3:3 }, 
  5: {}
}
source = 0

print Dijkstra(graph, source)

Output:
[0, 2, 3, 1, 10, -1]

Saturday, October 17, 2015

python heapq/priority queue/Min heap


Python heapq provides implementation of the heap(min) queue algorithm, also known as the priority queue algorithm. Here's some example code.
from heapq import heappush, heappop

# priority queue normal
h = []
heappush(h, 5)
heappush(h, 1)
heappush(h, 3)

print heappop(h),"size",len(h)
print heappop(h),"size",len(h)
print heappop(h),"size",len(h)

# priority queue with tuple number and string
h = []
heappush(h, (5, "sample text"))
heappush(h, (1, "important text"))
heappush(h, (1, "a important text"))
heappush(h, (9, "un-important text"))

print heappop(h)
print heappop(h)

# priority queue with tuple number only 
h = []
heappush(h, (5, 3))
heappush(h, (7, 3))
heappush(h, (1, 3))
heappush(h, (1, 1))
heappush(h, (1, 2))
heappush(h, (3, 2))
heappush(h, (3, 1))

print heappop(h)
print heappop(h)
print heappop(h)

Output:
1 size 2
3 size 1
5 size 0
(1, 'a important text')
(1, 'important text')
(1, 1)
(1, 2)
(1, 3)

Saturday, August 1, 2015

Improve python programming/problem solving skill


Sites that will improve your python programming skill.
I will update this list regularly & open to any suggestion,

Wednesday, April 1, 2015

Pillow python image processing library


Pillow is fork form PIL (Python Imaging Library) and very easy to use. To download source and/or contribute please follow https://github.com/python-pillow/Pillow

Tuesday, March 31, 2015

Nose unittest OOP example


Here we will test helloWorld.py using nose
All source code are available on github
# testHelloWorldOOP.py
__author__ = 'AbuZahedJony'

from helloWorld import add_two_num
from helloWorld import multi_two_num

class TestHelloWorld:
    def __init__(self):
        pass

    # This function will run before any test case (only once)
    @classmethod
    def setup_class(cls):
        print "Main Setup"

    # This function will run after all test case (only once)
    @classmethod
    def teardown_class(cls):
        print "Main Teardown"

    # This function will call per test case (before)
    def setup(self):
        print "SETUP"

    # This function will call per test case (after)
    def teardown(self):
        print "TEAR-DOWN"

    def test_add_num(self):
        print 10*"*"+" Test add num"
        assert add_two_num(2, 3) == 5
        assert add_two_num(-2, 3) == 1

    def test_multi_num(self):
        print 10*"*"+" Test multi num"
        assert multi_two_num(2, 3) == 6
        assert multi_two_num(-2, 3) == -6
  

RUN: nosetests -s testHelloWorldOOP.py

Output:
Main Setup
SETUP
********** Test add num
TEAR-DOWN
.SETUP
********** Test multi num
TEAR-DOWN
.Main Teardown

----------------------------------------------------------------------
Ran 2 tests in 0.002s

OK

Nose unittest simple example


Here we will test helloWorld.py using nose
All source code are available on github
# testHelloWorld.py
from nose import with_setup

__author__ = 'AbuZahedJony'

from helloWorld import add_two_num
from helloWorld import multi_two_num

def m_setup():
    print "\nRun SETUP"

def m_teardown():
    print "Run TEAR-DOWN"

@with_setup(m_setup, m_teardown)
def test_add_num():
    print "Running test ADD"
    assert add_two_num(2, 3) == 5
    assert add_two_num(-2, 3) == 1
    assert add_two_num(-2, -3) == -5

@with_setup(m_setup, m_teardown)
def test_multi_num():
    print "Running test MULTI"
    assert multi_two_num(2, 3) == 6
    assert multi_two_num(-2, 3) == -6
    assert multi_two_num(-2, -3) == 6


RUN: nosetests -s testHelloWorld.py

Output:

Run SETUP
Running test ADD
Run TEAR-DOWN
.
Run SETUP
Running test MULTI
Run TEAR-DOWN
.
----------------------------------------------------------------------
Ran 2 tests in 0.002s

OK

Nose python unittest framework


Nose official doc

Setup

pip install nose

Please install pip if required

Tuesday, September 30, 2014

Python profiling

Profiling in python is very handy. It will help you to monitor your code performance and find the hot spot of your code.
Here I am using cProfile for profiling. It is built in with python.

Download sample code , unzip the file and cd to the folder.
No run this command python -m cProfile a.py
Output will be

Visualization of cProfile data:

I used RunSnakeRun for visualization.
Now run this following two command (make sure you installed RunSnakeRun)
python -m cProfile -o file.prof a.py
runsnake file.prof

Output will be

Check it out.

Sunday, September 28, 2014

Min, max value and index from list

See also enumerate and operator.itemgetter
import operator
a = [2, 5, 1, 4, 8, 71, 4, 1, 21]
min_i, min_v = min(enumerate(a), key = operator.itemgetter(1))
max_i, max_v = max(enumerate(a), key = operator.itemgetter(1))

print "[Min] index:",min_i,"value:",min_v
print "[Max] index:",max_i,"value:",max_v


Output:
[Min] index: 2 value: 1
[Max] index: 5 value: 71

Python: Absolute difference between successive elements in list


See also abs, zip and islice
from itertools import islice

a = [1, 4 , 2 , 6 , 7, 3]
result = [abs(p-q) for p, q in zip(a, islice(a, 1, None))]

print result

Output:
[3, 2, 4, 1, 4]

Wednesday, June 4, 2014

Python multi threading vs multi processing



A very good article on python multi threading vs multi processing

Python Requests: HTTP for Humans


Python requests module provides very interactive way to access web content via proper HTTP protocol.

Installation:
Here is the installation manual. You can also install using pip :
pip install requests

import requests

r = requests.get('https://api.github.com/user', auth=('user', 'pass'))
print r.status_code
print r.headers['content-type']
print r.encoding
print r.text
print r.json()

Output:
200
'application/json; charset=utf8'
'utf-8'
u'{"type":"User"...'
{u'private_gists': 419, u'total_private_repos': 77, ...}


Python webbrowser


Python webrowser provides high-level interface to displaying Web-based documents.
import webbrowser

url ='https://docs.python.org'
webbrowser.open(url, new=0, autoraise=True)


Output:

This code will open url to existing web browser and raised the window.

Saturday, May 10, 2014

Python unittest examle



All source code available on github
Python unittest example. Here I am testing my insertion sort code.
import random
import unittest


def insert(A,i):
    value = A[i]
    j = i
    while j != 0 and A[j-1]>value:
        A[j] = A[j-1]
        j = j - 1
    A[j] = value


def insertion_sort(A):
     for i in range(len(A)):
         insert(A, i)

class TestInsertionSort(unittest.TestCase):

    def setUp(self):
        print "Setup ..."

    def testSortRange(self):
        print "TestSortRange .... \n"
        n = 10
        a = range(n)
        random.shuffle(a)
        insertion_sort(a)
        self.assertEqual(a, range(n))

        n = 1
        a = range(n)
        random.shuffle(a)
        insertion_sort(a)
        self.assertEqual(a, range(n))

        n = 77
        a = range(n)
        random.shuffle(a)
        insertion_sort(a)
        self.assertEqual(a, range(n))

    def testSortData(self):
        print "TestSortData .... \n"
        a = []
        r = []
        insertion_sort(a)
        self.assertEqual(a, r)

        a = [3, 1, 2]
        r = [1, 2, 3]
        insertion_sort(a)
        self.assertEqual(a, r)


if __name__=="__main__":
    suite = unittest.TestLoader().loadTestsFromTestCase(TestInsertionSort)
    unittest.TextTestRunner(verbosity=2).run(suite)


Output:
testSortData (__main__.TestInsertionSortSetup ..) ... ok
.
TestSortData .... 

Setup ...
TestSortRange .... 

testSortRange (__main__.TestInsertionSort) ... ok

----------------------------------------------------------------------
Ran 2 tests in 0.000s

OK
[Finished in 0.1s]

Friday, January 17, 2014

Python fancy string formatting

print "1. Life is {} {} {} python".format('very','easy', 'with')
print "2. Life is {0} {1} {2} python".format('very','easy', 'with')

print "3. Life is {1} {0} {2} python".format('very','easy', 'with')
print "4. Life is {1} {0} {t} python".format('very','easy', t='with')
print "5. Life is {0} {t} python".format('very easy', t='with')

for n in range(5, 11):
    print "{0:2d} {1:3d} {2:4d}".format(n, n*n, n*n*n)

d = {"name1":100, "name3":350, "name2":250}
print "6. First: {name1:d}, Second: {name2:d}, Third: {name3:d}".format(**d)

print "zfill pads string on left zeros, it also consider + and - sign"
print '25'.zfill(5)
print '2.5'.zfill(5)
print '-2.5'.zfill(5)
print '2.54234324234'.zfill(5)
print 'ah'.zfill(5)

print "asd".rjust(6, 'R')
print "asd".ljust(6, 'R')

Output:
1. Life is very easy with python
2. Life is very easy with python
3. Life is easy very with python
4. Life is easy very with python
5. Life is very easy with python
 5  25  125
 6  36  216
 7  49  343
 8  64  512
 9  81  729
10 100 1000
6. First: 100, Second: 250, Third: 350
zfill pads string on left zeros, it also consider + and - sign
00025
002.5
-02.5
2.54234324234
000ah
RRRasd
asdRRR