Sunday, November 10, 2013

Python script to get your Movie details from omdbapi.com

This was my first(and the last one till date) Python script. It finds movies in a directory and uses the http://www.omdbapi.com/ to get and print details of the movie on the console.
'''
Copyright Mad Piranha, Apr 10, 2012

Used 2to3.py to convert from python26 to python33
@author: Mad Piranha
'''

import re
import glob
import http.client, urllib.request, urllib.parse, urllib.error
import os
import fnmatch

apiURL="www.omdbapi.com"

# Directory to find movies
movieFolder="G:\MotionPics\__HIGHRES"

# Ignore these directories
ignore=['Sample', 'South Park', 'Tom&Jerry', 'Video clips', 'Subtitles', 'TV Serires', 'TELGU', 'TAMIL', 'MALAYALAM', 'KANNADA', 'HINDI', 'OTHERS', 'Hindi Cinema', 'BOLLYWOOD']

# File name filter regular expression 
FILE_NAME_MATCH_EXPR="(?P.*)\.(avi|divx|mkv|mpg|mp4|wmv|bin|ogm|vob|iso|img|bin|ts)"

# Get RottenTomatoes data
tomato = 'true' 

def moviedetails(moviename):

    # Create URL parameters. Check http://www.omdbapi.com for details
    params = urllib.parse.urlencode({'tomatoes':tomato, 't':moviename})
    connection = http.client.HTTPConnection(apiURL)
    # Connect
    connection.request("GET", "/?"+params);
    response = connection.getresponse();
    # Get/Read response and print
    print(response.status, response.reason)
    data1 = response.read()
    print(data1)
    connection.close()

# Get the movie name from the file name
def movienamefromfile(moviefilename):

    # Remove all characters after these special characters
    substr = re.sub("(\[|\()(.*)$", "", moviefilename, )
    # Remove unnecessary words and anything after that
    substr = re.sub("(?i)(dvdrip|brrip|UNRATED|WEBSCR|KLAXXON|xvid|r5)(.*)$", "", substr, re.I)
    # Remove 4digits in a row (year of the movie ?)
    substr = re.sub("(\d{4})(.*)$", "", substr, )
    # Replace . and _ with space
    substr = re.sub("(\.|_)", " ", substr, )
    print(moviefilename, " -> ", substr)
    return substr.rstrip().lstrip()


def parsemoviefolder(foldername):
    
    # For all the directories in the specified foldername
    for root, dirnames, filenames in os.walk(foldername, ):
    
        # Remove the directories mentioned in the ignore list.
        for val in ignore :
            if val in dirnames:
                dirnames.remove(val)

        for filename in filenames:
            # If the file name matches the movie file name expression
            moviefile = re.match(FILE_NAME_MATCH_EXPR, filename, re.I)
            if moviefile:
                # Get the movie name from the file name
                moviename = movienamefromfile(moviefile.group("name"))
                # Get and print the movie details
                moviedetails(moviename)
                print()
            
parsemoviefolder(movieFolder)

No comments:

Post a Comment