from twill.commands import * import time import re import BeautifulSoup import rpy2.robjects as R import cPickle #Construct a browser object. b = get_browser() b.go("http://forums.offtopic.com") #Fill field 'vb_login_username' of form 1 with "ByteMining", the username. fv("1", "vb_login_username", "ByteMining") #Fill field 'vb_login_password' of form 1 with "yeah_right", the password. fv("1", "vb_login_password", "yeah_right") #Click the fifth submit button (called 4) b.submit('4') #Bypass the "Please wait while we log you in..." prompt b.go("http://forums.offtopic.com") #Extract the HTML table containing the actual data. There are multiple tables. patt = re.compile('(.' + '
' + '