def login(opener):
import urllib
url = "https://secure.nicovideo.jp/secure/login?site=niconico"
postdata = {}
postdata["mail"] = "メールアドレス"
postdata["password"] = "パスワード"
postdata = urllib.urlencode(postdata)
r = opener.open(url, postdata)
html = r.read().decode("utf-8")
return html
def get_list_html():
import cookielib, urllib2
url = u"http://www.nicovideo.jp/tag/%E5%A0%80%E6%B1%9F%E7%94%B1%E8%A1%A3"
cj = cookielib.LWPCookieJar("cookie.txt")
cj.load()
ch = urllib2.HTTPCookieProcessor(cj)
opener = urllib2.build_opener(ch)
html = opener.open(url).read().decode("utf-8")
try:
html.index('form name="login"')
login(opener)
html = opener.open(url).read().decode("utf-8")
except ValueError:
pass
cj.save()
return html
def parse(html):
from BeautifulSoup import BeautifulSoup
soup = BeautifulSoup(html)
table = soup.findAll("table")
td = table[8].findAll("td")
list = []
for t in td:
p = t.findAll("p")
date = p[1].text
length = p[3].text
title = p[4].text
a = p[4].find("a")
url = "http://www.nicovideo.jp/" + dict(a.attrs)["href"]
s = p[5].findAll("strong")
play = s[0].text
comment = s[1].text
mylist = s[2].text
if int(play.replace(",", "")) > 5000:
video = {"date" : date, "length" : length, "title" : title, "url" : url, "play" : play, "comment" : comment, "mylist" : mylist}
list.append(video)
return list
def main():
html = get_list_html()
list = parse(html)
print list
if __name__=="__main__":
main()