ニコニコ動画の検索結果を取得してリスト化する

# coding: sjis

def login(opener):
	import urllib
	url = "https://secure.nicovideo.jp/secure/login?site=niconico"
	postdata = {}
	postdata["mail"] = "メールアドレス"
	postdata["password"] = "パスワード"
	postdata = urllib.urlencode(postdata)
	r = opener.open(url, postdata)
	html = r.read().decode("utf-8")
	return html

def get_list_html():
	import cookielib, urllib2
	url = u"http://www.nicovideo.jp/tag/%E5%A0%80%E6%B1%9F%E7%94%B1%E8%A1%A3"

	cj = cookielib.LWPCookieJar("cookie.txt")
	cj.load()
	ch = urllib2.HTTPCookieProcessor(cj)
	opener = urllib2.build_opener(ch)

	html = opener.open(url).read().decode("utf-8")
	try:
		html.index('form name="login"')
		login(opener)
		html = opener.open(url).read().decode("utf-8")
	except ValueError:
		pass
	
	cj.save()
	return html

def parse(html):
	from BeautifulSoup import BeautifulSoup
	soup = BeautifulSoup(html)
	
	table = soup.findAll("table")
	td = table[8].findAll("td")
	list = []
	for t in td:
		p = t.findAll("p")
		date = p[1].text
		length = p[3].text
		title = p[4].text
		a = p[4].find("a")
		url = "http://www.nicovideo.jp/" + dict(a.attrs)["href"]
		s = p[5].findAll("strong")
		play = s[0].text
		comment = s[1].text
		mylist = s[2].text
		if int(play.replace(",", "")) > 5000:
			video = {"date" : date, "length" : length, "title" : title, "url" : url, "play" : play, "comment" : comment, "mylist" : mylist}
			list.append(video)

	return list

def main():
	html = get_list_html()
	list = parse(html)
	print list
	
if __name__=="__main__":
	main()
  • Cookieを保存し次回からはそのCookieを使ってアクセス
  • cookie.txtがないとエラーになるかも
  • Cookieが無効 or 中身がないなどでログインフォームが返ってきた場合はログインする
  • 再生数5000以上のみ取得