Sound of Mind

2015, Mar 29    

Sound of Mind - rating change in time

It’s been a long time since I last wrote a posting. To brush up on python and R, I worked on a tiny project that was to crawl episode and ratings data of a famous webtoon, Sound of Mind. Using the default ‘plot’ in R, I drew the following chart which shows two darkest times in which the ratings significantly dropped.

ms_image

#-*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding('utf-8')

from time import sleep
import urllib
from bs4 import BeautifulSoup
from xlwt import Workbook


ms_url = 'http://comic.naver.com/webtoon/list.nhn?titleId=20853&page=1'
ms_data = urllib.urlopen(ms_url)
ms_soup = BeautifulSoup(ms_data)

l_episode = ms_soup.findAll('td', attrs={'class':'title'})

latest_episode = l_episode[0].find('a').contents[0]
latest_episode_num = int(latest_episode.split(".")[0])
end_page_index = int(latest_episode_num / 10) + 1


episode_num_list = list()
episode_date_list = list()
episode_ratings_list = list()
episode_link_list = list()

for page in range(1,end_page_index+1):
	page_url = 'http://comic.naver.com/webtoon/list.nhn?titleId=20853&page=' + str(page)
	page_data = urllib.urlopen(page_url)
	page_soup = BeautifulSoup(page_data)
	total_episode = page_soup.findAll('td', attrs={'class':'title'})

	for num in range(0, len(total_episode)):
		episode_num = total_episode[num].find('a').contents[0]
		episode_num_list.append(episode_num)

		date = total_episode[num].next_sibling.next_sibling.next_sibling.next_sibling.contents[0]
		episode_date_list.append(date)

		ratings = total_episode[num].next_sibling.next_sibling.find('strong').contents[0]
		episode_ratings_list.append(ratings)

		link = total_episode[num].find('a').get('href')
		link = 'http://comic.naver.com' + link
		episode_link_list.append(link)

	print "%i is done" %page

	sleep(0.5)



book = Workbook()

sheet = book.add_sheet("ms")

sheet.write(0,0,'num')
sheet.write(0,1,'episode_title')
sheet.write(0,2,'date')
sheet.write(0,3,'ratings')
sheet.write(0,4,'link')

for x in range(0,len(episode_num_list)):
	sheet.write(x+1,0,latest_episode_num-x)
	sheet.write(x+1,1,episode_num_list[x])
	sheet.write(x+1,2,episode_date_list[x])
	sheet.write(x+1,3,float(episode_ratings_list[x]))
	sheet.write(x+1,4,episode_link_list[x])

book.save('ms.xls')





#episode_num // title // date // ratings // rating_num // link