%matplotlib inline
import numpy as np
import re
import pandas as pd
from bs4 import BeautifulSoup
from sys import argv
from urllib.request import urlopen
from urllib.error import HTTPError
import requests
import itertools
import datetime as dt
def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return list(itertools.zip_longest(*args, fillvalue=fillvalue))
def get_item(soup):
return soup.find('table', attrs={'summary':"외국인 기관 순매매 거래량에 관한표이며 날짜별로 정보를 제공합니다."}).text.strip()
def get_num(soup):
return soup.find('table', attrs={'class':"type2"}).text.strip()
column = [
'날짜',
'종가',
'전일비',
'등락률',
'거래량',
'기관 순매매량',
'외국인 순매매량',
'외국인 보유주수',
'외국인 보유율']
column2 = ['날짜',
'종가',
'전일비',
'시가',
'고가',
'저가',
'거래량']
code="035720" #stock code
length = 20 #length of stock. 1page => about 20~30days.
stock = []
for num in np.arange(1,length):
page = requests.get("https://finance.naver.com/item/frgn.nhn?code={0}&page={1}".format(code,num))
data = page.content
stock_surround = BeautifulSoup(data, "html.parser")
a = get_item(stock_surround).splitlines()
ab = [i.split('\t')[-1] for i in a]
stockdata = list(filter(lambda item: item.strip(), ab))[12:]
smalldata = grouper(stockdata,9)
for i in range(len(smalldata)):
stock.append(smalldata[i])
stockDF = pd.DataFrame(data=stock, columns=column)
stock2 = []
for num in np.arange(1,length*2-1):
page2 = requests.get("https://finance.naver.com/item/sise_day.nhn?code={0}&page={1}".format(code,num))
data2 = page2.content
stock_surround2 = BeautifulSoup(data2, "html.parser")
a2 = get_num(stock_surround2).splitlines()
ab2 = [i.split('\t')[-1] for i in a2]
stocktable2 = list(filter(lambda item: item.strip(), ab2))
stockdata2 = stocktable2[7:]
smalldata2 = grouper(stockdata2,7)
for i in range(len(smalldata2)):
stock2.append(smalldata2[i])
stockDF2 = pd.DataFrame(data=stock2, columns=column2)
stockDF2 = stockDF2.drop(["전일비"],axis=1)
stockDF = stockDF.drop(["종가","거래량","전일비","외국인 보유주수"],axis=1)
totalstock = pd.merge(stockDF, stockDF2, how='inner', on='날짜')
totalstock["종가"] = totalstock["종가"].str.replace(",","").astype(float)
totalstock["시가"] = totalstock["시가"].str.replace(",","").astype(float)
totalstock["고가"] = totalstock["고가"].str.replace(",","").astype(float)
totalstock["저가"] = totalstock["저가"].str.replace(",","").astype(float)
totalstock["거래량"] = totalstock["거래량"].str.replace(",","").astype(float)
totalstock["외국인 순매매량"] = totalstock["외국인 순매매량"].str.replace(",","").astype(float)
totalstock["기관 순매매량"] = totalstock["기관 순매매량"].str.replace(",","").astype(float)
totalstock["외국인 보유율"] = totalstock["외국인 보유율"].str[:-1].astype(float)
totalstock.등락률 = totalstock.등락률.str[:-1].astype(float)
totalstock["날짜"] = pd.to_datetime(stockDF.날짜)
totalstock["기간"] = totalstock["날짜"]- totalstock.iloc[-1].날짜
totalstock["기간"] = totalstock["기간"].dt.days
totalstock
totalstock.to_pickle("{0}.bz2".format(code))