分享

网易大宗交易爬取(unicode

 imelee 2017-08-25
def dzjy(code=None,Page = 0,end_date = None,data =pd.DataFrame()):
   
'''
    #
获取大宗交易数据
   
:param code:查询特定股票大宗交易数据,若无,则查询全部股票
   
:param Page:不需要设置
   
:param end_date:最早交易日期,若为空则返回第一页的数据
   
:param data:不需要设置
   
:return:DataFrame
            NO
:序号
            PUBLISHDATE
:交易日期
            EXCHANGE
:不知道
            SYMBOL
:股票代码
            SNAME
:股票名称
            STYPE
:股票类型
            DZJY2
:成交数量(万股)
            DZJY5
:成交价格
            DZJY6
:成交金额(万元)
            DZJY9
:买方名称
            DZJY11
:卖方名称
            TCLOSE
:收盘价格
            DZJY55
:折溢价率
            CODE
:股票代码
            NAME
:不知道
    '''
   
if code is None:url = 'http://quotes.money.163.com/hs/marketdata/service/dzjy.php?host=/hs/marketdata/service/dzjy.php&page={}&fields=NO,SYMBOL,SNAME,PUBLISHDATE,DZJY2,DZJY5,TCLOSE,DZJY55,DZJY6,DZJY9,DZJY11&sort=PUBLISHDATE&order=desc&count=1000&type=query&initData=[object Object]'.format(Page)
   
else:url = 'http://quotes.money.163.com/hs/marketdata/service/dzjy.php?host=/hs/marketdata/service/dzjy.php&page={}&query=symbol:{};&fields=NO,SYMBOL,SNAME,PUBLISHDATE,DZJY2,DZJY5,TCLOSE,DZJY55,DZJY6,DZJY9,DZJY11&sort=PUBLISHDATE&order=desc&count=25&type=query&initData=[object Object]'.format(Page,code)
   
# url = 'http://quotes.money.163.com/hs/marketdata/service/dzjy.php?host=/hs/marketdata/service/dzjy.php&page=0&fields=NO,SYMBOL,SNAME,PUBLISHDATE,DZJY2,DZJY5,TCLOSE,DZJY55,DZJY6,DZJY9,DZJY11&sort=PUBLISHDATE&order=desc&count=25&type=query&i nitData=[object Object]&callback=callback_1717769636&req=31952'
    # 'http://quotes.money.163.com/hs/marketdata/service/dzjy.php?host=/hs/marketdata/service/dzjy.php&page=0&query=symbol:000568;&fields=NO,SYMBOL,SNAME,PUBLISHDATE,DZJY2,DZJY5,TCLOSE,DZJY55,DZJY6,DZJY9,DZJY11&sort=PUBLISHDATE&order=desc&count=25&type=query&initData=[object Object]&callback=callback_472758631&req=02126'
   
req = Request(url=url, headers=header)
    bs
= urlopen(req).read()
    bi
= io.BytesIO(bs)
    gf
= gzip.GzipFile(fileobj=bi, mode="rb")
    mainPage
= gf.read().decode("unicode-escape")
    page_list
= ast.literal_eval_r(mainPage)['list']
    df
= pd.DataFrame.from_records(page_list)
    df
=df[['NO','PUBLISHDATE','EXCHANGE','SYMBOL','SNAME','STYPE','DZJY2','DZJY5','DZJY6','DZJY9','DZJY11','TCLOSE','DZJY55','CODE','NAME']]
    df[
'DZJY2'] = df['DZJY2'].astype(float)
    df[
'DZJY5'] = df['DZJY5'].astype(float)
    df[
'DZJY6'] = df['DZJY6'].astype(float)
    df[
'TCLOSE'] = df['TCLOSE'].astype(float)
    df[
'DZJY55'] = df['DZJY55'].astype(float)
    data
= pd.concat([data, df], ignore_index=True)
   
if end_date is None:
       
return data
    count
= (df["PUBLISHDATE"] < end_date).sum()
   
if count == 0:
       
Page += 1
       
return dzjy(code=code, Page=Page, end_date=end_date, data=data)
   
else:
       
return data

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约