分享

年化15.7%,深度强化学习在道琼斯指数30成份股上构建投资组合(附数据与代码下载)

 AI量化实验室 2023-10-12 发布于北京

原创文章第253篇,专注“个人成长与财富自由、世界运作的逻辑与投资"。

今天的代码在如下位置:

代码与数据,请前往星球下载:

我的开源项目及知识星球

从finRL里的环境里借一个env出来看看,代码比我们实现的投资组合管理的要复杂,其实它这个env是带了回测功能的,不妨我们解读一下:

import numpy as np
import pandas as pd
from gym.utils import seeding
import gym
from gym import spaces
import matplotlib

matplotlib.use('Agg')
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env import DummyVecEnv


class StockPortfolioEnv(gym.Env):

metadata = {'render.modes': ['human']}

def __init__(self,
df,
stock_dim,
hmax,
initial_amount,
transaction_cost_pct,
reward_scaling,
state_space,
action_space,
tech_indicator_list,
turbulence_threshold=None,
lookback=252,
day=0):
# super(StockEnv, self).__init__()
# money = 10 , scope = 1
self.day = day
self.lookback = lookback
self.df = df
self.stock_dim = stock_dim
self.hmax = hmax
self.initial_amount = initial_amount
self.transaction_cost_pct = transaction_cost_pct
self.reward_scaling = reward_scaling
self.state_space = state_space
self.action_space = action_space
self.tech_indicator_list = tech_indicator_list

# action_space normalization and shape is self.stock_dim
self.action_space = spaces.Box(low=0, high=1, shape=(self.action_space,))
# Shape = (34, 30)
# covariance matrix + technical indicators
self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
shape=(self.state_space + len(self.tech_indicator_list), self.state_space))

# load data from a pandas dataframe
self.data = self.df.loc[self.day, :]
self.covs = self.data['cov_list'].values[0]
self.state = np.append(np.array(self.covs),
[self.data[tech].values.tolist() for tech in self.tech_indicator_list], axis=0)
self.terminal = False
self.turbulence_threshold = turbulence_threshold
# initalize state: inital portfolio return + individual stock return + individual weights
self.portfolio_value = self.initial_amount

# memorize portfolio value each step
self.asset_memory = [self.initial_amount]
# memorize portfolio return each step
self.portfolio_return_memory = [0]
self.actions_memory = [[1 / self.stock_dim] * self.stock_dim]
self.date_memory = [self.data.date.unique()[0]]

def step(self, actions):
# print(self.day)
self.terminal = self.day >= len(self.df.index.unique()) - 1
# print(actions)

if self.terminal:
df = pd.DataFrame(self.portfolio_return_memory)
df.columns = ['daily_return']
plt.plot(df.daily_return.cumsum(), 'r')
plt.savefig('results/cumulative_reward.png')
plt.close()

plt.plot(self.portfolio_return_memory, 'r')
plt.savefig('results/rewards.png')
plt.close()

print("=================================")
print("begin_total_asset:{}".format(self.asset_memory[0]))
print("end_total_asset:{}".format(self.portfolio_value))

df_daily_return = pd.DataFrame(self.portfolio_return_memory)
df_daily_return.columns = ['daily_return']
if df_daily_return['daily_return'].std() != 0:
sharpe = (252 ** 0.5) * df_daily_return['daily_return'].mean() / \
df_daily_return['daily_return'].std()
print("Sharpe: ", sharpe)
print("=================================")

return self.state, self.reward, self.terminal, {}

else:
# print("Model actions: ",actions)
# actions are the portfolio weight
# normalize to sum of 1
# if (np.array(actions) - np.array(actions).min()).sum() != 0:
# norm_actions = (np.array(actions) - np.array(actions).min()) / (np.array(actions) - np.array(actions).min()).sum()
# else:
# norm_actions = actions
weights = self.softmax_normalization(actions)
# print("Normalized actions: ", weights)
self.actions_memory.append(weights)
last_day_memory = self.data

# load next state
self.day += 1
self.data = self.df.loc[self.day, :]
self.covs = self.data['cov_list'].values[0]
self.state = np.append(np.array(self.covs),
[self.data[tech].values.tolist() for tech in self.tech_indicator_list], axis=0)
# print(self.state)
# calcualte portfolio return
# individual stocks' return * weight
portfolio_return = sum(((self.data.close.values / last_day_memory.close.values) - 1) * weights)
# update portfolio value
new_portfolio_value = self.portfolio_value * (1 + portfolio_return)
self.portfolio_value = new_portfolio_value

# save into memory
self.portfolio_return_memory.append(portfolio_return)
self.date_memory.append(self.data.date.unique()[0])
self.asset_memory.append(new_portfolio_value)

# the reward is the new portfolio value or end portfolo value
self.reward = new_portfolio_value
# print("Step reward: ", self.reward)
# self.reward = self.reward*self.reward_scaling

return self.state, self.reward, self.terminal, {}

def reset(self):
self.asset_memory = [self.initial_amount]
self.day = 0
self.data = self.df.loc[self.day, :]
# load states
self.covs = self.data['cov_list'].values[0]
self.state = np.append(np.array(self.covs),
[self.data[tech].values.tolist() for tech in self.tech_indicator_list], axis=0)
self.portfolio_value = self.initial_amount
# self.cost = 0
# self.trades = 0
self.terminal = False
self.portfolio_return_memory = [0]
self.actions_memory = [[1 / self.stock_dim] * self.stock_dim]
self.date_memory = [self.data.date.unique()[0]]
return self.state

def render(self, mode='human'):
return self.state

def softmax_normalization(self, actions):
numerator = np.exp(actions)
denominator = np.sum(np.exp(actions))
softmax_output = numerator / denominator
return softmax_output

def save_asset_memory(self):
date_list = self.date_memory
portfolio_return = self.portfolio_return_memory
# print(len(date_list))
# print(len(asset_list))
df_account_value = pd.DataFrame({'date': date_list, 'daily_return': portfolio_return})
return df_account_value

def save_action_memory(self):
# date and close price length must match actions length
date_list = self.date_memory
df_date = pd.DataFrame(date_list)
df_date.columns = ['date']

action_list = self.actions_memory
df_actions = pd.DataFrame(action_list)
df_actions.columns = self.data.tic.values
df_actions.index = df_date.date
# df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
return df_actions

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]

def get_sb_env(self):
e = DummyVecEnv([lambda: self])
obs = e.reset()
return e, obs

看强化学习环境,看action_space和observation_space:

action_space的形态=stock_dim,就是有多少支股票,有多少个动作,这个好理解。

# action_space normalization and shape is self.stock_dim
self.action_space = spaces.Box(low=0, high=1, shape=(self.action_space,))
# Shape = (34, 30)
# covariance matrix + technical indicators
self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
shape=(self.state_space + len(self.tech_indicator_list), self.state_space))

调用ppo和DDPG两种算法。

import pandas as pd

# train = pd.read_csv('train.csv', index_col=0)
with pd.HDFStore('train.h5') as s:
train = s['train']
print(train)
stock_dimension = len(train.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

INDICATORS = [
"macd",
"boll_ub",
"boll_lb",
"rsi_30",
"cci_30",
"dx_30",
"close_30_sma",
"close_60_sma",
]

env_kwargs = {
"hmax": 100,
"initial_amount": 1000000,
"transaction_cost_pct": 0.001,
"state_space": state_space,
"stock_dim": stock_dimension,
"tech_indicator_list": INDICATORS,
"action_space": stock_dimension,
"reward_scaling": 1e-4

}

from engine.rl.stock_portfolio_env import StockPortfolioEnv

e_train_gym = StockPortfolioEnv(df=train, **env_kwargs)

env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

from engine.rl.models import DRLAgent

agent = DRLAgent(env=env_train)


agent = DRLAgent(env = env_train)
PPO_PARAMS = {
"n_steps": 2048,
"ent_coef": 0.005,
"learning_rate": 0.0001,
"batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

trained_ppo = agent.train_model(model=model_ppo,
tb_log_name='ppo',
total_timesteps=80000)

trained_ppo.save('trained_ppo.zip')


DDPG_PARAMS = {"batch_size": 128, "buffer_size": 50000, "learning_rate": 0.001}

model_ddpg = agent.get_model("ddpg", model_kwargs=DDPG_PARAMS)

trained_ddpg = agent.train_model(model=model_ddpg,
tb_log_name='ddpg',
total_timesteps=50000)

trained_ddpg.save('trained_ddpg.zip')

一些人生思考:

人生有时会陷入一些执念,无法自拔。

道理都懂,好比说,人生本身就是一场有去无回的单程车票,想清楚这一点,人生走一遭就是一场体验,无论开心,痛苦,悲伤,纠结,欢笑,泪水都是体验。

搁到历史的时间轴里,一点点水花都没有。放眼138亿光年的宇宙,我们渺小如尘埃。

————最近听到一句很有力量的话:“允许一切发生,一切事物的发生。”本身你就不可能去阻挡,你害怕也这样,不害怕也这样。所以我选择让它过去,不去再跟他较劲,当你允许一切发生了之后,你就会变成一个特别柔软的人,放松的人

星球下载:我的开源项目及知识星球

“静待花开的聚宝盘”:年化 17.5%,最大回测 22%,夏普比 1.187的etfs动量轮动(代码下载)

    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约