MorEpiSim

MorEpiSim is a Reinforcement Learning based simulation environment for epidemic control

Installation

Install using pip pip install MorEpiSim

https://pypi.org/project/MorEpiSim/

Requirements

  • Python 3.6 or greater
  • numpy
  • networkX
  • matplotlib

Video tutorial

  • Link will be added soon
  • Link to source code will be added soon

Code examples

1. minimalist boilerplate code:

In [ ]:
from MorEpiSim import the_env

# initial values ---------------------------------------------
epidemic_data = [14, 0.02] #incubation periode and fatality rate
demographic_data = [100000, 3, 1000] #Population, Initial inefctions, density(ppl/km2)          
custom_data = [epidemic_data, demographic_data]
# instantiate the environment
env = the_env.MorEpiEnv(custom_data)
#--------------------------------------------------------------

num_episodes = 3
for i in range(num_episodes):
    obs = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()

        obs, reward, done, info = env.step(action)

        env.render('IRD') #IRD, graph, or scores      #(rendering may not work in jupyter)

env.close()

2. Training a Deep RL model using stable baselines

In [ ]:
from MorEpiSim import the_env

# initial values ---------------------------------------------
epidemic_data = [14, 0.02] #incubation periode and fatality rate
demographic_data = [100000, 3, 1000] #Population, Initial inefctions, density(ppl/km2)          
custom_data = [epidemic_data, demographic_data]
# instantiate the environment
env = the_env.MorEpiEnv(custom_data)
#--------------------------------------------------------------

from stable_baselines3 import PPO
model = PPO('MlpPolicy', env, verbose=0)
model.learn(total_timesteps=180000, log_interval=10)

# for more details on stable baselines library: 
# https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html

3. Monitoring the performance:

In [4]:
import os

import gym
import numpy as np
import matplotlib.pyplot as plt

from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.callbacks import BaseCallback
In [5]:
class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq: (int)
    :param log_dir: (str) Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: (int)
    """
    def __init__(self, check_freq: int, log_dir: str, verbose=1):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = os.path.join(log_dir, 'best_model')
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps}")
                    print(f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}")

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                  # Example for saving best model
                    if self.verbose > 0:
                        print(f"Saving new best model to {self.save_path}.zip")
                    self.model.save(self.save_path)

        return True
In [6]:
log_dir = "./path"
os.makedirs(log_dir, exist_ok=True)
In [10]:
from MorEpiSim import the_env

# initial values ---------------------------------------------
epidemic_data = [14, 0.02] #incubation periode and fatality rate
demographic_data = [100000, 3, 1000] #Population, Initial inefctions, density(ppl/km2)          
custom_data = [epidemic_data, demographic_data]
# instantiate the environment
env = the_env.MorEpiEnv(custom_data)
plt.close('all')
#--------------------------------------------------------------

env = Monitor(env, log_dir)
Initializing the env ...
env successfully initialized ============= 

In [12]:
model = PPO('MlpPolicy', env, verbose=0)
In [13]:
# Create the callback: check every 500 steps
callback = SaveOnBestTrainingRewardCallback(check_freq=500, log_dir=log_dir)  
In [ ]:
# Train the agent
model.learn(total_timesteps=180000, callback=callback) 
In [ ]:
model.set_random_seed(0)
In [ ]:
x_1, run_1 = ts2xy(load_results(log_dir), 'timesteps')
plt.plot(x_1, run_1, label='Episodic Reward')
plt.legend()

# fname = log_dir +'/'+ title +'.png'
# plt.savefig(fname, dpi=300)

4. Customizing the environment:

4.1 Custom epidemic and demographic data:

In [ ]:
from MorEpiSim import the_env

# initial values ---------------------------------------------
epidemic_data = [14, 0.02] #incubation periode and fatality rate
demographic_data = [100000, 3, 1000] #Population, Initial inefctions, density(ppl/km2)          
custom_data = [epidemic_data, demographic_data]
# instantiate the environment
env = the_env.MorEpiEnv(custom_data)

4.2 Custom threshold for the health score:

In [ ]:
health_thresholds = [25, 2]  # threshold for infections and threshold for deaths as a list of two values
env.set_TI_TD(health_thresholds)
env.TI_TD

4.3 Custom threshold for the economic score:

In [ ]:
economy_threshold = 0.1  #a number between 0 and 1
env.set_Te(economy_threshold)
env.Te

4.4 Custom weights for each action in the economic score:

  • A1: Travel restriction
  • A2: Lockdown
  • A3: Distance work and education
  • A4: Provide masks and imposing their wearing
  • A5: Increase the testing rate (test and isolate if positive)
  • A6: Increase the health care capacity (e.g., hospital beds)
  • A7: Increase Vaccination rate
In [ ]:
wts = [1, 1, 1, 1, 1, 1, 1]  
env.set_a_weights(wts)
env.a_weights