-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
130 lines (102 loc) · 3.33 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import torch
import numpy as np
import os
import gymnasium as gym
from typing import Optional, Tuple, Union
import json
from imageio import mimsave
import random
from modules import Actor
from dataset import ReplayBuffer
def make_dir(dir_path):
try:
os.mkdir(dir_path)
except OSError:
pass
return dir_path
def seed_everything(seed: int,
env=None,
use_deterministic_algos: bool = False):
if env is not None:
env.seed(seed)
env.action_space.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.use_deterministic_algorithms(use_deterministic_algos)
random.seed(seed)
def rollout(batch_size,
horizon,
transition,
policy,
env_buffer,
model_buffer,
exploration_noise=0.1,
max_action=1):
raise NotImplementedError()
def parse_json_dataset(filename: str) -> Tuple[int, int, float]:
max_action = 1.0
if not filename.endswith('.json'):
filename = filename + '.json'
filename_ = os.path.join("json_datasets", filename)
with open(filename_) as f:
obj = json.load(f)
states = np.array(obj["observations"])
actions = np.array(obj["actions"])
return states.shape[1], actions.shape[1], max_action
def wrap_env(env: gym.Env,
state_mean: Union[np.ndarray, float] = 0.0,
state_std: Union[np.ndarray, float] = 1.0,
reward_scale: float = 1.0) -> gym.Env:
def normalize_state(state):
return (state - state_mean) / state_std
def scale_reward(reward):
return reward_scale * reward
env = gym.wrappers.TransformObservation(env, normalize_state)
if reward_scale != 1.0:
env = gym.wrappers.TransformReward(env, scale_reward)
return env
@torch.no_grad()
def eval_actor(env: gym.Env,
actor: Actor,
device: str,
num_episodes: int,
seed: int) -> np.ndarray:
env.seed(seed)
actor.eval()
episode_rewards = []
for _ in range(num_episodes):
state, done = env.reset(), False
episode_reward = 0.0
while not done:
action = actor.act(state, device)
state, reward, done, _ = env.step(action)
episode_reward += reward
episode_rewards.append(episode_reward)
actor.train()
return np.array(episode_rewards)
class VideoRecorder:
def __init__(self, dir_name, height=512, width=512, camera_id=0, fps=30):
self.dir_name = dir_name
self.height = height
self.width = width
self.camera_id = camera_id
self.fps = fps
self.frames = []
def init(self, enabled=True):
self.frames = []
self.enabled = self.dir_name is not None and enabled
def record(self, env:gym.Env):
if self.enabled:
frame = env.render(
mode='rgb_array',
height=self.height,
width=self.width,
# camera_id=self.camera_id
)
self.frames.append(frame)
def save(self, file_name):
if self.enabled:
path = os.path.join(self.dir_name, file_name)
mimsave(path, self.frames, fps=self.fps)