ååï¼ゼロから作るDeep Learning 4(強化学習編)の4章のサンプルコードにアニメーション化の機能を追加する - daisukeの技術ブログï¼ã«ç¶ããä»åã¯ãåãããã¼ãããä½ãDeep Learning 4ï¼å¼·åå¦ç¿ç·¨ï¼ãã®4ç« ã®ãµã³ãã«ã³ã¼ãã®ãpolicy_iter.pyãã«å¯¾ãã¦ãã¹ããããã¨ã®æ´æ°ããã価å¤é¢æ°ãæ¸ããããããã®ç»åãã¡ã¤ã«ãããã¡ã¤ã«åºåããæ©è½ã追å ãã¦ããã¾ãã
ã¾ããã¤ãã§ã«ãåãã4ç« ã®ãµã³ãã«ã³ã¼ãã®ãpolicy_eval.pyãã®ã¹ãã¼ããã¨ã®æ´æ°ããã価å¤é¢æ°ãæ¸ããããããã®ç»åãã¡ã¤ã«ãããã¡ã¤ã«åºåããæ©è½ã追å ãã¾ãã
åèæç®
ã¯ããã«
ååããããã®å¤åãã¢ãã¡ã¼ã·ã§ã³åãããã¨ã§ãã©ã®ããã«ããããå¤åãã¦ããã®ããç´æçã«ç解ãããã¨ãã§ãã¾ãããä¸æ¹ã§ãç´°ããã価å¤é¢æ°ããæ¹çãæ´æ°ããã¦ããã®ãã確èªããã«ã¯ãå°ãä¸ä¾¿ã§ããã
ããã§ãä»åã¯ãååã¢ãã¡ã¼ã·ã§ã³åãã¦ãããã®ããåç´ã«ç»åãã¡ã¤ã«ã¨ãã¦åºåããæ©è½ã追å ãã¾ããã
ãªããä»åããæ©è½ã追å ããã½ã¼ã¹ã³ã¼ãã¯ã以ä¸ã®GitHubã«æ ¼ç´ãã¦ãã¾ãã
github.com
使ãæ¹
$ git clone https://github.com/dk0893/deep-learning-from-scratch-4.git -b v1.1-dk0893
Cloning into 'deep-learning-from-scratch-4'...
remote: Enumerating objects: 425, done.
remote: Counting objects: 100% (148/148), done.
remote: Compressing objects: 100% (33/33), done.
ta 115)R, pack-erceueiving objectss:e d81 %2774[K5/425) 3eu40s/ed 1425)16 (d e l
Receiving objects: 100% (425/425), 922.49 KiB | 0 bytes/s, done.
Resolving deltas: 100% (246/246), done.
Checking connectivity... done.
Note: checking out '4eca9bf48e1afbf56628107a33bffe2440df6000'.
You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by performing another checkout.
If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -b with the checkout command again. Example:
git checkout -b <new-branch-name>
$ cd deep-learning-from-scratch-4/
$ python ch04/policy_iter.py --ope ani_step --fpath policy_iter_step.gif
save animation: policy_iter_step.gif
$ python ch04/policy_iter.py --ope im_step
save image: images\policy_iter_step_0.png
save image: images\policy_iter_step_1.png
save image: images\policy_iter_step_2.png
save image: images\policy_iter_step_3.png
save image: images\policy_iter_step_4.png
$ python ch04/policy_iter.py --ope ani_state --fpath policy_iter_state.gif
save animation: policy_iter_state.gif
ImageStore.cnt=384
$ python ch04/policy_iter.py --ope im_state
save image: images\policy_iter_step0_phase00_state_(0, 0).png
save image: images\policy_iter_step0_phase00_state_(0, 1).png
save image: images\policy_iter_step0_phase00_state_(0, 2).png
save image: images\policy_iter_step0_phase00_state_(0, 3).png
save image: images\policy_iter_step4_phase00_state_(2, 0).png
save image: images\policy_iter_step4_phase00_state_(2, 1).png
save image: images\policy_iter_step4_phase00_state_(2, 2).png
save image: images\policy_iter_step4_phase00_state_(2, 3).png
ImageStore.cnt=384
ä»åã¯ãåèã¨ãã¦ãGoogle Colaboratoryã§å®è¡ã§ãããã¡ã¤ã«ï¼ch04-exec.ipynbï¼ãä¸ç·ã«ã³ããããã¦ããã¾ããã
ä»åã®æ©è½è¿½å ã®è¨è¨æ¹é
ååã®æ©è½è¿½å ã¯ããªãªã¸ãã«ã®ã½ã¼ã¹ã³ã¼ãã«å¯¾ãã¦ãç¹ã«ä½ãèããã«å¤æ´ãã¦ãã¾ãã¾ããããä»åã¯ããªãªã¸ãã«ã®ã½ã¼ã¹ã³ã¼ãã«ãªãã¹ãå½±é¿ãä¸ããªããããªå¤æ´ã«ãã¾ããã
å
·ä½çã«ã¯ããªãªã¸ãã«ã®ã½ã¼ã¹ã³ã¼ãã«å¯¾ãã¦ãæå°éã®è¿½å ã§å®ç¾ããããã«ãã¾ããããããããã¨ã§ããªãªã¸ãã«ã®ã½ã¼ã¹ã³ã¼ããæ´æ°ãããå ´åã«ãä»åã®æ©è½è¿½å åããã¼ã¸ãããã¨ãç°¡åã«ãªãã¾ãããä»ã®æ©è½ã追å ããããªã£ãå ´åã«ãè¤éãªæ§æã«ãªããªãããã«ã§ãã¾ãã
ãªãªã¸ãã«ããã®å¤æ´å
容
policy_iter.py
以ä¸ã®ããã«ãpolicy_iter()ã¯ã1è¡ã®å¤æ´ã§å®ç¾ã§ãã¦ãã¾ãã
--- deep-learning-from-scratch-4-org/ch04/policy_iter.py 2024-03-20 18:07:05.107000000 +0900
+++ deep-learning-from-scratch-4/ch04/policy_iter.py 2024-03-23 20:52:32.819000000 +0900
@@ -3,6 +3,7 @@
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from collections import defaultdict
from common.gridworld import GridWorld
+from common.image_store import ImageStore
from ch04.policy_eval import policy_eval
@@ -44,7 +45,7 @@
new_pi = greedy_policy(V, env, gamma)
if is_render:
- env.render_v(V, pi)
+ ImageStore.st_step( env, V, pi )
if new_pi == pi:
break
@@ -53,7 +54,25 @@
return pi
+def parse_args():
+ import argparse
+ parser = argparse.ArgumentParser( description='policy_iter.py' )
+ parser.add_argument( '--ope', default=None, help='select output operation, [None or im_step or im_state or ani_step or ani_state]' )
+ parser.add_argument( '--dpath', default="images", help='input save image directory path' )
+ parser.add_argument( '--fpath', default='policy_iter.gif', help='input save animation path' )
+ return parser.parse_args()
+
if __name__ == '__main__':
+ args = parse_args()
+ ImageStore.init( args.ope, args.dpath, args.fpath )
env = GridWorld()
gamma = 0.9
pi = policy_iter(env, gamma)
+ ImageStore.output( env.renderer.fig )
policy_eval.py
ãã¡ããã2è¡ã追å ããã ãã§å®ç¾ã§ãã¦ãã¾ãã
--- deep-learning-from-scratch-4-org/ch04/policy_eval.py 2024-03-20 18:07:05.100000000 +0900
+++ deep-learning-from-scratch-4/ch04/policy_eval.py 2024-03-21 23:07:58.401000000 +0900
@@ -3,12 +3,14 @@
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from collections import defaultdict
from common.gridworld import GridWorld
+from common.image_store import ImageStore
def eval_onestep(pi, V, env, gamma=0.9):
for state in env.states():
if state == env.goal_state:
V[state] = 0
+ ImageStore.st_state( env, V, pi, state )
continue
action_probs = pi[state]
@@ -18,6 +20,7 @@
r = env.reward(state, action, next_state)
new_V += action_prob * (r + gamma * V[next_state])
V[state] = new_V
+ ImageStore.st_state( env, V, pi, state )
return V
image_store.py
ãã¡ãã¯ãæ°è¦è¿½å ãããã¡ã¤ã«ã§ããã¯ã©ã¹å¤æ°ã使ããã¨ã§ãimportæãæ¸ãã ãã§ãä¸è¡ã®è¿½å ã§å¤æ´ãã§ãã¦ãã¾ãã
é常ã®ã¯ã©ã¹ã®ä½¿ãæ¹ã®ãããªã¤ã³ã¹ã¿ã³ã¹ãçæããæ¹æ³ã®å ´åãæ¢åã®é¢æ°ã®å¼æ°ã¸ã®è¿½å ãå¤ãå¿
è¦ã«ãªã£ã¦ãã¾ãã¾ãã
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import ArtistAnimation
class ImageStore:
ope = None
dpath = None
fpath = None
artists_step = []
artists_state = []
animation = False
step = 0
phase = 0
cnt = 0
debug = False
def init( ope=None, dpath=None, fpath=None, debug=False ):
ImageStore.ope = ope
ImageStore.dpath = dpath
ImageStore.fpath = fpath
ImageStore.debug = debug
if ImageStore.ope == "ani_step" or ImageStore.ope == "ani_state":
ImageStore.animation = True
elif ImageStore.ope == "im_step" or ImageStore.ope == "im_state":
os.makedirs( ImageStore.dpath, exist_ok=True )
def st_step( env, V, pi ):
if ImageStore.ope == "im_step" or ImageStore.ope == "ani_step" or ImageStore.ope is None:
frame = env.render_v(V, pi, title=f"step={ImageStore.step}")
ImageStore.artists_step.append( frame )
if ImageStore.ope == "im_step":
fpath = os.path.join( ImageStore.dpath, f"policy_iter_step_{ImageStore.step}.png" )
plt.savefig( fpath )
plt.close()
print( f"save image: {fpath}" )
ImageStore.step += 1
ImageStore.phase = 0
def st_state( env, V, pi, state ):
if ImageStore.ope == "im_state" or ImageStore.ope == "ani_state":
frame = env.render_v( V, pi, title=f"step={ImageStore.step} phase={ImageStore.phase} state={state}" )
ImageStore.artists_state.append( frame )
if ImageStore.ope == "im_state":
fpath = os.path.join( ImageStore.dpath, f"policy_iter_step{ImageStore.step}_phase{ImageStore.phase:02d}_state_{state}.png" )
plt.savefig( fpath )
plt.close()
print( f"save image: {fpath}" )
ImageStore.cnt += 1
if ImageStore.cnt % np.prod(env.shape) == 0:
ImageStore.phase += 1
if ImageStore.debug:
if ImageStore.ope == "ani_state" and ImageStore.phase == 1:
ImageStore.ope = "ani_end"
def output( fig ):
if ImageStore.ope == "ani_step" or ImageStore.ope == "ani_state" or ImageStore.ope == "ani_end":
artists = ImageStore.artists_step if ImageStore.ope == "ani_step" else ImageStore.artists_state
interval = 2000 if ImageStore.ope == "ani_step" else 500
anim = ArtistAnimation( fig, artists, interval=interval )
anim.save( ImageStore.fpath )
print( f"save animation: {ImageStore.fpath}" )
if ImageStore.ope == "im_state" or ImageStore.ope == "ani_state":
print( f"ImageStore.cnt={ImageStore.cnt}" )
ãããã«
価å¤é¢æ°ã®æ´æ°ã¯æã£ã¦ãããããããããã®æ´æ°ãå¿
è¦ã ã£ããã¨ããä»åã®ç»åãã¡ã¤ã«ã®åºåã§åããã¾ããã
ä½åº¦ãæ´æ°éç¨ã®ç»åãã¡ã¤ã«ãè¦ããã¨ã§ãç解ãæ·±ã¾ãã¾ããã
ä»åã¯ä»¥ä¸ã§ãï¼
æå¾ã¾ã§ãèªã¿ããã ãããããã¨ããããã¾ããã