Skip to content

Commit

Permalink
Cleanup useless files for each image layers
Browse files Browse the repository at this point in the history
  • Loading branch information
rosbo committed Nov 30, 2018
1 parent 604efce commit eb3f09f
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 43 deletions.
75 changes: 35 additions & 40 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,33 @@
FROM gcr.io/kaggle-images/python-tensorflow-whl:1.11.0-py36 as tensorflow_whl
FROM continuumio/anaconda3:5.2.0

# This is necessary for apt to access HTTPS sources
RUN apt-get update && \
apt-get install apt-transport-https

ADD clean-layer.sh /tmp/clean-layer.sh
ADD patches/ /tmp/patches/
ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl

# This is necessary for apt to access HTTPS sources
RUN apt-get update && \
apt-get install apt-transport-https && \
/tmp/clean-layer.sh

# Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections,
# as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346
RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \
apt-get update && apt-get install -y build-essential unzip && \
# https://stackoverflow.com/a/46498173
conda update -y conda && conda update -y python && \
pip install --upgrade pip && \
apt-get -y install cmake
apt-get -y install cmake && \
/tmp/clean-layer.sh

# Tensorflow doesn't support python 3.7 yet. See https://github.com/tensorflow/tensorflow/issues/20517
# Fix to install tf 1.10:: Downgrade python 3.7->3.6.6 and downgrade Pandas 0.23.3->0.23.2
RUN conda install -y python=3.6.6 && \
pip install pandas==0.23.2 && \
# Another fix for TF 1.10 https://github.com/tensorflow/tensorflow/issues/21518
pip install keras_applications==1.0.4 --no-deps && \
pip install keras_preprocessing==1.0.2 --no-deps
pip install keras_preprocessing==1.0.2 --no-deps && \
/tmp/clean-layer.sh

# The anaconda base image includes outdated versions of these packages. Update them to include the latest version.
RUN pip install --upgrade seaborn python-dateutil dask && \
Expand All @@ -43,13 +47,13 @@ RUN pip install --upgrade seaborn python-dateutil dask && \
wget https://imagemagick.org/download/ImageMagick.tar.gz && \
tar xzf ImageMagick.tar.gz && cd `ls -d ImageMagick-*` && pwd && ls -al && ./configure && \
make -j $(nproc) && make install && \
# clean up ImageMagick source files
cd ../ && rm -rf ImageMagick*
/tmp/clean-layer.sh

# Install tensorflow from a pre-built wheel
COPY --from=tensorflow_whl /tmp/tensorflow_cpu/*.whl /tmp/tensorflow_cpu/
RUN pip install /tmp/tensorflow_cpu/tensorflow*.whl && \
rm -rf /tmp/tensorflow_cpu
rm -rf /tmp/tensorflow_cpu && \
/tmp/clean-layer.sh

RUN apt-get install -y libfreetype6-dev && \
apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \
Expand Down Expand Up @@ -112,10 +116,7 @@ RUN apt-get install -y libfreetype6-dev && \
vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe && \
# Stop-words
pip install stop-words && \
# clean up
rm -rf /root/.cache/pip/* && \
apt-get autoremove -y && apt-get clean && \
rm -rf /usr/local/src/*
/tmp/clean-layer.sh

# Make sure the dynamic linker finds the right libstdc++
ENV LD_LIBRARY_PATH=/opt/conda/lib
Expand All @@ -128,10 +129,9 @@ RUN apt-get -y install zlib1g-dev liblcms2-dev libwebp-dev libgeos-dev && \
cd basemap && \
git checkout v1.1.0 && \
python setup.py install && \
pip install basemap --no-binary basemap

# sasl is apparently an ibis dependency
RUN apt-get -y install libsasl2-dev && \
pip install basemap --no-binary basemap && \
# sasl is apparently an ibis dependency
apt-get -y install libsasl2-dev && \
# ...as is psycopg2
apt-get install -y libpq-dev && \
pip install ibis-framework && \
Expand Down Expand Up @@ -162,7 +162,8 @@ RUN apt-get -y install libsasl2-dev && \
# Re-run it to flush any more disk writes
python -c "from keras.models import Sequential; from keras import backend; print(backend._BACKEND)" && \
# Keras reverts to /tmp from ~ when it detects a read-only file system
mkdir -p /tmp/.keras && cp /root/.keras/keras.json /tmp/.keras
mkdir -p /tmp/.keras && cp /root/.keras/keras.json /tmp/.keras && \
/tmp/clean-layer.sh

# scikit-learn dependencies
RUN pip install scipy && \
Expand Down Expand Up @@ -204,21 +205,15 @@ RUN pip install scipy && \
apt-get install -y sox libsox-dev libsox-fmt-all && \
pip install cffi && \
cd /usr/local/src && git clone https://github.com/pytorch/audio && cd audio && python setup.py install && \
# ~~~~ CLEAN UP ~~~~
rm -rf /root/.cache/pip/* && \
apt-get autoremove -y && apt-get clean && \
conda clean -i -l -t -y && \
rm -rf /usr/local/src/*
/tmp/clean-layer.sh

# vtk with dependencies
RUN apt-get install -y libgl1-mesa-glx && \
pip install vtk && \
# xvfbwrapper with dependencies
apt-get install -y xvfb && \
pip install xvfbwrapper && \
# ~~~~ CLEAN UP ~~~~
rm -rf /root/.cache/pip/* && \
apt-get autoremove -y && apt-get clean
/tmp/clean-layer.sh

RUN pip install --upgrade mpld3 && \
pip install mplleaflet && \
Expand Down Expand Up @@ -268,7 +263,8 @@ RUN pip install --upgrade mpld3 && \
pip install pystan && \
pip install ImageHash && \
conda install -y ecos && \
conda install -y CVXcanon
conda install -y CVXcanon && \
/tmp/clean-layer.sh

RUN pip install fancyimpute && \
pip install git+https://github.com/pymc-devs/pymc3 && \
Expand Down Expand Up @@ -324,7 +320,8 @@ RUN pip install fancyimpute && \
pip install geoplot && \
pip install eli5 && \
pip install implicit && \
pip install dask-ml[xgboost]
pip install dask-ml[xgboost] && \
/tmp/clean-layer.sh

RUN pip install kmeans-smote --no-dependencies && \
# Add google PAIR-code Facets
Expand All @@ -343,7 +340,8 @@ RUN pip install kmeans-smote --no-dependencies && \
pip install cufflinks && \
pip install glmnet_py && \
pip install lime && \
pip install memory_profiler
pip install memory_profiler && \
/tmp/clean-layer.sh

# install cython & cysignals before pyfasttext
RUN pip install --upgrade cython && \
Expand Down Expand Up @@ -374,9 +372,8 @@ RUN pip install --upgrade cython && \
pip install mlcrate && \
# Required to display Altair charts in Jupyter notebook
pip install vega3 && \
jupyter nbextension install --sys-prefix --py vega3 && \
# clean up pip cache
rm -rf /root/.cache/pip/*
jupyter nbextension install --sys-prefix --py vega3 && \
/tmp/clean-layer.sh

# Fast.ai and dependencies
RUN pip install bcolz && \
Expand Down Expand Up @@ -438,9 +435,7 @@ RUN pip install bcolz && \
# which downgrades pytorch. fastai does work with pytorch 0.4.
pip install fastai==0.7.0 --no-deps && \
pip install torchtext && \
# clean up pip cache
rm -rf /root/.cache/pip/* && \
cd && rm -rf /usr/local/src/*
/tmp/clean-layer.sh

###########
#
Expand Down Expand Up @@ -476,16 +471,15 @@ RUN pip install flashtext && \
pip install ggplot && \
pip install cesium && \
pip install rgf_python && \
##### ^^^^ Add new contributions above here ^^^^ #####
# clean up pip cache
rm -rf /root/.cache/pip/*
/tmp/clean-layer.sh

# Pin Vowpal Wabbit v8.6.0 because 8.6.1 does not build or install successfully
RUN cd /usr/local/src && \
git clone -b 8.6.0 https://github.com/JohnLangford/vowpal_wabbit.git && \
./vowpal_wabbit/python/conda_install.sh && \
# Reinstall in non-editable mode (without the -e flag)
pip install vowpal_wabbit/python
pip install vowpal_wabbit/python && \
/tmp/clean-layer.sh

# For Facets
ENV PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/
Expand All @@ -501,7 +495,8 @@ RUN pip install --upgrade dask && \
# Stop Matplotlib printing junk to the console on first load
sed -i "s/^.*Matplotlib is building the font cache using fc-list.*$/# Warning removed by Kaggle/g" /opt/conda/lib/python3.6/site-packages/matplotlib/font_manager.py && \
# Make matplotlib output in Jupyter notebooks display correctly
mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py
mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \
/tmp/clean-layer.sh

# Add BigQuery client proxy settings
ENV PYTHONUSERBASE "/root/.local"
Expand Down
22 changes: 22 additions & 0 deletions clean-layer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
#
# This scripts should be called at the end of each RUN command
# in the Dockerfiles.
#
# Each RUN command creates a new layer that is stored separately.
# At the end of each command, we should ensure we clean up downloaded
# archives and source files used to produce binary to reduce the size
# of the layer.
set -e
set -x

# Delete files that pip caches when installing a package.
rm -rf /root/.cache/pip/*
# Delete old downloaded archive files
apt-get autoremove -y
# Delete downloaded archive files
apt-get clean
# Delete source files used for building binaries
rm -rf /usr/local/src/*
# Delete conda downloaded tarballs
conda clean -y --tarballs
11 changes: 8 additions & 3 deletions gpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ FROM nvidia/cuda:9.1-cudnn7-devel-ubuntu16.04 AS nvidia
FROM gcr.io/kaggle-images/python-tensorflow-whl:1.11.0-py36 as tensorflow_whl
FROM gcr.io/kaggle-images/python:staging

ADD clean-layer.sh /tmp/clean-layer.sh

# Cuda support
COPY --from=nvidia /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/
COPY --from=nvidia /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/
Expand Down Expand Up @@ -36,7 +38,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libnccl2=2.2.12-1+cuda9.1 \
libnccl-dev=2.2.12-1+cuda9.1 && \
ln -s /usr/local/cuda-9.1 /usr/local/cuda && \
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
/tmp/clean-layer.sh

# Reinstall packages with a separate version for GPU support
# Tensorflow
Expand All @@ -45,7 +48,9 @@ RUN pip uninstall -y tensorflow && \
pip install /tmp/tensorflow_gpu/tensorflow*.whl && \
rm -rf /tmp/tensorflow_gpu && \
conda uninstall -y pytorch-cpu torchvision-cpu && \
conda install -y pytorch torchvision -c pytorch
conda install -y pytorch torchvision -c pytorch && \
/tmp/clean-layer.sh

# Install GPU-only packages
RUN pip install pycuda
RUN pip install pycuda && \
/tmp/clean-layer.sh

0 comments on commit eb3f09f

Please sign in to comment.