XZ Utilsï¼liblzmaï¼ã«ããå§ç¸®ã»ä¼¸é·ï¼C++ï¼
xz ã®æ¦è¦
xzï¼XZ Utilsï¼ã¯ lzmaï¼LZMA Utilsï¼ã®å¾ç¶ã¨ãªãå§ç¸®å½¢å¼ã§ãï¼ã¡ãã£ã¨ããæ§è½ãã¹ãï¼コマンド xz の圧縮率と圧縮速度 - やた@はてな日記ï¼コマンド xz による圧縮・伸長のメモリ消費 - やた@はてな日記ï¼ã§ãåããããã«ï¼å§ç¸®ã«ãããã³ã¹ãã大ãããã®ã®ï¼ãã¼ã¿ãããå°ããå§ç¸®ã§ãã¾ãï¼
xz ãã¡ã¤ã«ãæä½ããã³ãã³ã
XZ Utils ãã¤ã³ã¹ãã¼ã«ããã°ï¼ã³ãã³ã xz ã使ããããã«ãªãã¾ãï¼ä½¿ãæ¹ã¯ gzip ã bzip2 ã¨ã»ã©ãã©åãã§ãï¼"-c" ã§åºåå ãæ¨æºåºåã«ãããï¼"-d" ã§ä¼¸é·ã¢ã¼ãã«ãªã£ããï¼"-k" ã§å ¥åãã¡ã¤ã«ãæ®ãããã¨ããå ·åã§ãï¼
# Ubuntu 10.4 ã§ã¯ï¼"sudo aptitude install xz-utils" ã§ã¤ã³ã¹ãã¼ã«ã§ãã¾ããï¼å¯¾å¿ããéçºç¨ã®ããã±ã¼ã¸ã¯ liblzma-dev ã§ãï¼
ã¾ãï¼ãã¼ã¸ã§ã³ 1.22 以éã® tar ã§ã¯ï¼gzipï¼zï¼ã bzip2ï¼jï¼ã¨åãããã« xzï¼Jï¼ãå©ç¨ã§ãã¾ãï¼
liblzma ã®ä½¿ãæ¹
liblzma ã使ãããã«å¿ è¦ãªããã㯠<lzma.h> ã§ãï¼ã©ã¤ãã©ãªã®ãªã³ã¯ã¯ï¼-llzma ã§æå®ã§ãã¾ãï¼é¢æ°ãåï¼å®æ°ã®èª¬æã¯ï¼ãããã«æ¸ãã¦ããã¾ãï¼åºæ¬çãªæ©è½ï¼å§ç¸®ããªã»ãããç¨ããå§ç¸®ã»ä¼¸é·ï¼ã«ã¤ãã¦ã¯ï¼ä»¥ä¸ã®ããããè¦ãã¨è¯ãã¨æãã¾ãï¼
ã©ã¤ãã©ãªã®ã¤ã³ã¿ãã§ã¼ã¹ã¯ï¼zlib ã bzlib ã¨ä¼¼ã¦ãã¾ãï¼ã¹ããªã¼ã ï¼lzma_streamï¼ãåæåããå¾ï¼å ¥åºåï¼next_in, avail_in, next_out, avail_outï¼ãè¨å®ãã¦ããï¼å§ç¸®ã»ä¼¸é·ï¼lzma_code()ï¼ããããªãã¾ãï¼æå¾ã®å ¥åãä¸ããã¨ãï¼lzma_code() ã« LZMA_FINISH ã渡ãããã«ãã¾ãï¼ããã¦ï¼å§ç¸®ã»ä¼¸é·ãçµããã°ï¼lzma_end() ã§çµäºå¦çããããªãã¾ãï¼
以ä¸ï¼å§ç¸®ã¨ä¼¸é·ã®æé ã«ã¤ãã¦ï¼ããå°ã詳ãã説æãã¾ãï¼
liblzma ã«ããå§ç¸®
ã¹ããªã¼ã ã®åæè¨å®
ã¾ãï¼ã¹ããªã¼ã ã®æ§é ä½ãåæåãã¾ãï¼
// LZMA_STREAM_INIT ã¯ãã¯ãã§ï¼lzma_stream ã®åæåã«ã®ã¿å©ç¨ã§ãã¾ãï¼ lzma_stream stream = LZMA_STREAM_INIT; // æ¢ã«ç¢ºä¿æ¸ã¿ã® lzma_stream ãåæåããªããå ´åã¯ï¼ // LZMA_STREAM_INIT ã§åæåãã lzma_stream ãä»£å ¥ãã¾ãï¼ // lzma_stream temp_stream = LZMA_STREAM_INIT; // stream = temp_stream;
次ã«ï¼å§ç¸®ç¨ã®åæè¨å®ããããªãã¾ãï¼
// 第 1 å¼æ°ã«ã¯ï¼åæåãã lzma_stream ã渡ãã¾ãï¼ // 第 2 å¼æ°ã«ã¯ï¼å§ç¸®ããªã»ãããæå®ãã¾ãï¼ // 0 ä»¥ä¸ 9 以ä¸ã®æ´æ°ã¨ï¼LZMA_PRESET_EXTREME ãä½µãã¦æå®ã§ãã¾ãï¼ // æ°å¤ã大ããããã»ã©å§ç¸®çãé«ããªãã¾ããï¼å§ç¸®ã«æéããããï¼ // ã¡ã¢ãªæ¶è²»ã大ãããªãã¾ãï¼ // LZMA_PRESET_EXTREME ã®å¹æã«ã¤ãã¦ã¯ï¼xz --help ã§è¡¨ç¤ºããã // ãªãã·ã§ã³ -e ã®èª¬æãåãããããã¨æãã¾ãï¼ // æå®ããå ´åï¼6 | LZMA_PRESET_EXTREME ã®ããã«ãã¾ãï¼ // 第 3 å¼æ°ã«ã¯ï¼æ´åæ§ãã§ãã¯ã®æ¹æ³ãæå®ãã¾ãï¼ // ããåãããªãã¨ã㯠LZMA_CHECK_CRC32 ã使ã㨠lzma/container.h ã« // æ¸ãã¦ããã¾ããï¼ã³ãã³ã xz 㯠LZMA_CHECK_CRC64 ã使ã£ã¦ããããã§ãï¼ lzma_ret ret = lzma_easy_encoder(&stream, 6, LZMA_CHECK_CRC64); if (ret != LZMA_OK) ã¨ã©ã¼å¦ç;
å§ç¸®ããªã»ãããç¨ããå ´åã®ã¡ã¢ãªä½¿ç¨éã¯ï¼lzma_easy_encoder_memusage() ã§ç¢ºèªã§ãã¾ãï¼
å ¥åºåã®è¨å®
ã¹ããªã¼ã ã«å ¥åãã¼ã¿ã¨åºåå ã®ãããã¡ãæå®ãã¾ãï¼æå®ã®æ¹æ³ã¯ã¡ã³ãã«ä»£å ¥ããã ãã§ããï¼uint8_t ã使ç¨ãã¦ããã®ã§ï¼åãã£ã¹ããå¿ è¦ã«ãªãã¨æãã¾ãï¼
void set_input(lzma_stream *stream, const void *data, std::size_t size) { // data ã const char * ã®å ´åï¼reinterpret_cast ãç¨ãããï¼ // const void * ãçµç±ãããã¨ã«ãªãã¾ãï¼ stream->next_in = static_cast<const uint8_t *>(data); stream->avail_in = size; } void set_output(lzma_stream *stream, void *buf, std::size_t size) { stream->next_out = static_cast<uint8_t *>(buf); stream->avail_out = size; }
å§ç¸®
å ¥åºåã®è¨å®ãçµããã°ï¼lzma_code() ãå¼ã³åºãã¾ãï¼ä¾ãã°ï¼ä»¥ä¸ã®ãããªã«ã¼ãã«ãªãã¾ãï¼
// å ¥åãå°½ããã¾ã§å§ç¸®ãç¶ãã¾ãï¼ while (stream.avail_in > 0) { lzma_ret ret = lzma_code(&stream, LZMA_RUN); if (ret != LZMA_OK) ã¨ã©ã¼å¦ç; if (stream.avail_in == 0) å ¥åã®ç¶ããè¨å®; if (stream.avail_out == 0) åºåå ã®ãããã¡ãåè¨å®; }
æå¾ã®å ¥åãæå®ããå¾ã¯ï¼LZMA_FINISH ã¨ã¨ãã«å¼ã³åºãã¾ãï¼å§ç¸®ãçµäºããã°ï¼lzma_code() 㯠LZMA_STREAM_END ãè¿ãã¾ãï¼
lzma_ret ret; do { ret = lzma_code(&stream, LZMA_FINISH); if (ret == LZMA_OK) åºåå ã®ãããã¡ãåè¨å®; else if (ret != LZMA_STREAM_END) ã¨ã©ã¼å¦ç; } while (ret == LZMA_OK);
ã¹ããªã¼ã ã®çµäºå¦ç
å§ç¸®ãçµäºããã°ï¼å²ãå½ã¦ãã¡ã¢ãªã解æ¾ããããã«ï¼lzma_end() ãå¼ã³åºãã¾ãï¼
lzma_end(&stream);
liblzma ã«ãã伸é·
ã¹ããªã¼ã ã®åæè¨å®
å§ç¸®æã¨åãããã«ã¹ããªã¼ã ã®æ§é ä½ãåæåããå¾ï¼lzma_stream_encoder() ãç¨ãã¦ä¼¸é·ç¨ã®åæè¨å®ããããªãã¾ãï¼
// 第 1 å¼æ°ã«ã¯ï¼åæåãã lzma_stream ã渡ãã¾ãï¼ // 第 2 å¼æ°ã«ã¯ï¼ã¡ã¢ãªä½¿ç¨éã®ä¸éãæå®ãã¾ãï¼ // ã¡ã¢ãªä½¿ç¨éãä¸éã«éããã¨ï¼lzma_code() ã¯ã¨ã©ã¼ãè¿ãã¾ãï¼ // lzma_memlimit_set() ã使ã£ã¦ãä¸æã復帰ã§ããªããããªã®ã§ï¼ // æåãã大ããªå¤ãæå®ãã¦ãããæ¹ãããããã§ãï¼ // åºæ¬çãªä½¿ãæ¹ã§ããã°ï¼å§ç¸®ããªã»ããã« 9 ãæå®ããã¨ãã® // ã¡ã¢ãªä½¿ç¨éï¼64MiB å¼·ï¼ã§è¶³ããã¨æãã¾ãï¼ // 第 3 å¼æ°ã«ã¯ãªãã·ã§ã³ãæå®ã§ãã¾ããï¼ã¨ãããã 0 ã«ãã¦ããã¾ããï¼ lzma_ret ret = lzma_stream_decoder(&stream, lzma_easy_decoder_memusage(9), 0); if (ret != LZMA_OK) ã¨ã©ã¼å¦ç;
æ®ãã®æé
å ¥åºåã®è¨å®ã»ä¼¸é·ã»ã¹ããªã¼ã ã®çµäºå¦çã«ã¤ãã¦ã¯ï¼å§ç¸®ã®å ´åã¨ã»ã¼åãã«ãªãã¾ãï¼ç°ãªãã®ã¯ï¼å§ç¸®ããã¦ãããã¼ã¿ãå ¥åã¨ãªãï¼ä¼¸é·ããå¾ã®ãã¼ã¿ãåºåã¨ãªããã¨ãããã§ãï¼
ãµã³ãã«ã³ã¼ã
ãã¼ã¿ãå§ç¸®ãã¦ã¹ããªã¼ã ã«åºåããã¯ã©ã¹ Encoderï¼å§ç¸®ãã¼ã¿ãã¹ããªã¼ã ããå
¥åãã¦ä¼¸é·ããã¯ã©ã¹ Decoder ã¯ä»¥ä¸ã®ããã«å®è£
ã§ãã¾ãï¼
class Encoder { public: Encoder() : out_(NULL), lzma_(), buf_(NULL), buf_size_(0), total_in_(0), total_out_(0) { lzma_stream initial_lzma = LZMA_STREAM_INIT; lzma_ = initial_lzma; } ~Encoder() { close(); } bool open(std::ostream *out, uint32_t preset = 6, lzma_check check = LZMA_CHECK_CRC64, std::size_t buf_size = 0) { close(); if (buf_size == 0) buf_size = DEFAULT_BUF_SIZE; try { buf_ = new uint8_t[buf_size]; buf_size_ = buf_size; } catch (...) { return false; } lzma_ret ret = lzma_easy_encoder(&lzma_, preset, check); if (ret != LZMA_OK) return false; lzma_.next_out = buf_; lzma_.avail_out = buf_size_; out_ = out; return true; } bool close() { bool is_ok = true; if (out_ != NULL) { is_ok = finish(); lzma_end(&lzma_); } if (buf_ != NULL) delete [] buf_; out_ = NULL; lzma_stream initial_lzma = LZMA_STREAM_INIT; lzma_ = initial_lzma; buf_ = NULL; buf_size_ = 0; return is_ok; } bool write(const void *data, std::size_t size) { if (out_ == NULL) return false; lzma_.next_in = static_cast<const uint8_t *>(data); lzma_.avail_in = size; while (lzma_.avail_in > 0) { lzma_ret ret = lzma_code(&lzma_, LZMA_RUN); if (ret != LZMA_OK) return false; if (lzma_.avail_out == 0) { if (!out_->write(reinterpret_cast<const char *>(buf_), buf_size_)) return false; lzma_.next_out = buf_; lzma_.avail_out = buf_size_; } } return true; } std::size_t total_in() const { return out_ != NULL ? lzma_.total_in : total_in_; } std::size_t total_out() const { return out_ != NULL ? lzma_.total_out : total_out_; } private: std::ostream *out_; lzma_stream lzma_; uint8_t *buf_; std::size_t buf_size_; std::size_t total_in_; std::size_t total_out_; enum { DEFAULT_BUF_SIZE = 4096 }; bool finish() { lzma_ret ret = LZMA_OK; do { ret = lzma_code(&lzma_, LZMA_FINISH); if (!out_->write(reinterpret_cast<const char *>(buf_), buf_size_ - lzma_.avail_out)) return false; lzma_.next_out = buf_; lzma_.avail_out = buf_size_; } while (ret == LZMA_OK); total_in_ = lzma_.total_in; total_out_ = lzma_.total_out; return ret == LZMA_STREAM_END; } // Disallows copy. Encoder(const Encoder &); Encoder &operator=(const Encoder &); };
class Decoder { public: Decoder() : in_(NULL), lzma_(), buf_(NULL), buf_size_(0), eof_(false), fail_(false), total_in_(0), total_out_(0) { lzma_stream initial_lzma = LZMA_STREAM_INIT; lzma_ = initial_lzma; } ~Decoder() {} bool open(std::istream *in, std::size_t buf_size = 0) { close(); if (buf_size == 0) buf_size = DEFAULT_BUF_SIZE; try { buf_ = new uint8_t[buf_size]; buf_size_ = buf_size; } catch (...) { return false; } lzma_ret ret = lzma_stream_decoder(&lzma_, lzma_easy_decoder_memusage(9), 0); if (ret != LZMA_OK) return false; in_ = in; return true; } void close() { if (in_ != NULL) lzma_end(&lzma_); if (buf_ != NULL) delete [] buf_; in_ = NULL; lzma_stream initial_lzma = LZMA_STREAM_INIT; lzma_ = initial_lzma; buf_ = NULL; buf_size_ = 0; eof_ = false; fail_ = false; } std::size_t read(void *buf, std::size_t size) { if (in_ == NULL || eof_ || fail_) return false; lzma_.next_out = static_cast<uint8_t *>(buf); lzma_.avail_out = size; while (lzma_.avail_out > 0) { if (lzma_.avail_in == 0) { in_->read(reinterpret_cast<char *>(buf_), buf_size_); lzma_.next_in = buf_; lzma_.avail_in = in_->gcount(); } if (!*in_) { lzma_ret ret = lzma_code(&lzma_, LZMA_FINISH); if (ret == LZMA_OK) continue; else { if (ret == LZMA_STREAM_END) { total_in_ = lzma_.total_in; total_out_ = lzma_.total_out; eof_ = true; } else fail_ = true; break; } } else { lzma_ret ret = lzma_code(&lzma_, LZMA_RUN); if (ret != LZMA_OK) { fail_ = true; break; } } } return size - lzma_.avail_out; } std::size_t total_in() const { return in_ != NULL ? lzma_.total_in : total_in_; } std::size_t total_out() const { return in_ != NULL ? lzma_.total_out : total_out_; } bool eof() const { return eof_; } bool fail() const { return fail_; } private: std::istream *in_; lzma_stream lzma_; uint8_t *buf_; std::size_t buf_size_; bool eof_; bool fail_; std::size_t total_in_; std::size_t total_out_; enum { DEFAULT_BUF_SIZE = 4096 }; // Disallows copy. Decoder(const Decoder &); Decoder &operator=(const Decoder &); };