事象
- S3のParquetファイルにHiveカタログで外部表を定義して、Presto でクエリすると、"java.lang.UnsupportedOperationException: com.facebook.presto.spi.type.LongDecimalType" という例外が発生する。
$ hive CREATE EXTERNAL TABLE IF NOT EXISTS sh10.sales( prod_id DECIMAL(38,0), cust_id DECIMAL(38,0), time_id TIMESTAMP, channel_id DECIMAL(38,0), promo_id DECIMAL(38,0), quantity_sold DECIMAL(38,2), seller INT, fulfillment_center INT, courier_org INT, tax_country VARCHAR(3), tax_region VARCHAR(3), amount_sold DECIMAL(38,2) ) PARTITIONED BY (year int, month int) STORED AS PARQUET LOCATION 's3://.../data/parquet_pyspark/sh10/sales/' tblproperties ("parquet.compress"="SNAPPY") ; hive> MSCK REPAIR TABLE sh10.sales; hive> exit; $ presto-cli presto> use hive.sh10; presto:sh10> select * from sales where prod_id is not null limit 10; Query failed (#query_id): com.facebook.presto.spi.type.LongDecimalType
- /var/log/presto/server.log
2018-12-04T03:22:56.740Z ERROR remote-task-callback-73 com.facebook.presto.execution.StageStateMachine Stage 20181204_032254_00002_emvnj.1 failed java.lang.UnsupportedOperationException: com.facebook.presto.spi.type.LongDecimalType at com.facebook.presto.spi.type.AbstractType.writeLong(AbstractType.java:111) at com.facebook.presto.hive.parquet.reader.ParquetIntColumnReader.readValue(ParquetIntColumnReader.java:32) at com.facebook.presto.hive.parquet.reader.ParquetPrimitiveColumnReader.lambda$readValues$1(ParquetPrimitiveColumnReader.java:184) at com.facebook.presto.hive.parquet.reader.ParquetPrimitiveColumnReader.processValues(ParquetPrimitiveColumnReader.java:204) at com.facebook.presto.hive.parquet.reader.ParquetPrimitiveColumnReader.readValues(ParquetPrimitiveColumnReader.java:183) at com.facebook.presto.hive.parquet.reader.ParquetPrimitiveColumnReader.readPrimitive(ParquetPrimitiveColumnReader.java:171) at com.facebook.presto.hive.parquet.reader.ParquetReader.readPrimitive(ParquetReader.java:209) at com.facebook.presto.hive.parquet.reader.ParquetReader.readColumnChunk(ParquetReader.java:259) at com.facebook.presto.hive.parquet.reader.ParquetReader.readBlock(ParquetReader.java:242) at com.facebook.presto.hive.parquet.ParquetPageSource$ParquetBlockLoader.load(ParquetPageSource.java:244) at com.facebook.presto.hive.parquet.ParquetPageSource$ParquetBlockLoader.load(ParquetPageSource.java:222) at com.facebook.presto.spi.block.LazyBlock.assureLoaded(LazyBlock.java:269) at com.facebook.presto.spi.block.LazyBlock.getLoadedBlock(LazyBlock.java:260) at com.facebook.presto.operator.project.DictionaryAwarePageProjection$DictionaryAwarePageProjectionWork.<init>(DictionaryAwarePageProjection.java:97) at com.facebook.presto.operator.project.DictionaryAwarePageProjection.project(DictionaryAwarePageProjection.java:75) at com.facebook.presto.operator.project.PageProcessor$PositionsPageProcessorIterator.processBatch(PageProcessor.java:276) at com.facebook.presto.operator.project.PageProcessor$PositionsPageProcessorIterator.computeNext(PageProcessor.java:182) at com.facebook.presto.operator.project.PageProcessor$PositionsPageProcessorIterator.computeNext(PageProcessor.java:129) at com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:141) at com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:136) at com.facebook.presto.operator.project.PageProcessorOutput.hasNext(PageProcessorOutput.java:49) at com.facebook.presto.operator.project.MergingPageOutput.getOutput(MergingPageOutput.java:110) at com.facebook.presto.operator.ScanFilterAndProjectOperator.processPageSource(ScanFilterAndProjectOperator.java:287) at com.facebook.presto.operator.ScanFilterAndProjectOperator.getOutput(ScanFilterAndProjectOperator.java:226) at com.facebook.presto.operator.Driver.processInternal(Driver.java:379) at com.facebook.presto.operator.Driver.lambda$processFor$8(Driver.java:283) at com.facebook.presto.operator.Driver.tryWithLock(Driver.java:675) at com.facebook.presto.operator.Driver.processFor(Driver.java:276) at com.facebook.presto.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:1053) at com.facebook.presto.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:162) at com.facebook.presto.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:456) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748)
解決策
- Hiveカタログで外部表を定義する際に DECIMALの精度は 17 以下にする。
$ hive hive> drop table sh10.sales; hive> CREATE EXTERNAL TABLE IF NOT EXISTS sh10.sales( prod_id DECIMAL(17,0), cust_id DECIMAL(17,0), time_id TIMESTAMP, channel_id DECIMAL(17,0), promo_id DECIMAL(17,0), quantity_sold DECIMAL(17,2), seller INT, fulfillment_center INT, courier_org INT, tax_country VARCHAR(3), tax_region VARCHAR(3), amount_sold DECIMAL(17,2) ) PARTITIONED BY (year int, month int) STORED AS PARQUET LOCATION 's3://.../data/parquet_pyspark/sh10/sales/' tblproperties ("parquet.compress"="SNAPPY") ; hive> MSCK REPAIR TABLE sh10_option.sales;
環境
- emr-5.19.0
- Hive 2.3.3
- Presto 0.212
参考
thanks @nezihyigitbasi - so Decimals(>17,x) weren't supported in 0.164
https://github.com/prestodb/presto/issues/8484
@Override public void writeLong(BlockBuilder blockBuilder, long value) { throw new UnsupportedOperationException(getClass().getName()); }
ackage com.facebook.presto.spi.type; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.block.BlockBuilderStatus; import com.facebook.presto.spi.block.FixedWidthBlockBuilder; import com.facebook.presto.spi.block.PageBuilderStatus; import io.airlift.slice.Slice; import static com.facebook.presto.spi.type.Decimals.MAX_PRECISION;
public final class Decimals { private Decimals() {} public static final int MAX_PRECISION = 38; public static final int MAX_SHORT_PRECISION = 18;