@@ -581,269 +581,6 @@ def test_manage_views(client, to_delete):
581581 # [END bigquery_grant_view_access]
582582
583583
584- def test_load_table_from_file (client , to_delete ):
585- """Upload table data from a CSV file."""
586- dataset_id = "load_table_from_file_dataset_{}" .format (_millis ())
587- table_id = "load_table_from_file_table_{}" .format (_millis ())
588- dataset = bigquery .Dataset (client .dataset (dataset_id ))
589- dataset .location = "US"
590- client .create_dataset (dataset )
591- to_delete .append (dataset )
592- snippets_dir = os .path .abspath (os .path .dirname (__file__ ))
593- filename = os .path .join (
594- snippets_dir , ".." , ".." , "bigquery" , "tests" , "data" , "people.csv"
595- )
596-
597- # [START bigquery_load_from_file]
598- # from google.cloud import bigquery
599- # client = bigquery.Client()
600- # filename = '/path/to/file.csv'
601- # dataset_id = 'my_dataset'
602- # table_id = 'my_table'
603-
604- dataset_ref = client .dataset (dataset_id )
605- table_ref = dataset_ref .table (table_id )
606- job_config = bigquery .LoadJobConfig ()
607- job_config .source_format = bigquery .SourceFormat .CSV
608- job_config .skip_leading_rows = 1
609- job_config .autodetect = True
610-
611- with open (filename , "rb" ) as source_file :
612- job = client .load_table_from_file (source_file , table_ref , job_config = job_config )
613-
614- job .result () # Waits for table load to complete.
615-
616- print ("Loaded {} rows into {}:{}." .format (job .output_rows , dataset_id , table_id ))
617- # [END bigquery_load_from_file]
618-
619- table = client .get_table (table_ref )
620- rows = list (client .list_rows (table )) # API request
621-
622- assert len (rows ) == 2
623- # Order is not preserved, so compare individually
624- row1 = bigquery .Row (("Wylma Phlyntstone" , 29 ), {"full_name" : 0 , "age" : 1 })
625- assert row1 in rows
626- row2 = bigquery .Row (("Phred Phlyntstone" , 32 ), {"full_name" : 0 , "age" : 1 })
627- assert row2 in rows
628-
629-
630- def test_load_table_from_uri_avro (client , to_delete , capsys ):
631- dataset_id = "load_table_from_uri_avro_{}" .format (_millis ())
632- dataset = bigquery .Dataset (client .dataset (dataset_id ))
633- client .create_dataset (dataset )
634- to_delete .append (dataset )
635-
636- # [START bigquery_load_table_gcs_avro]
637- # from google.cloud import bigquery
638- # client = bigquery.Client()
639- # dataset_id = 'my_dataset'
640-
641- dataset_ref = client .dataset (dataset_id )
642- job_config = bigquery .LoadJobConfig ()
643- job_config .source_format = bigquery .SourceFormat .AVRO
644- uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro"
645-
646- load_job = client .load_table_from_uri (
647- uri , dataset_ref .table ("us_states" ), job_config = job_config
648- ) # API request
649- print ("Starting job {}" .format (load_job .job_id ))
650-
651- load_job .result () # Waits for table load to complete.
652- print ("Job finished." )
653-
654- destination_table = client .get_table (dataset_ref .table ("us_states" ))
655- print ("Loaded {} rows." .format (destination_table .num_rows ))
656- # [END bigquery_load_table_gcs_avro]
657-
658- out , _ = capsys .readouterr ()
659- assert "Loaded 50 rows." in out
660-
661-
662- def test_load_table_from_uri_csv (client , to_delete , capsys ):
663- dataset_id = "load_table_from_uri_csv_{}" .format (_millis ())
664- dataset = bigquery .Dataset (client .dataset (dataset_id ))
665- client .create_dataset (dataset )
666- to_delete .append (dataset )
667-
668- # [START bigquery_load_table_gcs_csv]
669- # from google.cloud import bigquery
670- # client = bigquery.Client()
671- # dataset_id = 'my_dataset'
672-
673- dataset_ref = client .dataset (dataset_id )
674- job_config = bigquery .LoadJobConfig ()
675- job_config .schema = [
676- bigquery .SchemaField ("name" , "STRING" ),
677- bigquery .SchemaField ("post_abbr" , "STRING" ),
678- ]
679- job_config .skip_leading_rows = 1
680- # The source format defaults to CSV, so the line below is optional.
681- job_config .source_format = bigquery .SourceFormat .CSV
682- uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
683-
684- load_job = client .load_table_from_uri (
685- uri , dataset_ref .table ("us_states" ), job_config = job_config
686- ) # API request
687- print ("Starting job {}" .format (load_job .job_id ))
688-
689- load_job .result () # Waits for table load to complete.
690- print ("Job finished." )
691-
692- destination_table = client .get_table (dataset_ref .table ("us_states" ))
693- print ("Loaded {} rows." .format (destination_table .num_rows ))
694- # [END bigquery_load_table_gcs_csv]
695-
696- out , _ = capsys .readouterr ()
697- assert "Loaded 50 rows." in out
698-
699-
700- def test_load_table_from_uri_json (client , to_delete , capsys ):
701- dataset_id = "load_table_from_uri_json_{}" .format (_millis ())
702- dataset = bigquery .Dataset (client .dataset (dataset_id ))
703- dataset .location = "US"
704- client .create_dataset (dataset )
705- to_delete .append (dataset )
706-
707- # [START bigquery_load_table_gcs_json]
708- # from google.cloud import bigquery
709- # client = bigquery.Client()
710- # dataset_id = 'my_dataset'
711-
712- dataset_ref = client .dataset (dataset_id )
713- job_config = bigquery .LoadJobConfig ()
714- job_config .schema = [
715- bigquery .SchemaField ("name" , "STRING" ),
716- bigquery .SchemaField ("post_abbr" , "STRING" ),
717- ]
718- job_config .source_format = bigquery .SourceFormat .NEWLINE_DELIMITED_JSON
719- uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json"
720-
721- load_job = client .load_table_from_uri (
722- uri ,
723- dataset_ref .table ("us_states" ),
724- location = "US" , # Location must match that of the destination dataset.
725- job_config = job_config ,
726- ) # API request
727- print ("Starting job {}" .format (load_job .job_id ))
728-
729- load_job .result () # Waits for table load to complete.
730- print ("Job finished." )
731-
732- destination_table = client .get_table (dataset_ref .table ("us_states" ))
733- print ("Loaded {} rows." .format (destination_table .num_rows ))
734- # [END bigquery_load_table_gcs_json]
735-
736- out , _ = capsys .readouterr ()
737- assert "Loaded 50 rows." in out
738-
739-
740- def test_load_table_from_uri_cmek (client , to_delete ):
741- dataset_id = "load_table_from_uri_cmek_{}" .format (_millis ())
742- dataset = bigquery .Dataset (client .dataset (dataset_id ))
743- dataset .location = "US"
744- client .create_dataset (dataset )
745- to_delete .append (dataset )
746-
747- # [START bigquery_load_table_gcs_json_cmek]
748- # from google.cloud import bigquery
749- # client = bigquery.Client()
750- # dataset_id = 'my_dataset'
751-
752- dataset_ref = client .dataset (dataset_id )
753- job_config = bigquery .LoadJobConfig ()
754- job_config .autodetect = True
755- job_config .source_format = bigquery .SourceFormat .NEWLINE_DELIMITED_JSON
756-
757- # Set the encryption key to use for the destination.
758- # TODO: Replace this key with a key you have created in KMS.
759- kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}" .format (
760- "cloud-samples-tests" , "us" , "test" , "test"
761- )
762- encryption_config = bigquery .EncryptionConfiguration (kms_key_name = kms_key_name )
763- job_config .destination_encryption_configuration = encryption_config
764- uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json"
765-
766- load_job = client .load_table_from_uri (
767- uri ,
768- dataset_ref .table ("us_states" ),
769- location = "US" , # Location must match that of the destination dataset.
770- job_config = job_config ,
771- ) # API request
772-
773- assert load_job .job_type == "load"
774-
775- load_job .result () # Waits for table load to complete.
776-
777- assert load_job .state == "DONE"
778- table = client .get_table (dataset_ref .table ("us_states" ))
779- assert table .encryption_configuration .kms_key_name == kms_key_name
780- # [END bigquery_load_table_gcs_json_cmek]
781-
782-
783- def test_load_table_from_uri_parquet (client , to_delete , capsys ):
784- dataset_id = "load_table_from_uri_parquet_{}" .format (_millis ())
785- dataset = bigquery .Dataset (client .dataset (dataset_id ))
786- client .create_dataset (dataset )
787- to_delete .append (dataset )
788-
789- # [START bigquery_load_table_gcs_parquet]
790- # from google.cloud import bigquery
791- # client = bigquery.Client()
792- # dataset_id = 'my_dataset'
793-
794- dataset_ref = client .dataset (dataset_id )
795- job_config = bigquery .LoadJobConfig ()
796- job_config .source_format = bigquery .SourceFormat .PARQUET
797- uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet"
798-
799- load_job = client .load_table_from_uri (
800- uri , dataset_ref .table ("us_states" ), job_config = job_config
801- ) # API request
802- print ("Starting job {}" .format (load_job .job_id ))
803-
804- load_job .result () # Waits for table load to complete.
805- print ("Job finished." )
806-
807- destination_table = client .get_table (dataset_ref .table ("us_states" ))
808- print ("Loaded {} rows." .format (destination_table .num_rows ))
809- # [END bigquery_load_table_gcs_parquet]
810-
811- out , _ = capsys .readouterr ()
812- assert "Loaded 50 rows." in out
813-
814-
815- def test_load_table_from_uri_orc (client , to_delete , capsys ):
816- dataset_id = "load_table_from_uri_orc_{}" .format (_millis ())
817- dataset = bigquery .Dataset (client .dataset (dataset_id ))
818- client .create_dataset (dataset )
819- to_delete .append (dataset )
820-
821- # [START bigquery_load_table_gcs_orc]
822- # from google.cloud import bigquery
823- # client = bigquery.Client()
824- # dataset_id = 'my_dataset'
825-
826- dataset_ref = client .dataset (dataset_id )
827- job_config = bigquery .LoadJobConfig ()
828- job_config .source_format = bigquery .SourceFormat .ORC
829- uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc"
830-
831- load_job = client .load_table_from_uri (
832- uri , dataset_ref .table ("us_states" ), job_config = job_config
833- ) # API request
834- print ("Starting job {}" .format (load_job .job_id ))
835-
836- load_job .result () # Waits for table load to complete.
837- print ("Job finished." )
838-
839- destination_table = client .get_table (dataset_ref .table ("us_states" ))
840- print ("Loaded {} rows." .format (destination_table .num_rows ))
841- # [END bigquery_load_table_gcs_orc]
842-
843- out , _ = capsys .readouterr ()
844- assert "Loaded 50 rows." in out
845-
846-
847584def test_load_table_from_uri_autodetect (client , to_delete , capsys ):
848585 """Load table from a GCS URI using various formats and auto-detected schema
849586 Each file format has its own tested load from URI sample. Because most of
0 commit comments