-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgcp_querypractice.sql
More file actions
217 lines (194 loc) · 5.5 KB
/
Copy pathgcp_querypractice.sql
File metadata and controls
217 lines (194 loc) · 5.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
Data1: bigquery-public-data.austin_bikeshare.bikeshare_stations
Data2: bigquery-public-data.austin_bikeshare.bikeshare_trips
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-- %%%%%%%%%%% Data-1 Sample Records %%%%%%%%%%%
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Select * From bigquery-public-data.austin_bikeshare.bikeshare_stations Limit 500;
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-- %%%%%%%%%%% Data-1 Key Statistics %%%%%%%%%%%
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Select
Count(*) as records
,Count(Distinct station_id) as stations
From
bigquery-public-data.austin_bikeshare.bikeshare_stations
;
-- records stations
-- 102 102
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-- %%%%%%%%%%% Data-2 Sample Records %%%%%%%%%%%
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Select * From bigquery-public-data.austin_bikeshare.bikeshare_trips Limit 5000
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-- %%%%%%%%%%% Data-2 Key Statistics %%%%%%%%%%%
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Select
Count(*) as records
,Count(Distinct trip_id) as trips
,Count(Distinct start_station_id) as start_stations
,Count(Distinct end_station_id) as end_stations
From
bigquery-public-data.austin_bikeshare.bikeshare_trips
;
-- records trips start_stations end_stations
-- 2112263 2112263 106 107
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- (I)
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-- %%%%%% Unique Number of Trips by Station Start %%%%%%
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Select
start_station_id
,count(distinct trip_id) as trips
From
bigquery-public-data.austin_bikeshare.bikeshare_trips
Group BY
start_station_id
Order By
trips DESC
;
-- (II)
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-- %%%%%% Identifying Stations based on station_id %%%%%%
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-- ~~~~~~~ Method-A ~~~~~~
Select
A.*
,B.name as Station_Name
,B.address as Station_Address
,B.status as Station_Status
,B.property_type as Station_Property_Type
From
(
Select
start_station_id
,count(distinct trip_id) as trips
From
bigquery-public-data.austin_bikeshare.bikeshare_trips
Group BY
start_station_id
Order By
trips DESC
) as A
Left Join
bigquery-public-data.austin_bikeshare.bikeshare_stations as B
ON
A.start_station_id = B.station_id
;
-- Question1 : Will you get the result in Order as in Subquery?
-- Question2 : Why did we join the data after aggregation(which is in Subquery)?
-- ~~~~~~~ Method-B ~~~~~~
With Trips As
(
Select
start_station_id
,count(distinct trip_id) as trips
From
bigquery-public-data.austin_bikeshare.bikeshare_trips
Group BY
start_station_id
Order By
trips DESC
),
Stations As
(
Select
station_id
,name
,address
,status
,property_type
From
bigquery-public-data.austin_bikeshare.bikeshare_stations
)
Select
A.*
,B.name as Station_Name
,B.address as Station_Address
,B.status as Station_Status
,B.property_type as Station_Property_Type
From
Trips as A
Left Join
Stations as B
ON
A.start_station_id = B.station_id
;
-- Question1 : What is the difference?
-- (III)
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-- %%%%%% Identifying Stations with highest number of trips -- TOP5 Stations Only %%%%%%
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
With Trips As
(
Select
start_station_id
,count(distinct trip_id) as trips
From
bigquery-public-data.austin_bikeshare.bikeshare_trips
Group BY
start_station_id
),
Stations As
(
Select
station_id
,name
,address
,status
,property_type
From
bigquery-public-data.austin_bikeshare.bikeshare_stations
)
Select
A.*
,B.name as Station_Name
,B.address as Station_Address
,B.status as Station_Status
,B.property_type as Station_Property_Type
From
Trips as A
Left Join
Stations as B
ON
A.start_station_id = B.station_id
Order By trips
Limit 5
;
-- start_station_id trips Station_Name Station_Address Station_Status Station_Property_Type
-- 1001 12 OFFICE/Main/Shop/Repair 1000 Brazos closed
-- 7637 182
-- 1008 303 Nueces @ 3rd 311 Nueces closed
-- 3456 348
-- 2500 440 Republic Square 425 W 4th Street closed
-- Question1 : What will happen if you remove Order By?
-- (IV)
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-- Identifying When was the first & last trip made from Stations .
-- For (i) Station_id=2500 which has 440 Trips
-- For (ii) Station_id = 1008 which has 303 trips
-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
With Sequence As
(
Select
*
,Row_Number() Over (Partition By start_station_id Order By start_time ASC) as trip_sequence_asc
,Row_Number() Over (Partition By start_station_id Order By start_time DESC) as trip_sequence_desc
From
bigquery-public-data.austin_bikeshare.bikeshare_trips
Where
start_station_id in (2500,1008)
)
Select
*
,Case
When trip_sequence_asc = 1 Then "First Trip"
When trip_sequence_desc = 1 Then "Last Trip"
End as Trip_Category
From
Sequence
Where
trip_sequence_asc = 1
OR
trip_sequence_desc = 1
;