-
Notifications
You must be signed in to change notification settings - Fork 45
/
dataux.conf
217 lines (186 loc) · 4.91 KB
/
dataux.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#
# dataux configuration example
#
# config format is nginx ish, it is quite lenient see https://github.com/lytics/confl
# - think of it as lenient json with support for comments
# - name/value pairs may be separated by colons or equals [:,=,(whitespace)]
# - value's: for strings they don't have to be quoted
# - keys: left side keys must not have spaces in key name
# - arrays are json: ["a","b"] with entries allowed on new lines
# commas not required between items on new lines
# - objects are values so mobj = { name : "value", ....}
# - comments allowed
# suppress recovery means panic, don't recover
supress_recover: true
## Distributed Runtime Config Setup
# etcd servers
# [ "http://127.0.0.1:2379","http://127.0.0.1:2380"]
etcd = ["http://localhost:2379"]
# Nats.io GnatsD servers
# [ 'nats://10.240.134.116:4222', 'nats://10.145.147.112:4222' ]
nats = [ "nats://localhost:4222" ]
## Frontend
# frontend is inbound tcp connection listener (only mysql currently supported)
# - we don't bind to 3306 because that is mysql's
#
# mysql -h127.0.0.1 -P4000 -Ddatauxtest
#
frontends [
{
type : mysql
address : "0.0.0.0:4000"
}
]
# schemas: ie a virtual database made up of
# combining tables from each source into a flattened table namespace
schemas : [
# {
# name : datauxtest
# sources : [ "mgo_datauxtest", "es_test" , "cass" ]
# }
# {
# name : cass
# sources : [ "cass" ]
# }
# {
# name : bt
# sources : [ "bt"]
# }
# {
# name : kube
# sources : [ "minikube" ]
# }
# {
# name : baseball
# sources : [ "gcs_baseball_csv" ]
# }
# {
# name : baseball_small
# sources : [ "gcs_baseball_small_csv" ]
# }
{
name : baseball_local
sources : [ "local_baseball_csv" ]
}
# {
# # baseball local partitioned
# name : blp
# sources : [ "blp" ]
# }
# {
# # baseball local partitioned
# name : blp
# sources : [ "blp" ]
# }
]
# sources
# - elasticsearch which uses example data from github, see tools/importgithub
# - csv files: cloudstorage (gcs, s3)
# - csv files: localstorage
# - mongo
#
sources : [
{
name : mgo_datauxtest
type : mongo
hosts : ["localhost"]
}
{
name : es_test
type : elasticsearch
hosts : ["http://localhost:9200"]
#tables_to_load : [ "github_commits" ]
}
{
# example of a cassandra source
name cass
type cassandra
settings {
keyspace datauxtest
numconns 100
hosts ["localhost"]
}
}
# {
# name : minikube
# type : kubernetes
# settings {
# # locaction of kube config
# # defaults to $HOME/.kube/config
# #kube_conf "/etc/kube.conf"
# # kubernetes namespace to use
# #namespace default
# }
# }
{
# this section is a cloudstorage source for csv files
# stored on google-storage from
# http://seanlahman.com/baseball-archive/statistics/
name "gcs_baseball_csv"
type "cloudstore"
settings {
# types: "gcs","localfs"
type "gcs"
# google cloud storage bucket name, ie the gs://bucket-name
bucket "lytics-dataux-tests"
project "lytics-dev"
# restrict to looking for files inside this folder within bucket
path "baseball/"
# format, csv, json, custom
format "csv"
}
}
{
# this section is a cloudstorage source for csv files
# stored on google-storage from
# http://seanlahman.com/baseball-archive/statistics/
# only one table from previous set of data
# and that data is split across 2 partitions
name "gcs_baseball_small_csv"
type "cloudstore"
partition_count 2
settings {
# types: "gcs","localfs"
type "gcs"
# google cloud storage bucket name, ie the gs://bucket-name
bucket "lytics-dataux-tests"
project "lytics-dev"
# restrict to looking for files inside this folder on bucket
path "baseball2/"
# format, csv, json, custom
format "csv"
}
}
{
# same files as above baseball csv only copied locally
# mkdir -p /vol/baseball
# gsutil rsync -d -r gs://lytics-dataux-tests/baseball /vol/baseball
name "local_baseball_csv"
type "cloudstore"
settings {
type "localfs"
localpath "/vol/baseball"
format "csv"
}
}
{
# Baseball local partitioned
name "blp"
type "cloudstore"
partition_count 2
settings {
type "localfs"
localpath "/vol/baseball2"
format "csv"
}
}
{
name : bt
type : bigtable
tables_to_load : [ "datauxtest" , "article", "user", "event" ]
settings {
instance "bigtable0"
# project will be loaded from ENV $GCEPROJECT
}
}
]