Doc updates

dataux · Oct 14, 2017 · 5effa2d · 5effa2d
1 parent 99407f8
commit 5effa2d
Show file tree

Hide file tree

Showing 5 changed files with 174 additions and 59 deletions.
diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ easy to add custom data sources as well as REST api sources.
 * [Google Cloud Storage / (csv, json files)](https://github.com/dataux/dataux/tree/master/backends/files) An example of REST api backends (list of files), as well as the file contents themselves are tables.
 * [Cassandra](https://github.com/dataux/dataux/tree/master/backends/cassandra) SQL against cassandra.  Adds sql features that are missing.
 * [Lytics](https://github.com/dataux/dataux/tree/master/backends/lytics) SQL against [Lytics REST Api's](https://www.getlytics.com)
-* [Kubernetes](https://github.com/dataux/dataux/tree/master/backends/kubernetes) An example of REST api backend.
+* [Kubernetes](https://github.com/dataux/dataux/tree/master/backends/_kube) An example of REST api backend.
 * [Google Big Query](https://github.com/dataux/dataux/tree/master/backends/bigquery) MYSQL against worlds best analytics datawarehouse [BigQuery](https://cloud.google.com/bigquery/).
 * [Google Datastore](https://github.com/dataux/dataux/tree/master/backends/datastore) MYSQL against [Datastore](https://cloud.google.com/datastore/).
 
@@ -43,52 +43,64 @@ easy to add custom data sources as well as REST api sources.
 * NOT Production ready.  Currently supporting a few non-critical use-cases (ad-hoc queries, support tool) in production.
 
 
-
 ## Try it Out
 This example imports a couple hours worth of historical data
 from  https://www.githubarchive.org/ into a local
-elasticsearch server for example.  Requires [nats.io](http://nats.io)
-and etcd server running local as well. Docker setup coming soon.
+elasticsearch server for example.  Requires etcd server running local as well. 
+Docker setup coming soon.
 ```sh
 
-cd tools/importgithub
-# assuming elasticsearch on localhost elase --host=myeshost
-go build && ./importgithub
-
-# using dataux.conf from root of this project
-go build
-./dataux --config=dataux.conf
+# assuming you are running local, if you are instead in Google Cloud, or Google Container Engine
+# you don't need the credentials or volume mount
+docker run -e "GOOGLE_APPLICATION_CREDENTIALS=/.config/gcloud/application_default_credentials.json" \
+  -e "LOGGING=debug" \
+  --rm -it \
+  -p 4000:4000 \
+  -v ~/.config/gcloud:/.config/gcloud \
+  gcr.io/dataux-io/dataux:latest
 
 # now that dataux is running use mysql-client to connect
-
 mysql -h 127.0.0.1 -P 4000
 ```
 now run some queries
 ```sql
-use datauxtest;
+-- we have a default CSV dataset that we have copied to the local
+-- docker drive from http://seanlahman.com/baseball-archive/statistics/ 
+
+show databases;
+
+use baseball;
 
 show tables;
 
-describe github_watch;
+describe appearances
+
+select count(*) from appearances;
 
-select cardinality(`actor`) AS users_who_watched, min(`repository.id`) as oldest_repo from github_watch;
+select * from appearances limit 10;
 
-SELECT actor, `repository.name`, `repository.stargazers_count`, `repository.language`
-FROM github_watch where `repository.language` = "Go";
 
-select actor, repository.name from github_watch where repository.stargazers_count BETWEEN "1000" AND 1100;
+-- Lets add a bigquery datasource
+CREATE source `datauxtest` WITH {
+    "type":"bigquery",
+    "schema":"bqsf_bikes",
+    "table_aliases" : {
+       "bikeshare_stations" : "bigquery-public-data:san_francisco.bikeshare_stations"
+    },
+    "settings" : {
+      "billing_project" : "your-google-cloud-project",
+      "data_project" : "bigquery-public-data",
+      "dataset" : "san_francisco"
+    }
+};
 
-SELECT actor, repository.organization AS org
-FROM github_watch 
-WHERE repository.created_at BETWEEN "2008-10-21T17:20:37Z" AND "2008-10-21T19:20:37Z";
+use bqsf_bikes;
 
-select actor, repository.name from github_watch where repository.name IN ("node", "docker","d3","myicons", "bootstrap") limit 100;
+show tables;
 
-select cardinality(`actor`) AS users_who_watched, count(*) as ct, min(`repository.id`) as oldest_repo
-FROM github_watch
-WHERE repository.description LIKE "database";
+describe film_locations;
 
-# to add other data sources see dataux.conf example
+select * from film_locations limit 10;
 
 ```
 
@@ -105,32 +117,6 @@ Roadmap(ish)
 
 
 
-SQL -> Mongo
-----------------------------------
-
-Mongo | SQL Query  
------ | -------
-`show collections`                                                                | `show tables;`
-na, -- runtime inspection                                                         | `describe mytable;`
-`db.accounts.find({},{created:{"$gte":"1/1/2016"}}).count();`                     | `select count(*) from accounts WHERE created > "1/1/2016";`
-`db.article.find({"year":{"$in": [2013,2014,2015] }},{}}`                         | `select * from article WHERE year IN (2015,2014,2013);`
-`db.article.find({"created":{"$gte": new Date('Aug 01, 2011'), "$lte": new Date('Aug 03, 2013') },{title:1,count:1,author:1}}` |  `SELECT title, count, author FROM article WHERE created BETWEEN todate(\"2011-08-01\") AND todate(\"2013-08-03\")
-`db.article.find({"title":{"Pattern":"^list","Options":"i"}},{title:1,count:1})`  | `SELECT title, count AS ct FROM article WHERE title like \"list%\"`
-na, not avail in mongo (polyfill in dataux)                                       |  `SELECT avg(CHAR_LENGTH(CAST(`title`, \"AS\", \"CHAR\"))) AS title_avg FROM article;`
-need to document ...                                                              | `select min(year), max(year), avg(year), sum(year) from table WHERE exists(a);`
-
-SQL -> Elasticsearch
-----------------------------------
-
-ES API | SQL Query  
------ | -------
-Aliases                 | `show tables;`
-Mapping                 | `describe mytable;`
-hits.total  for filter  | `select count(*) from table WHERE exists(a);`
-aggs min, max, avg, sum | `select min(year), max(year), avg(year), sum(year) from table WHERE exists(a);`
-filter:   terms         | `select * from table WHERE year IN (2015,2014,2013);`
-filter: gte, range      | `select * from table WHERE year BETWEEN 2012 AND 2014`
-
 
 
 

diff --git a/backends/_kube/README.md b/backends/_kube/README.md
@@ -6,6 +6,16 @@ Provides SQL Access to Kubernetes Rest API's via the DataUX Mysql Proxy Service.
 
 ![mysql_kube](https://cloud.githubusercontent.com/assets/7269/20697265/96e13c10-b5ac-11e6-944b-c588c6e7570e.png)
 
+CURRENTLY BROKEN
+-----------------------------
+
+10/13/2017 can no longer get this to compile due to vendor dependency issues.
+
+https://github.com/dataux/dataux/issues/60
+
+
+
+
 Try It Out
 ----------------------------
 

diff --git a/backends/elasticsearch/README.md b/backends/elasticsearch/README.md
@@ -0,0 +1,64 @@
+
+
+## Try it Out
+This example imports a couple hours worth of historical data
+from  https://www.githubarchive.org/ into a local elasticsearch server for example.
+
+```sh
+
+cd tools/importgithub
+# assuming elasticsearch on localhost elase --host=myeshost
+go build && ./importgithub
+
+# using dataux.conf from root of this project
+go build
+./dataux --config=dataux.conf
+
+# now that dataux is running use mysql-client to connect
+
+mysql -h 127.0.0.1 -P 4000
+```
+now run some queries
+```sql
+use datauxtest;
+
+show tables;
+
+describe github_watch;
+
+select cardinality(`actor`) AS users_who_watched, min(`repository.id`) as oldest_repo from github_watch;
+
+SELECT actor, `repository.name`, `repository.stargazers_count`, `repository.language`
+FROM github_watch where `repository.language` = "Go";
+
+select actor, repository.name from github_watch where repository.stargazers_count BETWEEN "1000" AND 1100;
+
+SELECT actor, repository.organization AS org
+FROM github_watch 
+WHERE repository.created_at BETWEEN "2008-10-21T17:20:37Z" AND "2008-10-21T19:20:37Z";
+
+select actor, repository.name from github_watch where repository.name IN ("node", "docker","d3","myicons", "bootstrap") limit 100;
+
+select cardinality(`actor`) AS users_who_watched, count(*) as ct, min(`repository.id`) as oldest_repo
+FROM github_watch
+WHERE repository.description LIKE "database";
+
+
+```
+
+
+
+SQL -> Elasticsearch
+----------------------------------
+
+ES API | SQL Query  
+----- | -------
+Aliases                 | `show tables;`
+Mapping                 | `describe mytable;`
+hits.total  for filter  | `select count(*) from table WHERE exists(a);`
+aggs min, max, avg, sum | `select min(year), max(year), avg(year), sum(year) from table WHERE exists(a);`
+filter:   terms         | `select * from table WHERE year IN (2015,2014,2013);`
+filter: gte, range      | `select * from table WHERE year BETWEEN 2012 AND 2014`
+
+
+
diff --git a/backends/files/README.md b/backends/files/README.md
@@ -65,17 +65,54 @@ select * from appearances limit 10;
 
 
 
+Adding A Source
+--------------------
 
 
+```sh
+# from baseball http://seanlahman.com/baseball-archive/statistics/
+mkdir -p /tmp/baseball2
+cd /tmp/baseball2
+curl -Ls http://seanlahman.com/files/database/baseballdatabank-2017.1.zip > bball2.zip
+unzip bball2.zip
 
-Examples
---------------------
+mv baseball*/core/*.csv .
 
+rm bball2.zip
+rm -rf baseballdatabank-*
 
-```sh
+# create a google-cloud-storage bucket
 
-# from dataux root
+gsutil mb gs://my-dataux2-bucket
 
+# sync files to cloud
+gsutil rsync -d -r /tmp/baseball2/  gs://my-dataux2-bucket/
+
+
+# connect
+mysql -h 127.0.0.1 -P4000
+
+
+docker run -e "GOOGLE_APPLICATION_CREDENTIALS=/.config/gcloud/application_default_credentials.json" \
+  -e "LOGGING=debug" \
+  --rm -it \
+  -p 4000:4000 \
+  -v ~/.config/gcloud:/.config/gcloud \
+  gcr.io/dataux-io/dataux:latest
+
+```
+
+```sql
 
+CREATE source gcsbball2 WITH {
+  "type":"cloudstore", 
+  "schema":"gcsbball", 
+  "settings" : {
+     "type": "gcs",
+     "bucket": "my-dataux-bucket",
+     "format": "csv",
+     "jwt": "/.config/gcloud/application_default_credentials.json"
+  }
+};
 
 ```
diff --git a/backends/mongo/README.md b/backends/mongo/README.md
@@ -20,13 +20,14 @@ CREATE source mgo_datauxtest WITH {
   "hosts": ["localhost:28017"]
 };
 
--- DB_NAME = existing database in mongo
+-- Syntax:  CREATE source DB_NAME WITH json_properties
+-- DB_NAME = existing database in mongo, in this example "mgo_datauxtest"
 
 -- WITH Properties:
 -- "schema":  Name of schema to attach this source to
 -- "type":  Source type, most be datasource registered in registry (mongo, bigtable, etc)
+-- "hosts": Array of hosts:port
 
-CREATE source DB_NAME WITH json_properties
 
 
 use dbx1;
@@ -39,4 +40,21 @@ select * from article;
 
 
 
-```
+```
+
+SQL -> Mongo
+----------------------------------
+
+Mongo | SQL Query  
+----- | -------
+`show collections`                                                                | `show tables;`
+na, -- runtime inspection                                                         | `describe mytable;`
+`db.accounts.find({},{created:{"$gte":"1/1/2016"}}).count();`                     | `select count(*) from accounts WHERE created > "1/1/2016";`
+`db.article.find({"year":{"$in": [2013,2014,2015] }},{}}`                         | `select * from article WHERE year IN (2015,2014,2013);`
+`db.article.find({"created":{"$gte": new Date('Aug 01, 2011'), "$lte": new Date('Aug 03, 2013') },{title:1,count:1,author:1}}` |  `SELECT title, count, author FROM article WHERE created BETWEEN todate(\"2011-08-01\") AND todate(\"2013-08-03\")
+`db.article.find({"title":{"Pattern":"^list","Options":"i"}},{title:1,count:1})`  | `SELECT title, count AS ct FROM article WHERE title like \"list%\"`
+na, not avail in mongo (polyfill in dataux)                                       |  `SELECT avg(CHAR_LENGTH(CAST(`title`, \"AS\", \"CHAR\"))) AS title_avg FROM article;`
+need to document ...                                                              | `select min(year), max(year), avg(year), sum(year) from table WHERE exists(a);`
+
+
+