update kubeflow fairness check component to the latest version (Trust…

…ed-AI#123)
gkumbhat · Nov 9, 2019 · fa5bd51 · fa5bd51
1 parent 38bc56e
commit fa5bd51
Show file tree

Hide file tree

Showing 5 changed files with 93 additions and 122 deletions.
diff --git a/mlops/kubeflow/README.md b/mlops/kubeflow/README.md
@@ -11,30 +11,32 @@ as input and may produce one or more
 
 Each task usually includes two parts:
 
-Each component has a component.yaml which will describe the finctionality exposed by it, for e.g.
+Each component has a component.yaml which will describe the functionality exposed by it, for e.g.
 
 ```
-name: 'PyTorch Model Fairness Check'
+name: 'PyTorch - Model Fairness Check'
 description: |
-  Perform a fairness check on a certain attribute using AIF360 to make sure the PyTorch model is fair
+  Perform a fairness check on a certain attribute using AIF360 to make sure the model is fair and ethical
 metadata:
   annotations: {platform: 'OpenSource'}
 inputs:
   - {name: model_id,                     description: 'Required. Training model ID', default: 'training-dummy'}
-  - {name: model_class_file,             description: 'Required. pytorch model class file'}
-  - {name: model_class_name,             description: 'Required. pytorch model class name', default: 'model'}
+  - {name: model_class_file,             description: 'Required. pytorch model class file', default: 'PyTorchModel.py'}
+  - {name: model_class_name,             description: 'Required. pytorch model class name', default: 'PyTorchModel'}
   - {name: feature_testset_path,         description: 'Required. Feature test dataset path in the data bucket'}
-  - {name: label_testset_path,           description: 'Required. processed_data/y_test.npy'}
+  - {name: label_testset_path,           description: 'Required. Label test dataset path in the data bucket'}
   - {name: protected_label_testset_path, description: 'Required. Protected label test dataset path in the data bucket'}
   - {name: favorable_label,              description: 'Required. Favorable label for this model predictions'}
   - {name: unfavorable_label,            description: 'Required. Unfavorable label for this model predictions'}
   - {name: privileged_groups,            description: 'Required. Privileged feature groups within this model'}
   - {name: unprivileged_groups,          description: 'Required. Unprivileged feature groups within this model'}
+  - {name: data_bucket_name,             description: 'Optional. Bucket that has the processed data', default: 'training-data'}
+  - {name: result_bucket_name,           description: 'Optional. Bucket that has the training results', default: 'training-result'}
 outputs:
   - {name: metric_path,                  description: 'Path for fairness check output'}
 implementation:
   container:
-    image: aipipeline/fairness-check-with-secret:pytorch-v3
+    image: aipipeline/bias-detector:pytorch
     command: ['python']
     args: [
       -u, fairness_check.py,
@@ -48,9 +50,11 @@ implementation:
       --unfavorable_label, {inputValue: unfavorable_label},
       --privileged_groups, {inputValue: privileged_groups},
       --unprivileged_groups, {inputValue: unprivileged_groups},
-      --metric_path, {outputPath: metric_path}
+      --metric_path, {outputPath: metric_path},
+      --data_bucket_name, {inputValue: data_bucket_name},
+      --result_bucket_name, {inputValue: result_bucket_name}
     ]
 ```
 
 See how to [use the Kubeflow Pipelines SDK](https://www.kubeflow.org/docs/pipelines/sdk/sdk-overview/)
-and [build your own components](https://www.kubeflow.org/docs/pipelines/sdk/build-component/).
+and [build your own components](https://www.kubeflow.org/docs/pipelines/sdk/build-component/).
diff --git a/mlops/kubeflow/bias_detector_pytorch/Dockerfile b/mlops/kubeflow/bias_detector_pytorch/Dockerfile
@@ -1,10 +1,10 @@
 FROM pytorch/pytorch:latest
 
-RUN pip install Flask aif360 pandas flask-cors Minio Pillow torchsummary
+RUN pip install aif360 pandas Minio Pillow torchsummary
 
 ENV APP_HOME /app
 COPY src $APP_HOME
 WORKDIR $APP_HOME
 
 ENTRYPOINT ["python"]
-CMD ["app.py"]
+CMD ["fairness_check.py"]
diff --git a/mlops/kubeflow/bias_detector_pytorch/component.yaml b/mlops/kubeflow/bias_detector_pytorch/component.yaml
@@ -1,38 +1,37 @@
-# Copyright 2019 IBM Corporation 
-# 
-# Licensed under the Apache License, Version 2.0 (the "License"); 
-# you may not use this file except in compliance with the License. 
-# You may obtain a copy of the License at 
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0 
-# 
-# Unless required by applicable law or agreed to in writing, software 
-# distributed under the License is distributed on an "AS IS" BASIS, 
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-# See the License for the specific language governing permissions and 
-# limitations under the License. 
-
-name: 'PyTorch Model Fairness Check'
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: 'Model Fairness Check'
 description: |
-  Perform a fairness check on a certain attribute using AIF360 to make sure the PyTorch model is fair
+  Perform a fairness check on a certain attribute using AIF360 to make sure the model is fair and ethical
 metadata:
   annotations: {platform: 'OpenSource'}
 inputs:
   - {name: model_id,                     description: 'Required. Training model ID', default: 'training-dummy'}
-  - {name: model_class_file,             description: 'Required. pytorch model class file'}
-  - {name: model_class_name,             description: 'Required. pytorch model class name', default: 'model'}
+  - {name: model_class_file,             description: 'Required. pytorch model class file', default: 'PyTorchModel.py'}
+  - {name: model_class_name,             description: 'Required. pytorch model class name', default: 'PyTorchModel'}
   - {name: feature_testset_path,         description: 'Required. Feature test dataset path in the data bucket'}
-  - {name: label_testset_path,           description: 'Required. processed_data/y_test.npy'}
+  - {name: label_testset_path,           description: 'Required. Label test dataset path in the data bucket'}
   - {name: protected_label_testset_path, description: 'Required. Protected label test dataset path in the data bucket'}
   - {name: favorable_label,              description: 'Required. Favorable label for this model predictions'}
   - {name: unfavorable_label,            description: 'Required. Unfavorable label for this model predictions'}
   - {name: privileged_groups,            description: 'Required. Privileged feature groups within this model'}
   - {name: unprivileged_groups,          description: 'Required. Unprivileged feature groups within this model'}
+  - {name: data_bucket_name,             description: 'Optional. Bucket that has the processed data', default: 'training-data'}
+  - {name: result_bucket_name,           description: 'Optional. Bucket that has the training results', default: 'training-result'}
 outputs:
   - {name: metric_path,                  description: 'Path for fairness check output'}
 implementation:
   container:
-    image: aipipeline/fairness-check-with-secret:pytorch-v3
+    image: aipipeline/bias-detector:pytorch
     command: ['python']
     args: [
       -u, fairness_check.py,
@@ -46,5 +45,7 @@ implementation:
       --unfavorable_label, {inputValue: unfavorable_label},
       --privileged_groups, {inputValue: privileged_groups},
       --unprivileged_groups, {inputValue: unprivileged_groups},
-      --metric_path, {outputPath: metric_path}
+      --metric_path, {outputPath: metric_path},
+      --data_bucket_name, {inputValue: data_bucket_name},
+      --result_bucket_name, {inputValue: result_bucket_name}
     ]
diff --git a/...kubeflow/bias_detector_pytorch/src/app.py → ...low/bias_detector_pytorch/src/fairness.py b/...kubeflow/bias_detector_pytorch/src/app.py → ...low/bias_detector_pytorch/src/fairness.py
@@ -1,16 +1,14 @@
-# Copyright 2019 IBM Corporation 
-# 
-# Licensed under the Apache License, Version 2.0 (the "License"); 
-# you may not use this file except in compliance with the License. 
-# You may obtain a copy of the License at 
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0 
-# 
-# Unless required by applicable law or agreed to in writing, software 
-# distributed under the License is distributed on an "AS IS" BASIS, 
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-# See the License for the specific language governing permissions and 
-# limitations under the License. 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 from aif360.datasets import BinaryLabelDataset
 from aif360.metrics import ClassificationMetric
@@ -27,11 +25,6 @@
 import torch.utils.data
 from torch.autograd import Variable
 
-from flask import Flask, request, abort
-from flask_cors import CORS
-
-app = Flask(__name__)
-CORS(app)
 
 def dataset_wrapper(outcome, protected, unprivileged_groups, privileged_groups, favorable_label, unfavorable_label):
     """ A wrapper function to create aif360 dataset from outcome and protected in numpy array format.
@@ -48,15 +41,6 @@ def dataset_wrapper(outcome, protected, unprivileged_groups, privileged_groups,
                                  unprivileged_protected_attributes=unprivileged_groups)
     return dataset
 
-def get_s3_item(client, bucket, s3_path, name):
-    try:
-        client.Bucket(bucket).download_file(s3_path, name)
-    except botocore.exceptions.ClientError as e:
-        if e.response['Error']['Code'] == "404":
-            print("The object does not exist.")
-        else:
-            raise
-
 # Compute the accuaracy and predicted label using the given test dataset
 def evaluate(model, X_test, y_test):
     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
@@ -95,7 +79,8 @@ def fairness_check(object_storage_url, object_storage_username, object_storage_p
     url = re.compile(r"https?://")
     cos = Minio(url.sub('', object_storage_url),
                 access_key=object_storage_username,
-                secret_key=object_storage_password)
+                secret_key=object_storage_password,
+                secure=False)  # Local Minio server won't have HTTPS
 
     dataset_filenamex = "X_test.npy"
     dataset_filenamey = "y_test.npy"
@@ -170,29 +155,3 @@ def fairness_check(object_storage_url, object_storage_username, object_storage_p
     }
     print("metrics: ", metrics)
     return metrics
-
-    # with open(metric_path, "w") as report:
-    #     report.write(json.dumps(metrics))
-
-
-@app.route('/', methods=['POST'])
-def fairness_api():
-    try:
-        s3_url = request.json['aws_endpoint_url']
-        result_bucket_name = request.json['training_results_bucket']
-        s3_username = request.json['aws_access_key_id']
-        s3_password = request.json['aws_secret_access_key']
-        training_id = request.json['model_id']
-        data_bucket_name = request.json['training_data_bucket']
-    except:
-        abort(400)
-    return json.dumps(fairness_check(s3_url, s3_username, s3_password, data_bucket_name, result_bucket_name, training_id))
-
-
-@app.route('/', methods=['OPTIONS'])
-def fairness_api_options():
-    return "200"
-
-
-if __name__ == "__main__":
-    app.run(debug=True,host='0.0.0.0',port=int(os.environ.get('PORT', 8080)))
diff --git a/mlops/kubeflow/bias_detector_pytorch/src/fairness_check.py b/mlops/kubeflow/bias_detector_pytorch/src/fairness_check.py
@@ -1,20 +1,19 @@
-# Copyright 2019 IBM Corporation 
-# 
-# Licensed under the Apache License, Version 2.0 (the "License"); 
-# you may not use this file except in compliance with the License. 
-# You may obtain a copy of the License at 
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0 
-# 
-# Unless required by applicable law or agreed to in writing, software 
-# distributed under the License is distributed on an "AS IS" BASIS, 
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-# See the License for the specific language governing permissions and 
-# limitations under the License. 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import json
 import argparse
+import os
 
-from app import fairness_check
+from fairness import fairness_check
 
 
 def check_fairness(metrics):
@@ -30,11 +29,15 @@ def check_fairness(metrics):
         return False
     return True
 
-def get_secret(path):
-    with open(path, 'r') as f:
-        cred = f.readline().strip('\'')
-    f.close()
-    return cred
+
+def get_secret(path, default=''):
+    try:
+        with open(path, 'r') as f:
+            cred = f.readline().strip('\'')
+        f.close()
+        return cred
+    except:
+        return default
 
 
 if __name__ == "__main__":
@@ -43,21 +46,21 @@ def get_secret(path):
     parser.add_argument('--model_id', type=str, help='Training model ID', default="training-dummy")
     parser.add_argument('--metric_path', type=str, help='Path for fairness check output', default="/tmp/fairness.txt")
     parser.add_argument('--fairness_status', type=str, help='Path for fairness status output', default="/tmp/status.txt")
-    parser.add_argument('--model_class_file', type=str, help='pytorch model class file', default="model.py")
-    parser.add_argument('--model_class_name', type=str, help='pytorch model class name', default="model")
+    parser.add_argument('--model_class_file', type=str, help='pytorch model class file', default="PyTorchModel.py")
+    parser.add_argument('--model_class_name', type=str, help='pytorch model class name', default="PyTorchModel")
     parser.add_argument('--feature_testset_path', type=str, help='Feature test dataset path in the data bucket', default="processed_data/X_test.npy")
     parser.add_argument('--label_testset_path', type=str, help='Label test dataset path in the data bucket', default="processed_data/y_test.npy")
     parser.add_argument('--protected_label_testset_path', type=str, help='Protected label test dataset path in the data bucket', default="processed_data/p_test.npy")
     parser.add_argument('--favorable_label', type=float, help='Favorable label for this model predictions', default=0.0)
     parser.add_argument('--unfavorable_label', type=float, help='Unfavorable label for this model predictions', default=1.0)
     parser.add_argument('--privileged_groups', type=str, help='Privileged feature groups within this model', default="[{'race': 0.0}]")
     parser.add_argument('--unprivileged_groups', type=str, help='Unprivileged feature groups within this model', default="[{'race': 4.0}]")
+    parser.add_argument('--data_bucket_name', type=str, help='Bucket that has the processed data', default="training-data")
+    parser.add_argument('--result_bucket_name', type=str, help='Bucket that has the training results', default="training-result")
 
-    object_storage_url = get_secret('/app/secrets/s3_url')
-    data_bucket_name = get_secret('/app/secrets/training_bucket')
-    result_bucket_name = get_secret('/app/secrets/result_bucket')
-    object_storage_username = get_secret('/app/secrets/s3_access_key_id')
-    object_storage_password = get_secret('/app/secrets/s3_secret_access_key')
+    object_storage_url = get_secret('/app/secrets/s3_url', 'minio-service:9000')
+    object_storage_username = get_secret('/app/secrets/s3_access_key_id', 'minio')
+    object_storage_password = get_secret('/app/secrets/s3_secret_access_key', 'minio123')
 
     args = parser.parse_args()
     metric_path = args.metric_path
@@ -70,20 +73,24 @@ def get_secret(path):
     protected_label_testset_path = args.protected_label_testset_path
     favorable_label = args.favorable_label
     unfavorable_label = args.unfavorable_label
+    data_bucket_name = args.data_bucket_name
+    result_bucket_name = args.result_bucket_name
     privileged_groups = eval(args.privileged_groups)
     unprivileged_groups = eval(args.unprivileged_groups)
 
     metrics = fairness_check(object_storage_url, object_storage_username, object_storage_password,
-                               data_bucket_name, result_bucket_name, model_id,
-                               feature_testset_path=feature_testset_path,
-                               label_testset_path=label_testset_path,
-                               protected_label_testset_path=protected_label_testset_path,
-                               model_class_file=model_class_file,
-                               model_class_name=model_class_name,
-                               favorable_label=favorable_label,
-                               unfavorable_label=unfavorable_label,
-                               privileged_groups=privileged_groups,
-                               unprivileged_groups=unprivileged_groups)
+                             data_bucket_name, result_bucket_name, model_id,
+                             feature_testset_path=feature_testset_path,
+                             label_testset_path=label_testset_path,
+                             protected_label_testset_path=protected_label_testset_path,
+                             model_class_file=model_class_file,
+                             model_class_name=model_class_name,
+                             favorable_label=favorable_label,
+                             unfavorable_label=unfavorable_label,
+                             privileged_groups=privileged_groups,
+                             unprivileged_groups=unprivileged_groups)
 
+    if not os.path.exists(os.path.dirname(metric_path)):
+        os.makedirs(os.path.dirname(metric_path))
     with open(metric_path, "w") as report:
         report.write(json.dumps(metrics))