diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..fe174c97 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Boston University - Software & Application Innovation Lab, Nikolaj Volgushev, Malte Schwarzkopf + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.rst b/README.rst index e09a4aa1..3c44ba37 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ This framework allows users to define data analysis workflows in familiar fronte Dependencies ------------ -Conclave requires a Python 3.x environment. On Ubuntu (14.04+), installing the `python3`, `python3-pystache`, and `python3-nose` should get everything that's needed. +Conclave requires a Python 3.x environment. On Ubuntu (14.04+), installing the `python3`, `python3-pystache` should get everything that's needed. Testing ------- diff --git a/conclave/codegen/jiff.py b/conclave/codegen/jiff.py index 14b1d2d5..caffa602 100644 --- a/conclave/codegen/jiff.py +++ b/conclave/codegen/jiff.py @@ -46,6 +46,13 @@ def generate_party_code(self): "SERVER_IP_PORT": "{0}:{1}".format(self.jiff_config.server_ip, self.jiff_config.server_port) } + nodes = self.dag.top_sort() + for node in nodes: + if isinstance(node, Open): + if self.pid in node.out_rel.stored_with: + data['OUTPUT_FILE'] = node.out_rel.name + break + self.party_code += pystache.render(template, data) return self @@ -185,10 +192,12 @@ def _generate_open(self, open_op: Open): template = open( "{0}/open.tmpl".format(self.template_directory), 'r').read() - data = {} + data = { + "INREL": open_op.get_in_rel().name, + "OUTREL": open_op.out_rel.name + } - # return pystache.render(template, data) - return '' + return pystache.render(template, data) def _generate_project(self, project_op: Project): @@ -275,10 +284,15 @@ def _generate_sort_by(self, sort_op: SortBy): return pystache.render(template, data) - def _generate_bash(self): - # TODO: need clear way to run jiff server/party/mpc files outside of jiff repository + def _write_bash(self): + template = open("{}/bash.tmpl" + .format(self.template_directory), 'r').read() + + data = { + "JIFF_PATH": self.jiff_config.jiff_path + } - bash_code = '' + bash_code = pystache.render(template, data) if self.pid == self.jiff_config.server_pid: bash_code += 'node server.js\n' @@ -302,7 +316,7 @@ def _write_code(self, code, job_name): protocol_file = open("{}/{}/mpc.js".format(self.config.code_path, job_name), 'w') protocol_file.write(code) - bash_code = self._generate_bash() + bash_code = self._write_bash() bash_file = open("{}/{}/run.sh".format(self.config.code_path, job_name), 'w') bash_file.write(bash_code) diff --git a/conclave/codegen/templates/jiff/bash.tmpl b/conclave/codegen/templates/jiff/bash.tmpl index c38184eb..c8b6dbed 100644 --- a/conclave/codegen/templates/jiff/bash.tmpl +++ b/conclave/codegen/templates/jiff/bash.tmpl @@ -1,3 +1,3 @@ #!/bin/bash -{{{OP_CODE}}} \ No newline at end of file +export NODE_PATH="{{{JIFF_PATH}}}/node_modules" diff --git a/conclave/codegen/templates/jiff/create.tmpl b/conclave/codegen/templates/jiff/create.tmpl index 7af61daa..a1b60509 100644 --- a/conclave/codegen/templates/jiff/create.tmpl +++ b/conclave/codegen/templates/jiff/create.tmpl @@ -1,2 +1,2 @@ - var {{{OUTREL}}} = datasets[{{{ID}}}]; \ No newline at end of file + var {{{OUTREL}}} = shares[{{{ID}}}]; \ No newline at end of file diff --git a/conclave/codegen/templates/jiff/mpc_top_level.tmpl b/conclave/codegen/templates/jiff/mpc_top_level.tmpl index 6538f375..cc856412 100644 --- a/conclave/codegen/templates/jiff/mpc_top_level.tmpl +++ b/conclave/codegen/templates/jiff/mpc_top_level.tmpl @@ -14,24 +14,6 @@ return saved_instance; }; - const splitDatasets = function(shares, jiff_instance) - { - var datasets = []; - - for (var k = 1; k <= jiff_instance.party_count; k++) - { - datasets.push([]); - } - - for (var i = 0; i < shares.length; i++) { - for (var j = 0; j < jiff_instance.party_count; j++) { - datasets[j].push(shares[i][j+1]); - } - } - - return datasets; - - }; const project = function(inputRel, projCols) { @@ -50,6 +32,8 @@ result[j].push(inputRel[j][projCols[k]]); } } + return result; + }; const concatenate = function(inRels) @@ -120,6 +104,196 @@ } + + const sortBy = function(inRel) + { + var numRows = inRel.length; + var numCols = inRel[0].length; + + var sortArray = []; + for (var i = 0; i < numRows; i++) + { + sortArray.push([]); + for (var k = 0; j < numCols; j++) + { + sortArray[i].push(inRel[k]); + } + } + oddEvenSort(array, keyCol, numCols, 0, numRows, true); + } + const oddEvenSort = function(array, keyCol, numCols, lo, n, ascending) + { + if (n > 1) + { + var m = n / 2; + oddEvenSort(array, keyCol, numCols, lo, m, ascending); + oddEvenSort(array, keyCol, numCols, lo + m, m, ascending); + oddEvenMerge(array, keyCol, numCols, lo, n, 1, ascending); + } + } + + //TODO + const oddEvenMerge = function(array, keyCol, numCols, lo, n, ascending) + { + var m = r * 2; + if (m < n) + { + oddEvenMerge(array, keyCol, numCols, lo, n, m, ascending); + oddEvenMerge(array, keyCol, numCols, lo + r, n, m, ascending); + + for (var i = lo + r; (i + r) < (lo + n); i += m) + { + compareExchange(array, keyCol, numCols, i, i + r, ascending); + } + } else { + compareExchange(array, keyCol, numCols, lo, lo + r, ascending); + } + } + + //TODO + const compareExchange = function(array, keyCol, numCols, i, j, ascending) + { + var x = array[i][keyCol]; + var y = array[j][keyCol]; + + var c = 1; + var d = 1; + if (ascending) + { + c = (x.slt(y)).mult(c); + d = (y.slt(x)).mult(d); + } + else + { + d = (x.slt(y)).mult(d); + c = (y.slt(x)).mult(c); + } + + var temp1 = []; + var temp2 = []; + + for (var k = 0; k < numCols; k++) + { + var a = array[i][k]; + var b = array[j][k]; + + temp1.push((c * a) + (d * b)); + temp2.push((d * a) + (c * b)); + } + array[i] = temp1; + array[j] = temp2; + } + + //TODO: make this work similar to OblivC + const agg = function(inRel) + { + var numCols = 2; + var numRows = inRel.length; + var array = []; + + for (var i = 0; i < numRows; i++) + { + array.push([]); + array[i].push(inRel[i][0]); + array[i].push(inRel[i][1]); + } + + if (!(numRows && (numRows & (numRows - 1)) === 0)) + { + var paddedVal = nextPowerOf2(numRows); + var paddedArray = padInput(array, numCols, numRows, paddedVal); + return agg_helper(inRel, numCols, paddedVal); + } else + { + return agg_helper(inRel, numCols, numRows); + } + } + + const oddEvenAgg = function(array, lo, n) + { + if (n > 1) + { + var m = n / 2; + oddEvenAgg(array, lo, m); + oddEvenAgg(array, lo + m, m); + for (var i = 0; i < m; i++) + { + aggIfEq(array, lo + i); + } + } + else + { + aggIfEq(array, 0); + } + } + + const aggIfEq = function(array, index) + { + var c = array[index][0].eq(array[index+1][0]); + array[index][0] = array[index][1].add(c.mult(array[index+1][1])); + } + + const agg_helper = function(inRel, numCols, numRows) + { + + oddEvenSort(array, 0, numCols, 0, numRows, true); + oddEvenAgg(array, 0, numRows); + + oddEvenSort(array, 1, numCols, 0, numRows, false); + + var outRows = 0; + + for (var i = 0; i < numRows; i++) + { + var add = 1; + add = array[i][1].neq(0).mult(add); + outRows += add; + } + + var returnArray = [] + + for (var i = 0; i < numOutRows; i++) + { + returnArray.push([]) + array[i].push(array[i][0]); + array[i].push(array[i][1]); + } + + return returnArray; + } + + const padInput = function(array, numCols, numRows, padVal) + { + var paddedArray = []; + for (var i = 0; i < numRows; i++) { + paddedArray.push([]); + for (var j = 0; j < numCols; j++) + { + paddedArray[i].push(array[i][j]); + } + } + + for (var i = numRows; i < pad; i++) + { + paddedArray.push([]); + for (var j = 0; j < numCols; j++) + { + paddedArray[i].push(0); + } + } + return paddedArray; + } + + const nextPowerOf2 = function(n) + { + var p = 1; + if (n && !(n & (n - 1))) + return n; + while (p < n) + p <<= 1; + return p; + } + const divide = function(inRel, newCol, targetCol, operands, scalar) { var result = []; @@ -146,24 +320,10 @@ return result; } - const open = function(inRel, jiff_instance) + const open = function(inRel) { - var result = []; - - for (var k = 0; k < inRel.length; k++) - { - result.push([]); - } - - for (var i = 0; i < inRel.length; i++) - { - for (var j = 0; j < inRel[i].length; j++) - { - result[i].push(jiff_instance.open(inRel[i][j])); - } - } - - return result + var results = saved_instance.open_2D_array(inRel); + return results; }; /** @@ -177,7 +337,7 @@ // NOTE: assuming 1 file per party here var inputData = []; - var unparsedData = fs.readFileSync(input, 'UTF-8'); + var unparsedData = (fs.readFileSync(input, 'UTF-8')).trim(); var rows = unparsedData.split('\n'); // start at one, skip header row @@ -187,21 +347,10 @@ inputData.push(arr); } - var allShares = []; - - for (let j = 0; j < inputData.length; j++) - { - allShares.push(jiff_instance.share_array(inputData[j])); - } - - Promise.all(allShares).then(function(shares) { - - var datasets = splitDatasets(shares, jiff_instance); - -{{{OP_CODE}}} + var promise = jiff_instance.share_2D_array(inputData); + return promise.then(function(shares) { + {{{OP_CODE}}} }); - - return Promise.all(allShares); }; }((typeof exports == 'undefined' ? this.mpc = {} : exports), typeof exports != 'undefined')); diff --git a/conclave/codegen/templates/jiff/open.tmpl b/conclave/codegen/templates/jiff/open.tmpl index e69de29b..1523d478 100644 --- a/conclave/codegen/templates/jiff/open.tmpl +++ b/conclave/codegen/templates/jiff/open.tmpl @@ -0,0 +1,3 @@ + + var {{{OUTREL}}} = open({{{INREL}}}); + return {{{OUTREL}}}; \ No newline at end of file diff --git a/conclave/codegen/templates/jiff/party.tmpl b/conclave/codegen/templates/jiff/party.tmpl index cc1feb7d..6314e518 100644 --- a/conclave/codegen/templates/jiff/party.tmpl +++ b/conclave/codegen/templates/jiff/party.tmpl @@ -15,11 +15,42 @@ if(computation_id == null) computation_id = 'test'; var party_id = process.argv[5]; if(party_id != null) party_id = parseInt(party_id, 10); + +var format_2d = function (array_2d){ + var output = ""; + + for (var i=0; i < array_2d.length; i++){ + for (var j=0; j < array_2d[i].length; j++){ + output += array_2d[i][j] + ","; + } + //remove the last comma and replace it with a new line + output = output.slice(0, -1) + '\n'; + } + //remove trailing new line + return output.slice(0, -1); +} + // JIFF options var options = {party_count: party_count, party_id: party_id}; options.onConnect = function(jiff_instance) { - mpc.compute(input); + mpc.compute(input).then(function(p){ + p.then(function(v){ + + var output = "{{{OUTPUT_FILE}}}"; + if(output.length > 0){ + output += ".csv"; + var fs = require('fs'); + fs.writeFile(output, format_2d(v) , function(err) { + if(err) { + return console.log(err); + } + }); + } + + }); + + }); }; // Connect -mpc.connect({{{SERVER_IP_PORT}}}, computation_id, options); \ No newline at end of file +mpc.connect("http://{{{SERVER_IP_PORT}}}", computation_id, options); \ No newline at end of file diff --git a/conclave/codegen/templates/jiff/server.tmpl b/conclave/codegen/templates/jiff/server.tmpl index 879b7bb8..deb9160f 100644 --- a/conclave/codegen/templates/jiff/server.tmpl +++ b/conclave/codegen/templates/jiff/server.tmpl @@ -10,4 +10,4 @@ app.use("{{{JIFF_PATH}}}/lib/ext", express.static("lib/ext")); http.listen({{{PORT}}}, function() { console.log('listening on *:{{{PORT}}}'); -} +}); diff --git a/conclave/dispatch/python.py b/conclave/dispatch/python.py index 21188ff0..e7c1b7cc 100644 --- a/conclave/dispatch/python.py +++ b/conclave/dispatch/python.py @@ -12,6 +12,6 @@ def dispatch(self, job): .format(job.name, job.code_dir)) try: - call(["python3", cmd]) + call(["python3.5", cmd]) except Exception as e: print(e) diff --git a/conclave/net/__init__.py b/conclave/net/__init__.py index 6fe60c16..9640bb6a 100644 --- a/conclave/net/__init__.py +++ b/conclave/net/__init__.py @@ -1,7 +1,7 @@ import asyncio import functools import pickle -import sys + class IAMMsg: """ Message identifying peer. """ diff --git a/conclave/workflow.py b/conclave/workflow.py index 01447910..d9410940 100644 --- a/conclave/workflow.py +++ b/conclave/workflow.py @@ -34,7 +34,7 @@ def setup(conf: Dict): oc_config = OblivcConfig(oc_path, ip_port) # JIFF - jiff_path = conf["jiff"]["jiff_path"] + jiff_path = conf["jiff"]["jiff_path"] party_count = conf["jiff"]["party_count"] server_pid = conf["jiff"]["server_pid"] server_ip = conf["jiff"]["server_ip"] diff --git a/examples/apps/local/in1.csv b/examples/apps/local/in1.csv new file mode 100644 index 00000000..38139df2 --- /dev/null +++ b/examples/apps/local/in1.csv @@ -0,0 +1,3 @@ +a,b +1,1 +2,1 \ No newline at end of file diff --git a/examples/apps/local/in2.csv b/examples/apps/local/in2.csv new file mode 100644 index 00000000..d63e6a3e --- /dev/null +++ b/examples/apps/local/in2.csv @@ -0,0 +1,3 @@ +a,c +1,1 +2,1 \ No newline at end of file diff --git a/examples/apps/local/local.py b/examples/apps/local/local.py new file mode 100644 index 00000000..1511f91b --- /dev/null +++ b/examples/apps/local/local.py @@ -0,0 +1,43 @@ +""" +Simple example workflow for MOC deployment of Conclave +""" +import conclave.lang as sal +from conclave.utils import * +from conclave import workflow + + +def protocol(): + """ + Define inputs and operations to be performed between them. + """ + + # define input columns + cols_in_a = [ + defCol('a', 'INTEGER', [1]), + defCol('b', 'INTEGER', [1]), + ] + cols_in_b = [ + defCol('a', 'INTEGER', [1]), + defCol('c', 'INTEGER', [1]), + ] + + # instantiate input columns + # NOTE: input file names will correspond to the 0th arg of each create call ("in1", "in2", etc.) + in1 = sal.create("in1", cols_in_a, {1}) + in2 = sal.create("in2", cols_in_b, {1}) + + # operate on columns + # join in1 & in2 over the column 'a', name output relation 'join1' + join1 = sal.join(in1, in2, 'join1', ['a'], ['a']) + + # collect leaf node + out = sal.collect(join1, 1) + + # return root nodes + return {in1, in2} + + +if __name__ == "__main__": + + workflow.run(protocol) + diff --git a/examples/apps/local/timing_results.csv b/examples/apps/local/timing_results.csv new file mode 100644 index 00000000..c91c0710 --- /dev/null +++ b/examples/apps/local/timing_results.csv @@ -0,0 +1,2 @@ +test-workflow,0.0,0:00:00.000073 +test-workflow,0.045,0:00:00.044935 diff --git a/examples/apps/moc/conf-ca.yml b/examples/apps/moc/conf-ca.yml deleted file mode 100644 index 4d5a0e13..00000000 --- a/examples/apps/moc/conf-ca.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -# Party ID -pid: &pid 1 -workflow_name: &workflow_name !join [job-, *pid] -name: taxi -delimiter: ',' -code_path: /mnt/shared/ -spark: - cluster_prefix: &prefix ca - hdfs: - node_name: !join [*prefix, '-spark-node-0:8020'] - root: /home/ubuntu - master_url: !join ['spark://', *prefix, '-spark-node-0:7077'] - -sharemind: - parties: - - host: ca-spark-node-0 - port: 9001 - - host: cb-spark-node-0 - port: 9002 - - host: cc-spark-node-0 - port: 9003 \ No newline at end of file diff --git a/examples/apps/moc/conf-cb.yml b/examples/apps/moc/conf-cb.yml deleted file mode 100644 index d3f5b903..00000000 --- a/examples/apps/moc/conf-cb.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -# Party ID -pid: &pid 2 -workflow_name: &workflow_name !join [job-, *pid] -name: taxi -delimiter: ',' -code_path: /mnt/shared/ -spark: - cluster_prefix: &prefix cb - hdfs: - node_name: !join [*prefix, '-spark-node-0:8020'] - root: /home/ubuntu - master_url: !join ['spark://', *prefix, '-spark-node-0:7077'] - -sharemind: - parties: - - host: ca-spark-node-0 - port: 9001 - - host: cb-spark-node-0 - port: 9002 - - host: cc-spark-node-0 - port: 9003 diff --git a/examples/apps/moc/conf-cc.yml b/examples/apps/moc/conf-cc.yml deleted file mode 100644 index 53a87607..00000000 --- a/examples/apps/moc/conf-cc.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -# Party ID -pid: &pid 3 -workflow_name: &workflow_name !join [job-, *pid] -name: taxi -delimiter: ',' -code_path: /mnt/shared/ -spark: - cluster_prefix: &prefix cc - hdfs: - node_name: !join [*prefix, '-spark-node-0:8020'] - root: /home/ubuntu - master_url: !join ['spark://', *prefix, '-spark-node-0:7077'] - -sharemind: - parties: - - host: ca-spark-node-0 - port: 9001 - - host: cb-spark-node-0 - port: 9002 - - host: cc-spark-node-0 - port: 9003 \ No newline at end of file diff --git a/examples/apps/moc/moc_example.py b/examples/apps/moc/moc_example.py index 8707fd1b..fa14eb3a 100644 --- a/examples/apps/moc/moc_example.py +++ b/examples/apps/moc/moc_example.py @@ -13,34 +13,28 @@ def protocol(): # define input columns cols_in_a = [ - defCol('a', 'INTEGER', [2]), - defCol('b', 'INTEGER', [2]), + defCol('a', 'INTEGER', [1]), + defCol('b', 'INTEGER', [1]), ] cols_in_b = [ - defCol('a', 'INTEGER', [2]), - defCol('c', 'INTEGER', [2]), - ] - cols_in_c = [ - defCol('a', 'INTEGER', [2]), - defCol('d', 'INTEGER', [2]) + defCol('a', 'INTEGER', [1]), + defCol('c', 'INTEGER', [1]), ] # instantiate input columns # NOTE: input file names will correspond to the 0th arg of each create call ("in1", "in2", etc.) - in1 = sal.create("in1", cols_in_a, {2}) - in2 = sal.create("in2", cols_in_b, {2}) - in3 = sal.create("in3", cols_in_c, {2}) + in1 = sal.create("in1", cols_in_a, {1}) + in2 = sal.create("in2", cols_in_b, {1}) # operate on columns # join in1 & in2 over the column 'a', name output relation 'join1' join1 = sal.join(in1, in2, 'join1', ['a'], ['a']) - join2 = sal.join(join1, in3, 'join2', ['a'], ['a']) # collect leaf node - out = sal.collect(join2, 2) + out = sal.collect(join1, 2) # return root nodes - return {in1, in2, in3} + return {in1, in2} if __name__ == "__main__": diff --git a/examples/cfg-ex.yml b/examples/cfg-ex.yml index ec40d467..c89640ac 100644 --- a/examples/cfg-ex.yml +++ b/examples/cfg-ex.yml @@ -20,6 +20,14 @@ oblivc: oc_path: ip_port: : +# JIFF +jiff: + jiff_path: + party_count: + server_pid: + server_ip: + server_port: + # NET net: parties: diff --git a/examples/jiff_codegen/jiff_ex.py b/examples/jiff_codegen/jiff_ex.py index 81f9e5ca..eb904ce3 100644 --- a/examples/jiff_codegen/jiff_ex.py +++ b/examples/jiff_codegen/jiff_ex.py @@ -92,10 +92,7 @@ def project(): in1 = in_rels[0] in2 = in_rels[1] - cl1 = sal._close(in1, "cl1", set([1, 2])) - cl2 = sal._close(in2, "cl2", set([1, 2])) - - rel = sal.concat([cl1, cl2], "rel") + rel = sal.concat([in1, in2], "rel") proj = sal.project(rel, 'proj1', ['b', 'a', 'c']) diff --git a/examples/oblivc/local-cfg-two.yml b/examples/oblivc/local-cfg-two.yml deleted file mode 100644 index e5ad4dba..00000000 --- a/examples/oblivc/local-cfg-two.yml +++ /dev/null @@ -1,54 +0,0 @@ -# GENERAL -pid: 2 -all_pids: [1,2] -workflow_name: oc_test -use_swift: False - -# TODO: these could be hardcoded to /tmp/_in/, -# /tmp/_in/, and /tmp/_code -# for use on OS containers. -input_path: /Users/ben/Desktop/ -output_path: /Users/ben/Desktop/ -code_path: /Users/ben/Desktop/protocol2/ - -# SPARK -spark: - master_url: local - -# OBLIV-C -oblivc: - oc_path: /Users/ben/Desktop/dev/obliv-c/bin/oblivcc - ip_port: localhost:9000 - -# NET -net: - parties: - - host: localhost - port: 9001 - - host: localhost - port: 9002 - -# SWIFT -swift: - source: - AUTHORIZATION: - osAuthUrl: - username: - password: - PROJECT: - osProjectDomain: - osProjectName: - DATA: - container_name: - files: - dest: - AUTHORIZATION: - osAuthUrl: - username: - password: - PROJECT: - osProjectDomain: - osProjectName: - DATA: - container_name: - files: \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1e276c6f..ab473a55 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ pystache>=0.5.4 PyYAML>=3.12 python-keystoneclient python-swiftclient - +nose