From 8ceda1c469a83e118b95e2acbd3e7475d05fbd92 Mon Sep 17 00:00:00 2001 From: Alan King Date: Tue, 15 May 2018 14:29:26 -0400 Subject: [PATCH 01/96] [#126] Add test for size attribute in iRODSReplica Adds a test for fetching the size attribute of two replicas of a given data object to see that they are different. --- irods/test/data_obj_test.py | 59 +++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 424338e..380ee63 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -606,6 +606,65 @@ def test_repave_replicas(self): for resource in ufs_resources: resource.remove() + def test_get_replica_size(self): + session = self.sess + + # Can't do one step open/create with older servers + if session.server_version <= (4, 1, 4): + self.skipTest('For iRODS 4.1.5 and newer') + + # test vars + test_dir = '/tmp' + filename = 'get_replica_size_test_file' + test_file = os.path.join(test_dir, filename) + collection = self.coll.path + + # make random 16byte binary file + original_size = 16 + with open(test_file, 'wb') as f: + f.write(os.urandom(original_size)) + + # make ufs resources + ufs_resources = [] + for i in range(2): + resource_name = 'ufs{}'.format(i) + resource_type = 'unixfilesystem' + resource_host = session.host + resource_path = '/tmp/{}'.format(resource_name) + ufs_resources.append(session.resources.create( + resource_name, resource_type, resource_host, resource_path)) + + # put file in test collection and replicate + obj_path = '{collection}/{filename}'.format(**locals()) + options = {kw.DEST_RESC_NAME_KW: ufs_resources[0].name} + session.data_objects.put(test_file, collection + '/', **options) + session.data_objects.replicate(obj_path, ufs_resources[1].name) + + # make random 32byte binary file + new_size = 32 + with open(test_file, 'wb') as f: + f.write(os.urandom(new_size)) + + # overwrite existing replica 0 with new file + options = {kw.FORCE_FLAG_KW: '', kw.DEST_RESC_NAME_KW: ufs_resources[0].name} + session.data_objects.put(test_file, collection + '/', **options) + + # delete file + os.remove(test_file) + + # ensure that sizes of the replicas are distinct + obj = session.data_objects.get(obj_path, test_dir) + self.assertEqual(obj.replicas[0].size, new_size) + self.assertEqual(obj.replicas[1].size, original_size) + + # remove object + obj.unlink(force=True) + # delete file + os.remove(test_file) + + # remove ufs resources + for resource in ufs_resources: + resource.remove() def test_obj_put_get(self): # Can't do one step open/create with older servers From 765690281bd52f5a061c2ae8bd5ee6307f47b86c Mon Sep 17 00:00:00 2001 From: Alan King Date: Tue, 15 May 2018 14:51:12 -0400 Subject: [PATCH 02/96] [#126] Add size attribute to iRODSReplica Replicas can have different sizes than the size in the catalog for a given data object (stale replicas). Therefore, this change adds a size attribute to iRODSReplica to allow users to get the size of a particular replica. --- irods/data_object.py | 3 ++- irods/keywords.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/irods/data_object.py b/irods/data_object.py index c0a336c..7895bdd 100644 --- a/irods/data_object.py +++ b/irods/data_object.py @@ -61,7 +61,8 @@ def __init__(self, manager, parent=None, results=None): r[DataObject.replica_status], r[DataObject.resource_name], r[DataObject.path], - checksum=r[DataObject.checksum] + checksum=r[DataObject.checksum], + size=r[DataObject.size] ) for r in replicas] self._meta = None diff --git a/irods/keywords.py b/irods/keywords.py index d70ac24..6880bfe 100644 --- a/irods/keywords.py +++ b/irods/keywords.py @@ -13,7 +13,7 @@ RESC_NAME_KW = "rescName" # resource name # DEST_RESC_NAME_KW = "destRescName" # destination resource name # DEF_RESC_NAME_KW = "defRescName" # default resource name # -BACKUP_RESC_NAME_KW = "backupRescName" # destination resource name # +BACKUP_RESC_NAME_KW = "backupRescName" # backup resource name # DATA_TYPE_KW = "dataType" # data type # DATA_SIZE_KW = "dataSize" CHKSUM_KW = "chksum" From ddb9cee2982999bdf762db6704e2e42cddb0d339 Mon Sep 17 00:00:00 2001 From: Alan King Date: Tue, 25 Sep 2018 15:07:53 -0400 Subject: [PATCH 03/96] [#137] Add test for filename with ampersand --- irods/test/data_obj_test.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 380ee63..0fc1d0e 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -959,6 +959,39 @@ def test_modDataObjMeta(self): # delete file os.remove(test_file) + def test_register_with_xml_special_chars(self): + # skip if server is remote + if self.sess.host not in ('localhost', socket.gethostname()): + self.skipTest('Requires access to server-side file(s)') + + # test vars + test_dir = '/tmp' + filename = '''aaa'"<&test&>"'_file''' + test_file = os.path.join(test_dir, filename) + collection = self.coll.path + obj_path = '{collection}/{filename}'.format(**locals()) + + # make random 4K binary file + with open(test_file, 'wb') as f: + f.write(os.urandom(1024 * 4)) + + # register file in test collection + print('registering [' + obj_path + ']') + self.sess.data_objects.register(test_file, obj_path) + + # confirm object presence + print('getting [' + obj_path + ']') + obj = self.sess.data_objects.get(obj_path) + + # in a real use case we would likely + # want to leave the physical file on disk + print('unregistering [' + obj.path + ']') + obj.unregister() + + # delete file + os.remove(test_file) + + if __name__ == '__main__': # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath('../..')) From 42f5df7c9470b31dda7c5fd8efd9bb217ca60c28 Mon Sep 17 00:00:00 2001 From: Alan King Date: Tue, 25 Sep 2018 09:48:00 -0400 Subject: [PATCH 04/96] [#137] Escape StringProperty values Escape all StringProperty values for packStruct. HTML escaping was already in place for a few spots, so these have been removed. --- irods/manager/user_manager.py | 10 ---------- irods/message/property.py | 18 ++++++++++++------ irods/rule.py | 17 +++-------------- 3 files changed, 15 insertions(+), 30 deletions(-) diff --git a/irods/manager/user_manager.py b/irods/manager/user_manager.py index 3bdc1b4..d312858 100644 --- a/irods/manager/user_manager.py +++ b/irods/manager/user_manager.py @@ -1,10 +1,5 @@ from __future__ import absolute_import import logging -import six -if six.PY3: - from html import escape -else: - from cgi import escape from irods.models import User, UserGroup from irods.manager import Manager @@ -75,11 +70,6 @@ def modify(self, user_name, option, new_value, user_zone=""): current_password = self.sess.pool.account.password new_value = obf.obfuscate_new_password(new_value, current_password, conn.client_signature) - # html style escaping might have to be generalized: - # https://github.com/irods/irods/blob/4.2.1/lib/core/src/packStruct.cpp#L1913 - # https://github.com/irods/irods/blob/4.2.1/lib/core/src/packStruct.cpp#L1331-L1368 - new_value = escape(new_value, quote=False) - message_body = GeneralAdminRequest( "modify", "user", diff --git a/irods/message/property.py b/irods/message/property.py index 443d371..b4c250e 100644 --- a/irods/message/property.py +++ b/irods/message/property.py @@ -3,7 +3,10 @@ from irods.message.ordered import OrderedProperty import six - +if six.PY3: + from html import escape +else: + from cgi import escape class MessageProperty(OrderedProperty): @@ -82,24 +85,27 @@ def __init__(self, length=None): self.length = length super(StringProperty, self).__init__() + @staticmethod + def escape_xml_string(string): + return escape(string, quote=False) if six.PY2: def format(self, value): if isinstance(value, str) or isinstance(value, unicode): - return value + return self.escape_xml_string(value) - return str(value) + return self.escape_xml_string(str(value)) else: # Python 3 def format(self, value): if isinstance(value, str): - return value + return self.escape_xml_string(value) if isinstance(value, bytes): - return value.decode() + return self.escape_xml_string(value.decode()) - return str(value) + return self.escape_xml_string(str(value)) def parse(self, value): diff --git a/irods/rule.py b/irods/rule.py index c81800c..4cd26ad 100644 --- a/irods/rule.py +++ b/irods/rule.py @@ -1,18 +1,7 @@ from __future__ import absolute_import -import six from irods.message import iRODSMessage, StringStringMap, RodsHostAddress, STR_PI, MsParam, MsParamArray, RuleExecutionRequest from irods.api_number import api_number -if six.PY3: - from html import escape -else: - from cgi import escape - -import logging - -logger = logging.getLogger(__name__) - - class Rule(object): def __init__(self, session, rule_file=None, body='', params=None, output=''): self.session = session @@ -20,7 +9,7 @@ def __init__(self, session, rule_file=None, body='', params=None, output=''): if rule_file: self.load(rule_file) else: - self.body = '@external\n' + escape(body, quote=True) + self.body = '@external\n' + body if params is None: self.params = {} else: @@ -61,14 +50,14 @@ def load(self, rule_file): # parse rule else: - self.body += escape(line, quote=True) + self.body += line def execute(self): # rule input param_array = [] for label, value in self.params.items(): - inOutStruct = STR_PI(myStr=escape(value, quote=True)) + inOutStruct = STR_PI(myStr=value) param_array.append(MsParam(label=label, type='STR_PI', inOutStruct=inOutStruct)) inpParamArray = MsParamArray(paramLen=len(param_array), oprType=0, MsParam_PI=param_array) From 8a3f4c9393339a547e21550598a58c404666d46c Mon Sep 17 00:00:00 2001 From: Alan King Date: Wed, 26 Sep 2018 16:04:30 -0400 Subject: [PATCH 05/96] [#139] Use uppercase queries in tests MySQL needs uppercased table and column names, so the TestSpecificQuery tests have been made to accommodate this requirement. --- irods/test/query_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/irods/test/query_test.py b/irods/test/query_test.py index 6d92e51..6f85097 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -192,7 +192,7 @@ def test_query_data_name_and_id(self): self.session, test_collection_path, obj_count=test_collection_size) # make specific query - sql = "select data_name, data_id from r_data_main join r_coll_main using (coll_id) where coll_name = '{test_collection_path}'".format(**locals()) + sql = "select DATA_NAME, DATA_ID from R_DATA_MAIN join R_COLL_MAIN using (COLL_ID) where COLL_NAME = '{test_collection_path}'".format(**locals()) alias = 'list_data_name_id' columns = [DataObject.name, DataObject.id] query = SpecificQuery(self.session, sql, alias, columns) @@ -225,7 +225,7 @@ def test_query_data_name_and_id_no_columns(self): self.session, test_collection_path, obj_count=test_collection_size) # make specific query - sql = "select data_name, data_id from r_data_main join r_coll_main using (coll_id) where coll_name = '{test_collection_path}'".format(**locals()) + sql = "select DATA_NAME, DATA_ID from R_DATA_MAIN join R_COLL_MAIN using (COLL_ID) where COLL_NAME = '{test_collection_path}'".format(**locals()) alias = 'list_data_name_id' query = SpecificQuery(self.session, sql, alias) @@ -246,7 +246,7 @@ def test_query_data_name_and_id_no_columns(self): def test_register_query_twice(self): - query = SpecificQuery(self.session, sql='select data_name from r_data_main', alias='list_data_names') + query = SpecificQuery(self.session, sql='select DATA_NAME from R_DATA_MAIN', alias='list_data_names') # register query query.register() From 3a651eae7ee0443870fda39f181b3194bc6b8a42 Mon Sep 17 00:00:00 2001 From: Alan King Date: Wed, 26 Sep 2018 16:06:59 -0400 Subject: [PATCH 06/96] [#140] Remove randomization from password test --- irods/test/admin_test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/irods/test/admin_test.py b/irods/test/admin_test.py index 883acec..b341cb1 100644 --- a/irods/test/admin_test.py +++ b/irods/test/admin_test.py @@ -2,8 +2,6 @@ from __future__ import absolute_import import os import sys -import string -import random import unittest from irods.models import User from irods.exception import UserDoesNotExist, ResourceDoesNotExist @@ -332,8 +330,8 @@ def test_set_user_password(self): zone = self.sess.zone self.sess.users.create(self.new_user_name, self.new_user_type) - # make a 12 character pseudo-random password - new_password = ''.join(random.choice(string.ascii_letters + string.digits + string.punctuation) for _ in range(12)) + # make a really horrible password + new_password = '''abc123!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~Z''' self.sess.users.modify(username, 'password', new_password) # open a session as the new user From 35bef9e9099ceac0509da8381adabf49cf4a031d Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Wed, 26 Sep 2018 23:19:06 -0400 Subject: [PATCH 07/96] [#3] v0.8.1 and update changelog --- CHANGELOG.rst | 7 +++++++ irods/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index bbf916a..0aab573 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,13 @@ Changelog ========= +v0.8.1 (2018-09-27) +------------------- +- [#140] Remove randomization from password test [Alan King] +- [#139] Use uppercase queries in tests [Alan King] +- [#137] Handle filenames with ampersands [Alan King] +- [#126] Add size attribute to iRODSReplica [Alan King] + v0.8.0 (2018-05-03) ------------------- - Add rescName and replNum awareness. [Hao Xu] diff --git a/irods/version.py b/irods/version.py index 32a90a3..ef72cc0 100644 --- a/irods/version.py +++ b/irods/version.py @@ -1 +1 @@ -__version__ = '0.8.0' +__version__ = '0.8.1' From 3c46ea8968f3e3152ae5ea7f8e24e172363952fe Mon Sep 17 00:00:00 2001 From: Mattia Date: Sat, 15 Sep 2018 07:36:46 +0200 Subject: [PATCH 08/96] [#8] PAM Authentication The implemented PAM protocol is based on the following steps 1 . AUTH_PLUG_REQ_AN is sent by passing a context containing a_user, a_pw and a_ttl (defaulted to 60) 2 . irods server responds with a temporary password 3 . A a new connection is created (a combination of disconnect + connect). 4 . The temporary password is used to login as a native protocol To use PAM set authentication_scheme='pam' when creating the iRODSSession object iRODSSession(user='yourPAMuser', password='yourPAMpassword, authentication_scheme='pam', [...]) As a plus: - created a generic AuthPluginMessage to replace the GSIAuthMessage - added more constants for better readability --- irods/__init__.py | 10 +++++- irods/connection.py | 64 ++++++++++++++++++++++++++++++++------- irods/exception.py | 22 ++++++++++++++ irods/message/__init__.py | 8 ++++- 4 files changed, 91 insertions(+), 13 deletions(-) diff --git a/irods/__init__.py b/irods/__init__.py index 4f43c19..7520648 100644 --- a/irods/__init__.py +++ b/irods/__init__.py @@ -10,8 +10,16 @@ MAX_SQL_ROWS = 256 DEFAULT_CONNECTION_TIMEOUT = 120 -# Other variables AUTH_SCHEME_KEY = 'a_scheme' +AUTH_USER_KEY = 'a_user' +AUTH_PWD_KEY = 'a_pw' +AUTH_TTL_KEY = 'a_ttl' + +NATIVE_AUTH_SCHEME = 'native' + GSI_AUTH_PLUGIN = 'GSI' GSI_AUTH_SCHEME = GSI_AUTH_PLUGIN.lower() GSI_OID = "1.3.6.1.4.1.3536.1.1" # taken from http://j.mp/2hDeczm + +PAM_AUTH_PLUGIN = 'PAM' +PAM_AUTH_SCHEME = PAM_AUTH_PLUGIN.lower() diff --git a/irods/connection.py b/irods/connection.py index 0a26eab..fa46acf 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -4,19 +4,22 @@ import struct import hashlib import six -import struct +# import struct import os import ssl from irods.message import ( - iRODSMessage, StartupPack, AuthResponse, AuthChallenge, + iRODSMessage, StartupPack, AuthResponse, AuthChallenge, AuthPluginOut, OpenedDataObjRequest, FileSeekResponse, StringStringMap, VersionResponse, - GSIAuthMessage, ClientServerNegotiation, Error) + PluginAuthMessage, ClientServerNegotiation, Error) from irods.exception import get_exception_by_code, NetworkException from irods import ( MAX_PASSWORD_LENGTH, RESPONSE_LEN, - AUTH_SCHEME_KEY, GSI_AUTH_PLUGIN, GSI_AUTH_SCHEME, GSI_OID) + AUTH_SCHEME_KEY, AUTH_USER_KEY, AUTH_PWD_KEY, AUTH_TTL_KEY, + NATIVE_AUTH_SCHEME, + GSI_AUTH_PLUGIN, GSI_AUTH_SCHEME, GSI_OID, + PAM_AUTH_SCHEME) from irods.client_server_negotiation import ( perform_negotiation, validate_policy, @@ -42,11 +45,13 @@ def __init__(self, pool, account): scheme = self.account.authentication_scheme - if scheme == 'native': + if scheme == NATIVE_AUTH_SCHEME: self._login_native() - elif scheme == 'gsi': + elif scheme == GSI_AUTH_SCHEME: self.client_ctx = None self._login_gsi() + elif scheme == PAM_AUTH_SCHEME: + self._login_pam() else: raise ValueError("Unknown authentication scheme %s" % scheme) @@ -334,9 +339,10 @@ def handshake(self, target): def gsi_client_auth_request(self): # Request for authentication with GSI on current user - message_body = GSIAuthMessage( + + message_body = PluginAuthMessage( auth_scheme_=GSI_AUTH_PLUGIN, - context_='a_user=%s' % self.account.client_user + context_='%s=%s' % (AUTH_USER_KEY, self.account.client_user) ) # GSI = 1201 # https://github.com/irods/irods/blob/master/lib/api/include/apiNumber.h#L158 @@ -381,6 +387,38 @@ def _login_gsi(self): logger.info("GSI authorization validated") + def _login_pam(self): + + ctx_user = '%s=%s' % (AUTH_USER_KEY, self.account.client_user) + ctx_pwd = '%s=%s' % (AUTH_PWD_KEY, self.account.password) + ctx_ttl = '%s=%s' % (AUTH_TTL_KEY, "60") + + ctx = ";".join([ctx_user, ctx_pwd, ctx_ttl]) + + message_body = PluginAuthMessage( + auth_scheme_=PAM_AUTH_SCHEME, + context_=ctx + ) + + auth_req = iRODSMessage( + msg_type='RODS_API_REQ', + msg=message_body, + # int_info=725 + int_info=1201 + ) + + self.send(auth_req) + # Getting the new password + output_message = self.recv() + + auth_out = output_message.get_main_message(AuthPluginOut) + + self.disconnect() + self._connect() + self._login_native(password=auth_out.result_) + + logger.info("PAM authorization validated") + def read_file(self, desc, size=-1, buffer=None): if size < 0: size = len(buffer) @@ -408,7 +446,11 @@ def read_file(self, desc, size=-1, buffer=None): return response.bs - def _login_native(self): + def _login_native(self, password=None): + + # Default case, PAM login will send a new password + if password is None: + password = self.account.password # authenticate auth_req = iRODSMessage(msg_type='RODS_API_REQ', int_info=703) @@ -430,11 +472,11 @@ def _login_native(self): if six.PY3: challenge = challenge.strip() padded_pwd = struct.pack( - "%ds" % MAX_PASSWORD_LENGTH, self.account.password.encode( + "%ds" % MAX_PASSWORD_LENGTH, password.encode( 'utf-8').strip()) else: padded_pwd = struct.pack( - "%ds" % MAX_PASSWORD_LENGTH, self.account.password) + "%ds" % MAX_PASSWORD_LENGTH, password) m = hashlib.md5() m.update(challenge) diff --git a/irods/exception.py b/irods/exception.py index a1976a1..6f67500 100644 --- a/irods/exception.py +++ b/irods/exception.py @@ -4,6 +4,8 @@ from __future__ import absolute_import import six + + class PycommandsException(Exception): pass @@ -1874,3 +1876,23 @@ class PHP_REQUEST_STARTUP_ERR(PHPException): class PHP_OPEN_SCRIPT_FILE_ERR(PHPException): code = -1602000 + + +class PAMException(iRODSException): + pass + + +class PAM_AUTH_NOT_BUILT_INTO_CLIENT(PAMException): + code = -991000 + + +class PAM_AUTH_NOT_BUILT_INTO_SERVER(PAMException): + code = -992000 + + +class PAM_AUTH_PASSWORD_FAILED(PAMException): + code = -993000 + + +class PAM_AUTH_PASSWORD_INVALID_TTL(PAMException): + code = -994000 diff --git a/irods/message/__init__.py b/irods/message/__init__.py index 6e187b9..f13f12c 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -223,6 +223,12 @@ class AuthChallenge(Message): _name = 'authRequestOut_PI' challenge = BinaryProperty(64) + +class AuthPluginOut(Message): + _name = 'authPlugReqOut_PI' + result_ = StringProperty() + # result_ = BinaryProperty(16) + # define InxIvalPair_PI "int iiLen; int *inx(iiLen); int *ivalue(iiLen);" @@ -232,7 +238,7 @@ class BinBytesBuf(Message): buf = BinaryProperty() -class GSIAuthMessage(Message): +class PluginAuthMessage(Message): _name = 'authPlugReqInp_PI' auth_scheme_ = StringProperty() context_ = StringProperty() From bc1ec89a535fb727579899af2bad9f805c25e7a7 Mon Sep 17 00:00:00 2001 From: Alan King Date: Mon, 3 Dec 2018 12:14:12 -0500 Subject: [PATCH 09/96] [#5] Remove commented-out import --- irods/connection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/irods/connection.py b/irods/connection.py index fa46acf..7d25eba 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -4,7 +4,6 @@ import struct import hashlib import six -# import struct import os import ssl From 8330b7a7a6c30d0e65d64105f429532ad1af9153 Mon Sep 17 00:00:00 2001 From: Jonathan Landrum Date: Mon, 3 Dec 2018 13:08:38 -0600 Subject: [PATCH 10/96] [irods#5] Add `.idea` directory to `.gitignore` --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 120928f..6a1275a 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ nosetests.xml .settings .project .pydevproject +.idea # Vim *.s[a-w][a-z] From 9c4cc4a2bdfccd6c43bfb1dfdf5f75cb6d1f56a2 Mon Sep 17 00:00:00 2001 From: Chris Klimowski Date: Mon, 3 Dec 2018 11:00:46 -0700 Subject: [PATCH 11/96] Fix specific query argument labeling Use start=1 on enumerate --- irods/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/irods/query.py b/irods/query.py index 58fbb77..f112371 100644 --- a/irods/query.py +++ b/irods/query.py @@ -279,7 +279,7 @@ def execute(self, limit=MAX_SQL_ROWS, offset=0, options=0, conditions=None): conditions = StringStringMap({}) sql_args = {} - for i, arg in enumerate(self._args[:10]): + for i, arg in enumerate(self._args[:10], start=1): sql_args['arg{}'.format(i)] = arg message_body = SpecificQueryRequest(sql=target, From d77cc044c63fd4cef7371957070572b96528abc3 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Thu, 6 Dec 2018 15:24:16 -0500 Subject: [PATCH 12/96] [#150] test for specific query with arguments --- irods/test/query_test.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/irods/test/query_test.py b/irods/test/query_test.py index 6f85097..55c188c 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -270,7 +270,15 @@ def test_list_specific_queries(self): self.assertIn('SELECT', result[1].upper()) # query string - def test_list_specific_queries_with_wrong_alias(self): + def test_list_specific_queries_with_arguments(self): + query = SpecificQuery(self.session, alias='lsl', args=['%OFFSET%']) + + for result in query: + self.assertIsNotNone(result[0]) # query alias + self.assertIn('SELECT', result[1].upper()) # query string + + + def test_list_specific_queries_with_unknown_alias(self): query = SpecificQuery(self.session, alias='foo') with self.assertRaises(CAT_UNKNOWN_SPECIFIC_QUERY): From 53ac78d9dbac02191caf41bff359ba166b67c699 Mon Sep 17 00:00:00 2001 From: Jonathan Landrum Date: Fri, 30 Nov 2018 12:46:35 -0600 Subject: [PATCH 13/96] [#148] DataObjectManager.put() can return the new data_object --- irods/manager/data_object_manager.py | 5 +++- irods/test/data_obj_test.py | 40 ++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index 513b439..3fd8407 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -57,7 +57,7 @@ def get(self, path, file=None, **options): return iRODSDataObject(self, parent, results) - def put(self, file, irods_path, **options): + def put(self, file, irods_path, return_data_object=False, **options): if irods_path.endswith('/'): obj = irods_path + os.path.basename(file) else: @@ -75,6 +75,9 @@ def put(self, file, irods_path, **options): options[kw.UPDATE_REPL_KW] = '' self.replicate(obj, **options) + if return_data_object: + return self.get(obj) + def create(self, path, resource=None, **options): options[kw.DATA_TYPE_KW] = 'generic' diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 0fc1d0e..8ff3952 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -825,6 +825,46 @@ def test_obj_put_to_default_resource_from_env_file(self): os.remove(new_env_file) + def test_obj_put_and_return_data_object(self): + # Can't do one step open/create with older servers + if self.sess.server_version <= (4, 1, 4): + self.skipTest('For iRODS 4.1.5 and newer') + + # make another UFS resource + session = self.sess + resource_name = 'ufs' + resource_type = 'unixfilesystem' + resource_host = session.host + resource_path = '/tmp/' + resource_name + session.resources.create(resource_name, resource_type, resource_host, resource_path) + + # set default resource to new UFS resource + session.default_resource = resource_name + + # make a local file with random text content + content = ''.join(random.choice(string.printable) for _ in range(1024)) + filename = 'testfile.txt' + file_path = os.path.join('/tmp', filename) + with open(file_path, 'w') as f: + f.write(content) + + # put file + collection = self.coll_path + obj_path = '{collection}/{filename}'.format(**locals()) + + new_file = session.data_objects.put(file_path, obj_path, return_data_object=True) + + # get object and confirm resource + obj = session.data_objects.get(obj_path) + self.assertEqual(new_file.replicas[0].resource_name, obj.replicas[0].resource_name) + + # cleanup + os.remove(file_path) + obj.unlink(force=True) + session.resources.remove(resource_name) + + + def test_force_get(self): # Can't do one step open/create with older servers if self.sess.server_version <= (4, 1, 4): From 5e41bdb9d3e79a744f1ad1feeecaf2e697492f63 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Fri, 7 Dec 2018 09:47:24 -0500 Subject: [PATCH 14/96] Update README.rst --- README.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 4c2698c..ba5f013 100644 --- a/README.rst +++ b/README.rst @@ -2,38 +2,38 @@ Python iRODS Client (PRC) ========================= -`iRODS `_ is an open source distributed data management system. This is a client API implemented in python. +`iRODS `_ is an open source distributed data management system. This is a client API implemented in Python. Currently supported: -- Establish a connection to iRODS, authenticate -- Implement basic Gen Queries (select columns and filtering) -- Support more advanced Gen Queries with limits, offsets, and aggregations +- Establish a connection to iRODS +- Authenticate via password, GSI, PAM +- iRODS connection over SSL +- Implement basic GenQueries (select columns and filtering) +- Support more advanced GenQueries with limits, offsets, and aggregations - Query the collections and data objects within a collection - Execute direct SQL queries - Execute iRODS rules - Support read, write, and seek operations for files - PUT/GET data objects -- Create data objects -- Delete data objects - Create collections +- Rename collections - Delete collections +- Create data objects - Rename data objects -- Rename collections +- Delete data objects - Register files and directories - Query metadata for collections and data objects - Add, edit, remove metadata - Replicate data objects to different resource servers - Connection pool management -- Implement gen query result sets as lazy queries +- Implement GenQuery result sets as lazy queries - Return empty result sets when CAT_NO_ROWS_FOUND is raised - Manage permissions - Manage users and groups - Manage resources -- GSI authentication - Unicode strings - Ticket based access -- iRODS connection over SSL - Python 2.7, 3.4 or newer From 800488b7b3af97342f229945a8da69c931f09180 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Fri, 18 Jan 2019 11:57:36 -0500 Subject: [PATCH 15/96] Update README.rst Add example for searching metadata --- README.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.rst b/README.rst index ba5f013..3ac3300 100644 --- a/README.rst +++ b/README.rst @@ -235,6 +235,25 @@ General queries /tempZone/home/rods/manager/user_manager.py id=212669 size=5509 /tempZone/home/rods/manager/user_manager.pyc id=212658 size=5233 +Query using other models: + +>>> from irods.column import Criterion +>>> from irods.models import DataObject, DataObjectMeta, Collection, CollectionMeta +>>> from irods.session import iRODSSession +>>> import os +>>> env_file = os.path.expanduser('~/.irods/irods_environment.json') +>>> with iRODSSession(irods_env_file=env_file) as session: +... # by metadata +... # equivalent to 'imeta qu -C type like Project' +... results = session.query(Collection, CollectionMeta).filter( \ +... Criterion('=', CollectionMeta.name, 'type')).filter( \ +... Criterion('like', CollectionMeta.value, '%Project%')) +... for r in results: +... print(r[Collection.name], r[CollectionMeta.name], r[CollectionMeta.value], r[CollectionMeta.units]) +... +('/tempZone/home/rods', 'type', 'Project', None) + + Query with aggregation(min, max, sum, avg, count): >>> with iRODSSession(irods_env_file=env_file) as session: From c5072ad6cad4743465173ca14747956840203918 Mon Sep 17 00:00:00 2001 From: Alan King Date: Fri, 15 Mar 2019 12:23:45 -0400 Subject: [PATCH 16/96] [#124] Add test for utf8 query --- irods/test/data_obj_test.py | 3 --- irods/test/query_test.py | 39 +++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 8ff3952..8782cfa 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -1016,16 +1016,13 @@ def test_register_with_xml_special_chars(self): f.write(os.urandom(1024 * 4)) # register file in test collection - print('registering [' + obj_path + ']') self.sess.data_objects.register(test_file, obj_path) # confirm object presence - print('getting [' + obj_path + ']') obj = self.sess.data_objects.get(obj_path) # in a real use case we would likely # want to leave the physical file on disk - print('unregistering [' + obj.path + ']') obj.unregister() # delete file diff --git a/irods/test/query_test.py b/irods/test/query_test.py index 55c188c..8c5e04b 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -1,7 +1,10 @@ #! /usr/bin/env python +# -*- coding: utf-8 -*- from __future__ import absolute_import import os +import six import sys +import tempfile import unittest from datetime import datetime from irods.models import User, Collection, DataObject, Resource @@ -171,6 +174,42 @@ def test_query_with_between_condition(self): res_str = '{} {}/{}'.format(result[Resource.name], result[Collection.name], result[DataObject.name]) self.assertIn(session.zone, res_str) + @unittest.skipIf(six.PY3, 'Test is for python2 only') + def test_query_for_data_object_with_utf8_name_python2(self): + filename_prefix = '_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' + _,test_file = tempfile.mkstemp(prefix=filename_prefix) + obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) + try: + self.sess.data_objects.register(test_file, obj_path) + results = self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file).first() + result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) + result_physical_path = results[DataObject.path] + self.assertEqual(result_logical_path, obj_path.decode('utf8')) + self.assertEqual(result_physical_path, test_file.decode('utf8')) + finally: + self.sess.data_objects.unregister(obj_path) + os.remove(test_file) + + @unittest.skipIf(six.PY2, 'Test is for python3 only') + def test_query_for_data_object_with_utf8_name_python3(self): + filename_prefix = u'_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' + _,encoded_test_file = tempfile.mkstemp(prefix=filename_prefix.encode('utf-8')) + self.assertTrue(os.path.exists(encoded_test_file)) + + test_file = encoded_test_file.decode('utf-8') + obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) + + try: + self.sess.data_objects.register(test_file, obj_path) + + results = self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file).first() + result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) + result_physical_path = results[DataObject.path] + self.assertEqual(result_logical_path, obj_path) + self.assertEqual(result_physical_path, test_file) + finally: + self.sess.data_objects.unregister(obj_path) + os.remove(encoded_test_file) class TestSpecificQuery(unittest.TestCase): From 4862d3fba9c9a91ff510f99d05418b8326f08127 Mon Sep 17 00:00:00 2001 From: Alan King Date: Fri, 15 Mar 2019 11:58:12 -0400 Subject: [PATCH 17/96] [#124] Convert strings going to irods to Unicode --- irods/column.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/irods/column.py b/irods/column.py index f4f644f..e097ed8 100644 --- a/irods/column.py +++ b/irods/column.py @@ -1,4 +1,5 @@ from __future__ import absolute_import +import six from datetime import datetime from calendar import timegm @@ -113,6 +114,12 @@ def to_python(string): @staticmethod def to_irods(data): + try: + # Convert to Unicode string (aka decode) + data = six.text_type(data, 'utf-8', 'replace') + except TypeError: + # Some strings are already Unicode so they do not need decoding + pass return u"'{}'".format(data) From 556133acb837294edad616419d7030b4c4b72d8a Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Thu, 9 May 2019 23:20:40 -0400 Subject: [PATCH 18/96] add SSL connection example to README.md --- README.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 3ac3300..dd6d317 100644 --- a/README.rst +++ b/README.rst @@ -58,18 +58,22 @@ Uninstalling pip uninstall python-irodsclient -Establishing a connection +Establishing a (secure) connection ------------------------- -Using environment files in ``~/.irods/``: +Using environment files (including any SSL settings) in ``~/.irods/``: >>> import os +>>> import ssl >>> from irods.session import iRODSSession >>> try: ... env_file = os.environ['IRODS_ENVIRONMENT_FILE'] ... except KeyError: ... env_file = os.path.expanduser('~/.irods/irods_environment.json') ... +>>> ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, cafile=None, capath=None, cadata=None) +>>> ssl_settings = {'ssl_context': ssl_context} +>>> with iRODSSession(irods_env_file=env_file, **ssl_settings) as session: >>> with iRODSSession(irods_env_file=env_file) as session: ... pass ... From d1a46ace868deeabff3895de77a0d8b7528e2972 Mon Sep 17 00:00:00 2001 From: Mathijs Koymans Date: Thu, 6 Jun 2019 09:28:14 +0200 Subject: [PATCH 19/96] Allow dynamic I/O for rule from file --- irods/rule.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/irods/rule.py b/irods/rule.py index 4cd26ad..0471cda 100644 --- a/irods/rule.py +++ b/irods/rule.py @@ -6,19 +6,21 @@ class Rule(object): def __init__(self, session, rule_file=None, body='', params=None, output=''): self.session = session + self.params = {} + self.output = '' + if rule_file: self.load(rule_file) else: self.body = '@external\n' + body - if params is None: - self.params = {} - else: - self.params = params + + # overwrite params and output if received arguments + if params is not None: + self.params = params + if output != '': self.output = output def load(self, rule_file): - self.params = {} - self.output = '' self.body = '@external\n' # parse rule file From be2e3b64c4fc185b045eb10c1595a5ac26c34afa Mon Sep 17 00:00:00 2001 From: bh9 Date: Wed, 19 Jun 2019 17:01:40 +0100 Subject: [PATCH 20/96] Include resc_hier in replica information Finding out the full resource hierarchy is important for dealing with replicas. In the past, we had a naming scheme that allowed us to work around it, but now that naming scheme doesn't provide what we need. Including resc_hier in the replicas information allows us to do this the right way --- irods/data_object.py | 4 ++- irods/test/data_obj_test.py | 67 +++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/irods/data_object.py b/irods/data_object.py index 7895bdd..2bc823e 100644 --- a/irods/data_object.py +++ b/irods/data_object.py @@ -23,11 +23,12 @@ def irods_basename(path): class iRODSReplica(object): - def __init__(self, number, status, resource_name, path, **kwargs): + def __init__(self, number, status, resource_name, path, resc_hier, **kwargs): self.number = number self.status = status self.resource_name = resource_name self.path = path + self.resc_hier = resc_hier for key, value in kwargs.items(): setattr(self, key, value) @@ -61,6 +62,7 @@ def __init__(self, manager, parent=None, results=None): r[DataObject.replica_status], r[DataObject.resource_name], r[DataObject.path], + r[DataObject.resc_hier], checksum=r[DataObject.checksum], size=r[DataObject.size] ) for r in replicas] diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 8782cfa..7a211a0 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -1028,6 +1028,73 @@ def test_register_with_xml_special_chars(self): # delete file os.remove(test_file) + def test_get_data_objects(self): + # Can't do one step open/create with older servers + if self.sess.server_version <= (4, 1, 4): + self.skipTest('For iRODS 4.1.5 and newer') + + # test vars + test_dir = '/tmp' + filename = 'get_data_objects_test_file' + test_file = os.path.join(test_dir, filename) + collection = self.coll.path + + # make random 16byte binary file + original_size = 16 + with open(test_file, 'wb') as f: + f.write(os.urandom(original_size)) + + # make ufs resources + ufs_resources = [] + for i in range(2): + resource_name = 'ufs{}'.format(i) + resource_type = 'unixfilesystem' + resource_host = self.sess.host + resource_path = '/tmp/{}'.format(resource_name) + ufs_resources.append(self.sess.resources.create( + resource_name, resource_type, resource_host, resource_path)) + + # make passthru resource and add ufs1 as a child + passthru_resource = self.sess.resources.create('pt', 'passthru') + self.sess.resources.add_child(passthru_resource.name, ufs_resources[1].name) + + # put file in test collection and replicate + obj_path = '{collection}/{filename}'.format(**locals()) + options = {kw.DEST_RESC_NAME_KW: ufs_resources[0].name} + self.sess.data_objects.put(test_file, '{collection}/'.format(**locals()), **options) + self.sess.data_objects.replicate(obj_path, passthru_resource.name) + + # ensure that replica info is populated + obj = self.sess.data_objects.get(obj_path) + for i in ["number","status","resource_name","path","resc_hier"]: + self.assertIsNotNone(obj.replicas[0].__getattribute__(i)) + self.assertIsNotNone(obj.replicas[1].__getattribute__(i)) + + # ensure replica info is sensible + for i in range(2): + self.assertEqual(obj.replicas[i].number, i) + self.assertEqual(obj.replicas[i].status, '1') + self.assertEqual(obj.replicas[i].path.split('/')[-1], filename) + self.assertEqual(obj.replicas[i].resc_hier.split(';')[-1], ufs_resources[i].name) + + self.assertEqual(obj.replicas[0].resource_name, ufs_resources[0].name) + if self.sess.server_version < (4, 2, 0): + self.assertEqual(obj.replicas[i].resource_name, passthru_resource.name) + else: + self.assertEqual(obj.replicas[i].resource_name, ufs_resources[1].name) + self.assertEqual(obj.replicas[1].resc_hier.split(';')[0], passthru_resource.name) + + # remove object + obj.unlink(force=True) + # delete file + os.remove(test_file) + + # remove resources + self.sess.resources.remove_child(passthru_resource.name, ufs_resources[1].name) + passthru_resource.remove() + for resource in ufs_resources: + resource.remove() + if __name__ == '__main__': # let the tests find the parent irods lib From cf54e9989016c8dd340a87a427374e5ed4ddb204 Mon Sep 17 00:00:00 2001 From: Chris Smeele Date: Tue, 6 Aug 2019 16:28:50 +0200 Subject: [PATCH 21/96] Fix CAT_STATEMENT_TABLE_FULL by auto closing queries --- irods/exception.py | 3 +++ irods/query.py | 25 +++++++++++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/irods/exception.py b/irods/exception.py index 6f67500..0f4b434 100644 --- a/irods/exception.py +++ b/irods/exception.py @@ -1133,6 +1133,9 @@ class CAT_TABLE_ACCESS_DENIED(CatalogLibraryException): class CAT_UNKNOWN_SPECIFIC_QUERY(CatalogLibraryException): code = -853000 +class CAT_STATEMENT_TABLE_FULL(CatalogLibraryException): + code = -860000 + class RDSException(iRODSException): pass diff --git a/irods/query.py b/irods/query.py index f112371..7c8d2e0 100644 --- a/irods/query.py +++ b/irods/query.py @@ -184,15 +184,20 @@ def all(self): def get_batches(self): result_set = self.execute() - yield result_set - while result_set.continue_index > 0: - try: - result_set = self.continue_index( - result_set.continue_index).execute() - yield result_set - except CAT_NO_ROWS_FOUND: - break + try: + yield result_set + + while result_set.continue_index > 0: + try: + result_set = self.continue_index( + result_set.continue_index).execute() + yield result_set + except CAT_NO_ROWS_FOUND: + break + except GeneratorExit: + if result_set.continue_index > 0: + self.continue_index(result_set.continue_index).close() def get_results(self): for result_set in self.get_batches(): @@ -204,6 +209,8 @@ def __iter__(self): def one(self): results = self.execute() + if results.continue_index > 0: + self.continue_index(results.continue_index).close() if not len(results): raise NoResultFound() if len(results) > 1: @@ -213,6 +220,8 @@ def one(self): def first(self): query = self.limit(1) results = query.execute() + if results.continue_index > 0: + query.continue_index(results.continue_index).close() if not len(results): return None else: From 42506a1a87e2fb787cde9e80071f5a389057a679 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Sun, 11 Aug 2019 14:11:05 -0400 Subject: [PATCH 22/96] [#166] Test freeing statements in unfinished query --- irods/test/query_test.py | 133 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 3 deletions(-) diff --git a/irods/test/query_test.py b/irods/test/query_test.py index 8c5e04b..dafac9f 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -7,16 +7,35 @@ import tempfile import unittest from datetime import datetime -from irods.models import User, Collection, DataObject, Resource +from irods.models import User, Collection, DataObject, DataObjectMeta, Resource from irods.exception import MultipleResultsFound, CAT_UNKNOWN_SPECIFIC_QUERY, CAT_INVALID_ARGUMENT from irods.query import SpecificQuery from irods.column import Like, Between +from irods.meta import iRODSMeta from irods import MAX_SQL_ROWS import irods.test.helpers as helpers +from six.moves import range as py3_range +IRODS_STATEMENT_TABLE_SIZE = 50 + +def remove_unused_metadata (sess) : + + from irods.message import GeneralAdminRequest, iRODSMessage + from irods.api_number import api_number + message_body = GeneralAdminRequest( 'rm', 'unusedAVUs', '','','','') + req = iRODSMessage("RODS_API_REQ", msg = message_body,int_info=api_number['GENERAL_ADMIN_AN']) + with sess.pool.get_connection() as conn: + conn.send(req) + response=conn.recv() + if (response.int_info != 0): raise RuntimeError("Error removing unused AVU's") class TestQuery(unittest.TestCase): + Iterate_to_exhaust_statement_table = range(IRODS_STATEMENT_TABLE_SIZE + 1) + + More_than_one_batch = 2*MAX_SQL_ROWS # may need to increase if PRC default page + # size is increased beyond 500 + def setUp(self): self.sess = helpers.make_session() @@ -29,14 +48,12 @@ def setUp(self): self.coll = self.sess.collections.create(self.coll_path) self.obj = self.sess.data_objects.create(self.obj_path) - def tearDown(self): '''Remove test data and close connections ''' self.coll.remove(recurse=True, force=True) self.sess.cleanup() - def test_collections_query(self): # collection query test result = self.sess.query(Collection.id, Collection.name).all() @@ -211,6 +228,116 @@ def test_query_for_data_object_with_utf8_name_python3(self): self.sess.data_objects.unregister(obj_path) os.remove(encoded_test_file) + class Issue_166_context: + ''' + For [irods/python-irodsclient#166] related tests + ''' + + def __init__(self, session, coll_path='test_collection_issue_166', num_objects=8, num_avus_per_object=0): + self.session = session + if '/' not in coll_path: + coll_path = '/{}/home/{}/{}'.format(self.session.zone, self.session.username, coll_path) + self.coll_path = coll_path + self.num_objects = num_objects + self.test_collection = None + self.nAVUs = num_avus_per_object + + def __enter__(self): # - prepare for context block ("with" statement) + + self.test_collection = helpers.make_test_collection( self.session, self.coll_path, obj_count=self.num_objects) + q_params = (Collection.name, DataObject.name) + + if self.nAVUs > 0: + + # - set the AVUs on the collection's objects: + for data_obj_path in map(lambda d:d[Collection.name]+"/"+d[DataObject.name], + self.session.query(*q_params).filter(Collection.name == self.test_collection.path)): + data_obj = self.session.data_objects.get(data_obj_path) + for key in (str(x) for x in py3_range(self.nAVUs)): + data_obj.metadata[key] = iRODSMeta(key, "1") + + # - in subsequent test searches, match on each AVU of every data object in the collection: + q_params += (DataObjectMeta.name,) + + # - The "with" statement receives, as context variable, a zero-arg function to build the query + return lambda : self.session.query( *q_params ).filter( Collection.name == self.test_collection.path) + + def __exit__(self,*_): # - clean up after context block + + if self.test_collection is not None: + self.test_collection.remove(recurse=True, force=True) + + if self.nAVUs > 0 and self.num_objects > 0: + remove_unused_metadata(self.session) # delete unused AVU's + + def test_query_first__166(self): + + with self.Issue_166_context(self.sess) as buildQuery: + for dummy_i in self.Iterate_to_exhaust_statement_table: + buildQuery().first() + + def test_query_one__166(self): + + with self.Issue_166_context(self.sess, num_objects = self.More_than_one_batch) as buildQuery: + + for dummy_i in self.Iterate_to_exhaust_statement_table: + query = buildQuery() + try: + query.one() + except MultipleResultsFound: + pass # irrelevant result + + def test_query_one_iter__166(self): + + with self.Issue_166_context(self.sess, num_objects = self.More_than_one_batch) as buildQuery: + + for dummy_i in self.Iterate_to_exhaust_statement_table: + + for dummy_row in buildQuery(): + break # single iteration + + def test_paging_get_batches_and_check_paging__166(self): + + with self.Issue_166_context( self.sess, num_objects = 1, + num_avus_per_object = 2 * self.More_than_one_batch) as buildQuery: + + pages = [b for b in buildQuery().get_batches()] + self.assertTrue(len(pages) > 2 and len(pages[0]) < self.More_than_one_batch) + + to_compare = [] + + for _ in self.Iterate_to_exhaust_statement_table: + + for batch in buildQuery().get_batches(): + to_compare.append(batch) + if len(to_compare) == 2: break #leave query unfinished, but save two pages to compare + + # - To make sure paging was done, we ensure that this "key" tuple (collName/dataName , metadataKey) + # is not repeated between first two pages: + + Compare_Key = lambda d: ( d[Collection.name] + "/" + d[DataObject.name], + d[DataObjectMeta.name] ) + Set0 = { Compare_Key(dct) for dct in to_compare[0] } + Set1 = { Compare_Key(dct) for dct in to_compare[1] } + self.assertTrue(len(Set0 & Set1) == 0) # assert intersection is null set + + def test_paging_get_results__166(self): + + with self.Issue_166_context( self.sess, num_objects = self.More_than_one_batch) as buildQuery: + batch_size = 0 + for result_set in buildQuery().get_batches(): + batch_size = len(result_set) + break + + self.assertTrue(0 < batch_size < self.More_than_one_batch) + + for dummy_iter in self.Iterate_to_exhaust_statement_table: + iters = 0 + for dummy_row in buildQuery().get_results(): + iters += 1 + if iters == batch_size - 1: + break # leave iteration unfinished + class TestSpecificQuery(unittest.TestCase): def setUp(self): From 3fd432d2369b036b7e9e56afc98990da47dcf82a Mon Sep 17 00:00:00 2001 From: wierinve Date: Thu, 8 Aug 2019 15:54:18 +0200 Subject: [PATCH 23/96] Add metadata property for user and usergroup objects --- irods/user.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/irods/user.py b/irods/user.py index 4e61471..72fdf7b 100644 --- a/irods/user.py +++ b/irods/user.py @@ -1,5 +1,6 @@ from __future__ import absolute_import from irods.models import User, UserGroup, UserAuth +from irods.meta import iRODSMetaCollection from irods.exception import NoResultFound @@ -19,6 +20,13 @@ def dn(self): query = self.manager.sess.query(UserAuth.user_dn).filter(UserAuth.user_id == self.id) return [res[UserAuth.user_dn] for res in query] + @property + def metadata(self): + if not self._meta: + self._meta = iRODSMetaCollection( + self.manager.sess.metadata, User, self.name) + return self._meta + def modify(self, *args, **kwargs): self.manager.modify(self.name, *args, **kwargs) @@ -47,6 +55,13 @@ def remove(self): @property def members(self): return self.manager.getmembers(self.name) + + @property + def metadata(self): + if not self._meta: + self._meta = iRODSMetaCollection( + self.manager.sess.metadata, User, self.name) + return self._meta def addmember(self, user_name, user_zone=""): self.manager.addmember(self.name, user_name, user_zone) From 027eaf2a56b0cc550a331c935a8107b32dcc279e Mon Sep 17 00:00:00 2001 From: wierinve Date: Fri, 9 Aug 2019 14:00:59 +0200 Subject: [PATCH 24/96] Added user metadata tests to user_group_test.py --- irods/test/user_group_test.py | 60 ++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/irods/test/user_group_test.py b/irods/test/user_group_test.py index ff40c83..9d7849e 100644 --- a/irods/test/user_group_test.py +++ b/irods/test/user_group_test.py @@ -4,6 +4,7 @@ import sys import unittest from irods.exception import UserGroupDoesNotExist +from irods.meta import iRODSMetaCollection import irods.test.helpers as helpers from six.moves import range @@ -106,7 +107,7 @@ def test_user_dn(self): # add other dn user.modify('addAuth', user_DNs[1]) - self.assertEqual(user.dn, user_DNs) + self.assertEqual(user.dn.sort(), user_DNs.sort()) # remove first dn user.modify('rmAuth', user_DNs[0]) @@ -117,6 +118,63 @@ def test_user_dn(self): # delete user user.remove() + def test_user_metadata(self): + user_name = 'testuser' + user = self.sess.users.create(user_name, 'rodsuser') + self.assertIsInstance(user.metadata, iRODSMetaCollection) + user.remove() + + def test_get_user_metadata(self): + + user_name = "testuser" + + # create user + user = self.sess.users.create(user_name, 'rodsuser') + meta = user.metadata.get_all('key') + # There should be no metadata + self.assertEqual(len(meta), 0) + user.remove() + + def test_add_user_metadata(self): + user_name = "testuser" + + # create user + user = self.sess.users.create(user_name, 'rodsuser') + + user.metadata.add('key0', 'value0') + user.metadata.add('key1', 'value1', 'unit1') + user.metadata.add('key2', 'value2a', 'unit2') + user.metadata.add('key2', 'value2b', 'unit2') + + meta0 = user.metadata.get_all('key0') + self.assertEqual(len(meta0),1) + self.assertEqual(meta0[0].name, 'key0') + self.assertEqual(meta0[0].value, 'value0') + + meta1 = user.metadata.get_all('key1') + self.assertEqual(len(meta1),1) + self.assertEqual(meta1[0].name, 'key1') + self.assertEqual(meta1[0].value, 'value1') + self.assertEqual(meta1[0].units, 'unit1') + + meta2 = sorted(user.metadata.get_all('key2'), key = lambda AVU : AVU.value) + self.assertEqual(len(meta2),2) + self.assertEqual(meta2[0].name, 'key2') + self.assertEqual(meta2[0].value, 'value2a') + self.assertEqual(meta2[0].units, 'unit2') + self.assertEqual(meta2[1].name, 'key2') + self.assertEqual(meta2[1].value, 'value2b') + self.assertEqual(meta2[1].units, 'unit2') + + user.metadata.remove('key1', 'value1', 'unit1') + metadata = user.metadata.items() + self.assertEqual(len(metadata), 3) + + user.metadata.remove('key2', 'value2a', 'unit2') + metadata = user.metadata.items() + self.assertEqual(len(metadata), 2) + + user.remove() if __name__ == '__main__': # let the tests find the parent irods lib From 57a43e2b817ca47cc3dc9134f8de09e8f868fc8f Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Sat, 17 Aug 2019 17:38:13 -0400 Subject: [PATCH 25/96] [#167] test user/group AVU queries; clean up AVUs --- irods/test/helpers.py | 11 +++ irods/test/user_group_test.py | 177 +++++++++++++++++++++++----------- irods/user.py | 2 +- 3 files changed, 134 insertions(+), 56 deletions(-) diff --git a/irods/test/helpers.py b/irods/test/helpers.py index 76d9204..a23fd1b 100644 --- a/irods/test/helpers.py +++ b/irods/test/helpers.py @@ -124,6 +124,17 @@ def compute_sha256_digest(file_path): return base64.b64encode(hasher.digest()).decode() +def remove_unused_metadata(session): + from irods.message import GeneralAdminRequest + from irods.api_number import api_number + message_body = GeneralAdminRequest( 'rm', 'unusedAVUs', '','','','') + req = iRODSMessage("RODS_API_REQ", msg = message_body,int_info=api_number['GENERAL_ADMIN_AN']) + with session.pool.get_connection() as conn: + conn.send(req) + response=conn.recv() + if (response.int_info != 0): raise RuntimeError("Error removing unused AVUs") + + @contextlib.contextmanager def file_backed_up(filename): with tempfile.NamedTemporaryFile(prefix=os.path.basename(filename)) as f: diff --git a/irods/test/user_group_test.py b/irods/test/user_group_test.py index 9d7849e..be6b871 100644 --- a/irods/test/user_group_test.py +++ b/irods/test/user_group_test.py @@ -4,7 +4,8 @@ import sys import unittest from irods.exception import UserGroupDoesNotExist -from irods.meta import iRODSMetaCollection +from irods.meta import iRODSMetaCollection, iRODSMeta +from irods.models import User, UserGroup, UserMeta import irods.test.helpers as helpers from six.moves import range @@ -86,7 +87,6 @@ def test_add_users_to_group(self): with self.assertRaises(UserGroupDoesNotExist): self.sess.user_groups.get(group_name) - def test_user_dn(self): # https://github.com/irods/irods/issues/3620 if self.sess.server_version == (4, 2, 1): @@ -107,7 +107,7 @@ def test_user_dn(self): # add other dn user.modify('addAuth', user_DNs[1]) - self.assertEqual(user.dn.sort(), user_DNs.sort()) + self.assertEqual(user.dn, user_DNs) # remove first dn user.modify('rmAuth', user_DNs[0]) @@ -118,63 +118,130 @@ def test_user_dn(self): # delete user user.remove() + def test_group_metadata(self): + group_name = "test_group" + + # group should not be already present + with self.assertRaises(UserGroupDoesNotExist): + self.sess.user_groups.get(group_name) + + group = None + + try: + # create group + group = self.sess.user_groups.create(group_name) + + # add metadata to group + triple = ['key', 'value', 'unit'] + group.metadata[triple[0]] = iRODSMeta(*triple) + + result = self.sess.query(UserMeta, UserGroup).filter(UserGroup.name == group_name, + UserMeta.name == 'key').one() + + self.assertTrue([result[k] for k in (UserMeta.name, UserMeta.value, UserMeta.units)] == triple) + + finally: + if group: + group.remove() + helpers.remove_unused_metadata(self.sess) + def test_user_metadata(self): - user_name = 'testuser' - user = self.sess.users.create(user_name, 'rodsuser') - self.assertIsInstance(user.metadata, iRODSMetaCollection) - user.remove() - + user_name = "testuser" + user = None + + try: + user = self.sess.users.create(user_name, 'rodsuser') + + # metadata collection is the right type? + self.assertIsInstance(user.metadata, iRODSMetaCollection) + + # add three AVUs, two having the same key + user.metadata['key0'] = iRODSMeta('key0', 'value', 'units') + sorted_triples = sorted( [ ['key1', 'value0', 'units0'], + ['key1', 'value1', 'units1'] ] ) + for m in sorted_triples: + user.metadata.add(iRODSMeta(*m)) + + # general query gives the right results? + result_0 = self.sess.query(UserMeta, User)\ + .filter( User.name == user_name, UserMeta.name == 'key0').one() + + self.assertTrue( [result_0[k] for k in (UserMeta.name, UserMeta.value, UserMeta.units)] + == ['key0', 'value', 'units'] ) + + results_1 = self.sess.query(UserMeta, User)\ + .filter(User.name == user_name, UserMeta.name == 'key1') + + retrieved_triples = [ [ res[k] for k in (UserMeta.name, UserMeta.value, UserMeta.units) ] + for res in results_1 + ] + + self.assertTrue( sorted_triples == sorted(retrieved_triples)) + + finally: + if user: + user.remove() + helpers.remove_unused_metadata(self.sess) + def test_get_user_metadata(self): - user_name = "testuser" - - # create user - user = self.sess.users.create(user_name, 'rodsuser') - meta = user.metadata.get_all('key') - # There should be no metadata - self.assertEqual(len(meta), 0) - user.remove() - + user = None + + try: + # create user + user = self.sess.users.create(user_name, 'rodsuser') + meta = user.metadata.get_all('key') + + # There should be no metadata + self.assertEqual(len(meta), 0) + finally: + if user: user.remove() + def test_add_user_metadata(self): user_name = "testuser" - - # create user - user = self.sess.users.create(user_name, 'rodsuser') - - user.metadata.add('key0', 'value0') - user.metadata.add('key1', 'value1', 'unit1') - user.metadata.add('key2', 'value2a', 'unit2') - user.metadata.add('key2', 'value2b', 'unit2') - - meta0 = user.metadata.get_all('key0') - self.assertEqual(len(meta0),1) - self.assertEqual(meta0[0].name, 'key0') - self.assertEqual(meta0[0].value, 'value0') - - meta1 = user.metadata.get_all('key1') - self.assertEqual(len(meta1),1) - self.assertEqual(meta1[0].name, 'key1') - self.assertEqual(meta1[0].value, 'value1') - self.assertEqual(meta1[0].units, 'unit1') - - meta2 = sorted(user.metadata.get_all('key2'), key = lambda AVU : AVU.value) - self.assertEqual(len(meta2),2) - self.assertEqual(meta2[0].name, 'key2') - self.assertEqual(meta2[0].value, 'value2a') - self.assertEqual(meta2[0].units, 'unit2') - self.assertEqual(meta2[1].name, 'key2') - self.assertEqual(meta2[1].value, 'value2b') - self.assertEqual(meta2[1].units, 'unit2') - - user.metadata.remove('key1', 'value1', 'unit1') - metadata = user.metadata.items() - self.assertEqual(len(metadata), 3) - - user.metadata.remove('key2', 'value2a', 'unit2') - metadata = user.metadata.items() - self.assertEqual(len(metadata), 2) - - user.remove() + user = None + + try: + # create user + user = self.sess.users.create(user_name, 'rodsuser') + + user.metadata.add('key0', 'value0') + user.metadata.add('key1', 'value1', 'unit1') + user.metadata.add('key2', 'value2a', 'unit2') + user.metadata.add('key2', 'value2b', 'unit2') + + meta0 = user.metadata.get_all('key0') + self.assertEqual(len(meta0),1) + self.assertEqual(meta0[0].name, 'key0') + self.assertEqual(meta0[0].value, 'value0') + + meta1 = user.metadata.get_all('key1') + self.assertEqual(len(meta1),1) + self.assertEqual(meta1[0].name, 'key1') + self.assertEqual(meta1[0].value, 'value1') + self.assertEqual(meta1[0].units, 'unit1') + + meta2 = sorted(user.metadata.get_all('key2'), key = lambda AVU : AVU.value) + self.assertEqual(len(meta2),2) + self.assertEqual(meta2[0].name, 'key2') + self.assertEqual(meta2[0].value, 'value2a') + self.assertEqual(meta2[0].units, 'unit2') + self.assertEqual(meta2[1].name, 'key2') + self.assertEqual(meta2[1].value, 'value2b') + self.assertEqual(meta2[1].units, 'unit2') + + user.metadata.remove('key1', 'value1', 'unit1') + metadata = user.metadata.items() + self.assertEqual(len(metadata), 3) + + user.metadata.remove('key2', 'value2a', 'unit2') + metadata = user.metadata.items() + self.assertEqual(len(metadata), 2) + + finally: + if user: + user.remove() + helpers.remove_unused_metadata(self.sess) if __name__ == '__main__': # let the tests find the parent irods lib diff --git a/irods/user.py b/irods/user.py index 72fdf7b..a91da21 100644 --- a/irods/user.py +++ b/irods/user.py @@ -55,7 +55,7 @@ def remove(self): @property def members(self): return self.manager.getmembers(self.name) - + @property def metadata(self): if not self._meta: From 0d9a14f2ebc8be7fa19fc163b200362d8bdc3baa Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Fri, 18 Oct 2019 04:54:33 -0400 Subject: [PATCH 26/96] [#175] metadata property for instances of iRODSResource --- irods/resource.py | 7 +++++++ irods/test/meta_test.py | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/irods/resource.py b/irods/resource.py index 7ddd368..c87a7a7 100644 --- a/irods/resource.py +++ b/irods/resource.py @@ -1,5 +1,6 @@ from __future__ import absolute_import from irods.models import Resource +from irods.meta import iRODSMetaCollection import six @@ -37,6 +38,12 @@ def __init__(self, manager, result=None): self._meta = None + @property + def metadata(self): + if not self._meta: + self._meta = iRODSMetaCollection( + self.manager.sess.metadata, Resource, self.name) + return self._meta @property def context_fields(self): diff --git a/irods/test/meta_test.py b/irods/test/meta_test.py index 49fd24f..f6a13b0 100644 --- a/irods/test/meta_test.py +++ b/irods/test/meta_test.py @@ -5,7 +5,7 @@ import sys import unittest from irods.meta import iRODSMeta -from irods.models import DataObject, Collection +from irods.models import DataObject, Collection, Resource import irods.test.helpers as helpers from six.moves import range @@ -44,6 +44,19 @@ def test_get_obj_meta(self): # there should be no metadata at this point assert len(meta) == 0 + def test_resc_meta(self): + rescname = 'demoResc' + self.sess.resources.get(rescname).metadata.remove_all() + self.sess.metadata.set(Resource, rescname, iRODSMeta('zero','marginal','cost')) + self.sess.metadata.add(Resource, rescname, iRODSMeta('zero','marginal')) + self.sess.metadata.set(Resource, rescname, iRODSMeta('for','ever','after')) + meta = self.sess.resources.get(rescname).metadata + self.assertTrue( len(meta) == 3 ) + resource = self.sess.resources.get(rescname) + all_AVUs= resource.metadata.items() + for avu in all_AVUs: + resource.metadata.remove(avu) + self.assertTrue(0 == len(self.sess.resources.get(rescname).metadata)) def test_add_obj_meta(self): # add metadata to test object From 14ebbdbbf75b7c612e1aa5061e049c41951a3755 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Tue, 1 Oct 2019 15:41:25 -0400 Subject: [PATCH 27/96] [#163] add keywords to query objects --- irods/manager/data_object_manager.py | 4 +++- irods/query.py | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index 3fd8407..ddf5808 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -50,7 +50,9 @@ def get(self, path, file=None, **options): query = self.sess.query(DataObject)\ .filter(DataObject.name == irods_basename(path))\ - .filter(DataObject.collection_id == parent.id) + .filter(DataObject.collection_id == parent.id)\ + .add_keyword(kw.ZONE_KW, path.split('/')[1]) + results = query.all() # get up to max_rows replicas if len(results) <= 0: raise ex.DataObjectDoesNotExist() diff --git a/irods/query.py b/irods/query.py index 7c8d2e0..deb6983 100644 --- a/irods/query.py +++ b/irods/query.py @@ -36,6 +36,7 @@ def __init__(self, sess, *args, **kwargs): self._limit = -1 self._offset = 0 self._continue_index = 0 + self._keywords = {} for arg in args: if isinstance(arg, type) and issubclass(arg, Model): @@ -54,6 +55,12 @@ def _clone(self): new_q._limit = self._limit new_q._offset = self._offset new_q._continue_index = self._continue_index + new_q._keywords = self._keywords + return new_q + + def add_keyword(self, keyword, value = ''): + new_q = self._clone() + new_q._keywords[keyword] = value return new_q def filter(self, *criteria): @@ -138,6 +145,8 @@ def _kw_message(self): for criterion in self.criteria if isinstance(criterion.query_key, Keyword) ]) + for key in self._keywords: + dct[ key ] = self._keywords[key] return StringStringMap(dct) def _message(self): From d5677b8d633bba517c2c4d0d92419a31149bf171 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Wed, 13 Nov 2019 09:15:35 -0500 Subject: [PATCH 28/96] [#3] v0.8.2 and update changelog --- CHANGELOG.rst | 16 ++++++++++++++++ irods/version.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0aab573..6fa4a5a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,22 @@ Changelog ========= +v0.8.2 (2019-11-14) +------------------- +- [#8] Add PAM Authentication handling (still needs tests) [Mattia D'Antonio] +- [#5] Remove commented-out import [Alan King] +- [#5] Add .idea directory to .gitignore [Jonathan Landrum] +- [#150] Fix specific query argument labeling [Chris Klimowski] +- [#148] DataObjectManager.put() can return the new data_object [Jonathan Landrum] +- [#124] Convert strings going to irods to Unicode [Alan King] +- [#161] Allow dynamic I/O for rule from file [Mathijs Koymans] +- [#162] Include resc_hier in replica information [Brett Hartley] +- [#165] Fix CAT_STATEMENT_TABLE_FULL by auto closing queries [Chris Smeele] +- [#166] Test freeing statements in unfinished query [Daniel Moore] +- [#167] Add metadata for user and usergroup objects [Erwin van Wieringen] +- [#175] Add metadata property for instances of iRODSResource [Daniel Moore] +- [#163] add keywords to query objects [Daniel Moore] + v0.8.1 (2018-09-27) ------------------- - [#140] Remove randomization from password test [Alan King] diff --git a/irods/version.py b/irods/version.py index ef72cc0..4ca39e7 100644 --- a/irods/version.py +++ b/irods/version.py @@ -1 +1 @@ -__version__ = '0.8.1' +__version__ = '0.8.2' From 27faafba1dc21d6b1e9731e97e9bbfc9f38f5832 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Wed, 13 Nov 2019 09:46:37 -0500 Subject: [PATCH 29/96] [#3] updates for PyPI --- README.rst | 2 +- setup.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index dd6d317..a75eb3d 100644 --- a/README.rst +++ b/README.rst @@ -59,7 +59,7 @@ Uninstalling Establishing a (secure) connection -------------------------- +---------------------------------- Using environment files (including any SSL settings) in ``~/.irods/``: diff --git a/setup.py b/setup.py index e735453..687f105 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ author_email='support@irods.org', description='A python API for iRODS', long_description=long_description, + long_description_content_type='text/x-rst', license='BSD', url='https://github.com/irods/python-irodsclient', keywords='irods', From 38c7be775ca8bb067ebdd988bfbe3393e643f74e Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Wed, 13 Nov 2019 09:52:53 -0500 Subject: [PATCH 30/96] [#3] update release date for v0.8.2 --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6fa4a5a..9231048 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,7 @@ Changelog ========= -v0.8.2 (2019-11-14) +v0.8.2 (2019-11-13) ------------------- - [#8] Add PAM Authentication handling (still needs tests) [Mattia D'Antonio] - [#5] Remove commented-out import [Alan King] From 94744086fed05369f6fab00faba5dc8444abc574 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Sat, 16 Nov 2019 15:02:31 -0500 Subject: [PATCH 31/96] [#5] fix ssl example in README.rst --- README.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/README.rst b/README.rst index a75eb3d..f1b1632 100644 --- a/README.rst +++ b/README.rst @@ -74,7 +74,6 @@ Using environment files (including any SSL settings) in ``~/.irods/``: >>> ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, cafile=None, capath=None, cadata=None) >>> ssl_settings = {'ssl_context': ssl_context} >>> with iRODSSession(irods_env_file=env_file, **ssl_settings) as session: ->>> with iRODSSession(irods_env_file=env_file) as session: ... pass ... >>> From 742759718b4331418a0e6ed51801da59ab899bae Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Tue, 10 Dec 2019 12:37:12 -0500 Subject: [PATCH 32/96] [#183] fix key error when tables from order_by() not in query() --- irods/query.py | 2 +- irods/test/query_test.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/irods/query.py b/irods/query.py index deb6983..4dddc7f 100644 --- a/irods/query.py +++ b/irods/query.py @@ -70,7 +70,7 @@ def filter(self, *criteria): def order_by(self, column, order='asc'): new_q = self._clone() - del new_q.columns[column] + new_q.columns.pop(column,None) if order == 'asc': new_q.columns[column] = query_number['ORDER_BY'] elif order == 'desc': diff --git a/irods/test/query_test.py b/irods/test/query_test.py index dafac9f..07c580f 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -165,6 +165,20 @@ def test_query_order_by_invalid_param(self): results = self.sess.query(User.name).order_by( User.name, order='moo').all() + def test_query_order_by_col_not_in_result__183(self): + test_collection_size = 8 + test_collection_path = '/{0}/home/{1}/testcoln_for_col_not_in_result'.format(self.sess.zone, self.sess.username) + c1 = c2 = None + try: + c1 = helpers.make_test_collection( self.sess, test_collection_path+"1", obj_count=test_collection_size) + c2 = helpers.make_test_collection( self.sess, test_collection_path+"2", obj_count=test_collection_size) + d12 = [ sorted([d.id for d in c.data_objects]) for c in sorted((c1,c2),key=lambda c:c.id) ] + query = self.sess.query(DataObject).filter(Like(Collection.name, test_collection_path+"_")).order_by(Collection.id) + q12 = list(map(lambda res:res[DataObject.id], query)) + self.assertTrue(d12[0] + d12[1] == sorted( q12[:test_collection_size] ) + sorted( q12[test_collection_size:])) + finally: + if c1: c1.remove(recurse=True,force=True) + if c2: c2.remove(recurse=True,force=True) def test_query_with_like_condition(self): '''Equivalent to: From 8a75751d4aab1cae86cfbf102669a8cb6a7a8d92 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Sun, 8 Dec 2019 16:38:47 -0500 Subject: [PATCH 33/96] [#180] add the "in" genquery operator --- irods/column.py | 14 ++++++++++++++ irods/test/query_test.py | 14 +++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/irods/column.py b/irods/column.py index e097ed8..cfed46b 100644 --- a/irods/column.py +++ b/irods/column.py @@ -39,6 +39,20 @@ def __init__(self, op, query_key, value): def value(self): return self.query_key.column_type.to_irods(self._value) +class In(Criterion): + + def __init__(self, query_key, value): + super(In, self).__init__('in', query_key, value) + + @property + def value(self): + v = "(" + comma = "" + for element in self._value: + v += "{}'{}'".format(comma,element) + comma = "," + v += ")" + return v class Like(Criterion): diff --git a/irods/test/query_test.py b/irods/test/query_test.py index 07c580f..0e25ee3 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -10,7 +10,7 @@ from irods.models import User, Collection, DataObject, DataObjectMeta, Resource from irods.exception import MultipleResultsFound, CAT_UNKNOWN_SPECIFIC_QUERY, CAT_INVALID_ARGUMENT from irods.query import SpecificQuery -from irods.column import Like, Between +from irods.column import Like, Between, In from irods.meta import iRODSMeta from irods import MAX_SQL_ROWS import irods.test.helpers as helpers @@ -205,6 +205,18 @@ def test_query_with_between_condition(self): res_str = '{} {}/{}'.format(result[Resource.name], result[Collection.name], result[DataObject.name]) self.assertIn(session.zone, res_str) + def test_query_with_in_condition(self): + collection = self.coll_path + filename = 'test_query_id_in_list.txt' + file_path = '{collection}/{filename}'.format(**locals()) + obj1 = helpers.make_object(self.sess, file_path+'-1') + obj2 = helpers.make_object(self.sess, file_path+'-2') + ids = [x.id for x in (obj1,obj2)] + for number in range(3): # slice for empty(:0), first(:1) or both(:2) + search_tuple = (ids[:number] if number >= 1 else [0] + ids[:number]) + q = self.sess.query(DataObject.name).filter(In( DataObject.id, search_tuple )) + self.assertEqual (number, len(list(q))) + @unittest.skipIf(six.PY3, 'Test is for python2 only') def test_query_for_data_object_with_utf8_name_python2(self): filename_prefix = '_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' From ff426fa7f56c3e1feac3ec19f35bba5e754f838f Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 12 Dec 2019 17:50:46 -0500 Subject: [PATCH 34/96] [#135] Allow multiple criteria based on column name --- irods/message/__init__.py | 18 ++++++++++++ irods/query.py | 4 +-- irods/test/query_test.py | 62 +++++++++++++++++++++++++++++++-------- 3 files changed, 70 insertions(+), 14 deletions(-) diff --git a/irods/message/__init__.py b/irods/message/__init__.py index f13f12c..325fd98 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -244,6 +244,24 @@ class PluginAuthMessage(Message): context_ = StringProperty() +class _OrderedMultiMapping : + def keys(self): + return self._keys + def values(self): + return self._values + def __len__(self): + return len(self._keys) + def __init__(self, list_of_keyval_tuples ): + self.dedup = set() + self._keys = [] + self._values = [] + for k,v in list_of_keyval_tuples: + if (k,v) not in self.dedup: + self.dedup.add((k,v)) + self._keys.append(k) + self._values.append(v) + + class IntegerIntegerMap(Message): _name = 'InxIvalPair_PI' diff --git a/irods/query.py b/irods/query.py index 4dddc7f..0d9f7f4 100644 --- a/irods/query.py +++ b/irods/query.py @@ -5,7 +5,7 @@ from irods.models import Model from irods.column import Column, Keyword from irods.message import ( - IntegerIntegerMap, IntegerStringMap, StringStringMap, + IntegerIntegerMap, IntegerStringMap, StringStringMap, _OrderedMultiMapping, GenQueryRequest, GenQueryResponse, empty_gen_query_out, iRODSMessage, SpecificQueryRequest, GeneralAdminRequest) from irods.api_number import api_number @@ -131,7 +131,7 @@ def _select_message(self): # todo store criterion for columns and criterion for keywords in seaparate # lists def _conds_message(self): - dct = dict([ + dct = _OrderedMultiMapping([ (criterion.query_key.icat_id, criterion.op + ' ' + criterion.value) for criterion in self.criteria if isinstance(criterion.query_key, Column) diff --git a/irods/test/query_test.py b/irods/test/query_test.py index 0e25ee3..f11f42a 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -18,16 +18,6 @@ IRODS_STATEMENT_TABLE_SIZE = 50 -def remove_unused_metadata (sess) : - - from irods.message import GeneralAdminRequest, iRODSMessage - from irods.api_number import api_number - message_body = GeneralAdminRequest( 'rm', 'unusedAVUs', '','','','') - req = iRODSMessage("RODS_API_REQ", msg = message_body,int_info=api_number['GENERAL_ADMIN_AN']) - with sess.pool.get_connection() as conn: - conn.send(req) - response=conn.recv() - if (response.int_info != 0): raise RuntimeError("Error removing unused AVU's") class TestQuery(unittest.TestCase): @@ -188,7 +178,6 @@ def test_query_with_like_condition(self): query = self.sess.query(Resource).filter(Like(Resource.name, 'dem%')) self.assertIn('demoResc', [row[Resource.name] for row in query]) - def test_query_with_between_condition(self): '''Equivalent to: iquest "select RESC_NAME, COLL_NAME, DATA_NAME where DATA_MODIFY_TIME between '01451606400' '...'" @@ -217,6 +206,55 @@ def test_query_with_in_condition(self): q = self.sess.query(DataObject.name).filter(In( DataObject.id, search_tuple )) self.assertEqual (number, len(list(q))) + def test_simultaneous_multiple_AVU_joins(self): + objects = [] + decoys = [] + try: + collection = self.coll_path + filename = 'test_multiple_AVU_joins' + file_path = '{collection}/{filename}'.format(**locals()) + for x in range(3,9): + obj = helpers.make_object(self.sess, file_path+'-{}'.format(x)) # with metadata + objects.append(obj) + obj.metadata.add('A_meta','1{}'.format(x)) + obj.metadata.add('B_meta','2{}'.format(x)) + decoys.append(helpers.make_object(self.sess, file_path+'-dummy{}'.format(x))) # without metadata + self.assertTrue( len(objects) > 0 ) + q = self.sess.query(DataObject,DataObjectMeta).\ + filter(DataObjectMeta.name == 'A_meta', DataObjectMeta.value < '20').\ + filter(DataObjectMeta.name == 'B_meta', DataObjectMeta.value >= '20') + self.assertTrue( len(list(q)) == len(objects) ) + q = self.sess.query(DataObject,DataObjectMeta).\ + filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value < '28').\ + filter(DataObjectMeta.name == 'B_meta').filter(Like(DataObjectMeta.value, '2_')) + self.assertTrue( len(list(q)) == len(objects)-1 ) + finally: + for x in (objects + decoys): + x.unlink(force=True) + helpers.remove_unused_metadata( self.sess ) + + def test_multiple_criteria_on_one_column_name(self): + collection = self.coll_path + filename = 'test_multiple_AVU_joins' + file_path = '{collection}/{filename}'.format(**locals()) + objects = [] + nobj = 0 + for x in range(3,9): + nobj += 2 + obj1 = helpers.make_object(self.sess, file_path+'-{}'.format(x)) + obj2 = helpers.make_object(self.sess, file_path+'-dummy{}'.format(x)) + objects.extend([obj1,obj2]) + self.assertTrue( nobj > 0 and len(objects) == nobj ) + q = self.sess.query(Collection,DataObject) + dummy_test = [d for d in q if d[DataObject.name][-1:] != '8' + and d[DataObject.name][-7:-1] == '-dummy' ] + self.assertTrue( len(dummy_test) > 0 ) + q = q. filter(Like(DataObject.name, '%-dummy_')).\ + filter(Collection.name == collection) .\ + filter(DataObject.name != (filename + '-dummy8')) + results = [r[DataObject.name] for r in q] + self.assertTrue(len(results) == len(dummy_test)) + @unittest.skipIf(six.PY3, 'Test is for python2 only') def test_query_for_data_object_with_utf8_name_python2(self): filename_prefix = '_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' @@ -294,7 +332,7 @@ def __exit__(self,*_): # - clean up after context block self.test_collection.remove(recurse=True, force=True) if self.nAVUs > 0 and self.num_objects > 0: - remove_unused_metadata(self.session) # delete unused AVU's + helpers.remove_unused_metadata(self.session) # delete unused AVU's def test_query_first__166(self): From d4306ef9871c687b502bcda661c458f0df3216ea Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Mon, 16 Dec 2019 05:22:58 -0500 Subject: [PATCH 35/96] [#135] fix queries for multiple AVUs of same name --- irods/message/__init__.py | 7 ++----- irods/test/query_test.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/irods/message/__init__.py b/irods/message/__init__.py index 325fd98..362052c 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -252,14 +252,11 @@ def values(self): def __len__(self): return len(self._keys) def __init__(self, list_of_keyval_tuples ): - self.dedup = set() self._keys = [] self._values = [] for k,v in list_of_keyval_tuples: - if (k,v) not in self.dedup: - self.dedup.add((k,v)) - self._keys.append(k) - self._values.append(v) + self._keys.append(k) + self._values.append(v) class IntegerIntegerMap(Message): diff --git a/irods/test/query_test.py b/irods/test/query_test.py index f11f42a..5f27f3a 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -220,14 +220,26 @@ def test_simultaneous_multiple_AVU_joins(self): obj.metadata.add('B_meta','2{}'.format(x)) decoys.append(helpers.make_object(self.sess, file_path+'-dummy{}'.format(x))) # without metadata self.assertTrue( len(objects) > 0 ) + + # -- test simple repeat of same column -- q = self.sess.query(DataObject,DataObjectMeta).\ filter(DataObjectMeta.name == 'A_meta', DataObjectMeta.value < '20').\ filter(DataObjectMeta.name == 'B_meta', DataObjectMeta.value >= '20') self.assertTrue( len(list(q)) == len(objects) ) + + # -- test no-stomp of previous filter -- + self.assertTrue( ('B_meta','28') in [ (x.name,x.value) for x in objects[-1].metadata.items() ] ) q = self.sess.query(DataObject,DataObjectMeta).\ filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value < '28').\ filter(DataObjectMeta.name == 'B_meta').filter(Like(DataObjectMeta.value, '2_')) self.assertTrue( len(list(q)) == len(objects)-1 ) + + # -- test multiple AVU's by same attribute name -- + objects[-1].metadata.add('B_meta','29') + q = self.sess.query(DataObject,DataObjectMeta).\ + filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value == '28').\ + filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value == '29') + self.assertTrue(len(list(q)) == 1) finally: for x in (objects + decoys): x.unlink(force=True) From 007526eb18bc7ee0b949a6df6df1f505177ae4cf Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Tue, 10 Dec 2019 11:28:23 -0500 Subject: [PATCH 36/96] [#187] Allow query on metadata create and modify times --- irods/models.py | 11 ++++++++ irods/test/query_test.py | 54 +++++++++++++++++++++++++++++++++++----- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/irods/models.py b/irods/models.py index 08c53e7..fb2fe51 100644 --- a/irods/models.py +++ b/irods/models.py @@ -112,6 +112,8 @@ class DataObjectMeta(Model): name = Column(String, 'COL_META_DATA_ATTR_NAME', 600) value = Column(String, 'COL_META_DATA_ATTR_VALUE', 601) units = Column(String, 'COL_META_DATA_ATTR_UNITS', 602) + create_time = Column(DateTime, 'COL_META_DATA_CREATE_TIME', 604) + modify_time = Column(DateTime, 'COL_META_DATA_MODIFY_TIME', 605) class CollectionMeta(Model): @@ -119,6 +121,9 @@ class CollectionMeta(Model): name = Column(String, 'COL_META_COLL_ATTR_NAME', 610) value = Column(String, 'COL_META_COLL_ATTR_VALUE', 611) units = Column(String, 'COL_META_COLL_ATTR_UNITS', 612) + create_time = Column(DateTime, 'COL_META_COLL_CREATE_TIME', 614) + modify_time = Column(DateTime, 'COL_META_COLL_MODIFY_TIME', 615) + class ResourceMeta(Model): @@ -126,6 +131,9 @@ class ResourceMeta(Model): name = Column(String, 'COL_META_RESC_ATTR_NAME', 630) value = Column(String, 'COL_META_RESC_ATTR_VALUE', 631) units = Column(String, 'COL_META_RESC_ATTR_UNITS', 632) + create_time = Column(DateTime, 'COL_META_RESC_CREATE_TIME', 634) + modify_time = Column(DateTime, 'COL_META_RESC_MODIFY_TIME', 635) + class UserMeta(Model): @@ -133,6 +141,9 @@ class UserMeta(Model): name = Column(String, 'COL_META_USER_ATTR_NAME', 640) value = Column(String, 'COL_META_USER_ATTR_VALUE', 641) units = Column(String, 'COL_META_USER_ATTR_UNITS', 642) + create_time = Column(DateTime, 'COL_META_USER_CREATE_TIME', 644) + modify_time = Column(DateTime, 'COL_META_USER_MODIFY_TIME', 645) + class DataAccess(Model): diff --git a/irods/test/query_test.py b/irods/test/query_test.py index 5f27f3a..c928e5b 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -6,8 +6,13 @@ import sys import tempfile import unittest +import time from datetime import datetime -from irods.models import User, Collection, DataObject, DataObjectMeta, Resource +from irods.models import (User, UserMeta, + Resource, ResourceMeta, + Collection, CollectionMeta, + DataObject, DataObjectMeta ) + from irods.exception import MultipleResultsFound, CAT_UNKNOWN_SPECIFIC_QUERY, CAT_INVALID_ARGUMENT from irods.query import SpecificQuery from irods.column import Like, Between, In @@ -18,6 +23,9 @@ IRODS_STATEMENT_TABLE_SIZE = 50 +def rows_returned(query): + return len( list(query) ) + class TestQuery(unittest.TestCase): @@ -204,7 +212,7 @@ def test_query_with_in_condition(self): for number in range(3): # slice for empty(:0), first(:1) or both(:2) search_tuple = (ids[:number] if number >= 1 else [0] + ids[:number]) q = self.sess.query(DataObject.name).filter(In( DataObject.id, search_tuple )) - self.assertEqual (number, len(list(q))) + self.assertEqual (number, rows_returned(q)) def test_simultaneous_multiple_AVU_joins(self): objects = [] @@ -225,26 +233,61 @@ def test_simultaneous_multiple_AVU_joins(self): q = self.sess.query(DataObject,DataObjectMeta).\ filter(DataObjectMeta.name == 'A_meta', DataObjectMeta.value < '20').\ filter(DataObjectMeta.name == 'B_meta', DataObjectMeta.value >= '20') - self.assertTrue( len(list(q)) == len(objects) ) + self.assertTrue( rows_returned(q) == len(objects) ) # -- test no-stomp of previous filter -- self.assertTrue( ('B_meta','28') in [ (x.name,x.value) for x in objects[-1].metadata.items() ] ) q = self.sess.query(DataObject,DataObjectMeta).\ filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value < '28').\ filter(DataObjectMeta.name == 'B_meta').filter(Like(DataObjectMeta.value, '2_')) - self.assertTrue( len(list(q)) == len(objects)-1 ) + self.assertTrue( rows_returned(q) == len(objects)-1 ) # -- test multiple AVU's by same attribute name -- objects[-1].metadata.add('B_meta','29') q = self.sess.query(DataObject,DataObjectMeta).\ filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value == '28').\ filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value == '29') - self.assertTrue(len(list(q)) == 1) + self.assertTrue(rows_returned(q) == 1) finally: for x in (objects + decoys): x.unlink(force=True) helpers.remove_unused_metadata( self.sess ) + def test_query_on_AVU_times(self): + test_collection_path = '/{zone}/home/{user}/test_collection'.format( zone = self.sess.zone, user = self.sess.username) + testColl = helpers.make_test_collection(self.sess, test_collection_path, obj_count = 1) + testData = testColl.data_objects[0] + testResc = self.sess.resources.get('demoResc') + testUser = self.sess.users.get(self.sess.username) + objects = { 'r': testResc, 'u': testUser, 'c':testColl, 'd':testData } + object_IDs = { sfx:obj.id for sfx,obj in objects.items() } + tables = { 'r': (Resource, ResourceMeta), + 'u': (User, UserMeta), + 'd': (DataObject, DataObjectMeta), + 'c': (Collection, CollectionMeta) } + try: + str_number_incr = lambda str_numbers : str(1+max([0]+[int(n) if n.isdigit() else 0 for n in str_numbers])) + AVU_unique_incr = lambda obj,suffix='' : ( 'a_'+suffix, + 'v_'+suffix, + str_number_incr(avu.units for avu in obj.metadata.items()) ) + before = datetime.utcnow() + time.sleep(1.5) + for suffix,obj in objects.items(): obj.metadata.add( *AVU_unique_incr(obj,suffix) ) + after = datetime.utcnow() + for suffix, tblpair in tables.items(): + self.sess.query( *tblpair ).filter(tblpair[1].modify_time <= after )\ + .filter(tblpair[1].modify_time > before )\ + .filter(tblpair[0].id == object_IDs[suffix] ).one() + self.sess.query( *tblpair ).filter(tblpair[1].create_time <= after )\ + .filter(tblpair[1].create_time > before )\ + .filter(tblpair[0].id == object_IDs[suffix] ).one() + finally: + for obj in objects.values(): + for avu in obj.metadata.items(): obj.metadata.remove(avu) + testColl.remove(recurse=True,force=True) + helpers.remove_unused_metadata( self.sess ) + + def test_multiple_criteria_on_one_column_name(self): collection = self.coll_path filename = 'test_multiple_AVU_joins' @@ -503,7 +546,6 @@ def test_register_query_twice(self): # remove query query.remove() - def test_list_specific_queries(self): query = SpecificQuery(self.session, alias='ls') From f8979f83147266e5e8cbbb6e27fba3ab56eb0d33 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Mon, 9 Dec 2019 15:21:05 -0500 Subject: [PATCH 37/96] Additional sections and examples in README --- README.rst | 330 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 324 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index f1b1632..7fbc0cb 100644 --- a/README.rst +++ b/README.rst @@ -187,26 +187,94 @@ bar Working with metadata --------------------- +To enumerate AVU's on an object. With no metadata attached, the result is an empty list: + + +>>> from irods.meta import iRODSMeta >>> obj = session.data_objects.get("/tempZone/home/rods/test1") >>> print(obj.metadata.items()) [] + +We then add some metadata. +Just as with the icommand equivalent "imeta add ...", we can add multiple AVU's with the same name field: + + >>> obj.metadata.add('key1', 'value1', 'units1') >>> obj.metadata.add('key1', 'value2') >>> obj.metadata.add('key2', 'value3') +>>> obj.metadata.add('key2', 'value4') >>> print(obj.metadata.items()) -[, , -] +[, , +, ] + + +We can also use Python's item indexing syntax to perform the equivalent of an "imeta set ...", e.g. overwriting +all AVU's with a name field of "key2" in a single update: + + +>>> new_meta = iRODSMeta('key2','value5','units2') +>>> obj.metadata[new_meta.name] = new_meta +>>> print(obj.metadata.items()) +[, , + ] + + +Now, with only one AVU on the object with a name of "key2", *get_one* is assured of not throwing an exception: ->>> print(obj.metadata.get_all('key1')) -[, ] >>> print(obj.metadata.get_one('key2')) - + + + +However, the same is not true of "key1": + + +>>> print(obj.metadata.get_one('key1')) +Traceback (most recent call last): + File "", line 1, in + File "/[...]/python-irodsclient/irods/meta.py", line 41, in get_one + raise KeyError +KeyError + + +Finally, to remove a specific AVU from an object: + >>> obj.metadata.remove('key1', 'value1', 'units1') >>> print(obj.metadata.items()) -[, ] +[, ] + + +Alternately, this form of the remove() method can also be useful: + + +>>> for avu in obj.metadata.items(): +... obj.metadata.remove(avu) +>>> print(obj.metadata.items()) +[] + + +If we intended on deleting the data object anyway, we could have just done this instead: + + +>>> obj.unlink(force=True) + + +But notice that the force option is important, since a data object in the trash may still have AVU's attached. + +At the end of a long session of AVU add/manipulate/delete operations, one should make sure to delete all unused +AVU's. We can in fact use any *\*Meta* data model in the queries below, since unattached AVU's are not aware +of the (type of) catalog object they once annotated: + + +>>> from irods.models import (DataObjectMeta, ResourceMeta) +>>> len(list( session.query(ResourceMeta) )) +4 +>>> from irods.test.helpers import remove_unused_metadata +>>> remove_unused_metadata(session) +>>> len(list( session.query(ResourceMeta) )) +0 General queries @@ -256,6 +324,12 @@ Query using other models: ... ('/tempZone/home/rods', 'type', 'Project', None) +Beginning with version 0.8.3 of PRC, the 'in' genquery operator is also available: + +>>> from irods.models import Resource +>>> from irods.column import In +>>> [ resc[Resource.id]for resc in session.query(Resource).filter(In(Resource.name, ['thisResc','thatResc'])) ] +[10037,10038] Query with aggregation(min, max, sum, avg, count): @@ -316,6 +390,7 @@ user_manager.py 212669 __init__.py 212670 __init__.pyc 212671 + Recherché queries ----------------- @@ -340,6 +415,249 @@ not reside in the trash. >>> pprint( list( chained_results ) ) +Instantiating iRODS objects from query results +---------------------------------------------- +The General query works well for getting information out of the ICAT if all we're interested in is +information representable with +primitive types (ie. object names, paths, and ID's, as strings or integers). But Python's object orientation also +allows us to create object references to mirror the persistent entities (instances of *Collection*, *DataObject*, *User*, or *Resource*, etc.) +inhabiting the ICAT. + +**Background:** +Certain iRODS object types can be instantiated easily using the session object's custom type managers, +particularly if some parameter (often just the name or path) of the object is already known: + +>>> type(session.users) + +>>> u = session.users.get('rods') +>>> u.id +10003 + +Type managers are good for specific operations, including object creation and removal:: + +>>> session.collections.create('/tempZone/home/rods/subColln') +>>> session.collections.remove('/tempZone/home/rods/subColln') +>>> session.data_objects.create('/tempZone/home/rods/dataObj') +>>> session.data_objects.unlink('/tempZone/home/rods/dataObj') + +When we retrieve a reference to an existing collection using *get* : + +>>> c = session.collections.get('/tempZone/home/rods') +>>> c + + + +we have, in that variable *c*, a reference to an iRODS *Collection* object whose properties provide +useful information: + +>>> [ x for x in dir(c) if not x.startswith('__') ] +['_meta', 'data_objects', 'id', 'manager', 'metadata', 'move', 'name', 'path', 'remove', 'subcollections', 'unregister', 'walk'] +>>> c.name +'rods' +>>> c.path +'/tempZone/home/rods' +>>> c.data_objects +[] +>>> c.metadata.items() +[ <... list of AVU's attached to Collection c ... > ] + +or whose methods can do useful things: + +>>> for sub_coll in c.walk(): print('---'); pprint( sub_coll ) +[ ...< series of Python data structures giving the complete tree structure below collection 'c'> ...] + +This approach of finding objects by name, or via their relations with other objects (ie "contained by", or in the case of metadata, "attached to"), +is helpful if we know something about the location or identity of what we're searching for, but we don't always +have that kind of a-priori knowledge. + +So, although we can (as seen in the last example) walk an *iRODSCollection* recursively to discover all subordinate +collections and their data objects, this approach will not always be best +for a given type of application or data discovery, especially in more advanced +use cases. + +**A Different Approach:** +For the PRC to be sufficiently powerful for general use, we'll often need at least: + +* general queries, and +* the capabilities afforded by the PRC's object-relational mapping. + +Suppose, for example, we wish to enumerate all collections in the iRODS catalog. + +Again, the object managers are the answer, but they are now invoked using a different scheme: + +>>> from irods.collection import iRODSCollection; from irods.models import Collection +>>> all_collns = [ iRODSCollection(session.collections,result) for result in session.query(Collection) ] + +From there, we have the ability to do useful work, or filtering based on the results of the enumeration. +And, because *all_collns* is an iterable of true objects, we can either use Python's list comprehensions or +execute more catalog queries to achieve further aims. + +Note that, for similar system-wide queries of Data Objects (which, as it happens, are inextricably joined to their +parent Collection objects), a bit more finesse is required. Let us query, for example, to find all data +objects in a particular zone with an AVU that matches the following condition:: + + META_DATA_ATTR_NAME = "irods::alert_time" and META_DATA_ATTR_VALUE like '+0%' + + +>>> import irods.keywords +>>> from irods.data_object import iRODSDataObject +>>> from irods.models import DataObjectMeta, DataObject +>>> from irods.column import Like +>>> q = session.query(DataObject).filter( DataObjectMeta.name == 'irods::alert_time', + Like(DataObjectMeta.value, '+0%') ) +>>> zone_hint = "" # --> add a zone name in quotes to search another zone +>>> if zone_hint: q = q.add_keyword( irods.keywords.ZONE_KW, zone_hint ) +>>> for res in q: +... colln_id = res [DataObject.collection_id] +... collObject = get_collection( colln_id, session, zone = zone_hint) +... dataObject = iRODSDataObject( session.data_objects, parent = collObject, results=[res]) +... print( '{coll}/{data}'.format (coll = collObject.path, data = dataObject.name)) + + +In the above loop we have used a helper function, *get_collection*, to minimize the number of hits to the object +catalog. Otherwise, me might find within a typical application that some Collection objects are being queried at +a high rate of redundancy. *get_collection* can be implemented thusly: + +.. code:: Python + + import collections # of the Pythonic, not iRODS, kind + def makehash(): + # see https://stackoverflow.com/questions/651794/whats-the-best-way-to-initialize-a-dict-of-dicts-in-python + return collections.defaultdict(makehash) + from irods.collection import iRODSCollection + from irods.models import Collection + def get_collection (Id, session, zone=None, memo = makehash()): + if not zone: zone = "" + c_obj = memo[session][zone].get(Id) + if c_obj is None: + q = session.query(Collection).filter(Collection.id==Id) + if zone != '': q = q.add_keyword( irods.keywords.ZONE_KW, zone ) + c_id = q.one() + c_obj = iRODSCollection(session, result = c_id) + memo[session][zone][Id] = c_obj + return c_obj + + +Once instantiated, of course, any *iRODSDataObject*'s data to which we have access permissions is available via its open() method. + +As stated, this type of object discovery requires some extra study and effort, but the ability to search arbitrary iRODS zones +(to which we are federated and have the user permissions) is powerful indeed. + + +Tracking and manipulating replicas of Data objects +-------------------------------------------------- + +Putting together the techniques we've seen so far, it's not hard to write functions +that achieve useful, common goals. Suppose that for all data objects containing replicas on +a given named resource (the "source") we want those replicas "moved" to a second, or +"destination" resource. We can achieve it with a function such as the one below. It +achieves the move via a replication of the data objects found to the destination +resource , followed by a trimming of each replica from the source. We assume for our current +purposed that all replicas are "good", ie have a status of "1" :: + + from irods.resource import iRODSResource + from irods.collection import iRODSCollection + from irods.data_object import iRODSDataObject + from irods.models import Resource,Collection,DataObject + def repl_and_trim (srcRescName, dstRescName = '', verbose = False): + objects_trimmed = 0 + q = session.query(Resource).filter(Resource.name == srcRescName) + srcResc = iRODSResource( session.resources, q.one()) + # loop over data objects found on srcResc + for q_row in session.query(Collection,DataObject) \ + .filter(DataObject.resc_id == srcResc.id): + collection = iRODSCollection (session.collections, result = q_row) + data_object = iRODSDataObject (session.data_objects, parent = collection, results = (q_row,)) + objects_trimmed += 1 + if verbose : + import pprint + print( '--------', data_object.name, '--------') + pprint.pprint( [vars(r) for r in data_object.replicas if + r.resource_name == srcRescName] ) + if dstRescName: + objects_trimmed += 1 + data_object.replicate(dstRescName) + for replica_number in [r.number for r in data_object.replicas]: + data_object.unlink( replNum = replica_number ) + return objects_trimmed + + +Listing Users and Groups ; calculating Group Membership +------------------------------------------------------- + +iRODS tracks groups and users using two tables, R_USER_MAIN and R_USER_GROUP. +Under this database schema, all "user groups" are also users: + +>>> from irods.models import User, UserGroup +>>> from pprint import pprint +>>> pprint(list( [ (x[User.id], x[User.name]) for x in session.query(User) ] )) +[(10048, 'alice'), + (10001, 'rodsadmin'), + (13187, 'bobby'), + (10045, 'collab'), + (10003, 'rods'), + (13193, 'empty'), + (10002, 'public')] + +But it's also worth noting that the User.type field will be 'rodsgroup' for any +user ID that iRODS internally recognizes as a "Group": + +>>> groups = session.query(User).filter( User.type == 'rodsgroup' ) + +>>> [x[User.name] for x in groups] +['collab', 'public', 'rodsadmin', 'empty'] + +Since we can instantiate iRODSUserGroup and iRODSUser objects directly from the rows of +a general query on the corresponding tables, it is also straightforward to trace out +the groups' memberships: + +>>> from irods.user import iRODSUser, iRODSUserGroup +>>> grp_usr_mapping = [ (iRODSUserGroup ( session.user_groups, result), iRODSUser (session.users, result)) \ +... for result in session.query(UserGroup,User) ] +>>> pprint( [ (x,y) for x,y in grp_usr_mapping if x.id != y.id ] ) +[(, ), + (, ), + (, ), + (, ), + (, ), + (, )] + +(Note that in general queries, fields cannot be compared to each other, only to literal constants; thus +the '!=' comparison in the Python list comprehension.) + +From the above, we can see that the group 'collab' (with user ID 10045) contains users 'bobby'(13187) and +'alice'(10048) but not 'rods'(10003), as the tuple (10045,10003) is not listed. Group 'rodsadmin'(10001) +contains user 'rods'(10003) but no other users; and group 'public'(10002) by default contains all canonical +users (those whose User.type is 'rodsadmin' or 'rodsuser'). The empty group ('empty') has no users as +members, so it doesn't show up in our final list. + + +Getting and setting permissions +------------------------------- + +We can find the ID's of all the collections writable (ie having "modify" ACL) by, but not owned by, +alice (or even alice#otherZone): + +>>> from irods.models import Collection,CollectionAccess,CollectionUser,User +>>> from irods.column import Like +>>> q = session.query (Collection,CollectionAccess).filter( +... CollectionUser.name == 'alice', # User.zone == 'otherZone', # zone optional +... Like(CollectionAccess.name, 'modify%') ) #defaults to current zone + +If we then want to downgrade those permissions to read-only, we can do the following: + +>>> from irods.access import iRODSAccess +>>> for c in q: +... session.permissions.set( iRODSAccess('read', c[Collection.name], 'alice', # 'otherZone' # zone optional +... )) + +We can also query on access type using its numeric value, which will seem more natural to some: + +>>> OWN = 1200; MODIFY = 1120 ; READ = 1050 +>>> from irods.models import DataAccess, DataObject, User +>>> data_objects_writable = list(session.query(DataObject,DataAccess,User)).filter(User.name=='alice', DataAccess.type >= MODIFY) + + And more... ----------- From a306ebfff879ca95764e28c22cef039d80f86d49 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 11 Mar 2020 20:19:22 +0000 Subject: [PATCH 38/96] [irods/irods#4796] add data object copy tests --- irods/test/data_obj_test.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 7a211a0..3ae1a04 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -131,6 +131,37 @@ def test_move_obj_to_coll(self): # remove new collection new_coll.remove(recurse=True, force=True) + def test_copy_existing_obj_to_relative_dest_fails_irods4796(self): + if self.sess.server_version <= (4, 2, 7): + self.skipTest('iRODS servers <= 4.2.7 will give nondescriptive error') + obj_name = 'this_object_will_exist_once_made' + exists_path = '{}/{}'.format(self.coll_path, obj_name) + helpers.make_object(self.sess, exists_path) + self.assertTrue(self.sess.data_objects.exists(exists_path)) + non_existing_zone = 'this_zone_absent' + relative_dst_path = '{non_existing_zone}/{obj_name}'.format(**locals()) + options = {} + with self.assertRaises(ex.USER_INPUT_PATH_ERR): + self.sess.data_objects.copy(exists_path, relative_dst_path, **options) + + def test_copy_from_nonexistent_absolute_data_obj_path_fails_irods4796(self): + if self.sess.server_version <= (4, 2, 7): + self.skipTest('iRODS servers <= 4.2.7 will hang the client') + non_existing_zone = 'this_zone_absent' + src_path = '/{non_existing_zone}/non_existing.src'.format(**locals()) + dst_path = '/{non_existing_zone}/non_existing.dst'.format(**locals()) + options = {} + with self.assertRaises(ex.USER_INPUT_PATH_ERR): + self.sess.data_objects.copy(src_path, dst_path, **options) + + def test_copy_from_relative_path_fails_irods4796(self): + if self.sess.server_version <= (4, 2, 7): + self.skipTest('iRODS servers <= 4.2.7 will hang the client') + src_path = 'non_existing.src' + dst_path = 'non_existing.dst' + options = {} + with self.assertRaises(ex.USER_INPUT_PATH_ERR): + self.sess.data_objects.copy(src_path, dst_path, **options) def test_copy_obj_to_obj(self): # test args From 6cd44f5d81179f4d29e414e98f280943022c71e9 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Thu, 2 Apr 2020 14:04:59 -0400 Subject: [PATCH 39/96] [#5] clarify unlink specific replica example --- README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 7fbc0cb..7183b20 100644 --- a/README.rst +++ b/README.rst @@ -578,7 +578,8 @@ purposed that all replicas are "good", ie have a status of "1" :: objects_trimmed += 1 data_object.replicate(dstRescName) for replica_number in [r.number for r in data_object.replicas]: - data_object.unlink( replNum = replica_number ) + options = { kw.DATA_REPL_KW: replica_number } + data_object.unlink( **options ) return objects_trimmed From abbc4a41569ce7bca79aa5c7f8d87528925b762f Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 28 May 2020 16:11:50 +0000 Subject: [PATCH 40/96] [#3] remove order sensitivity in test_user_dn test_user_dn was failing, preventing 'rodsuser' from being removed, causing subsequent tests to fail on creation of the same user --- irods/test/user_group_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/irods/test/user_group_test.py b/irods/test/user_group_test.py index be6b871..90479bf 100644 --- a/irods/test/user_group_test.py +++ b/irods/test/user_group_test.py @@ -107,13 +107,13 @@ def test_user_dn(self): # add other dn user.modify('addAuth', user_DNs[1]) - self.assertEqual(user.dn, user_DNs) + self.assertEqual( sorted(user.dn), sorted(user_DNs) ) # remove first dn user.modify('rmAuth', user_DNs[0]) # confirm removal - self.assertEqual(user.dn, user_DNs[1:]) + self.assertEqual(sorted(user.dn), sorted(user_DNs[1:])) # delete user user.remove() From 82fb71d097b452b143360c90399788d71e49f9ac Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Thu, 4 Jun 2020 22:55:50 -0400 Subject: [PATCH 41/96] [#3] v0.8.3 and update changelog --- CHANGELOG.rst | 13 +++++++++++++ irods/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9231048..b275dca 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,19 @@ Changelog ========= +v0.8.3 (2020-06-05) +------------------- +- [#3] remove order sensitivity in test_user_dn +- [#5] clarify unlink specific replica example +- [irods/irods#4796] add data object copy tests +- [#5] Additional sections and examples in README +- [#187] Allow query on metadata create and modify times +- [#135] fix queries for multiple AVUs of same name +- [#135] Allow multiple criteria based on column name +- [#180] add the "in" genquery operator +- [#183] fix key error when tables from order_by() not in query() +- [#5] fix ssl example in README.rst + v0.8.2 (2019-11-13) ------------------- - [#8] Add PAM Authentication handling (still needs tests) [Mattia D'Antonio] diff --git a/irods/version.py b/irods/version.py index 4ca39e7..b4e3540 100644 --- a/irods/version.py +++ b/irods/version.py @@ -1 +1 @@ -__version__ = '0.8.2' +__version__ = '0.8.3' From 423cef2319bddc9fca019bb91c09e22316e58508 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Thu, 4 Jun 2020 23:24:10 -0400 Subject: [PATCH 42/96] [#3] update changelog --- CHANGELOG.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b275dca..3bd34ab 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,16 +3,16 @@ Changelog v0.8.3 (2020-06-05) ------------------- -- [#3] remove order sensitivity in test_user_dn -- [#5] clarify unlink specific replica example -- [irods/irods#4796] add data object copy tests -- [#5] Additional sections and examples in README -- [#187] Allow query on metadata create and modify times -- [#135] fix queries for multiple AVUs of same name -- [#135] Allow multiple criteria based on column name -- [#180] add the "in" genquery operator -- [#183] fix key error when tables from order_by() not in query() -- [#5] fix ssl example in README.rst +- [#3] remove order sensitivity in test_user_dn [Daniel Moore] +- [#5] clarify unlink specific replica example [Terrell Russell] +- [irods/irods#4796] add data object copy tests [Daniel Moore] +- [#5] Additional sections and examples in README [Daniel Moore] +- [#187] Allow query on metadata create and modify times [Daniel Moore] +- [#135] fix queries for multiple AVUs of same name [Daniel Moore] +- [#135] Allow multiple criteria based on column name [Daniel Moore] +- [#180] add the "in" genquery operator [Daniel Moore] +- [#183] fix key error when tables from order_by() not in query() [Daniel Moore] +- [#5] fix ssl example in README.rst [Terrell Russell] v0.8.2 (2019-11-13) ------------------- From 7f52a8b3c4e3633161c9bede7f1818312ee33da7 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 16 Jul 2020 16:06:14 -0400 Subject: [PATCH 43/96] [#207] add raw-acl permissions getter Disabled by default; to enable, follow the usage session.permissions.get( object, report_raw_acls = True ) --- irods/manager/access_manager.py | 76 +++++++++++++++++++++++++++++++-- irods/test/access_test.py | 46 ++++++++++++++++++++ 2 files changed, 118 insertions(+), 4 deletions(-) diff --git a/irods/manager/access_manager.py b/irods/manager/access_manager.py index 8276e5d..20ec57e 100644 --- a/irods/manager/access_manager.py +++ b/irods/manager/access_manager.py @@ -4,20 +4,38 @@ from irods.manager import Manager from irods.api_number import api_number from irods.message import ModAclRequest, iRODSMessage -from irods.data_object import iRODSDataObject +from irods.data_object import ( iRODSDataObject, irods_dirname, irods_basename ) from irods.collection import iRODSCollection -from irods.models import ( - DataObject, Collection, User, DataAccess, CollectionAccess, CollectionUser) +from irods.models import ( DataObject, Collection, User, CollectionUser, + DataAccess, CollectionAccess ) from irods.access import iRODSAccess +from irods.column import In +from irods.user import iRODSUser +import six import logging logger = logging.getLogger(__name__) +def users_by_ids(session,ids=()): + try: + ids=list(iter(ids)) + except TypeError: + if type(ids) in (str,) + six.integer_types: ids=int(ids) + else: raise + cond = () if not ids \ + else (In(User.id,list(map(int,ids))),) if len(ids)>1 \ + else (User.id == int(ids[0]),) + return [ iRODSUser(session.users,i) + for i in session.query(User.id,User.name,User.type,User.zone).filter(*cond) ] class AccessManager(Manager): - def get(self, target): + def get(self, target, report_raw_acls = False, **kw): + + if report_raw_acls: + return self.__get_raw(target, **kw) # prefer a behavior consistent with 'ils -A` + # different query whether target is an object or a collection if type(target) == iRODSDataObject: access_type = DataAccess @@ -45,6 +63,56 @@ def get(self, target): user_zone=row[user_type.zone] ) for row in results] + def coll_access_query(self,path): + return self.sess.query(Collection, CollectionAccess).filter(Collection.name == path) + + def data_access_query(self,path): + cn = irods_dirname(path) + dn = irods_basename(path) + return self.sess.query(DataObject, DataAccess).filter( Collection.name == cn, DataObject.name == dn ) + + def __get_raw(self, target, **kw): + + ### sample usage: ### + # + # user_id_list = [] # simply to store the user id's from the discovered ACL's + # session.permissions.get( data_or_coll_target, report_raw_acls = True, + # acl_users = user_id_list, + # acl_users_transform = lambda u: u.id) + # + # -> returns list of iRODSAccess objects mapping one-to-one with ACL's stored in the catalog + + users_out = kw.pop( 'acl_users', None ) + T = kw.pop( 'acl_users_transform', lambda value : value ) + + # different choice of query based on whether target is an object or a collection + if isinstance(target, iRODSDataObject): + access_column = DataAccess + query_func = self.data_access_query + + elif isinstance(target, iRODSCollection): + access_column = CollectionAccess + query_func = self.coll_access_query + else: + raise TypeError + + rows = [ r for r in query_func(target.path) ] + userids = set( r[access_column.user_id] for r in rows ) + + user_lookup = { j.id:j for j in users_by_ids(self.sess, userids) } + + if isinstance(users_out, dict): users_out.update (user_lookup) + elif isinstance (users_out, list): users_out += [T(v) for v in user_lookup.values()] + elif isinstance (users_out, set): users_out |= set(T(v) for v in user_lookup.values()) + elif users_out is None: pass + else: raise TypeError + + acls = [ iRODSAccess ( r[access_column.name], + target.path, + user_lookup[r[access_column.user_id]].name, + user_lookup[r[access_column.user_id]].zone ) for r in rows ] + return acls + def set(self, acl, recursive=False, admin=False): prefix = 'admin:' if admin else '' diff --git a/irods/test/access_test.py b/irods/test/access_test.py index 0d1c39f..10f76b4 100644 --- a/irods/test/access_test.py +++ b/irods/test/access_test.py @@ -4,7 +4,10 @@ import sys import unittest from irods.access import iRODSAccess +from irods.user import iRODSUser +from irods.models import User import irods.test.helpers as helpers +from irods.column import In class TestAccess(unittest.TestCase): @@ -22,6 +25,7 @@ def tearDown(self): self.coll.remove(recurse=True, force=True) self.sess.cleanup() + def test_list_acl(self): # test args collection = self.coll_path @@ -114,6 +118,48 @@ def test_set_collection_acl(self): acl1 = iRODSAccess('own', coll.path, user.name, user.zone) self.sess.permissions.set(acl1) + mapping = dict( [ (i,i) for i in ('modify object', 'read object', 'own') ] + + [ ('write','modify object') , ('read', 'read object') ] + ) + + @classmethod + def perms_lists_symm_diff ( cls, a_iter, b_iter ): + fields = lambda perm: (cls.mapping[perm.access_name], perm.user_name, perm.user_zone) + A = set (map(fields,a_iter)) + B = set (map(fields,b_iter)) + return (A-B) | (B-A) + + def test_raw_acls__207(self): + data = helpers.make_object(self.sess,"/".join((self.coll_path,"test_obj"))) + eg = eu = fg = fu = None + try: + eg = self.sess.user_groups.create ('egrp') + eu = self.sess.users.create ('edith','rodsuser') + eg.addmember(eu.name,eu.zone) + fg = self.sess.user_groups.create ('fgrp') + fu = self.sess.users.create ('frank','rodsuser') + fg.addmember(fu.name,fu.zone) + my_ownership = set([('own', self.sess.username, self.sess.zone)]) + #--collection-- + perms1data = [ iRODSAccess ('write',self.coll_path, eg.name, self.sess.zone), + iRODSAccess ('read', self.coll_path, fu.name, self.sess.zone) + ] + for perm in perms1data: self.sess.permissions.set ( perm ) + p1 = self.sess.permissions.get ( self.coll, report_raw_acls = True) + self.assertEqual(self.perms_lists_symm_diff( perms1data, p1 ), my_ownership) + #--data object-- + perms2data = [ iRODSAccess ('write',data.path, fg.name, self.sess.zone), + iRODSAccess ('read', data.path, eu.name, self.sess.zone) + ] + for perm in perms2data: self.sess.permissions.set ( perm ) + p2 = self.sess.permissions.get ( data, report_raw_acls = True) + self.assertEqual(self.perms_lists_symm_diff( perms2data, p2 ), my_ownership) + finally: + ids_for_delete = [ u.id for u in (fu,fg,eu,eg) if u is not None ] + for u in [ iRODSUser(self.sess.users,row) + for row in self.sess.query(User).filter(In(User.id, ids_for_delete)) ]: + u.remove() + if __name__ == '__main__': # let the tests find the parent irods lib From 893770916024a4b54299d521a100d30435807577 Mon Sep 17 00:00:00 2001 From: Patrice Linel Date: Thu, 16 Jan 2020 17:02:13 -0600 Subject: [PATCH 44/96] [#156] fix the PAM authentication with env json file. Signed-off-by: Patrice Linel --- irods/session.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/irods/session.py b/irods/session.py index 1c7514b..5999280 100644 --- a/irods/session.py +++ b/irods/session.py @@ -12,6 +12,7 @@ from irods.manager.resource_manager import ResourceManager from irods.exception import NetworkException from irods.password_obfuscation import decode +from irods import NATIVE_AUTH_SCHEME, PAM_AUTH_SCHEME class iRODSSession(object): @@ -74,7 +75,13 @@ def _configure_account(self, **kwargs): # default auth_scheme = 'native' - if auth_scheme != 'native': + if auth_scheme.lower() == PAM_AUTH_SCHEME: + if 'password' in creds: + return iRODSAccount(**creds) + else: + # password will be from irodsA file therefore use native login + creds['irods_authentication_scheme'] = NATIVE_AUTH_SCHEME + elif auth_scheme != 'native': return iRODSAccount(**creds) # Native auth, try to unscramble password From 904e61a74e22b0e2de6e143822a710eba1c73616 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Tue, 14 Jul 2020 11:35:39 +0000 Subject: [PATCH 45/96] [#205] Disallow PAM plaintext passwords as strong default --- irods/connection.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/irods/connection.py b/irods/connection.py index 7d25eba..0006c83 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -31,9 +31,12 @@ logger = logging.getLogger(__name__) +class PlainTextPAMPasswordError(Exception): pass class Connection(object): + DISALLOWING_PAM_PLAINTEXT = True + def __init__(self, pool, account): self.pool = pool @@ -394,6 +397,10 @@ def _login_pam(self): ctx = ";".join([ctx_user, ctx_pwd, ctx_ttl]) + if type(self.socket) is socket.socket: + if getattr(self,'DISALLOWING_PAM_PLAINTEXT',True): + raise PlainTextPAMPasswordError + message_body = PluginAuthMessage( auth_scheme_=PAM_AUTH_SCHEME, context_=ctx From 55356fa8b71a8c14e5b6323a31e39f45ee7076fd Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Tue, 21 Jul 2020 10:23:57 +0000 Subject: [PATCH 46/96] [#209] store hashed PAM pw --- irods/connection.py | 6 ++++++ irods/session.py | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/irods/connection.py b/irods/connection.py index 0006c83..1d99225 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -421,6 +421,12 @@ def _login_pam(self): self.disconnect() self._connect() + + if hasattr(self.account,'store_pw'): + drop = self.account.store_pw + if type(drop) is list: + drop[:] = [ auth_out.result_ ] + self._login_native(password=auth_out.result_) logger.info("PAM authorization validated") diff --git a/irods/session.py b/irods/session.py index 5999280..217f836 100644 --- a/irods/session.py +++ b/irods/session.py @@ -129,6 +129,15 @@ def server_version(self): conn.release() return version + @property + def pam_pw_negotiated(self): + self.pool.account.store_pw = [] + conn = self.pool.get_connection() + pw = getattr(self.pool.account,'store_pw',[]) + delattr( self.pool.account, 'store_pw') + conn.release() + return pw + @property def default_resource(self): return self.pool.account.default_resource From 12144308fdd76fe562ecf96c3990dd0ec00eb505 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 22 Jul 2020 22:01:29 +0000 Subject: [PATCH 47/96] [#209] pam/ssl/env auth tests imported from test harness --- irods/test/login_auth_test.py | 316 ++++++++++++++++++++++++++++++++++ irods/test/setupssl.py | 50 ++++++ 2 files changed, 366 insertions(+) create mode 100644 irods/test/login_auth_test.py create mode 100755 irods/test/setupssl.py diff --git a/irods/test/login_auth_test.py b/irods/test/login_auth_test.py new file mode 100644 index 0000000..31aefe5 --- /dev/null +++ b/irods/test/login_auth_test.py @@ -0,0 +1,316 @@ +#! /usr/bin/env python +from __future__ import print_function +from __future__ import absolute_import +import os +import sys +import unittest +import textwrap +import json +import shutil +import ssl +import irods.test.helpers as helpers +from irods.connection import Connection +from irods.session import iRODSSession +from irods.rule import Rule +from socket import gethostname +from irods.password_obfuscation import (encode as pw_encode) +from irods.connection import PlainTextPAMPasswordError +import contextlib +from re import (compile as regex,_pattern_type as regex_type) + +def json_file_update(fname,keys_to_delete=(),**kw): + j = json.load(open(fname,'r')) + j.update(**kw) + for k in keys_to_delete: + if k in j: del j [k] + elif isinstance(k,regex_type): + jk = [i for i in j.keys() if k.search(i)] + for ky in jk: del j[ky] + with open(fname,'w') as out: + json.dump(j, out, indent=4) + +def env_dir_fullpath(authtype): return os.path.join( os.environ['HOME'] , '.irods.' + authtype) +def json_env_fullpath(authtype): return os.path.join( env_dir_fullpath(authtype), 'irods_environment.json') +def secrets_fullpath(authtype): return os.path.join( env_dir_fullpath(authtype), '.irodsA') + +SERVER_ENV_PATH = os.path.expanduser('~irods/.irods/irods_environment.json') + +SERVER_ENV_SSL_SETTINGS = { + "irods_ssl_certificate_chain_file": "/etc/irods/ssl/irods.crt", + "irods_ssl_certificate_key_file": "/etc/irods/ssl/irods.key", + "irods_ssl_dh_params_file": "/etc/irods/ssl/dhparams.pem", + "irods_ssl_ca_certificate_file": "/etc/irods/ssl/irods.crt", + "irods_ssl_verify_server": "cert" +} + +def update_service_account_for_SSL(): + json_file_update( SERVER_ENV_PATH, **SERVER_ENV_SSL_SETTINGS ) + +CLIENT_OPTIONS_FOR_SSL = { + "irods_client_server_policy": "CS_NEG_REQUIRE", + "irods_client_server_negotiation": "request_server_negotiation", + "irods_ssl_ca_certificate_file": "/etc/irods/ssl/irods.crt", + "irods_ssl_verify_server": "cert", + "irods_encryption_key_size": 16, + "irods_encryption_salt_size": 8, + "irods_encryption_num_hash_rounds": 16, + "irods_encryption_algorithm": "AES-256-CBC" +} + + +def client_env_from_server_env(user_name, auth_scheme=""): + cli_env = {} + with open(SERVER_ENV_PATH) as f: + srv_env = json.load(f) + for k in [ "irods_host", "irods_zone_name", "irods_port" ]: + cli_env [k] = srv_env[k] + cli_env["irods_user_name"] = user_name + if auth_scheme: + cli_env["irods_authentication_scheme"] = auth_scheme + return cli_env + +@contextlib.contextmanager +def pam_password_in_plaintext(allow=True): + saved = bool(Connection.DISALLOWING_PAM_PLAINTEXT) + try: + Connection.DISALLOWING_PAM_PLAINTEXT = not(allow) + yield + finally: + Connection.DISALLOWING_PAM_PLAINTEXT = saved + +class TestLogins(unittest.TestCase): + + UserName = 'alissa' + + user_auth_envs = { + '.irods.pam': { + 'USER': UserName, + 'PASSWORD': 'test123', + 'AUTH': 'pam' + }, + '.irods.native': { + 'USER': UserName, + 'PASSWORD': 'apass', + 'AUTH': 'native' + } + } + + env_save = {} + + @contextlib.contextmanager + def setenv(self,var,newvalue): + try: + self.env_save[var] = os.environ.get(var,None) + os.environ[var] = newvalue + yield newvalue + finally: + oldvalue = self.env_save[var] + if oldvalue is None: + del os.environ[var] + else: + os.environ[var]=oldvalue + + @classmethod + def create_env_dirs(cls): + dirs = {} + retval = [] + # -- create environment configurations and secrets + with pam_password_in_plaintext(): + for dirname,lookup in cls.user_auth_envs.items(): + if lookup['AUTH'] == 'pam': + ses = iRODSSession( host=gethostname(), + user=lookup['USER'], + zone='tempZone', + authentication_scheme=lookup['AUTH'], + password=lookup['PASSWORD'], + port= 1247 ) + try: + pam_hashes = ses.pam_pw_negotiated + except AttributeError: + pam_hashes = [] + if not pam_hashes: print('Warning ** PAM pw couldnt be generated' ); break + scrambled_pw = pw_encode( pam_hashes[0] ) + #elif lookup['AUTH'] == 'XXXXXX': # TODO: insert other authentication schemes here + elif lookup['AUTH'] in ('native', '',None): + scrambled_pw = pw_encode( lookup['PASSWORD'] ) + cl_env = client_env_from_server_env(cls.UserName) + if lookup.get('AUTH',None) is not None: # - specify auth scheme only if given + cl_env['irods_authentication_scheme'] = lookup['AUTH'] + dirbase = os.path.join(os.environ['HOME'],dirname) + dirs[dirbase] = { 'secrets':scrambled_pw , 'client_environment':cl_env } + + # -- create the environment directories and write into them the configurations just created + for absdir in dirs.keys(): + shutil.rmtree(absdir,ignore_errors=True) + os.mkdir(absdir) + with open(os.path.join(absdir,'irods_environment.json'),'w') as envfile: + envfile.write('{}') + json_file_update(envfile.name, **dirs[absdir]['client_environment']) + with open(os.path.join(absdir,'.irodsA'),'wb') as secrets_file: + secrets_file.write(dirs[absdir]['secrets']) + os.chmod(secrets_file.name,0o600) + + retval = dirs.keys() + return retval + + + @staticmethod + def get_server_ssl_negotiation( session ): + + rule_body = textwrap.dedent(''' + test { *out=""; acPreConnect(*out); + writeLine("stdout", "*out"); + } + ''') + myrule = Rule(session, body=rule_body, params={}, output='ruleExecOut') + out_array = myrule.execute() + buf = out_array.MsParam_PI[0].inOutStruct.stdoutBuf.buf.decode('utf-8') + eol_offset = buf.find('\n') + return buf[:eol_offset] if eol_offset >= 0 else None + + @classmethod + def setUpClass(cls): + cls.admin = helpers.make_session() + cls.server_ssl_setting = cls.get_server_ssl_negotiation( cls.admin ) + cls.envdirs = cls.create_env_dirs() + if not cls.envdirs: + raise RuntimeError('Could not create one or more client environments') + + + @classmethod + def tearDownClass(cls): + for envdir in cls.envdirs: + shutil.rmtree(envdir, ignore_errors=True) + cls.admin.cleanup() + +# def setUp(self): +# # - placeholder for per-test setup +# super(TestLogins,self).setUp() + +# def tearDown(self): +# # - placeholder for per-test teardown +# super(TestLogins,self).tearDown() + + def validate_session(self, session, verbose=False, **options): + + # - try to get the home collection + home_coll = '/{0.zone}/home/{0.username}'.format(session) + self.assertTrue(session.collections.get(home_coll).path == home_coll) + if verbose: print(home_coll) + # - check user is as expected + self.assertEqual( session.username, self.UserName ) + # - check socket type (normal vs SSL) against whether ssl requested + use_ssl = options.pop('ssl',None) + if use_ssl is not None: + my_connect = [s for s in (session.pool.active|session.pool.idle)] [0] + self.assertEqual( bool( use_ssl ), my_connect.socket.__class__ is ssl.SSLSocket ) + + +# def test_demo(self): self.demo() + +# def demo(self): # for future reference - skipping based on CS_NEG_DONT_CARE setting +# if self.server_ssl_setting == 'CS_NEG_DONT_CARE': +# self.skipTest('skipping b/c setting is DONT_CARE') +# self.assertTrue (False) + + + def tst0(self, ssl_opt, auth_opt, env_opt ): + auth_opt_explicit = 'native' if auth_opt=='' else auth_opt + verbosity=False + #verbosity='' # -- debug - sanity check by printing out options applied + out = {'':''} + if env_opt: + with self.setenv('IRODS_ENVIRONMENT_FILE', json_env_fullpath(auth_opt_explicit)) as env_file,\ + self.setenv('IRODS_AUTHENTICATION_FILE', secrets_fullpath(auth_opt_explicit)): + cli_env_extras = {} if not(ssl_opt) else dict( CLIENT_OPTIONS_FOR_SSL ) + if auth_opt: + cli_env_extras.update( irods_authentication_scheme = auth_opt ) + remove=[] + else: + remove=[regex('authentication_')] + with helpers.file_backed_up(env_file): + json_file_update( env_file, keys_to_delete=remove, **cli_env_extras ) + session = iRODSSession(irods_env_file=env_file) + out = json.load(open(env_file)) + self.validate_session( session, verbose = verbosity, ssl = ssl_opt ) + session.cleanup() + out['ARGS']='no' + else: + session_options = {} + if auth_opt: + session_options.update (authentication_scheme = auth_opt) + if ssl_opt: + SSL_cert = CLIENT_OPTIONS_FOR_SSL["irods_ssl_ca_certificate_file"] + session_options.update( + ssl_context = ssl.create_default_context ( purpose = ssl.Purpose.SERVER_AUTH, + capath = None, + cadata = None, + cafile = SSL_cert), + **CLIENT_OPTIONS_FOR_SSL ) + lookup = self.user_auth_envs ['.irods.'+('native' if not(auth_opt) else auth_opt)] + session = iRODSSession ( host=gethostname(), + user=lookup['USER'], + zone='tempZone', + password=lookup['PASSWORD'], + port= 1247, + **session_options ) + out = session_options + self.validate_session( session, verbose = verbosity, ssl = ssl_opt ) + session.cleanup() + out['ARGS']='yes' + + if verbosity == '': + print ('--- ssl:',ssl_opt,'/ auth:',repr(auth_opt),'/ env:',env_opt) + print ('--- > ',json.dumps({k:v for k,v in out.items() if k != 'ssl_context'},indent=4)) + print ('---') + + # == test defaulting to 'native' + + def test_01(self): + self.tst0 ( ssl_opt = True , auth_opt = '' , env_opt = False ) + def test_02(self): + self.tst0 ( ssl_opt = False, auth_opt = '' , env_opt = False ) + def test_03(self): + self.tst0 ( ssl_opt = True , auth_opt = '' , env_opt = True ) + def test_04(self): + self.tst0 ( ssl_opt = False, auth_opt = '' , env_opt = True ) + + # == test explicit scheme 'native' + + def test_1(self): + self.tst0 ( ssl_opt = True , auth_opt = 'native' , env_opt = False ) + + def test_2(self): + self.tst0 ( ssl_opt = False, auth_opt = 'native' , env_opt = False ) + + def test_3(self): + self.tst0 ( ssl_opt = True , auth_opt = 'native' , env_opt = True ) + + def test_4(self): + self.tst0 ( ssl_opt = False, auth_opt = 'native' , env_opt = True ) + + # == test explicit scheme 'pam' + + def test_5(self): + self.tst0 ( ssl_opt = True, auth_opt = 'pam' , env_opt = False ) + + def test_6(self): + try: + self.tst0 ( ssl_opt = False, auth_opt = 'pam' , env_opt = False ) + except PlainTextPAMPasswordError: + pass + else: + # -- no exception raised + self.fail("PlainTextPAMPasswordError should have been raised") + + def test_7(self): + self.tst0 ( ssl_opt = True , auth_opt = 'pam' , env_opt = True ) + + def test_8(self): + self.tst0 ( ssl_opt = False, auth_opt = 'pam' , env_opt = True ) + + +if __name__ == '__main__': + # let the tests find the parent irods lib + sys.path.insert(0, os.path.abspath('../..')) + unittest.main() diff --git a/irods/test/setupssl.py b/irods/test/setupssl.py new file mode 100755 index 0000000..aab6bd1 --- /dev/null +++ b/irods/test/setupssl.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +from __future__ import print_function +import os +import sys +import socket +import posix +import shutil +from subprocess import (Popen, PIPE) + +IRODS_SSL_DIR = '/etc/irods/ssl' + +def create_ssl_dir(): + save_cwd = os.getcwd() + silent_run = { 'shell': True, 'stderr' : PIPE, 'stdout' : PIPE } + try: + if not (os.path.exists(IRODS_SSL_DIR)): + os.mkdir(IRODS_SSL_DIR) + os.chdir(IRODS_SSL_DIR) + Popen("openssl genrsa -out irods.key 2048",**silent_run).communicate() + with open("/dev/null","wb") as dev_null: + p = Popen("openssl req -new -x509 -key irods.key -out irods.crt -days 365 < Date: Fri, 28 Aug 2020 13:56:26 +0000 Subject: [PATCH 48/96] [#156] skip ssh/pam login tests if user doesn't exist --- irods/test/login_auth_test.py | 52 +++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/irods/test/login_auth_test.py b/irods/test/login_auth_test.py index 31aefe5..e8525ae 100644 --- a/irods/test/login_auth_test.py +++ b/irods/test/login_auth_test.py @@ -12,12 +12,14 @@ from irods.connection import Connection from irods.session import iRODSSession from irods.rule import Rule +from irods.models import User from socket import gethostname from irods.password_obfuscation import (encode as pw_encode) from irods.connection import PlainTextPAMPasswordError import contextlib from re import (compile as regex,_pattern_type as regex_type) + def json_file_update(fname,keys_to_delete=(),**kw): j = json.load(open(fname,'r')) j.update(**kw) @@ -78,19 +80,29 @@ def pam_password_in_plaintext(allow=True): finally: Connection.DISALLOWING_PAM_PLAINTEXT = saved -class TestLogins(unittest.TestCase): - UserName = 'alissa' +class TestLogins(unittest.TestCase): + ''' + This is due to be moved into Jenkins CI along core and other iRODS tests. + Until then, for these tests to run successfully, we require: + 1. First run ./setupssl.py (sets up SSL keys etc. in /etc/irods/ssl) + 2. Add & override configuration entries in /var/lib/irods/irods_environment + Per https://slides.com/irods/ugm2018-ssl-and-pam-configuration#/3/7 + 3. Create rodsuser alissa and corresponding unix user with the appropriate + passwords as below. + ''' + + test_rods_user = 'alissa' user_auth_envs = { '.irods.pam': { - 'USER': UserName, - 'PASSWORD': 'test123', + 'USER': test_rods_user, + 'PASSWORD': 'test123', # UNIX pw 'AUTH': 'pam' }, '.irods.native': { - 'USER': UserName, - 'PASSWORD': 'apass', + 'USER': test_rods_user, + 'PASSWORD': 'apass', # iRODS pw 'AUTH': 'native' } } @@ -133,7 +145,7 @@ def create_env_dirs(cls): #elif lookup['AUTH'] == 'XXXXXX': # TODO: insert other authentication schemes here elif lookup['AUTH'] in ('native', '',None): scrambled_pw = pw_encode( lookup['PASSWORD'] ) - cl_env = client_env_from_server_env(cls.UserName) + cl_env = client_env_from_server_env(cls.test_rods_user) if lookup.get('AUTH',None) is not None: # - specify auth scheme only if given cl_env['irods_authentication_scheme'] = lookup['AUTH'] dirbase = os.path.join(os.environ['HOME'],dirname) @@ -171,25 +183,25 @@ def get_server_ssl_negotiation( session ): @classmethod def setUpClass(cls): cls.admin = helpers.make_session() - cls.server_ssl_setting = cls.get_server_ssl_negotiation( cls.admin ) - cls.envdirs = cls.create_env_dirs() - if not cls.envdirs: - raise RuntimeError('Could not create one or more client environments') - + if cls.test_rods_user in (row[User.name] for row in cls.admin.query(User.name)): + cls.server_ssl_setting = cls.get_server_ssl_negotiation( cls.admin ) + cls.envdirs = cls.create_env_dirs() + if not cls.envdirs: + raise RuntimeError('Could not create one or more client environments') @classmethod def tearDownClass(cls): - for envdir in cls.envdirs: + for envdir in getattr(cls, 'envdirs', []): shutil.rmtree(envdir, ignore_errors=True) cls.admin.cleanup() -# def setUp(self): -# # - placeholder for per-test setup -# super(TestLogins,self).setUp() + def setUp(self): + if not getattr(self, 'envdirs', []): + self.skipTest('The test_rods_user "{}" does not exist'.format(self.test_rods_user)) + super(TestLogins,self).setUp() -# def tearDown(self): -# # - placeholder for per-test teardown -# super(TestLogins,self).tearDown() + def tearDown(self): + super(TestLogins,self).tearDown() def validate_session(self, session, verbose=False, **options): @@ -198,7 +210,7 @@ def validate_session(self, session, verbose=False, **options): self.assertTrue(session.collections.get(home_coll).path == home_coll) if verbose: print(home_coll) # - check user is as expected - self.assertEqual( session.username, self.UserName ) + self.assertEqual( session.username, self.test_rods_user ) # - check socket type (normal vs SSL) against whether ssl requested use_ssl = options.pop('ssl',None) if use_ssl is not None: From 6f7be52795b79700df1398e467549f4bf9d9f121 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Fri, 28 Aug 2020 06:18:21 +0000 Subject: [PATCH 49/96] [#211] set and report application name to server --- irods/connection.py | 4 +++- irods/message/__init__.py | 4 ++-- irods/pool.py | 14 +++++++++++++- irods/session.py | 2 +- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/irods/connection.py b/irods/connection.py index 1d99225..30cc55a 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -196,9 +196,11 @@ def _connect(self): "{}:{}".format(*address)) self.socket = s + main_message = StartupPack( (self.account.proxy_user, self.account.proxy_zone), - (self.account.client_user, self.account.client_zone) + (self.account.client_user, self.account.client_zone), + self.pool.application_name ) # No client-server negotiation diff --git a/irods/message/__init__.py b/irods/message/__init__.py index 362052c..72a5ef8 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -188,7 +188,7 @@ class ClientServerNegotiation(Message): class StartupPack(Message): _name = 'StartupPack_PI' - def __init__(self, proxy_user, client_user): + def __init__(self, proxy_user, client_user, application_name = ''): super(StartupPack, self).__init__() if proxy_user and client_user: self.irodsProt = 1 @@ -197,7 +197,7 @@ def __init__(self, proxy_user, client_user): self.clientUser, self.clientRcatZone = client_user self.relVersion = "rods{}.{}.{}".format(*IRODS_VERSION) self.apiVersion = "{3}".format(*IRODS_VERSION) - self.option = "" + self.option = application_name irodsProt = IntegerProperty() reconnFlag = IntegerProperty() diff --git a/irods/pool.py b/irods/pool.py index 4488ba4..f611e7d 100644 --- a/irods/pool.py +++ b/irods/pool.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import logging import threading +import os from irods import DEFAULT_CONNECTION_TIMEOUT from irods.connection import Connection @@ -8,14 +9,25 @@ logger = logging.getLogger(__name__) +DEFAULT_APPLICATION_NAME='python-irodsclient' + + class Pool(object): - def __init__(self, account): + def __init__(self, account, application_name = ''): + ''' + Pool( account , application_name='' ) + Create an iRODS connection pool; 'account' is an irods.account.iRODSAccount instance and + 'application_name' specifies the application name as it should appear in an 'ips' listing. + ''' self.account = account self._lock = threading.RLock() self.active = set() self.idle = set() self.connection_timeout = DEFAULT_CONNECTION_TIMEOUT + self.application_name = ( os.environ.get('spOption','') or + application_name or + DEFAULT_APPLICATION_NAME ) def get_connection(self): with self._lock: diff --git a/irods/session.py b/irods/session.py index 217f836..3071f77 100644 --- a/irods/session.py +++ b/irods/session.py @@ -97,7 +97,7 @@ def _configure_account(self, **kwargs): def configure(self, **kwargs): account = self._configure_account(**kwargs) - self.pool = Pool(account) + self.pool = Pool(account, application_name = kwargs.pop('application_name','')) def query(self, *args): return Query(self, *args) From 8ea65cbaadf87056314e1ac192a4c24a5e989180 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Fri, 4 Sep 2020 14:10:54 +0000 Subject: [PATCH 50/96] [#214] store/load rules as utf-8 in files --- irods/rule.py | 5 +++-- irods/test/rule_test.py | 8 +++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/irods/rule.py b/irods/rule.py index 0471cda..6d7e07a 100644 --- a/irods/rule.py +++ b/irods/rule.py @@ -1,6 +1,7 @@ from __future__ import absolute_import from irods.message import iRODSMessage, StringStringMap, RodsHostAddress, STR_PI, MsParam, MsParamArray, RuleExecutionRequest from irods.api_number import api_number +from io import open as io_open class Rule(object): def __init__(self, session, rule_file=None, body='', params=None, output=''): @@ -20,11 +21,11 @@ def __init__(self, session, rule_file=None, body='', params=None, output=''): if output != '': self.output = output - def load(self, rule_file): + def load(self, rule_file, encoding = 'utf-8'): self.body = '@external\n' # parse rule file - with open(rule_file) as f: + with io_open(rule_file, encoding = encoding) as f: for line in f: # parse input line if line.strip().lower().startswith('input'): diff --git a/irods/test/rule_test.py b/irods/test/rule_test.py index dfcc624..233cdd4 100644 --- a/irods/test/rule_test.py +++ b/irods/test/rule_test.py @@ -10,6 +10,7 @@ import irods.test.helpers as helpers from irods.rule import Rule import six +from io import open as io_open class TestRule(unittest.TestCase): @@ -157,11 +158,8 @@ def test_retrieve_std_streams_from_rule(self): INPUT *some_string="{some_string}",*some_other_string="{some_other_string}",*err_string="{err_string}" OUTPUT ruleExecOut'''.format(**locals())) - with open(rule_file_path, "w") as rule_file: - if six.PY2: - rule_file.write(rule.encode('utf-8')) - else: - rule_file.write(rule) + with io_open(rule_file_path, "w", encoding='utf-8') as rule_file: + rule_file.write(rule) # run test rule myrule = Rule(session, rule_file_path) From ce04d53e95df26a7c2446e7d58a320d872fe6644 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Fri, 4 Sep 2020 15:00:30 +0000 Subject: [PATCH 51/96] [#3] tests failing on Python3 unicode defaults --- irods/test/data_obj_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 3ae1a04..d7ac1af 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -323,7 +323,7 @@ def test_create_with_checksum(self): obj_path = "{collection}/{filename}".format(**locals()) contents = 'blah' * 100 checksum = base64.b64encode( - hashlib.sha256(contents).digest()).decode() + hashlib.sha256(contents.encode()).digest()).decode() # make object in test collection options = {kw.OPR_TYPE_KW: 1} # PUT_OPR @@ -384,7 +384,8 @@ def test_put_file_trigger_pep(self): # make pseudo-random test file filename = 'test_put_file_trigger_pep.txt' test_file = os.path.join('/tmp', filename) - contents = ''.join(random.choice(string.printable) for _ in range(1024)) + contents = ''.join(random.choice(string.printable) for _ in range(1024)).encode() + contents = contents[:1024] with open(test_file, 'wb') as f: f.write(contents) From bfd10d6a813a0a3c00f6da5c21bcb177ad7e242a Mon Sep 17 00:00:00 2001 From: Kaivan Kamali Date: Tue, 13 Oct 2020 13:43:55 -0400 Subject: [PATCH 52/96] [#220] Replace stale connections pulled from idle pools When getting a connection, check when the connection was last used (This is done via connection's last_used_time field). If it was last used more than a specific amount (configurable), drop the connection and create a one. This is to address the issue with connections that are dropped after being not used for a long time. --- irods/connection.py | 2 + irods/pool.py | 25 ++- irods/session.py | 31 +++- irods/test/pool_test.py | 153 +++++++++++++++++- irods/test/test-data/irods_environment.json | 7 + ...ds_environment_negative_refresh_field.json | 7 + .../irods_environment_no_refresh_field.json | 6 + 7 files changed, 222 insertions(+), 9 deletions(-) create mode 100644 irods/test/test-data/irods_environment.json create mode 100644 irods/test/test-data/irods_environment_negative_refresh_field.json create mode 100644 irods/test/test-data/irods_environment_no_refresh_field.json diff --git a/irods/connection.py b/irods/connection.py index 30cc55a..b3ce440 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -6,6 +6,7 @@ import six import os import ssl +import datetime from irods.message import ( @@ -56,6 +57,7 @@ def __init__(self, pool, account): self._login_pam() else: raise ValueError("Unknown authentication scheme %s" % scheme) + self.last_used_time = datetime.datetime.now() @property def server_version(self): diff --git a/irods/pool.py b/irods/pool.py index f611e7d..c4513e7 100644 --- a/irods/pool.py +++ b/irods/pool.py @@ -1,4 +1,5 @@ from __future__ import absolute_import +import datetime import logging import threading import os @@ -9,12 +10,12 @@ logger = logging.getLogger(__name__) -DEFAULT_APPLICATION_NAME='python-irodsclient' +DEFAULT_APPLICATION_NAME = 'python-irodsclient' class Pool(object): - def __init__(self, account, application_name = ''): + def __init__(self, account, application_name='', connection_refresh_time=-1): ''' Pool( account , application_name='' ) Create an iRODS connection pool; 'account' is an irods.account.iRODSAccount instance and @@ -29,12 +30,29 @@ def __init__(self, account, application_name = ''): application_name or DEFAULT_APPLICATION_NAME ) + if connection_refresh_time > 0: + self.refresh_connection = True + self.connection_refresh_time = connection_refresh_time + else: + self.refresh_connection = False + self.connection_refresh_time = None + def get_connection(self): with self._lock: try: conn = self.idle.pop() + + curr_time = datetime.datetime.now() + # If 'refresh_connection' flag is True and the connection was + # last used more than 'connection_refresh_time' seconds ago, + # release the connection (as its stale) and create a new one + if self.refresh_connection and (curr_time - conn.last_used_time).total_seconds() > self.connection_refresh_time: + logger.debug('Connection has been idle more than {} seconds. Releasing the connection and creating a new one.'.format(self.connection_refresh_time)) + self.release_connection(conn, True) + conn = Connection(self, self.account) except KeyError: conn = Connection(self, self.account) + self.active.add(conn) logger.debug('num active: {}'.format(len(self.active))) return conn @@ -44,6 +62,9 @@ def release_connection(self, conn, destroy=False): if conn in self.active: self.active.remove(conn) if not destroy: + # If 'refresh_connection' flag is True, update connection's 'last_used_time' + if self.refresh_connection: + conn.last_used_time = datetime.datetime.now() self.idle.add(conn) elif conn in self.idle and destroy: self.idle.remove(conn) diff --git a/irods/session.py b/irods/session.py index 3071f77..bc18cb0 100644 --- a/irods/session.py +++ b/irods/session.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import os import json +import logging from irods.query import Query from irods.pool import Pool from irods.account import iRODSAccount @@ -14,6 +15,7 @@ from irods.password_obfuscation import decode from irods import NATIVE_AUTH_SCHEME, PAM_AUTH_SCHEME +logger = logging.getLogger(__name__) class iRODSSession(object): @@ -94,10 +96,10 @@ def _configure_account(self, **kwargs): return iRODSAccount(**creds) - def configure(self, **kwargs): account = self._configure_account(**kwargs) - self.pool = Pool(account, application_name = kwargs.pop('application_name','')) + connection_refresh_time = self.get_connection_refresh_time(**kwargs) + self.pool = Pool(account, application_name=kwargs.pop('application_name',''), connection_refresh_time=connection_refresh_time) def query(self, *args): return Query(self, *args) @@ -163,8 +165,12 @@ def get_irods_password_file(): @staticmethod def get_irods_env(env_file): - with open(env_file, 'rt') as f: - return json.load(f) + try: + with open(env_file, 'rt') as f: + return json.load(f) + except IOError: + logger.debug("Could not open file {}".format(env_file)) + return {} @staticmethod def get_irods_password(**kwargs): @@ -180,3 +186,20 @@ def get_irods_password(**kwargs): with open(irods_auth_file, 'r') as f: return decode(f.read().rstrip('\n'), uid) + + def get_connection_refresh_time(self, **kwargs): + connection_refresh_time = -1 + try: + env_file = kwargs['irods_env_file'] + except KeyError: + return connection_refresh_time + + if env_file is not None: + env_file_map = self.get_irods_env(env_file) + connection_refresh_time = int(env_file_map.get('irods_connection_refresh_time', -1)) + if connection_refresh_time < 1: + # Negative values are not allowed. + logger.debug('connection_refresh_time in {} file has value of {}. Only values greater than 1 are allowed.'.format(env_file, connection_refresh_time)) + connection_refresh_time = -1 + + return connection_refresh_time diff --git a/irods/test/pool_test.py b/irods/test/pool_test.py index 0f38ff8..a6a9c21 100644 --- a/irods/test/pool_test.py +++ b/irods/test/pool_test.py @@ -1,15 +1,18 @@ #! /usr/bin/env python from __future__ import absolute_import +import datetime import os import sys +import time import unittest import irods.test.helpers as helpers + class TestPool(unittest.TestCase): def setUp(self): - self.sess = helpers.make_session() + self.sess = helpers.make_session(irods_env_file="./test-data/irods_environment.json") def tearDown(self): '''Close connections @@ -17,7 +20,7 @@ def tearDown(self): self.sess.cleanup() def test_release_connection(self): - with self.sess.pool.get_connection() as conn: + with self.sess.pool.get_connection(): self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) @@ -34,7 +37,7 @@ def test_destroy_active(self): self.assertEqual(0, len(self.sess.pool.idle)) def test_destroy_idle(self): - with self.sess.pool.get_connection() as conn: + with self.sess.pool.get_connection(): self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) @@ -58,6 +61,150 @@ def test_release_disconnected(self): self.assertEqual(0, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) + def test_connection_last_used_time(self): + # Get a connection and record its object ID and last_used_time + # Release the connection (goes from active to idle queue) + # Again, get a connection. Should get the same connection back. + # I.e., the object IDs should match. However, the new connection + # should have a more recent 'last_used_time' + conn_obj_id_1 = None + conn_obj_id_2 = None + last_used_time_1 = None + last_used_time_2 = None + + with self.sess.pool.get_connection() as conn: + conn_obj_id_1 = id(conn) + curr_time = datetime.datetime.now() + last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + with self.sess.pool.get_connection() as conn: + conn_obj_id_2 = id(conn) + curr_time = datetime.datetime.now() + last_used_time_2 = conn.last_used_time + self.assertEqual(conn_obj_id_1, conn_obj_id_2) + self.assertTrue(curr_time >= last_used_time_2) + self.assertTrue(last_used_time_2 >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn, True) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + def test_refresh_connection(self): + # Set 'irods_connection_refresh_time' to '3' (in seconds) in + # ~/.irods/irods_environment.json file. This means any connection + # that is not used more than 3 seconds will be dropped and + # a new connection is created/returned. This is to avoid + # issue with idle connections that are dropped. + conn_obj_id_1 = None + conn_obj_id_2 = None + last_used_time_1 = None + last_used_time_2 = None + + with self.sess.pool.get_connection() as conn: + conn_obj_id_1 = id(conn) + curr_time = datetime.datetime.now() + last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + # Wait more than 'irods_connection_refresh_time' seconds, + # which is set to 3. Connection object should have a different + # object ID (as a new connection is created) + time.sleep(5) + + with self.sess.pool.get_connection() as conn: + conn_obj_id_2 = id(conn) + curr_time = datetime.datetime.now() + last_used_time_2 = conn.last_used_time + self.assertTrue(curr_time >= last_used_time_2) + self.assertNotEqual(conn_obj_id_1, conn_obj_id_2) + self.assertTrue(last_used_time_2 > last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn, True) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + def test_no_refresh_connection(self): + # Set 'irods_connection_refresh_time' to '3' (in seconds) in + # ~/.irods/irods_environment.json file. This means any connection + # that is not used more than 3 seconds will be dropped and + # a new connection is created/returned. This is to avoid + # issue with idle connections that are dropped. + conn_obj_id_1 = None + conn_obj_id_2 = None + last_used_time_1 = None + last_used_time_2 = None + + with self.sess.pool.get_connection() as conn: + conn_obj_id_1 = id(conn) + curr_time = datetime.datetime.now() + last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + # Wait less than 'irods_connection_refresh_time' seconds, + # which is set to 3. Connection object should have the same + # object ID (as idle time is less than 'irods_connection_refresh_time') + time.sleep(1) + + with self.sess.pool.get_connection() as conn: + conn_obj_id_2 = id(conn) + curr_time = datetime.datetime.now() + last_used_time_2 = conn.last_used_time + self.assertTrue(curr_time >= last_used_time_2) + self.assertEqual(conn_obj_id_1, conn_obj_id_2) + self.assertTrue(last_used_time_2 >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn, True) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + def test_get_connection_refresh_time_no_env_file_input_param(self): + connection_refresh_time = self.sess.get_connection_refresh_time(first_name="Magic", last_name="Johnson") + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time_none_existant_env_file(self): + connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment_non_existant.json") + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time_no_connection_refresh_field(self): + connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment_no_refresh_field.json") + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time_negative_connection_refresh_field(self): + connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment_negative_refresh_field.json") + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time(self): + connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment.json") + self.assertEqual(connection_refresh_time, 3) if __name__ == '__main__': # let the tests find the parent irods lib diff --git a/irods/test/test-data/irods_environment.json b/irods/test/test-data/irods_environment.json new file mode 100644 index 0000000..2bf0fe8 --- /dev/null +++ b/irods/test/test-data/irods_environment.json @@ -0,0 +1,7 @@ +{ + "irods_host": "127.0.0.1", + "irods_port": "1247", + "irods_user_name": "rods", + "irods_zone_name": "tempZone", + "irods_connection_refresh_time": "3" +} diff --git a/irods/test/test-data/irods_environment_negative_refresh_field.json b/irods/test/test-data/irods_environment_negative_refresh_field.json new file mode 100644 index 0000000..29803f3 --- /dev/null +++ b/irods/test/test-data/irods_environment_negative_refresh_field.json @@ -0,0 +1,7 @@ +{ + "irods_host": "127.0.0.1", + "irods_port": "1247", + "irods_user_name": "rods", + "irods_zone_name": "tempZone", + "irods_connection_refresh_time": "-3" +} diff --git a/irods/test/test-data/irods_environment_no_refresh_field.json b/irods/test/test-data/irods_environment_no_refresh_field.json new file mode 100644 index 0000000..9856098 --- /dev/null +++ b/irods/test/test-data/irods_environment_no_refresh_field.json @@ -0,0 +1,6 @@ +{ + "irods_host": "127.0.0.1", + "irods_port": "1247", + "irods_user_name": "rods", + "irods_zone_name": "tempZone" +} From 14ef6651ca1a62b45ea19a309c22aa7e0a2c8ea8 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Sat, 17 Oct 2020 02:34:47 +0000 Subject: [PATCH 53/96] [#221] fix tests which were failing in Py3.4 and 3.7 --- irods/test/login_auth_test.py | 6 +++++- irods/test/query_test.py | 23 ++++++++++++++++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/irods/test/login_auth_test.py b/irods/test/login_auth_test.py index e8525ae..b92e8d4 100644 --- a/irods/test/login_auth_test.py +++ b/irods/test/login_auth_test.py @@ -17,7 +17,11 @@ from irods.password_obfuscation import (encode as pw_encode) from irods.connection import PlainTextPAMPasswordError import contextlib -from re import (compile as regex,_pattern_type as regex_type) +from re import compile as regex +try: + from re import _pattern_type as regex_type +except ImportError: + from re import Pattern as regex_type # Python 3.7+ def json_file_update(fname,keys_to_delete=(),**kw): diff --git a/irods/test/query_test.py b/irods/test/query_test.py index c928e5b..d2b5a8d 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -7,6 +7,7 @@ import tempfile import unittest import time +import uuid from datetime import datetime from irods.models import (User, UserMeta, Resource, ResourceMeta, @@ -313,6 +314,7 @@ def test_multiple_criteria_on_one_column_name(self): @unittest.skipIf(six.PY3, 'Test is for python2 only') def test_query_for_data_object_with_utf8_name_python2(self): filename_prefix = '_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' + self.assertEqual(self.FILENAME_PREFIX.encode('utf-8'), filename_prefix) _,test_file = tempfile.mkstemp(prefix=filename_prefix) obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) try: @@ -326,18 +328,28 @@ def test_query_for_data_object_with_utf8_name_python2(self): self.sess.data_objects.unregister(obj_path) os.remove(test_file) + # view/change this line in text editors under own risk: + FILENAME_PREFIX = u'_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' + @unittest.skipIf(six.PY2, 'Test is for python3 only') def test_query_for_data_object_with_utf8_name_python3(self): - filename_prefix = u'_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' - _,encoded_test_file = tempfile.mkstemp(prefix=filename_prefix.encode('utf-8')) + def python34_unicode_mkstemp( prefix, dir = None, open_mode = 0o777 ): + file_path = os.path.join ((dir or os.environ.get('TMPDIR') or '/tmp'), prefix+'-'+str(uuid.uuid1())) + encoded_file_path = file_path.encode('utf-8') + return os.open(encoded_file_path,os.O_CREAT|os.O_RDWR,mode=open_mode), encoded_file_path + fd = None + filename_prefix = u'_prefix_'\ + u'\u01e0\u01e1\u01e2\u01e3\u01e4\u01e5\u01e6\u01e7\u01e8\u01e9\u01ea\u01eb\u01ec\u01ed\u01ee\u01ef'\ + u'\u01f0\u01f1\u01f2\u01f3\u01f4\u01f5\u01f6\u01f7\u01f8' # make more visible/changeable in VIM + self.assertEqual(self.FILENAME_PREFIX, filename_prefix) + (fd,encoded_test_file) = tempfile.mkstemp(prefix=filename_prefix.encode('utf-8')) \ + if sys.version_info >= (3,5) \ + else python34_unicode_mkstemp(prefix = filename_prefix) self.assertTrue(os.path.exists(encoded_test_file)) - test_file = encoded_test_file.decode('utf-8') obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) - try: self.sess.data_objects.register(test_file, obj_path) - results = self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file).first() result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) result_physical_path = results[DataObject.path] @@ -345,6 +357,7 @@ def test_query_for_data_object_with_utf8_name_python3(self): self.assertEqual(result_physical_path, test_file) finally: self.sess.data_objects.unregister(obj_path) + if fd is not None: os.close(fd) os.remove(encoded_test_file) class Issue_166_context: From 4f893eb5253bbc8dfbdca147e04ab3032a5e633e Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Mon, 19 Oct 2020 13:38:56 -0400 Subject: [PATCH 54/96] [#3] v0.8.4 and update changelog --- CHANGELOG.rst | 14 ++++++++++++++ README.rst | 2 +- irods/version.py | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3bd34ab..efa6bef 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,20 @@ Changelog ========= +v0.8.4 (2020-10-19) +------------------- +[#221] fix tests which were failing in Py3.4 and 3.7 [Daniel Moore] +[#220] Replace stale connections pulled from idle pools [Kaivan Kamali] +[#3] tests failing on Python3 unicode defaults [Daniel Moore] +[#214] store/load rules as utf-8 in files [Daniel Moore] +[#211] set and report application name to server [Daniel Moore] +[#156] skip ssh/pam login tests if user doesn't exist [Daniel Moore] +[#209] pam/ssl/env auth tests imported from test harness [Daniel Moore] +[#209] store hashed PAM pw [Daniel Moore] +[#205] Disallow PAM plaintext passwords as strong default [Daniel Moore] +[#156] fix the PAM authentication with env json file. [Patrice Linel] +[#207] add raw-acl permissions getter [Daniel Moore] + v0.8.3 (2020-06-05) ------------------- - [#3] remove order sensitivity in test_user_dn [Daniel Moore] diff --git a/README.rst b/README.rst index 7183b20..fbc85e8 100644 --- a/README.rst +++ b/README.rst @@ -6,6 +6,7 @@ Python iRODS Client (PRC) Currently supported: +- Python 2.7, 3.4 or newer - Establish a connection to iRODS - Authenticate via password, GSI, PAM - iRODS connection over SSL @@ -34,7 +35,6 @@ Currently supported: - Manage resources - Unicode strings - Ticket based access -- Python 2.7, 3.4 or newer Installing diff --git a/irods/version.py b/irods/version.py index b4e3540..21320a8 100644 --- a/irods/version.py +++ b/irods/version.py @@ -1 +1 @@ -__version__ = '0.8.3' +__version__ = '0.8.4' From 421022911396561e9e8dea15156c62a4a9f2d489 Mon Sep 17 00:00:00 2001 From: Kaivan Kamali Date: Tue, 10 Nov 2020 13:26:15 -0500 Subject: [PATCH 55/96] [#220] Use connection create time (not last used time) to decide whether a connection needs to be re-created --- irods/connection.py | 3 ++- irods/pool.py | 15 ++++++++++++--- irods/session.py | 6 ++++++ irods/test/pool_test.py | 30 ++++++++++++++++++++++++------ 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/irods/connection.py b/irods/connection.py index b3ce440..c927470 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -57,7 +57,8 @@ def __init__(self, pool, account): self._login_pam() else: raise ValueError("Unknown authentication scheme %s" % scheme) - self.last_used_time = datetime.datetime.now() + self.create_time = datetime.datetime.now() + self.last_used_time = self.create_time @property def server_version(self): diff --git a/irods/pool.py b/irods/pool.py index c4513e7..c8b2732 100644 --- a/irods/pool.py +++ b/irods/pool.py @@ -44,28 +44,37 @@ def get_connection(self): curr_time = datetime.datetime.now() # If 'refresh_connection' flag is True and the connection was - # last used more than 'connection_refresh_time' seconds ago, + # created more than 'connection_refresh_time' seconds ago, # release the connection (as its stale) and create a new one - if self.refresh_connection and (curr_time - conn.last_used_time).total_seconds() > self.connection_refresh_time: - logger.debug('Connection has been idle more than {} seconds. Releasing the connection and creating a new one.'.format(self.connection_refresh_time)) + if self.refresh_connection and (curr_time - conn.create_time).total_seconds() > self.connection_refresh_time: + logger.debug('Connection with id {} was created more than {} seconds ago. Releasing the connection and creating a new one.'.format(id(conn), self.connection_refresh_time)) self.release_connection(conn, True) conn = Connection(self, self.account) + logger.debug("Created new connection with id: {}".format(id(conn))) except KeyError: conn = Connection(self, self.account) + logger.debug("No connection found in idle set. Created a new connection with id: {}".format(id(conn))) self.active.add(conn) + logger.debug("Adding connection with id {} to active set".format(id(conn))) + logger.debug('num active: {}'.format(len(self.active))) + logger.debug('num idle: {}'.format(len(self.idle))) return conn def release_connection(self, conn, destroy=False): with self._lock: if conn in self.active: self.active.remove(conn) + logger.debug("Removed connection with id: {} from active set".format(id(conn))) if not destroy: # If 'refresh_connection' flag is True, update connection's 'last_used_time' if self.refresh_connection: conn.last_used_time = datetime.datetime.now() self.idle.add(conn) + logger.debug("Added connection with id: {} to idle set".format(id(conn))) elif conn in self.idle and destroy: + logger.debug("Destroyed connection with id: {}".format(id(conn))) self.idle.remove(conn) + logger.debug('num active: {}'.format(len(self.active))) logger.debug('num idle: {}'.format(len(self.idle))) diff --git a/irods/session.py b/irods/session.py index bc18cb0..8d2d017 100644 --- a/irods/session.py +++ b/irods/session.py @@ -99,6 +99,7 @@ def _configure_account(self, **kwargs): def configure(self, **kwargs): account = self._configure_account(**kwargs) connection_refresh_time = self.get_connection_refresh_time(**kwargs) + logger.debug("In iRODSSession's configure(). connection_refresh_time set to {}".format(connection_refresh_time)) self.pool = Pool(account, application_name=kwargs.pop('application_name',''), connection_refresh_time=connection_refresh_time) def query(self, *args): @@ -189,6 +190,11 @@ def get_irods_password(**kwargs): def get_connection_refresh_time(self, **kwargs): connection_refresh_time = -1 + + connection_refresh_time = int(kwargs.get('refresh_time', -1)) + if connection_refresh_time != -1: + return connection_refresh_time + try: env_file = kwargs['irods_env_file'] except KeyError: diff --git a/irods/test/pool_test.py b/irods/test/pool_test.py index a6a9c21..0ecaa66 100644 --- a/irods/test/pool_test.py +++ b/irods/test/pool_test.py @@ -61,21 +61,25 @@ def test_release_disconnected(self): self.assertEqual(0, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) - def test_connection_last_used_time(self): - # Get a connection and record its object ID and last_used_time + def test_connection_create_time(self): + # Get a connection and record its object ID and create_time # Release the connection (goes from active to idle queue) # Again, get a connection. Should get the same connection back. # I.e., the object IDs should match. However, the new connection # should have a more recent 'last_used_time' conn_obj_id_1 = None conn_obj_id_2 = None + create_time_1 = None + create_time_2 = None last_used_time_1 = None last_used_time_2 = None with self.sess.pool.get_connection() as conn: conn_obj_id_1 = id(conn) curr_time = datetime.datetime.now() + create_time_1 = conn.create_time last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= create_time_1) self.assertTrue(curr_time >= last_used_time_1) self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) @@ -87,8 +91,10 @@ def test_connection_last_used_time(self): with self.sess.pool.get_connection() as conn: conn_obj_id_2 = id(conn) curr_time = datetime.datetime.now() + create_time_2 = conn.create_time last_used_time_2 = conn.last_used_time self.assertEqual(conn_obj_id_1, conn_obj_id_2) + self.assertTrue(curr_time >= create_time_2) self.assertTrue(curr_time >= last_used_time_2) self.assertTrue(last_used_time_2 >= last_used_time_1) self.assertEqual(1, len(self.sess.pool.active)) @@ -105,18 +111,22 @@ def test_connection_last_used_time(self): def test_refresh_connection(self): # Set 'irods_connection_refresh_time' to '3' (in seconds) in # ~/.irods/irods_environment.json file. This means any connection - # that is not used more than 3 seconds will be dropped and + # that was created more than 3 seconds ago will be dropped and # a new connection is created/returned. This is to avoid # issue with idle connections that are dropped. conn_obj_id_1 = None conn_obj_id_2 = None + create_time_1 = None + create_time_2 = None last_used_time_1 = None last_used_time_2 = None with self.sess.pool.get_connection() as conn: conn_obj_id_1 = id(conn) curr_time = datetime.datetime.now() + create_time_1 = conn.create_time last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= create_time_1) self.assertTrue(curr_time >= last_used_time_1) self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) @@ -133,10 +143,12 @@ def test_refresh_connection(self): with self.sess.pool.get_connection() as conn: conn_obj_id_2 = id(conn) curr_time = datetime.datetime.now() + create_time_2 = conn.create_time last_used_time_2 = conn.last_used_time + self.assertTrue(curr_time >= create_time_2) self.assertTrue(curr_time >= last_used_time_2) self.assertNotEqual(conn_obj_id_1, conn_obj_id_2) - self.assertTrue(last_used_time_2 > last_used_time_1) + self.assertTrue(create_time_2 > create_time_1) self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) @@ -147,18 +159,22 @@ def test_refresh_connection(self): def test_no_refresh_connection(self): # Set 'irods_connection_refresh_time' to '3' (in seconds) in # ~/.irods/irods_environment.json file. This means any connection - # that is not used more than 3 seconds will be dropped and + # created more than 3 seconds ago will be dropped and # a new connection is created/returned. This is to avoid # issue with idle connections that are dropped. conn_obj_id_1 = None conn_obj_id_2 = None + create_time_1 = None + create_time_2 = None last_used_time_1 = None last_used_time_2 = None with self.sess.pool.get_connection() as conn: conn_obj_id_1 = id(conn) curr_time = datetime.datetime.now() + create_time_1 = conn.create_time last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= create_time_1) self.assertTrue(curr_time >= last_used_time_1) self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) @@ -175,10 +191,12 @@ def test_no_refresh_connection(self): with self.sess.pool.get_connection() as conn: conn_obj_id_2 = id(conn) curr_time = datetime.datetime.now() + create_time_2 = conn.create_time last_used_time_2 = conn.last_used_time + self.assertTrue(curr_time >= create_time_2) self.assertTrue(curr_time >= last_used_time_2) self.assertEqual(conn_obj_id_1, conn_obj_id_2) - self.assertTrue(last_used_time_2 >= last_used_time_1) + self.assertTrue(create_time_2 >= create_time_1) self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) From de3e3d35182f19a2c0e81bf22774eb9ab6035143 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Tue, 10 Nov 2020 15:16:23 -0500 Subject: [PATCH 56/96] [#3] v0.8.4 and update changelog --- CHANGELOG.rst | 4 ++++ irods/version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index efa6bef..7676a02 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,10 @@ Changelog ========= +v0.8.5 (2020-11-10) +------------------- +[#220] Use connection create time to determine stale connections [Kaivan Kamali] + v0.8.4 (2020-10-19) ------------------- [#221] fix tests which were failing in Py3.4 and 3.7 [Daniel Moore] diff --git a/irods/version.py b/irods/version.py index 21320a8..73f8315 100644 --- a/irods/version.py +++ b/irods/version.py @@ -1 +1 @@ -__version__ = '0.8.4' +__version__ = '0.8.5' From 7f30cafdeb3bb1c3362f2e89a0836a51b1e9a932 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Tue, 10 Nov 2020 15:06:41 +0000 Subject: [PATCH 57/96] [#227] [#228] enable ICAT entries for zones and foreign-zone users --- irods/exception.py | 4 +++ irods/manager/user_manager.py | 3 +- irods/manager/zone_manager.py | 50 +++++++++++++++++++++++++++++++++ irods/models.py | 1 + irods/session.py | 2 ++ irods/test/zone_test.py | 52 +++++++++++++++++++++++++++++++++++ irods/zone.py | 21 ++++++++++++++ 7 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 irods/manager/zone_manager.py create mode 100644 irods/test/zone_test.py create mode 100644 irods/zone.py diff --git a/irods/exception.py b/irods/exception.py index 0f4b434..74cb9c8 100644 --- a/irods/exception.py +++ b/irods/exception.py @@ -26,6 +26,10 @@ class CollectionDoesNotExist(DoesNotExist): pass +class ZoneDoesNotExist(DoesNotExist): + pass + + class UserDoesNotExist(DoesNotExist): pass diff --git a/irods/manager/user_manager.py b/irods/manager/user_manager.py index d312858..e0c107c 100644 --- a/irods/manager/user_manager.py +++ b/irods/manager/user_manager.py @@ -30,7 +30,8 @@ def create(self, user_name, user_type, user_zone="", auth_str=""): message_body = GeneralAdminRequest( "add", "user", - user_name, + user_name if not user_zone or user_zone == self.sess.zone \ + else "{}#{}".format(user_name,user_zone), user_type, user_zone, auth_str diff --git a/irods/manager/zone_manager.py b/irods/manager/zone_manager.py new file mode 100644 index 0000000..f6416c2 --- /dev/null +++ b/irods/manager/zone_manager.py @@ -0,0 +1,50 @@ +from __future__ import absolute_import +import logging + +from irods.models import Zone +from irods.zone import iRODSZone +from irods.manager import Manager +from irods.message import GeneralAdminRequest, iRODSMessage +from irods.api_number import api_number +from irods.exception import ZoneDoesNotExist, NoResultFound + +logger = logging.getLogger(__name__) + +class ZoneManager(Manager): + + def get(self, zone_name): + query = self.sess.query(Zone).filter(Zone.name == zone_name) + + try: + result = query.one() + except NoResultFound: + raise ZoneDoesNotExist() + return iRODSZone(self, result) + + def create(self, zone_name, zone_type): + message_body = GeneralAdminRequest( + "add", + "zone", + zone_name, + zone_type, + ) + request = iRODSMessage("RODS_API_REQ", msg=message_body, + int_info=api_number['GENERAL_ADMIN_AN']) + with self.sess.pool.get_connection() as conn: + conn.send(request) + response = conn.recv() + logger.debug(response.int_info) + return self.get(zone_name) + + def remove(self, zone_name): + message_body = GeneralAdminRequest( + "rm", + "zone", + zone_name + ) + request = iRODSMessage("RODS_API_REQ", msg=message_body, + int_info=api_number['GENERAL_ADMIN_AN']) + with self.sess.pool.get_connection() as conn: + conn.send(request) + response = conn.recv() + logger.debug(response.int_info) diff --git a/irods/models.py b/irods/models.py index fb2fe51..4b1b4e2 100644 --- a/irods/models.py +++ b/irods/models.py @@ -22,6 +22,7 @@ class Model(six.with_metaclass(ModelBase, object)): class Zone(Model): id = Column(Integer, 'ZONE_ID', 101) name = Column(String, 'ZONE_NAME', 102) + type = Column(String, 'ZONE_TYPE', 103) class User(Model): diff --git a/irods/session.py b/irods/session.py index 8d2d017..bf31479 100644 --- a/irods/session.py +++ b/irods/session.py @@ -11,6 +11,7 @@ from irods.manager.access_manager import AccessManager from irods.manager.user_manager import UserManager, UserGroupManager from irods.manager.resource_manager import ResourceManager +from irods.manager.zone_manager import ZoneManager from irods.exception import NetworkException from irods.password_obfuscation import decode from irods import NATIVE_AUTH_SCHEME, PAM_AUTH_SCHEME @@ -33,6 +34,7 @@ def __init__(self, configure=True, **kwargs): self.users = UserManager(self) self.user_groups = UserGroupManager(self) self.resources = ResourceManager(self) + self.zones = ZoneManager(self) def __enter__(self): return self diff --git a/irods/test/zone_test.py b/irods/test/zone_test.py new file mode 100644 index 0000000..de9baf4 --- /dev/null +++ b/irods/test/zone_test.py @@ -0,0 +1,52 @@ +#! /usr/bin/env python +from __future__ import absolute_import +import os +import sys +import unittest + +from irods.models import User,Collection +from irods.access import iRODSAccess +from irods.collection import iRODSCollection +from irods.exception import CollectionDoesNotExist +import irods.test.helpers as helpers + +class TestRemoteZone(unittest.TestCase): + + def setUp(self): + self.sess = helpers.make_session() + + def tearDown(self): + """Close connections.""" + self.sess.cleanup() + + # This test should pass whether or not federation is configured: + def test_create_other_zone_user_227_228(self): + usercolls = [] + session = self.sess + A_ZONE_NAME = 'otherZone' + A_ZONE_USER = 'alice' + try: + zoneB = session.zones.create(A_ZONE_NAME,'remote') + zBuser = session.users.create(A_ZONE_USER,'rodsuser', A_ZONE_NAME, '') + usercolls = [ iRODSCollection(session.collections, result) for result in + session.query(Collection).filter(Collection.owner_name == zBuser.name and + Collection.owner_zone == zBuser.zone) ] + self.assertEqual ([(u[User.name],u[User.zone]) for u in session.query(User).filter(User.zone == A_ZONE_NAME)], + [(A_ZONE_USER,A_ZONE_NAME)]) + zBuser.remove() + zoneB.remove() + finally: + for p in usercolls: + try: + session.collections.get( p.path ) + except CollectionDoesNotExist: + continue + perm = iRODSAccess( 'own', p.path, session.username, session.zone) + session.permissions.set( perm, admin=True) + p.remove(force=True) + + +if __name__ == '__main__': + # let the tests find the parent irods lib + sys.path.insert(0, os.path.abspath('../..')) + unittest.main() diff --git a/irods/zone.py b/irods/zone.py new file mode 100644 index 0000000..3943f9a --- /dev/null +++ b/irods/zone.py @@ -0,0 +1,21 @@ +from __future__ import absolute_import +from irods.models import Zone + + +class iRODSZone(object): + + def __init__(self, manager, result=None): + """Construct an iRODSZone object.""" + self.manager = manager + if result: + self.id = result[Zone.id] + self.name = result[Zone.name] + self.type = result[Zone.type] + + def remove(self): + self.manager.remove(self.name) + + def __repr__(self): + """Render a user-friendly string representation for the iRODSZone object.""" + return "".format(**vars(self)) + From dc84374bbf6d03c752be193408caad4fd9620486 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 13 Nov 2020 15:33:23 +0100 Subject: [PATCH 58/96] Remove unneeded, non-existing import --- irods/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/irods/connection.py b/irods/connection.py index 12c6ff8..e3a8728 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -12,7 +12,7 @@ from irods.message import ( iRODSMessage, StartupPack, AuthResponse, AuthChallenge, AuthPluginOut, OpenedDataObjRequest, FileSeekResponse, StringStringMap, VersionResponse, - GSIAuthMessage, OpenIDAuthMessage, ClientServerNegotiation, Error, PluginAuthMessage) + OpenIDAuthMessage, ClientServerNegotiation, Error, PluginAuthMessage) from irods.exception import get_exception_by_code, NetworkException from irods import ( MAX_PASSWORD_LENGTH, RESPONSE_LEN, From 3998d13e57c045d38cf6b42910f7b52ee4ddb011 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 9 Dec 2020 20:48:26 -0500 Subject: [PATCH 59/96] [#241][irods/irods_capability_automated_ingest#136] fix redundant disconnect --- irods/connection.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/irods/connection.py b/irods/connection.py index c927470..6fefc75 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -45,6 +45,7 @@ def __init__(self, pool, account): self.account = account self._client_signature = None self._server_version = self._connect() + self._disconnected = False scheme = self.account.authentication_scheme @@ -69,7 +70,7 @@ def client_signature(self): return self._client_signature def __del__(self): - if self.socket: + if self.socket and getattr(self,"_disconnected",False): self.disconnect() def send(self, message): @@ -193,6 +194,7 @@ def _connect(self): try: s = socket.create_connection(address, timeout) + self._disconnected = False except socket.error: raise NetworkException( "Could not connect to specified host and port: " + @@ -271,6 +273,7 @@ def disconnect(self): self.socket.shutdown(socket.SHUT_RDWR) self.socket.close() self.socket = None + self._disconnected = True def recvall(self, n): # Helper function to recv n bytes or return None if EOF is hit From b44b94f4ebde7119f2729763dddea8e1a82c6c62 Mon Sep 17 00:00:00 2001 From: Pierre Gay Date: Thu, 10 Dec 2020 11:11:52 +0100 Subject: [PATCH 60/96] [#239] add iRODSUser.comment attribute --- irods/test/admin_test.py | 20 ++++++++++++++++++++ irods/user.py | 1 + 2 files changed, 21 insertions(+) diff --git a/irods/test/admin_test.py b/irods/test/admin_test.py index b341cb1..198e171 100644 --- a/irods/test/admin_test.py +++ b/irods/test/admin_test.py @@ -352,6 +352,26 @@ def test_set_user_password(self): self.sess.users.get(self.new_user_name) + def test_set_user_comment(self): + # make a new user + self.sess.users.create(self.new_user_name, self.new_user_type) + + # modify user comment + new_comment = '''comment-abc123!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~Z''' + self.sess.users.modify(self.new_user_name, 'comment', new_comment) + + # check comment was modified + new_user = self.sess.users.get(self.new_user_name) + self.assertEqual(new_user.comment, new_comment) + + # delete new user + self.sess.users.remove(self.new_user_name) + + # user should be gone + with self.assertRaises(UserDoesNotExist): + self.sess.users.get(self.new_user_name) + + if __name__ == '__main__': # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath('../..')) diff --git a/irods/user.py b/irods/user.py index a91da21..859773a 100644 --- a/irods/user.py +++ b/irods/user.py @@ -13,6 +13,7 @@ def __init__(self, manager, result=None): self.name = result[User.name] self.type = result[User.type] self.zone = result[User.zone] + self.comment = result[User.comment] self._meta = None @property From 8817c9f857bf589d05326b42fcfd6b2a60e83226 Mon Sep 17 00:00:00 2001 From: Pierre Gay Date: Thu, 10 Dec 2020 12:58:09 +0100 Subject: [PATCH 61/96] [#239] add iRODSUser.info attribute --- irods/test/admin_test.py | 20 ++++++++++++++++++++ irods/user.py | 1 + 2 files changed, 21 insertions(+) diff --git a/irods/test/admin_test.py b/irods/test/admin_test.py index 198e171..1a74c78 100644 --- a/irods/test/admin_test.py +++ b/irods/test/admin_test.py @@ -372,6 +372,26 @@ def test_set_user_comment(self): self.sess.users.get(self.new_user_name) + def test_set_user_info(self): + # make a new user + self.sess.users.create(self.new_user_name, self.new_user_type) + + # modify user info + new_info = '''info-abc123!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~Z''' + self.sess.users.modify(self.new_user_name, 'info', new_info) + + # check info was modified + new_user = self.sess.users.get(self.new_user_name) + self.assertEqual(new_user.info, new_info) + + # delete new user + self.sess.users.remove(self.new_user_name) + + # user should be gone + with self.assertRaises(UserDoesNotExist): + self.sess.users.get(self.new_user_name) + + if __name__ == '__main__': # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath('../..')) diff --git a/irods/user.py b/irods/user.py index 859773a..d93150b 100644 --- a/irods/user.py +++ b/irods/user.py @@ -14,6 +14,7 @@ def __init__(self, manager, result=None): self.type = result[User.type] self.zone = result[User.zone] self.comment = result[User.comment] + self.info = result[User.info] self._meta = None @property From eaebbd9c9449b382e2aaa93ae9d434b1157a5c5b Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 10 Dec 2020 21:48:58 -0500 Subject: [PATCH 62/96] [#239] to keep the tests passing --- irods/test/admin_test.py | 4 ++-- irods/user.py | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/irods/test/admin_test.py b/irods/test/admin_test.py index 1a74c78..1267341 100644 --- a/irods/test/admin_test.py +++ b/irods/test/admin_test.py @@ -357,7 +357,7 @@ def test_set_user_comment(self): self.sess.users.create(self.new_user_name, self.new_user_type) # modify user comment - new_comment = '''comment-abc123!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~Z''' + new_comment = '''comment-abc123!"#$%&'()*+,-./:;<=>?@[\]^_{|}~Z''' # omitting backtick due to #170 self.sess.users.modify(self.new_user_name, 'comment', new_comment) # check comment was modified @@ -377,7 +377,7 @@ def test_set_user_info(self): self.sess.users.create(self.new_user_name, self.new_user_type) # modify user info - new_info = '''info-abc123!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~Z''' + new_info = '''info-abc123!"#$%&'()*+,-./:;<=>?@[\]^_{|}~Z''' # omitting backtick due to #170 self.sess.users.modify(self.new_user_name, 'info', new_info) # check info was modified diff --git a/irods/user.py b/irods/user.py index d93150b..ecf5280 100644 --- a/irods/user.py +++ b/irods/user.py @@ -3,6 +3,7 @@ from irods.meta import iRODSMetaCollection from irods.exception import NoResultFound +_Not_Defined = () class iRODSUser(object): @@ -13,10 +14,24 @@ def __init__(self, manager, result=None): self.name = result[User.name] self.type = result[User.type] self.zone = result[User.zone] - self.comment = result[User.comment] - self.info = result[User.info] + self._comment = result.get(User.comment, _Not_Defined) # these not needed in results for object ident, + self._info = result.get(User.info, _Not_Defined) # so we fetch lazily via a property self._meta = None + @property + def comment(self): + if self._comment == _Not_Defined: + query = self.manager.sess.query(User.id,User.comment).filter(User.id == self.id) + self._comment = query.one()[User.comment] + return self._comment + + @property + def info(self): + if self._info == _Not_Defined: + query = self.manager.sess.query(User.id,User.info).filter(User.id == self.id) + self._info = query.one()[User.info] + return self._info + @property def dn(self): query = self.manager.sess.query(UserAuth.user_dn).filter(UserAuth.user_id == self.id) From b9b0f34b99888756191cad4157151b4b1b1ab2d0 Mon Sep 17 00:00:00 2001 From: "Ruben J. Garcia-Hernandez" Date: Thu, 12 Nov 2020 09:57:35 +0100 Subject: [PATCH 63/96] [#230] Add force option to data_object_manager create Following the other functions. Needed to overwrite files in iRODS. --- irods/manager/data_object_manager.py | 5 ++- irods/test/force_create.py | 50 ++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 irods/test/force_create.py diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index ddf5808..3c5bf11 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -81,7 +81,7 @@ def put(self, file, irods_path, return_data_object=False, **options): return self.get(obj) - def create(self, path, resource=None, **options): + def create(self, path, resource=None, force=False, **options): options[kw.DATA_TYPE_KW] = 'generic' if resource: @@ -93,6 +93,9 @@ def create(self, path, resource=None, **options): except AttributeError: pass + if force: + options[kw.FORCE_FLAG_KW] = '' + message_body = FileOpenRequest( objPath=path, createMode=0o644, diff --git a/irods/test/force_create.py b/irods/test/force_create.py new file mode 100644 index 0000000..b373cfe --- /dev/null +++ b/irods/test/force_create.py @@ -0,0 +1,50 @@ +#! /usr/bin/env python +from __future__ import absolute_import +import os +import sys +import unittest + +from irods.exception import OVERWRITE_WITHOUT_FORCE_FLAG +import irods.test.helpers as helpers + +class TestForceCreate(unittest.TestCase): + + def setUp(self): + self.sess = helpers.make_session() + + def tearDown(self): + """Close connections.""" + self.sess.cleanup() + + # This test should pass whether or not federation is configured: + def test_force_create(self): + session = self.sess + FILE = '/{session.zone}/home/{session.username}/a.txt'.format(**locals()) + try: + session.data_objects.unlink(FILE) + except: + pass + error = None + try: + session.data_objects.create(FILE) + session.data_objects.create(FILE) + except OVERWRITE_WITHOUT_FORCE_FLAG: + error = "OVERWRITE_WITHOUT_FORCE_FLAG" + self.assertEqual (error, "OVERWRITE_WITHOUT_FORCE_FLAG") + error = None + try: + session.data_objects.create(FILE, force=True) + except: + error = "Error creating with force" + self.assertEqual (error, None) + try: + session.data_objects.unlink(FILE) + except: + error = "Error cleaning up" + self.assertEqual (error, None) + + +if __name__ == '__main__': + # let the tests find the parent irods lib + sys.path.insert(0, os.path.abspath('../..')) + unittest.main() From 30acb40886148ad37d695a6344b7af31d881bfb2 Mon Sep 17 00:00:00 2001 From: "Ruben J. Garcia-Hernandez" Date: Fri, 13 Nov 2020 16:28:30 +0100 Subject: [PATCH 64/96] [#226] Document creation of users. --- README.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.rst b/README.rst index fbc85e8..ad1578c 100644 --- a/README.rst +++ b/README.rst @@ -659,6 +659,16 @@ We can also query on access type using its numeric value, which will seem more n >>> data_objects_writable = list(session.query(DataObject,DataAccess,User)).filter(User.name=='alice', DataAccess.type >= MODIFY) +Managing users +-------------- + +You can create a user in the current zone using: +>>> session.users.create('user', 'rodsuser', 'MyZone', auth_str) +(the auth_str parameter is optional). +If you want to create a user in a federated zone, use: +>>> session.users.create('user', 'rodsuser', 'OtherZone', auth_str) + + And more... ----------- From 5a6216d0839ff68e80b2525571165e5acd3bd8d4 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Tue, 5 Jan 2021 22:21:28 -0500 Subject: [PATCH 65/96] [#244] added capability to add/remove atomic metadata --- README.rst | 29 +++++++++++++ irods/api_number.py | 1 + irods/manager/metadata_manager.py | 46 +++++++++++++++++++- irods/message/__init__.py | 17 ++++++++ irods/meta.py | 55 ++++++++++++++++++++++++ irods/test/helpers.py | 18 ++++++++ irods/test/meta_test.py | 71 +++++++++++++++++++++++++++++-- 7 files changed, 231 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index ad1578c..6257e4c 100644 --- a/README.rst +++ b/README.rst @@ -277,6 +277,35 @@ of the (type of) catalog object they once annotated: 0 +Atomic operations on metadata +----------------------------- + +With release 4.2.8 of iRODS, the atomic metadata API was introduced to allow a group of metadata add and remove +operations to be performed transactionally, within a single call to the server. This capability can be leveraged in +version 0.8.6 of the PRC. + +So, for example, if 'obj' is a handle to an object in the iRODS catalog (whether a data object, collection, user or +storage resource), we can send an arbitrary number of AVUOperation instances to be executed together as one indivisible +operation on that object: + +>>> from irods.meta import iRODSMeta, AVUOperation +>>> obj.metadata.apply_atomic_operations( AVUOperation(operation='remove', avu=iRODSMeta('a1','v1','these_units')), +... AVUOperation(operation='add', avu=iRODSMeta('a2','v2','those_units')), +... AVUOperation(operation='remove', avu=iRODSMeta('a3','v3')) # , ... +... ) + +The list of operations will applied in the order given, so that a "remove" followed by an "add" of the same AVU +is, in effect, a metadata "set" operation. Also note that a "remove" operation will be ignored if the AVU value given +does not exist on the target object at that point in the sequence of operations. + +We can also source from a pre-built list of AVUOperations using Python's f(*args_list) syntax. For example, this +function uses the atomic metadata API to very quickly remove all AVUs from an object: + +>>> def remove_all_avus( Object ): +... avus_on_Object = Object.metadata.items() +... Object.metadata.apply_atomic_operations( *[AVUOperation(operation='remove', avu=i) for i in avus_on_Object] ) + + General queries --------------- diff --git a/irods/api_number.py b/irods/api_number.py index a221d4a..41545de 100644 --- a/irods/api_number.py +++ b/irods/api_number.py @@ -176,4 +176,5 @@ # 1100 - 1200 - SSL API calls "SSL_START_AN": 1100, "SSL_END_AN": 1101, + "ATOMIC_APPLY_METADATA_OPERATIONS_APN": 20002 } diff --git a/irods/manager/metadata_manager.py b/irods/manager/metadata_manager.py index 338e5d6..ed31580 100644 --- a/irods/manager/metadata_manager.py +++ b/irods/manager/metadata_manager.py @@ -1,17 +1,22 @@ +from __future__ import print_function from __future__ import absolute_import import logging from os.path import dirname, basename from irods.manager import Manager -from irods.message import MetadataRequest, iRODSMessage +from irods.message import MetadataRequest, iRODSMessage, JSONMessage from irods.api_number import api_number from irods.models import (DataObject, Collection, Resource, User, DataObjectMeta, CollectionMeta, ResourceMeta, UserMeta) -from irods.meta import iRODSMeta +from irods.meta import iRODSMeta, AVUOperation + logger = logging.getLogger(__name__) +class InvalidAtomicAVURequest(Exception): pass + + class MetadataManager(Manager): @staticmethod @@ -23,6 +28,15 @@ def _model_class_to_resource_type(model_cls): User: 'u', }[model_cls] + @staticmethod + def _model_class_to_resource_description(model_cls): + return { + DataObject: 'data_object', + Collection: 'collection', + Resource: 'resource', + User: 'user', + }[model_cls] + def get(self, model_cls, path): resource_type = self._model_class_to_resource_type(model_cls) model = { @@ -121,3 +135,31 @@ def set(self, model_cls, path, meta): conn.send(request) response = conn.recv() logger.debug(response.int_info) + + @staticmethod + def _avu_operation_to_dict( op ): + opJSON = { "operation": op.operation, + "attribute": op.avu.name, + "value": op.avu.value + } + if op.avu.units not in ("",None): + opJSON["units"] = op.avu.units + return opJSON + + def apply_atomic_operations(self, model_cls, path, *avu_ops): + if not all(isinstance(op,AVUOperation) for op in avu_ops): + raise InvalidAtomicAVURequest("avu_ops must contain 1 or more AVUOperations") + request = { + "entity_name": path, + "entity_type": self._model_class_to_resource_description(model_cls), + "operations" : [self._avu_operation_to_dict(op) for op in avu_ops] + } + self._call_atomic_metadata_api(request) + + def _call_atomic_metadata_api(self, request): + request = iRODSMessage("RODS_API_REQ", msg=JSONMessage(request), + int_info=api_number['ATOMIC_APPLY_METADATA_OPERATIONS_APN']) + with self.sess.pool.get_connection() as conn: + conn.send(request) + response = conn.recv() + logger.debug('atomic metadata api response = %s %s',response.int_info,repr(response.get_json_encoded_struct())) diff --git a/irods/message/__init__.py b/irods/message/__init__.py index 72a5ef8..dfafdac 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -1,6 +1,7 @@ import struct import logging import socket +import json import xml.etree.ElementTree as ET from irods.message.message import Message from irods.message.property import (BinaryProperty, StringProperty, @@ -68,6 +69,10 @@ def __init__(self, msg_type=b'', msg=None, error=b'', bs=b'', int_info=0): self.bs = bs self.int_info = int_info + def get_json_encoded_struct (self): + Xml = ET.fromstring(self.msg.replace(b'\0',b'')) + return json.loads(Xml.find('buf').text) + @staticmethod def recv(sock): # rsp_header_size = sock.recv(4, socket.MSG_WAITALL) @@ -237,6 +242,18 @@ class BinBytesBuf(Message): buflen = IntegerProperty() buf = BinaryProperty() +class BytesBuf(Message): + _name = 'BytesBuf_PI' + buflen = IntegerProperty() + buf = StringProperty() + +class JSONMessage(BytesBuf): + """A message body whose payload is just a BytesBuf containing JSON.""" + def __init__(self, msg_struct): + """Initialize with a Python data structure that will be converted to JSON.""" + s = json.dumps(msg_struct) + super(JSONMessage,self).__init__(buf=s,buflen=len(s)) + class PluginAuthMessage(Message): _name = 'authPlugReqInp_PI' diff --git a/irods/meta.py b/irods/meta.py index 4137ac0..2029043 100644 --- a/irods/meta.py +++ b/irods/meta.py @@ -1,3 +1,5 @@ + + class iRODSMeta(object): def __init__(self, name, value, units=None, avu_id=None): @@ -10,6 +12,56 @@ def __repr__(self): return "".format(**vars(self)) +class BadAVUOperationKeyword(Exception): pass + +class BadAVUOperationValue(Exception): pass + + +class AVUOperation(dict): + + @property + def operation(self): + return self['operation'] + + @operation.setter + def operation(self,Oper): + self._check_operation(Oper) + self['operation'] = Oper + + @property + def avu(self): + return self['avu'] + + @avu.setter + def avu(self,newAVU): + self._check_avu(newAVU) + self['avu'] = newAVU + + def _check_avu(self,avu_param): + if not isinstance(avu_param, iRODSMeta): + error_msg = "Nonconforming avu {!r} of type {}; must be an iRODSMeta." \ + "".format(avu_param,type(avu_param).__name__) + raise BadAVUOperationValue(error_msg) + + def _check_operation(self,operation): + if operation not in ('add','remove'): + error_msg = "Nonconforming operation {!r}; must be 'add' or 'remove'.".format(operation) + raise BadAVUOperationValue(error_msg) + + def __init__(self, operation, avu, **kw): + """Constructor: + AVUOperation( operation = opstr, # where opstr is "add" or "remove" + avu = metadata ) # where metadata is an irods.meta.iRODSMeta instance + """ + super(AVUOperation,self).__init__() + self._check_operation (operation) + self._check_avu (avu) + if kw: + raise BadAVUOperationKeyword('''Nonconforming keyword (s) {}.'''.format(list(kw.keys()))) + for atr in ('operation','avu'): + setattr(self,atr,locals()[atr]) + + class iRODSMetaCollection(object): def __init__(self, manager, model_cls, path): @@ -47,6 +99,9 @@ def _get_meta(self, *args): "Must specify an iRODSMeta object or key, value, units)") return args[0] if len(args) == 1 else iRODSMeta(*args) + def apply_atomic_operations(self, *avu_ops): + self._manager.apply_atomic_operations(self._model_cls, self._path, *avu_ops) + def add(self, *args): """ Add as iRODSMeta to a key diff --git a/irods/test/helpers.py b/irods/test/helpers.py index a23fd1b..fc32fa9 100644 --- a/irods/test/helpers.py +++ b/irods/test/helpers.py @@ -13,6 +13,7 @@ from six.moves import range + def make_session(**kwargs): try: env_file = kwargs['irods_env_file'] @@ -110,6 +111,23 @@ def make_flat_test_dir(dir_path, file_count=10, file_size=1024): f.write(os.urandom(file_size)) +@contextlib.contextmanager +def create_simple_resc_hierarchy (self, Root, Leaf): + d = tempfile.mkdtemp() + self.sess.resources.create(Leaf,'unixfilesystem', + host = self.sess.host, + path=d) + self.sess.resources.create(Root,'passthru') + self.sess.resources.add_child(Root,Leaf) + try: + yield ';'.join([Root,Leaf]) + finally: + self.sess.resources.remove_child(Root,Leaf) + self.sess.resources.remove(Leaf) + self.sess.resources.remove(Root) + shutil.rmtree(d) + + def chunks(f, chunksize=io.DEFAULT_BUFFER_SIZE): return iter(lambda: f.read(chunksize), b'') diff --git a/irods/test/meta_test.py b/irods/test/meta_test.py index f6a13b0..7680cee 100644 --- a/irods/test/meta_test.py +++ b/irods/test/meta_test.py @@ -4,8 +4,9 @@ import os import sys import unittest -from irods.meta import iRODSMeta -from irods.models import DataObject, Collection, Resource +from irods.meta import (iRODSMeta, AVUOperation, BadAVUOperationValue, BadAVUOperationKeyword) +from irods.manager.metadata_manager import InvalidAtomicAVURequest +from irods.models import (DataObject, Collection, Resource) import irods.test.helpers as helpers from six.moves import range @@ -19,7 +20,6 @@ class TestMeta(unittest.TestCase): def setUp(self): self.sess = helpers.make_session() - # test data self.coll_path = '/{}/home/{}/test_dir'.format(self.sess.zone, self.sess.username) self.obj_name = 'test1' @@ -29,13 +29,76 @@ def setUp(self): self.coll = self.sess.collections.create(self.coll_path) self.obj = self.sess.data_objects.create(self.obj_path) - def tearDown(self): '''Remove test data and close connections ''' self.coll.remove(recurse=True, force=True) + helpers.remove_unused_metadata(self.sess) self.sess.cleanup() + from irods.test.helpers import create_simple_resc_hierarchy + + def test_atomic_metadata_operations_244(self): + user = self.sess.users.get("rods") + group = self.sess.user_groups.get("public") + m = ( "attr_244","value","units") + + with self.assertRaises(BadAVUOperationValue): + AVUOperation(operation="add", avu=m) + + with self.assertRaises(BadAVUOperationValue): + AVUOperation(operation="not_add_or_remove", avu=iRODSMeta(*m)) + + with self.assertRaises(BadAVUOperationKeyword): + AVUOperation(operation="add", avu=iRODSMeta(*m), extra_keyword=None) + + + with self.assertRaises(InvalidAtomicAVURequest): + user.metadata.apply_atomic_operations( tuple() ) + + user.metadata.apply_atomic_operations() # no AVUs applied - no-op without error + + for n,obj in enumerate((group, user, self.coll, self.obj)): + avus = [ iRODSMeta('some_attribute',str(i),'some_units') for i in range(n*100,(n+1)*100) ] + obj.metadata.apply_atomic_operations(*[AVUOperation(operation="add", avu=avu_) for avu_ in avus]) + obj.metadata.apply_atomic_operations(*[AVUOperation(operation="remove", avu=avu_) for avu_ in avus]) + + + def test_atomic_metadata_operation_for_resource_244(self): + (root,leaf)=('ptX','rescX') + with self.create_simple_resc_hierarchy(root,leaf): + root_resc = self.sess.resources.get(root) # resource objects + leaf_resc = self.sess.resources.get(leaf) + root_tuple = ('role','root','new units #1') # AVU tuples to apply + leaf_tuple = ('role','leaf','new units #2') + root_resc.metadata.add( *root_tuple[:2] ) # first apply without units ... + leaf_resc.metadata.add( *leaf_tuple[:2] ) + for resc,resc_tuple in ((root_resc, root_tuple), (leaf_resc, leaf_tuple)): + resc.metadata.apply_atomic_operations( # metadata set operation (remove + add) to add units + AVUOperation(operation="remove", avu=iRODSMeta(*resc_tuple[:2])), + AVUOperation(operation="add", avu=iRODSMeta(*resc_tuple[:3])) + ) + resc_meta = self.sess.metadata.get(Resource, resc.name) + avus_to_tuples = lambda avu_list: sorted([(i.name,i.value,i.units) for i in avu_list]) + self.assertEqual(avus_to_tuples(resc_meta), avus_to_tuples([iRODSMeta(*resc_tuple)])) + + + def test_atomic_metadata_operation_for_data_object_244(self): + AVUs_Equal = lambda avu1,avu2,fn=(lambda x:x): fn(avu1)==fn(avu2) + AVU_As_Tuple = lambda avu,length=3:(avu.name,avu.value,avu.units)[:length] + AVU_Units_String = lambda avu:"" if not avu.units else avu.units + m = iRODSMeta( "attr_244","value","units") + self.obj.metadata.add(m) + meta = self.sess.metadata.get(DataObject, self.obj_path) + self.assertEqual(len(meta), 1) + self.assertTrue(AVUs_Equal(m,meta[0],AVU_As_Tuple)) + self.obj.metadata.apply_atomic_operations( # remove original AVU and replace + AVUOperation(operation="remove",avu=m), # with two altered versions + AVUOperation(operation="add",avu=iRODSMeta(m.name,m.value,"units_244")), # (one of them without units) ... + AVUOperation(operation="add",avu=iRODSMeta(m.name,m.value)) + ) + meta = self.sess.metadata.get(DataObject, self.obj_path) # ... check integrity of change + self.assertEqual(sorted([AVU_Units_String(i) for i in meta]), ["","units_244"]) def test_get_obj_meta(self): # get object metadata From 4f1e071d54c60c59b566979a16655f7eb23dfcd1 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Fri, 22 Jan 2021 18:46:10 -0500 Subject: [PATCH 66/96] [#3] v0.8.6 and update changelog --- CHANGELOG.rst | 11 +++++++++++ irods/version.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7676a02..4f5e85a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,17 @@ Changelog ========= +v0.8.6 (2021-01-22) +------------------- +[#244] added capability to add/remove atomic metadata [Daniel Moore] +[#226] Document creation of users [Ruben Garcia] +[#230] Add force option to data_object_manager create [Ruben Garcia] +[#239] to keep the tests passing [Daniel Moore] +[#239] add iRODSUser.info attribute [Pierre Gay] +[#239] add iRODSUser.comment attribute [Pierre Gay] +[#241] [irods/irods_capability_automated_ingest#136] fix redundant disconnect [Daniel Moore] +[#227] [#228] enable ICAT entries for zones and foreign-zone users [Daniel Moore] + v0.8.5 (2020-11-10) ------------------- [#220] Use connection create time to determine stale connections [Kaivan Kamali] diff --git a/irods/version.py b/irods/version.py index 73f8315..37e9d6d 100644 --- a/irods/version.py +++ b/irods/version.py @@ -1 +1 @@ -__version__ = '0.8.5' +__version__ = '0.8.6' From b83dbc973d2d991b9d7a39195ef6fcd7f8e4c1ed Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Fri, 22 Jan 2021 21:32:54 -0500 Subject: [PATCH 67/96] [#5] README formatting tweaks --- README.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 6257e4c..279b2f9 100644 --- a/README.rst +++ b/README.rst @@ -298,7 +298,7 @@ The list of operations will applied in the order given, so that a "remove" follo is, in effect, a metadata "set" operation. Also note that a "remove" operation will be ignored if the AVU value given does not exist on the target object at that point in the sequence of operations. -We can also source from a pre-built list of AVUOperations using Python's f(*args_list) syntax. For example, this +We can also source from a pre-built list of AVUOperations using Python's `f(*args_list)` syntax. For example, this function uses the atomic metadata API to very quickly remove all AVUs from an object: >>> def remove_all_avus( Object ): @@ -691,10 +691,12 @@ We can also query on access type using its numeric value, which will seem more n Managing users -------------- -You can create a user in the current zone using: +You can create a user in the current zone (with an optional auth_str): + >>> session.users.create('user', 'rodsuser', 'MyZone', auth_str) -(the auth_str parameter is optional). + If you want to create a user in a federated zone, use: + >>> session.users.create('user', 'rodsuser', 'OtherZone', auth_str) From c2df75236387a5c817f7399cab31aff81680fb99 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 4 Feb 2021 08:24:59 -0500 Subject: [PATCH 68/96] [#250] add exception for SYS_INVALID_INPUT_PARAM --- irods/exception.py | 4 ++++ irods/test/data_obj_test.py | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/irods/exception.py b/irods/exception.py index 74cb9c8..9617929 100644 --- a/irods/exception.py +++ b/irods/exception.py @@ -510,6 +510,10 @@ class SYS_RESC_QUOTA_EXCEEDED(SystemException): code = -110000 +class SYS_INVALID_INPUT_PARAM(SystemException): + code = -130000 + + class UserInputException(iRODSException): pass diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index d7ac1af..3706619 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -70,6 +70,11 @@ def test_obj_does_not_exist(self): self.assertFalse(self.sess.data_objects.exists(does_not_exist_path)) + def test_create_from_invalid_path__250(self): + with self.assertRaises(ex.SYS_INVALID_INPUT_PARAM): + self.sess.data_objects.create('t') + + def test_rename_obj(self): # test args collection = self.coll_path From 12eb3063836edc9c87f48c74a08894a78b5c5b93 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 26 Feb 2021 14:07:55 +0100 Subject: [PATCH 69/96] Add force option. --- irods/manager/data_object_manager.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index ddf5808..3c5bf11 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -81,7 +81,7 @@ def put(self, file, irods_path, return_data_object=False, **options): return self.get(obj) - def create(self, path, resource=None, **options): + def create(self, path, resource=None, force=False, **options): options[kw.DATA_TYPE_KW] = 'generic' if resource: @@ -93,6 +93,9 @@ def create(self, path, resource=None, **options): except AttributeError: pass + if force: + options[kw.FORCE_FLAG_KW] = '' + message_body = FileOpenRequest( objPath=path, createMode=0o644, From 13594d06f1806276be568b2b8e32a6d81602b2c0 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Fri, 26 Feb 2021 12:32:30 -0500 Subject: [PATCH 70/96] [#255] make results of atomic metadata ops visible --- irods/meta.py | 1 + irods/test/meta_test.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/irods/meta.py b/irods/meta.py index 2029043..ad16eb1 100644 --- a/irods/meta.py +++ b/irods/meta.py @@ -101,6 +101,7 @@ def _get_meta(self, *args): def apply_atomic_operations(self, *avu_ops): self._manager.apply_atomic_operations(self._model_cls, self._path, *avu_ops) + self._reset_metadata() def add(self, *args): """ diff --git a/irods/test/meta_test.py b/irods/test/meta_test.py index 7680cee..8954920 100644 --- a/irods/test/meta_test.py +++ b/irods/test/meta_test.py @@ -100,6 +100,21 @@ def test_atomic_metadata_operation_for_data_object_244(self): meta = self.sess.metadata.get(DataObject, self.obj_path) # ... check integrity of change self.assertEqual(sorted([AVU_Units_String(i) for i in meta]), ["","units_244"]) + def test_atomic_metadata_operations_255(self): + my_resc = self.sess.resources.create('dummyResc','passthru') + avus = [iRODSMeta('a','b','c'), iRODSMeta('d','e','f')] + objects = [ self.sess.users.get("rods"), self.sess.user_groups.get("public"), my_resc, + self.sess.collections.get(self.coll_path), self.sess.data_objects.get(self.obj_path) ] + try: + for obj in objects: + self.assertEqual(len(obj.metadata.items()), 0) + for n,item in enumerate(avus): + obj.metadata.apply_atomic_operations(AVUOperation(operation='add',avu=item)) + self.assertEqual(len(obj.metadata.items()), n+1) + finally: + for obj in objects: obj.metadata.remove_all() + my_resc.remove() + def test_get_obj_meta(self): # get object metadata meta = self.sess.metadata.get(DataObject, self.obj_path) From 56d5bf137df6fd9d6b4fa7441c4f9402ce309fd5 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Tue, 23 Mar 2021 18:26:35 -0400 Subject: [PATCH 71/96] [#257] add pure python SSL session example --- README.rst | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 279b2f9..265b9e7 100644 --- a/README.rst +++ b/README.rst @@ -74,7 +74,7 @@ Using environment files (including any SSL settings) in ``~/.irods/``: >>> ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, cafile=None, capath=None, cadata=None) >>> ssl_settings = {'ssl_context': ssl_context} >>> with iRODSSession(irods_env_file=env_file, **ssl_settings) as session: -... pass +... # workload ... >>> @@ -82,7 +82,7 @@ Passing iRODS credentials as keyword arguments: >>> from irods.session import iRODSSession >>> with iRODSSession(host='localhost', port=1247, user='bob', password='1234', zone='tempZone') as session: -... pass +... # workload ... >>> @@ -91,12 +91,28 @@ If you're an administrator acting on behalf of another user: >>> from irods.session import iRODSSession >>> with iRODSSession(host='localhost', port=1247, user='rods', password='1234', zone='tempZone', client_user='bob', client_zone='possibly_another_zone') as session: -... pass +... # workload ... >>> If no ``client_zone`` is provided, the ``zone`` parameter is used in its place. +A pure Python SSL session (without a local `env_file`) requires a few more things defined: + +>>> import ssl +>>> from irods.session import iRODSSession +>>> ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, cafile='CERTNAME.crt', capath=None, cadata=None) +>>> ssl_settings = {'client_server_negotiation': 'request_server_negotiation', +... 'client_server_policy': 'CS_NEG_REQUIRE', +... 'encryption_algorithm': 'AES-256-CBC', +... 'encryption_key_size': 32, +... 'encryption_num_hash_rounds': 16, +... 'encryption_salt_size': 8, +... 'ssl_context': ssl_context} +>>> +>>> with iRODSSession(host='HOSTNAME_DEFINED_IN_CERTNAME.crt', port=1247, user='bob', password='1234', zone='tempZone', **ssl_settings) as session: +... # workload +>>> Working with collections ------------------------ From 142c068d52fb38eaac6f7f2906ff50679a947fbf Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Wed, 24 Mar 2021 09:10:37 -0400 Subject: [PATCH 72/96] [#257] more clarity in example --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 265b9e7..16971a3 100644 --- a/README.rst +++ b/README.rst @@ -110,7 +110,7 @@ A pure Python SSL session (without a local `env_file`) requires a few more thing ... 'encryption_salt_size': 8, ... 'ssl_context': ssl_context} >>> ->>> with iRODSSession(host='HOSTNAME_DEFINED_IN_CERTNAME.crt', port=1247, user='bob', password='1234', zone='tempZone', **ssl_settings) as session: +>>> with iRODSSession(host='HOSTNAME_DEFINED_IN_CAFILE_ABOVE', port=1247, user='bob', password='1234', zone='tempZone', **ssl_settings) as session: ... # workload >>> From 9198987bf543dee9a3bfdd05e3fc7ec1a67e406a Mon Sep 17 00:00:00 2001 From: Paul van Schayck Date: Mon, 26 Apr 2021 22:30:09 +0200 Subject: [PATCH 73/96] Add functions and tests for obtaining temporary passwords issued by the catalog provider for your account, or another (as rodsadmin). --- irods/connection.py | 17 ++++++- irods/manager/user_manager.py | 28 ++++++++++- irods/message/__init__.py | 16 ++++++ irods/password_obfuscation.py | 8 +++ irods/test/temp_password_test.py | 86 ++++++++++++++++++++++++++++++++ irods/user.py | 3 ++ 6 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 irods/test/temp_password_test.py diff --git a/irods/connection.py b/irods/connection.py index 6fefc75..bc96808 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -7,12 +7,12 @@ import os import ssl import datetime - +import irods.password_obfuscation as obf from irods.message import ( iRODSMessage, StartupPack, AuthResponse, AuthChallenge, AuthPluginOut, OpenedDataObjRequest, FileSeekResponse, StringStringMap, VersionResponse, - PluginAuthMessage, ClientServerNegotiation, Error) + PluginAuthMessage, ClientServerNegotiation, Error, GetTempPasswordOut) from irods.exception import get_exception_by_code, NetworkException from irods import ( MAX_PASSWORD_LENGTH, RESPONSE_LEN, @@ -566,3 +566,16 @@ def close_file(self, desc, **options): self.send(message) self.recv() + + def temp_password(self): + request = iRODSMessage("RODS_API_REQ", msg=None, + int_info=api_number['GET_TEMP_PASSWORD_AN']) + + # Send and receive request + self.send(request) + response = self.recv() + logger.debug(response.int_info) + + # Convert and return answer + msg = response.get_main_message(GetTempPasswordOut) + return obf.create_temp_password(msg.stringToHashWith, self.account.password) diff --git a/irods/manager/user_manager.py b/irods/manager/user_manager.py index e0c107c..01e7b41 100644 --- a/irods/manager/user_manager.py +++ b/irods/manager/user_manager.py @@ -3,8 +3,8 @@ from irods.models import User, UserGroup from irods.manager import Manager -from irods.message import GeneralAdminRequest, iRODSMessage -from irods.exception import UserDoesNotExist, UserGroupDoesNotExist, NoResultFound +from irods.message import GeneralAdminRequest, iRODSMessage, GetTempPasswordForOtherRequest, GetTempPasswordForOtherOut +from irods.exception import UserDoesNotExist, UserGroupDoesNotExist, NoResultFound, CAT_SQL_ERR from irods.api_number import api_number from irods.user import iRODSUser, iRODSUserGroup import irods.password_obfuscation as obf @@ -58,6 +58,30 @@ def remove(self, user_name, user_zone=""): response = conn.recv() logger.debug(response.int_info) + def temp_password_for_user(self, user_name): + with self.sess.pool.get_connection() as conn: + message_body = GetTempPasswordForOtherRequest( + targetUser=user_name, + unused=None + ) + request = iRODSMessage("RODS_API_REQ", msg=message_body, + int_info=api_number['GET_TEMP_PASSWORD_FOR_OTHER_AN']) + + # Send request + conn.send(request) + + # Receive answer + try: + response = conn.recv() + logger.debug(response.int_info) + except CAT_SQL_ERR: + raise UserDoesNotExist() + + # Convert and return answer + msg = response.get_main_message(GetTempPasswordForOtherOut) + return obf.create_temp_password(msg.stringToHashWith, conn.account.password) + + def modify(self, user_name, option, new_value, user_zone=""): # must append zone to username for this API call diff --git a/irods/message/__init__.py b/irods/message/__init__.py index dfafdac..ceaf6d5 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -495,6 +495,22 @@ def __init__(self, *args): arg9 = StringProperty() +class GetTempPasswordForOtherRequest(Message): + _name = 'getTempPasswordForOtherInp_PI' + targetUser = StringProperty() + unused = StringProperty() + + +class GetTempPasswordForOtherOut(Message): + _name = 'getTempPasswordForOtherOut_PI' + stringToHashWith = StringProperty() + + +class GetTempPasswordOut(Message): + _name = 'getTempPasswordOut_PI' + stringToHashWith = StringProperty() + + #define ticketAdminInp_PI "str *arg1; str *arg2; str *arg3; str *arg4; str *arg5; str *arg6;" class TicketAdminRequest(Message): diff --git a/irods/password_obfuscation.py b/irods/password_obfuscation.py index a6f3a0e..ef38550 100644 --- a/irods/password_obfuscation.py +++ b/irods/password_obfuscation.py @@ -275,3 +275,11 @@ def obfuscate_new_password(new, old, signature): new = new + padding[:lcopy] return scramble_v2(new, old, signature) + + +def create_temp_password(temp_hash, source_password): + password = (temp_hash + source_password).ljust(100, chr(0)) + password_md5 = hashlib.md5(password.encode('utf-8')) + + # Return hexdigest + return password_md5.hexdigest() diff --git a/irods/test/temp_password_test.py b/irods/test/temp_password_test.py new file mode 100644 index 0000000..9f18fbd --- /dev/null +++ b/irods/test/temp_password_test.py @@ -0,0 +1,86 @@ +#! /usr/bin/env python +from __future__ import absolute_import +import os +import sys +import unittest +from irods.exception import UserDoesNotExist +from irods.session import iRODSSession +import irods.test.helpers as helpers + + +class TestTempPassword(unittest.TestCase): + """ Suite of tests for setting and getting temporary passwords as rodsadmin or rodsuser + """ + admin = None + new_user = 'bobby' + password = 'foobar' + + @classmethod + def setUpClass(cls): + cls.admin = helpers.make_session() + + @classmethod + def tearDownClass(cls): + cls.admin.cleanup() + + def test_temp_password(self): + # Make a new user + self.admin.users.create(self.new_user, 'rodsuser') + self.admin.users.modify(self.new_user, 'password', self.password) + + # Login as the new test user, to retrieve a temporary password + with iRODSSession(host=self.admin.host, + port=self.admin.port, + user=self.new_user, + password=self.password, + zone=self.admin.zone) as session: + # Obtain the temporary password + conn = session.pool.get_connection() + temp_password = conn.temp_password() + + # Open a new session with the temporary password + with iRODSSession(host=self.admin.host, + port=self.admin.port, + user=self.new_user, + password=temp_password, + zone=self.admin.zone) as session: + + # do something that connects to the server + session.users.get(self.admin.username) + + # delete new user + self.admin.users.remove(self.new_user) + + # user should be gone + with self.assertRaises(UserDoesNotExist): + self.admin.users.get(self.new_user) + + def test_set_temp_password(self): + # make a new user + temp_user = self.admin.users.create(self.new_user, 'rodsuser') + + # obtain a temporary password as rodsadmin for another user + temp_password = temp_user.temp_password() + + # open a session as the new user + with iRODSSession(host=self.admin.host, + port=self.admin.port, + user=self.new_user, + password=temp_password, + zone=self.admin.zone) as session: + + # do something that connects to the server + session.users.get(self.new_user) + + # delete new user + self.admin.users.remove(self.new_user) + + # user should be gone + with self.assertRaises(UserDoesNotExist): + self.admin.users.get(self.new_user) + + +if __name__ == '__main__': + # let the tests find the parent irods lib + sys.path.insert(0, os.path.abspath('../..')) + unittest.main() diff --git a/irods/user.py b/irods/user.py index ecf5280..74b000a 100644 --- a/irods/user.py +++ b/irods/user.py @@ -53,6 +53,9 @@ def __repr__(self): def remove(self): self.manager.remove(self.name, self.zone) + def temp_password(self): + return self.manager.temp_password_for_user(self.name) + class iRODSUserGroup(object): From ee587e2c16d4ab267693b48e4e684d8ce26ce2e4 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Mon, 26 Apr 2021 16:59:02 -0400 Subject: [PATCH 74/96] [#263] update documentation for connection_timeout --- README.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.rst b/README.rst index 16971a3..ba9b05e 100644 --- a/README.rst +++ b/README.rst @@ -114,6 +114,19 @@ A pure Python SSL session (without a local `env_file`) requires a few more thing ... # workload >>> + +Maintaining a connection +------------------------ + +The default library timeout for a connection to an iRODS Server is 120 seconds. + +This can be overridden by changing the session `connection_timeout` immediately after creation of the session object: + +>>> session.connection_timeout = 300 + +This will set the timeout to five minutes for any associated connections. + + Working with collections ------------------------ From 952de73cf618eeeab407bb899c5f9c83de4fa897 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 5 May 2021 12:42:31 -0400 Subject: [PATCH 75/96] [#267] RuleExec model for genquery --- irods/models.py | 20 ++++++++++++++++++++ irods/rule.py | 19 +++++++++++++++++++ irods/test/query_test.py | 23 ++++++++++++++++++++++- 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/irods/models.py b/irods/models.py index 4b1b4e2..71d642a 100644 --- a/irods/models.py +++ b/irods/models.py @@ -19,6 +19,26 @@ class Model(six.with_metaclass(ModelBase, object)): pass +class RuleExec(Model): + id = Column(Integer, 'RULE_EXEC_ID', 1000) + name = Column(String, 'RULE_EXEC_NAME', 1001) + rei_file_path = Column(String,'RULE_EXEC_REI_FILE_PATH', 1002) + user_name = Column(String, 'RULE_EXEC_USER_NAME', 1003) + time = Column(DateTime,'RULE_EXEC_TIME', 1005) + last_exe_time = Column(DateTime,'RULE_EXEC_LAST_EXE_TIME', 1010) + frequency = Column(String,'RULE_EXEC_FREQUENCY', 1006) + priority = Column(String, 'RULE_EXEC_PRIORITY', 1007) + +# # If needed in 4.2.9, we can update the Query class to dynamically +# # attach this field based on server version: +# context = Column(String, 'RULE_EXEC_CONTEXT', 1012) + +# # These are either unused or usually absent: +# exec_status = Column(String,'RULE_EXEC_STATUS', 1011) +# address = Column(String,'RULE_EXEC_ADDRESS', 1004) +# notification_addr = Column('RULE_EXEC_NOTIFICATION_ADDR', 1009) + + class Zone(Model): id = Column(Integer, 'ZONE_ID', 101) name = Column(String, 'ZONE_NAME', 102) diff --git a/irods/rule.py b/irods/rule.py index 6d7e07a..a36142c 100644 --- a/irods/rule.py +++ b/irods/rule.py @@ -2,6 +2,15 @@ from irods.message import iRODSMessage, StringStringMap, RodsHostAddress, STR_PI, MsParam, MsParamArray, RuleExecutionRequest from irods.api_number import api_number from io import open as io_open +from irods.message import Message, StringProperty + +class RemoveRuleMessage(Message): + #define RULE_EXEC_DEL_INP_PI "str ruleExecId[NAME_LEN];" + _name = 'RULE_EXEC_DEL_INP_PI' + ruleExecId = StringProperty() + def __init__(self,id_): + super(RemoveRuleMessage,self).__init__() + self.ruleExecId = str(id_) class Rule(object): def __init__(self, session, rule_file=None, body='', params=None, output=''): @@ -21,6 +30,16 @@ def __init__(self, session, rule_file=None, body='', params=None, output=''): if output != '': self.output = output + def remove_by_id(self,*ids): + with self.session.pool.get_connection() as conn: + for id_ in ids: + request = iRODSMessage("RODS_API_REQ", msg=RemoveRuleMessage(id_), + int_info=api_number['RULE_EXEC_DEL_AN']) + conn.send(request) + response = conn.recv() + if response.int_info != 0: + raise RuntimeError("Error removing rule {id_}".format(**locals())) + def load(self, rule_file, encoding = 'utf-8'): self.body = '@external\n' diff --git a/irods/test/query_test.py b/irods/test/query_test.py index d2b5a8d..9fdd555 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -12,12 +12,15 @@ from irods.models import (User, UserMeta, Resource, ResourceMeta, Collection, CollectionMeta, - DataObject, DataObjectMeta ) + DataObject, DataObjectMeta, + RuleExec) +from tempfile import NamedTemporaryFile from irods.exception import MultipleResultsFound, CAT_UNKNOWN_SPECIFIC_QUERY, CAT_INVALID_ARGUMENT from irods.query import SpecificQuery from irods.column import Like, Between, In from irods.meta import iRODSMeta +from irods.rule import Rule from irods import MAX_SQL_ROWS import irods.test.helpers as helpers from six.moves import range as py3_range @@ -470,6 +473,23 @@ def test_paging_get_results__166(self): if iters == batch_size - 1: break # leave iteration unfinished + def test_rules_query__267(self): + unique = "Testing prc #267: queryable rule objects" + with NamedTemporaryFile(mode='w') as rfile: + rfile.write("""f() {{ delay('1m') {{ writeLine('serverLog','{unique}') }} }}\n""" + """OUTPUT null\n""".format(**locals())) + rfile.flush() + ## create a delayed rule we can query against + myrule = Rule(self.sess, rule_file = rfile.name) + myrule.execute() + qu = self.sess.query(RuleExec.id).filter( Like(RuleExec.frequency,'%1m%'), + Like(RuleExec.name, '%{unique}%'.format(**locals())) ) + results = [row for row in qu] + self.assertEqual(1, len(results)) + if results: + Rule(self.sess).remove_by_id( results[0][RuleExec.id] ) + + class TestSpecificQuery(unittest.TestCase): def setUp(self): @@ -583,6 +603,7 @@ def test_list_specific_queries_with_unknown_alias(self): next(res) + if __name__ == '__main__': # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath('../..')) From 5fc12da5eda77f87412fc2b4a6d5ea9f3ba4918b Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Sat, 17 Apr 2021 14:42:59 +0000 Subject: [PATCH 76/96] [#221] minor revisions to get test suite consistently passing * correct exception type for irods 4-2-stable * cleanup in rename test --- irods/test/data_obj_test.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 3706619..acf6837 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -71,8 +71,17 @@ def test_obj_does_not_exist(self): def test_create_from_invalid_path__250(self): - with self.assertRaises(ex.SYS_INVALID_INPUT_PARAM): + possible_exceptions = { ex.CAT_UNKNOWN_COLLECTION: (lambda serv_vsn : serv_vsn >= (4,2,9)), + ex.SYS_INVALID_INPUT_PARAM: (lambda serv_vsn : serv_vsn <= (4,2,8)) + } + raisedExc = None + try: self.sess.data_objects.create('t') + except Exception as exc: + raisedExc = exc + server_version_cond = possible_exceptions.get(type(raisedExc)) + self.assertTrue(server_version_cond is not None) + self.assertTrue(server_version_cond(self.sess.server_version)) def test_rename_obj(self): @@ -104,7 +113,7 @@ def test_rename_obj(self): self.assertEqual(obj.id, saved_id) # remove object - self.sess.data_objects.unlink(new_path) + self.sess.data_objects.unlink(new_path, force = True) def test_move_obj_to_coll(self): From ac6fb8b30565573726765982d86130f846938752 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Tue, 20 Apr 2021 20:38:44 +0000 Subject: [PATCH 77/96] [#221] DEST RESC in object open-for-write now required for some tests Due to irods/irods#5548, a DEST_RESC keyword must be used to allow a replica not residing on the default resource to participate in voting. --- irods/test/admin_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/irods/test/admin_test.py b/irods/test/admin_test.py index 1267341..e188d99 100644 --- a/irods/test/admin_test.py +++ b/irods/test/admin_test.py @@ -8,6 +8,7 @@ from irods.session import iRODSSession from irods.resource import iRODSResource import irods.test.helpers as helpers +import irods.keywords as kw class TestAdmin(unittest.TestCase): @@ -157,7 +158,7 @@ def test_make_compound_resource(self): obj = session.data_objects.create(obj_path, comp.name) # write to object - with obj.open('w+') as obj_desc: + with obj.open('w+',**{kw.DEST_RESC_NAME_KW:comp.name}) as obj_desc: obj_desc.write(dummy_str) # refresh object @@ -304,7 +305,9 @@ def test_make_ufs_resource(self): obj = self.sess.data_objects.create(obj_path, resc_name) # write something to the file - with obj.open('w+') as obj_desc: + # (can omit use of DEST_RESC_NAME_KW on resolution of + # https://github.com/irods/irods/issues/5548 ) + with obj.open('w+', **{kw.DEST_RESC_NAME_KW: resc_name} ) as obj_desc: obj_desc.write(dummy_str) # refresh object (size has changed) From 97ec30578e8f03b59de5286689df6fb526fe0df8 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 21 Apr 2021 03:12:28 +0000 Subject: [PATCH 78/96] [#221] general server vsn override and test skip IRODS_SERVER_VERSION may be set now to override the iRODS server version being reported to the python client. This allows us, for example, to: - skip tests for no-longer valid features (such as force option on data obj create in 4,2,9) - test validity of fixes designed to take effect for a future release, and do so prior to the corresponding version bump. --- irods/connection.py | 7 +++++-- irods/test/force_create.py | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/irods/connection.py b/irods/connection.py index bc96808..843d398 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -8,6 +8,8 @@ import ssl import datetime import irods.password_obfuscation as obf +from ast import literal_eval as safe_eval + from irods.message import ( iRODSMessage, StartupPack, AuthResponse, AuthChallenge, AuthPluginOut, @@ -63,8 +65,9 @@ def __init__(self, pool, account): @property def server_version(self): - return tuple(int(x) for x in self._server_version.relVersion.replace('rods', '').split('.')) - + detected = tuple(int(x) for x in self._server_version.relVersion.replace('rods', '').split('.')) + return (safe_eval(os.environ.get('IRODS_SERVER_VERSION','()')) + or detected) @property def client_signature(self): return self._client_signature diff --git a/irods/test/force_create.py b/irods/test/force_create.py index b373cfe..3510ebe 100644 --- a/irods/test/force_create.py +++ b/irods/test/force_create.py @@ -18,6 +18,8 @@ def tearDown(self): # This test should pass whether or not federation is configured: def test_force_create(self): + if self.sess.server_version > (4, 2, 8): + self.skipTest('force flag unneeded for create in iRODS > 4.2.8') session = self.sess FILE = '/{session.zone}/home/{session.username}/a.txt'.format(**locals()) try: From 6794aaf248e37cedb18c03fda638edc3d6289047 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 1 Apr 2021 22:53:25 -0400 Subject: [PATCH 79/96] [#260 irods/irods#5520] XML protocol will use BinBytesBuf in 4.2.9 Allow atomic metadata and parallel transfer to work in PRC with changes to server API numbers >= 20000 whose I/O's use possibly arbirary JSON text content. This encodes with base64 and prevents '&', '<', and '>' characters in the JSON from interfering with proper parsing of the XML packing instructions. --- irods/manager/metadata_manager.py | 14 +++++---- irods/message/__init__.py | 47 ++++++++++++++++++++++++------- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/irods/manager/metadata_manager.py b/irods/manager/metadata_manager.py index ed31580..6f4fe6e 100644 --- a/irods/manager/metadata_manager.py +++ b/irods/manager/metadata_manager.py @@ -4,7 +4,7 @@ from os.path import dirname, basename from irods.manager import Manager -from irods.message import MetadataRequest, iRODSMessage, JSONMessage +from irods.message import MetadataRequest, iRODSMessage, JSON_Message from irods.api_number import api_number from irods.models import (DataObject, Collection, Resource, User, DataObjectMeta, CollectionMeta, ResourceMeta, UserMeta) @@ -156,10 +156,12 @@ def apply_atomic_operations(self, model_cls, path, *avu_ops): } self._call_atomic_metadata_api(request) - def _call_atomic_metadata_api(self, request): - request = iRODSMessage("RODS_API_REQ", msg=JSONMessage(request), - int_info=api_number['ATOMIC_APPLY_METADATA_OPERATIONS_APN']) + def _call_atomic_metadata_api(self, request_text): with self.sess.pool.get_connection() as conn: - conn.send(request) + request_msg = iRODSMessage("RODS_API_REQ", JSON_Message( request_text, conn.server_version ), + int_info=api_number['ATOMIC_APPLY_METADATA_OPERATIONS_APN']) + conn.send( request_msg ) response = conn.recv() - logger.debug('atomic metadata api response = %s %s',response.int_info,repr(response.get_json_encoded_struct())) + response_msg = response.get_json_encoded_struct() + logger.debug("in atomic_metadata, server responded with: %r",response_msg) + diff --git a/irods/message/__init__.py b/irods/message/__init__.py index ceaf6d5..a25dcde 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -1,3 +1,4 @@ +import base64 import struct import logging import socket @@ -59,6 +60,15 @@ def _recv_message_into(sock, buffer, size): index += rsize return mv[:index] +#------------------------------------ + +class BinBytesBuf(Message): + _name = 'BinBytesBuf_PI' + buflen = IntegerProperty() + buf = BinaryProperty() + +class JSON_Binary_Response(BinBytesBuf): + pass class iRODSMessage(object): @@ -71,7 +81,12 @@ def __init__(self, msg_type=b'', msg=None, error=b'', bs=b'', int_info=0): def get_json_encoded_struct (self): Xml = ET.fromstring(self.msg.replace(b'\0',b'')) - return json.loads(Xml.find('buf').text) + json_str = Xml.find('buf').text + if Xml.tag == 'BinBytesBuf_PI': + mybin = JSON_Binary_Response() + mybin.unpack(Xml) + json_str = mybin.buf.replace(b'\0',b'').decode() + return json.loads( json_str ) @staticmethod def recv(sock): @@ -236,24 +251,36 @@ class AuthPluginOut(Message): # define InxIvalPair_PI "int iiLen; int *inx(iiLen); int *ivalue(iiLen);" - -class BinBytesBuf(Message): - _name = 'BinBytesBuf_PI' - buflen = IntegerProperty() - buf = BinaryProperty() +class JSON_Binary_Request(BinBytesBuf): + """A message body whose payload is BinBytesBuf containing JSON.""" + def __init__(self,msg_struct): + """Initialize with a Python data structure that will be converted to JSON.""" + super(JSON_Binary_Request,self).__init__() + string = json.dumps(msg_struct) + self.buf = string + self.buflen = len(string) class BytesBuf(Message): _name = 'BytesBuf_PI' buflen = IntegerProperty() buf = StringProperty() - -class JSONMessage(BytesBuf): - """A message body whose payload is just a BytesBuf containing JSON.""" + def __init__(self,string,*v,**kw): + super(BytesBuf,self).__init__(*v,**kw) + _buf = StringProperty.escape_xml_string( string ) + self.buf = string + self.buflen = len(self.buf) + +class JSON_XMLFramed_Request(BytesBuf): + """A message body whose payload is a BytesBuf containing JSON.""" def __init__(self, msg_struct): """Initialize with a Python data structure that will be converted to JSON.""" s = json.dumps(msg_struct) - super(JSONMessage,self).__init__(buf=s,buflen=len(s)) + super(JSON_XMLFramed_Request,self).__init__(s) +def JSON_Message( msg_struct , server_version = () ): + cls = JSON_XMLFramed_Request if server_version < (4,2,9) \ + else JSON_Binary_Request + return cls(msg_struct) class PluginAuthMessage(Message): _name = 'authPlugReqInp_PI' From 8f79e42255a66b556bdb4a16d16313d93919c3f4 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Mon, 26 Apr 2021 13:12:21 -0400 Subject: [PATCH 80/96] [#221] add infrastructure and update tests for Jenkins client CI Tests now take advantage of shared directories for registering files from client side and for shared resource situations (where the client needs local access to the vault directory of the shared iRODS resource). --- Dockerfile.prc_test.centos | 31 +++ Dockerfile.prc_test.ubuntu | 38 +++ MANIFEST.in | 2 +- docker-compose.yml | 36 +++ docker_build/Dockerfile.provider | 39 +++ docker_build/ICAT.sql | 3 + docker_build/build_deps_list | 22 ++ docker_build/iinit.py | 44 ++++ docker_build/pgpass | 1 + docker_build/wait_on_condition | 34 +++ irods/test/admin_test.py | 3 + irods/test/collection_test.py | 14 +- irods/test/data_obj_test.py | 235 ++++++++++-------- irods/test/helpers.py | 62 +++++ irods/test/pool_test.py | 53 +++- irods/test/query_test.py | 62 ++++- ...tium_continuous_integration_test_module.py | 25 ++ run_python_tests.sh | 13 + 18 files changed, 591 insertions(+), 126 deletions(-) create mode 100644 Dockerfile.prc_test.centos create mode 100644 Dockerfile.prc_test.ubuntu create mode 100644 docker-compose.yml create mode 100644 docker_build/Dockerfile.provider create mode 100644 docker_build/ICAT.sql create mode 100755 docker_build/build_deps_list create mode 100644 docker_build/iinit.py create mode 100644 docker_build/pgpass create mode 100755 docker_build/wait_on_condition create mode 100644 irods_consortium_continuous_integration_test_module.py create mode 100644 run_python_tests.sh diff --git a/Dockerfile.prc_test.centos b/Dockerfile.prc_test.centos new file mode 100644 index 0000000..bd4ef60 --- /dev/null +++ b/Dockerfile.prc_test.centos @@ -0,0 +1,31 @@ +ARG os_image +FROM ${os_image} +ARG log_output_dir=/tmp +ENV LOG_OUTPUT_DIR="$log_output_dir" +ARG py_N +ENV PY_N "$py_N" + +RUN yum install -y epel-release +RUN yum install -y git nmap-ncat sudo +RUN yum install -y python${py_N} python${py_N}-pip +RUN useradd -md /home/user -s /bin/bash user +RUN echo "user ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +WORKDIR /home/user +COPY ./ ./repo/ +RUN chown -R user repo/ +USER user +RUN pip${py_N} install --user --upgrade pip==20.3.4 # - limit pip version for C7 system python2.7 +RUN cd repo && python${py_N} -m pip install --user . +RUN python${py_N} repo/docker_build/iinit.py \ + host irods-provider \ + port 1247 \ + user rods \ + zone tempZone \ + password rods +SHELL ["/bin/bash","-c"] +CMD echo "Waiting on iRODS server ::: " &&\ + repo/docker_build/wait_on_condition -v -i 10 -n 30 \ + 'echo -e "\x00\x00\x00\x33HEARTBEAT" | nc irods-provider 1247 | grep HEARTBEAT' &&\ + echo "::: iRODS server is up." |tee /tmp/irods_is_up &&\ + sudo groupadd -o -g $(stat -c%g /irods_shared) irods && sudo usermod -aG irods user && \ + newgrp irods < repo/run_python_tests.sh diff --git a/Dockerfile.prc_test.ubuntu b/Dockerfile.prc_test.ubuntu new file mode 100644 index 0000000..924696d --- /dev/null +++ b/Dockerfile.prc_test.ubuntu @@ -0,0 +1,38 @@ +ARG os_image +FROM ${os_image} +ARG log_output_dir=/tmp +ENV LOG_OUTPUT_DIR="$log_output_dir" +ARG py_N +ENV PY_N "$py_N" + +RUN apt update +RUN apt install -y git netcat-openbsd sudo +RUN apt install -y python${py_N} python${py_N}-pip +RUN useradd -md /home/user -s /bin/bash user +RUN echo "user ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +WORKDIR /home/user +COPY ./ ./repo/ +RUN chown -R user repo/ +USER user +RUN pip${py_N} install --user --upgrade pip==20.3.4 # -- version specified for Ub16 +RUN cd repo && python${py_N} -m pip install --user . +RUN python${py_N} repo/docker_build/iinit.py \ + host irods-provider \ + port 1247 \ + user rods \ + zone tempZone \ + password rods + +SHELL ["/bin/bash","-c"] + +# -- At runtime: -- +# 1. wait for provider to run. +# 2. give user group permissions to access shared irods directories +# 3. run python tests as the new group + +CMD echo "Waiting on iRODS server ::: " &&\ + repo/docker_build/wait_on_condition -v -i 10 -n 30 \ + 'echo -e "\x00\x00\x00\x33HEARTBEAT" | nc irods-provider 1247 | grep HEARTBEAT' && \ + echo "::: iRODS server is up." |tee /tmp/irods_is_up && \ + sudo groupadd -o -g $(stat -c%g /irods_shared) irods && sudo usermod -aG irods user && \ + newgrp irods < repo/run_python_tests.sh diff --git a/MANIFEST.in b/MANIFEST.in index 3c469e1..7d5f943 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include AUTHORS CHANGELOG.rst LICENSE.txt README.rst irods/test/README.rst irods/test/unicode_sampler.xml \ No newline at end of file +include AUTHORS CHANGELOG.rst LICENSE.txt README.rst irods/test/README.rst irods/test/unicode_sampler.xml irods/test/test-data/*.json \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0aa372d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,36 @@ +version: '3' +services: + + icat: + image: postgres:10 + environment: + - POSTGRES_HOST_AUTH_METHOD=md5 + - POSTGRES_PASSWORD=pg_password + + irods-provider: + hostname: irods-provider + build: + context: docker_build + dockerfile: Dockerfile.provider + volumes: + - "${irods_pkg_dir}:/irods_packages:ro" + - ./irods_shared:/irods_shared:rw + depends_on: + - icat + networks: + default: + aliases: + - irods-provider + + client-runner: + env_file: client-runner.env + volumes: + - ./irods_shared:/irods_shared:rw + build: + context: . + dockerfile: Dockerfile.prc_test.${client_os_generic} + args: + os_image: "$client_os_image" + py_N: "$python_version" + depends_on: + - irods-provider diff --git a/docker_build/Dockerfile.provider b/docker_build/Dockerfile.provider new file mode 100644 index 0000000..5a45e84 --- /dev/null +++ b/docker_build/Dockerfile.provider @@ -0,0 +1,39 @@ +FROM ubuntu:18.04 + +ARG irods_pkg_dir + +RUN apt update +RUN apt install -y wget sudo lsb-release apt-transport-https gnupg2 postgresql-client +RUN wget -qO - https://packages.irods.org/irods-signing-key.asc | sudo apt-key add - +RUN echo "deb [arch=amd64] https://packages.irods.org/apt/ $(lsb_release -sc) main" | sudo tee /etc/apt/sources.list.d/renci-irods.list +RUN apt update + +SHELL [ "/bin/bash","-c" ] + +COPY ICAT.sql /tmp +COPY pgpass root/.pgpass +RUN chmod 600 root/.pgpass + +RUN apt install -y rsyslog +ADD build_deps_list wait_on_condition /tmp/ + +# At Runtime: 1. Install apt dependencies for the iRODS package files given. +# 2. Install the package files. +# 3. Wait on database container. +# 4. Configure iRODS provider and make sure it is running. +# 5. Configure shared folder for tests that need to register data objects. +# (We opt out if /irods_shared does not exist, ie is omitted in the docker-compose.yml). +# 6. Wait forever. + +CMD apt install -y $(/tmp/build_deps_list /irods_packages/irods*{serv,dev,icommand,runtime,database-*postgres}*.deb) && \ + dpkg -i /irods_packages/irods*{serv,dev,icommand,runtime,database-*postgres}*.deb && \ + /tmp/wait_on_condition -i 5 -n 12 "psql -h icat -U postgres -c '\\l' >/dev/null" && \ + psql -h icat -U postgres -f /tmp/ICAT.sql && \ + sed 's/localhost/icat/' < /var/lib/irods/packaging/localhost_setup_postgres.input \ + | python /var/lib/irods/scripts/setup_irods.py && \ + { pgrep -u irods irodsServer >/dev/null || su irods -c '~/irodsctl start'; } && \ + { [ ! -d /irods_shared ] || { mkdir -p /irods_shared/reg_resc && mkdir -p /irods_shared/tmp && \ + chown -R irods.irods /irods_shared && chmod g+ws /irods_shared/tmp && \ + chmod 777 /irods_shared/reg_resc ; } } && \ + echo $'*********\n' $'*********\n' $'*********\n' $'*********\n' $'*********\n' IRODS IS UP && \ + tail -f /dev/null diff --git a/docker_build/ICAT.sql b/docker_build/ICAT.sql new file mode 100644 index 0000000..abb706a --- /dev/null +++ b/docker_build/ICAT.sql @@ -0,0 +1,3 @@ +CREATE USER irods WITH PASSWORD 'testpassword'; +CREATE DATABASE "ICAT"; +GRANT ALL PRIVILEGES ON DATABASE "ICAT" TO irods; diff --git a/docker_build/build_deps_list b/docker_build/build_deps_list new file mode 100755 index 0000000..7bf3798 --- /dev/null +++ b/docker_build/build_deps_list @@ -0,0 +1,22 @@ +#!/bin/bash + +build_deps_list() +{ + local -A pkglist + local pkg + while [ $# -gt 0 ] + do + while read f + do + if [[ ! $f =~ \(.*\)\s*$ ]]; then # todo: include version-specific ? + pkglist["$f"]="" + fi + done < <(dpkg -I "$1"|grep -i '^ *depends:'|tr ',:' \\n | tail -n +2) + shift + done + for pkg in "${!pkglist[@]}" # package list de-duped by associative array + do + echo "$pkg" + done +} +build_deps_list "$@" diff --git a/docker_build/iinit.py b/docker_build/iinit.py new file mode 100644 index 0000000..81365d8 --- /dev/null +++ b/docker_build/iinit.py @@ -0,0 +1,44 @@ +from getpass import getpass +from irods.password_obfuscation import encode +import json +import os +import sys +from os import chmod +from os.path import expanduser,exists,join +from getopt import getopt + + +home_env_path = expanduser('~/.irods') +env_file_path = join(home_env_path,'irods_environment.json') +auth_file_path = join(home_env_path,'.irodsA') + + +def do_iinit(host, port, user, zone, password): + if not exists(home_env_path): + os.makedirs(home_env_path) + else: + raise RuntimeError('~/.irods already exists') + + with open(env_file_path,'w') as env_file: + json.dump ( { "irods_host": host, + "irods_port": int(port), + "irods_user_name": user, + "irods_zone_name": zone }, env_file, indent=4) + with open(auth_file_path,'w') as auth_file: + auth_file.write(encode(password)) + chmod (auth_file_path,0o600) + + +def get_kv_pairs_from_cmdline(*args): + arglist = list(args) + while arglist: + k = arglist.pop(0) + v = arglist.pop(0) + yield k,v + + +if __name__ == '__main__': + import sys + args = sys.argv[1:] + dct = {k:v for k,v in get_kv_pairs_from_cmdline(*args)} + do_iinit(**dct) diff --git a/docker_build/pgpass b/docker_build/pgpass new file mode 100644 index 0000000..55a6bdf --- /dev/null +++ b/docker_build/pgpass @@ -0,0 +1 @@ +icat:5432:postgres:postgres:pg_password diff --git a/docker_build/wait_on_condition b/docker_build/wait_on_condition new file mode 100755 index 0000000..ce2c29b --- /dev/null +++ b/docker_build/wait_on_condition @@ -0,0 +1,34 @@ +#!/bin/bash + +# wait for a program to run with 0 return status + +interval=3; ntimes=20; verbose="" + +usage() { + echo "$0 [options] " + printf "\t options are: -i (default %d)\n" $interval + printf "\t -n (default %d)\n" $ntimes + printf "\t -v : for verbose reporting\n" + exit 1 +} >&2 + +while [[ "$1" = -* ]] ; do + case $1 in + -i) shift; interval=$1; shift ;; + -n) shift; ntimes=$1; shift ;; + -v) verbose=1 ; shift;; + *) usage;; + esac +done +[ $# -eq 0 ] && usage + +n=1 +while : ; do + eval "$@" + STATUS=$? + [ -n "$verbose" ] && echo "$n:" 'STATUS =' $STATUS `date` + [ $((++n)) -gt $ntimes -o $STATUS -eq 0 ] && break + sleep $interval +done + +exit $STATUS diff --git a/irods/test/admin_test.py b/irods/test/admin_test.py index e188d99..4f1cb80 100644 --- a/irods/test/admin_test.py +++ b/irods/test/admin_test.py @@ -264,6 +264,9 @@ def test_resource_context_string(self): def test_make_ufs_resource(self): + RESC_PATH_BASE = helpers.irods_shared_tmp_dir() + if not(RESC_PATH_BASE) and not helpers.irods_session_host_local (self.sess): + self.skipTest('for non-local server with shared tmp dir missing') # test data resc_name = 'temporary_test_resource' if self.sess.server_version < (4, 0, 0): diff --git a/irods/test/collection_test.py b/irods/test/collection_test.py index fc811dd..d0f0030 100644 --- a/irods/test/collection_test.py +++ b/irods/test/collection_test.py @@ -241,13 +241,15 @@ def test_collection_metadata(self): def test_register_collection(self): - if self.sess.host not in ('localhost', socket.gethostname()): + tmp_dir = helpers.irods_shared_tmp_dir() + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(tmp_dir) and not(loc_server): self.skipTest('Requires access to server-side file(s)') # test vars file_count = 10 dir_name = 'register_test_dir' - dir_path = os.path.join('/tmp', dir_name) + dir_path = os.path.join((tmp_dir or '/tmp'), dir_name) coll_path = '{}/{}'.format(self.test_coll.path, dir_name) # make test dir @@ -272,13 +274,15 @@ def test_register_collection(self): def test_register_collection_with_checksums(self): - if self.sess.host not in ('localhost', socket.gethostname()): + tmp_dir = helpers.irods_shared_tmp_dir() + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(tmp_dir) and not(loc_server): self.skipTest('Requires access to server-side file(s)') # test vars file_count = 10 - dir_name = 'register_test_dir' - dir_path = os.path.join('/tmp', dir_name) + dir_name = 'register_test_dir_with_chksums' + dir_path = os.path.join((tmp_dir or '/tmp'), dir_name) coll_path = '{}/{}'.format(self.test_coll.path, dir_name) # make test dir diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index acf6837..b1e3523 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -17,17 +17,30 @@ import irods.test.helpers as helpers import irods.keywords as kw from datetime import datetime +from irods.test.helpers import (unique_name, my_function_name) + + +def make_ufs_resc_in_tmpdir(session, base_name, allow_local = False): + tmpdir = helpers.irods_shared_tmp_dir() + if not tmpdir and allow_local: + tmpdir = os.getenv('TMPDIR') or '/tmp' + if not tmpdir: + raise RuntimeError("Must have filesystem path shareable with server.") + full_phys_dir = os.path.join(tmpdir,base_name) + if not os.path.exists(full_phys_dir): os.mkdir(full_phys_dir) + session.resources.create(base_name,'unixfilesystem',session.host,full_phys_dir) + return full_phys_dir + class TestDataObjOps(unittest.TestCase): - def setUp(self): - self.sess = helpers.make_session() + def setUp(self): # Create test collection + self.sess = helpers.make_session() self.coll_path = '/{}/home/{}/test_dir'.format(self.sess.zone, self.sess.username) self.coll = helpers.make_collection(self.sess, self.coll_path) - def tearDown(self): '''Remove test data and close connections ''' @@ -528,7 +541,7 @@ def test_replica_number(self): # make ufs resources ufs_resources = [] for i in range(number_of_replicas): - resource_name = 'ufs{}'.format(i) + resource_name = unique_name(my_function_name(),i) resource_type = 'unixfilesystem' resource_host = session.host resource_path = '/tmp/' + resource_name @@ -614,7 +627,7 @@ def test_repave_replicas(self): # make ufs resources and replicate object ufs_resources = [] for i in range(number_of_replicas): - resource_name = 'ufs{}'.format(i) + resource_name = unique_name(my_function_name(),i) resource_type = 'unixfilesystem' resource_host = session.host resource_path = '/tmp/{}'.format(resource_name) @@ -652,6 +665,7 @@ def test_repave_replicas(self): for resource in ufs_resources: resource.remove() + def test_get_replica_size(self): session = self.sess @@ -673,7 +687,7 @@ def test_get_replica_size(self): # make ufs resources ufs_resources = [] for i in range(2): - resource_name = 'ufs{}'.format(i) + resource_name = unique_name(my_function_name(),i) resource_type = 'unixfilesystem' resource_host = session.host resource_path = '/tmp/{}'.format(resource_name) @@ -712,6 +726,7 @@ def test_get_replica_size(self): for resource in ufs_resources: resource.remove() + def test_obj_put_get(self): # Can't do one step open/create with older servers if self.sess.server_version <= (4, 1, 4): @@ -942,88 +957,27 @@ def test_force_get(self): os.remove(test_file) - def test_register(self): - # skip if server is remote - if self.sess.host not in ('localhost', socket.gethostname()): - self.skipTest('Requires access to server-side file(s)') - - # test vars - test_dir = '/tmp' - filename = 'register_test_file' - test_file = os.path.join(test_dir, filename) - collection = self.coll.path - obj_path = '{collection}/{filename}'.format(**locals()) - - # make random 4K binary file - with open(test_file, 'wb') as f: - f.write(os.urandom(1024 * 4)) - - # register file in test collection - self.sess.data_objects.register(test_file, obj_path) - - # confirm object presence - obj = self.sess.data_objects.get(obj_path) - - # in a real use case we would likely - # want to leave the physical file on disk - obj.unregister() - - # delete file - os.remove(test_file) - - - def test_register_with_checksum(self): - # skip if server is remote - if self.sess.host not in ('localhost', socket.gethostname()): - self.skipTest('Requires access to server-side file(s)') - - # test vars - test_dir = '/tmp' - filename = 'register_test_file' - test_file = os.path.join(test_dir, filename) - collection = self.coll.path - obj_path = '{collection}/{filename}'.format(**locals()) - - # make random 4K binary file - with open(test_file, 'wb') as f: - f.write(os.urandom(1024 * 4)) - - # register file in test collection - options = {kw.VERIFY_CHKSUM_KW: ''} - self.sess.data_objects.register(test_file, obj_path, **options) - - # confirm object presence and verify checksum - obj = self.sess.data_objects.get(obj_path) - - # don't use obj.path (aka logical path) - phys_path = obj.replicas[0].path - digest = helpers.compute_sha256_digest(phys_path) - self.assertEqual(obj.checksum, "sha2:{}".format(digest)) - - # leave physical file on disk - obj.unregister() - - # delete file - os.remove(test_file) - def test_modDataObjMeta(self): + test_dir = helpers.irods_shared_tmp_dir() # skip if server is remote - if self.sess.host not in ('localhost', socket.gethostname()): + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(test_dir) and not (loc_server): self.skipTest('Requires access to server-side file(s)') # test vars - test_dir = '/tmp' + resc_name = 'testDataObjMetaResc' filename = 'register_test_file' - test_file = os.path.join(test_dir, filename) collection = self.coll.path obj_path = '{collection}/{filename}'.format(**locals()) + test_path = make_ufs_resc_in_tmpdir(self.sess, resc_name, allow_local = loc_server) + test_file = os.path.join(test_path, filename) # make random 4K binary file with open(test_file, 'wb') as f: f.write(os.urandom(1024 * 4)) # register file in test collection - self.sess.data_objects.register(test_file, obj_path) + self.sess.data_objects.register(test_file, obj_path, **{kw.RESC_NAME_KW:resc_name}) qu = self.sess.query(Collection.id).filter(Collection.name == collection) for res in qu: @@ -1045,34 +999,6 @@ def test_modDataObjMeta(self): # delete file os.remove(test_file) - def test_register_with_xml_special_chars(self): - # skip if server is remote - if self.sess.host not in ('localhost', socket.gethostname()): - self.skipTest('Requires access to server-side file(s)') - - # test vars - test_dir = '/tmp' - filename = '''aaa'"<&test&>"'_file''' - test_file = os.path.join(test_dir, filename) - collection = self.coll.path - obj_path = '{collection}/{filename}'.format(**locals()) - - # make random 4K binary file - with open(test_file, 'wb') as f: - f.write(os.urandom(1024 * 4)) - - # register file in test collection - self.sess.data_objects.register(test_file, obj_path) - - # confirm object presence - obj = self.sess.data_objects.get(obj_path) - - # in a real use case we would likely - # want to leave the physical file on disk - obj.unregister() - - # delete file - os.remove(test_file) def test_get_data_objects(self): # Can't do one step open/create with older servers @@ -1093,13 +1019,14 @@ def test_get_data_objects(self): # make ufs resources ufs_resources = [] for i in range(2): - resource_name = 'ufs{}'.format(i) + resource_name = unique_name(my_function_name(),i) resource_type = 'unixfilesystem' resource_host = self.sess.host resource_path = '/tmp/{}'.format(resource_name) ufs_resources.append(self.sess.resources.create( resource_name, resource_type, resource_host, resource_path)) + # make passthru resource and add ufs1 as a child passthru_resource = self.sess.resources.create('pt', 'passthru') self.sess.resources.add_child(passthru_resource.name, ufs_resources[1].name) @@ -1142,6 +1069,108 @@ def test_get_data_objects(self): resource.remove() + def test_register(self): + test_dir = helpers.irods_shared_tmp_dir() + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(test_dir) and not(loc_server): + self.skipTest('data_obj register requires server has access to local or shared files') + + # test vars + resc_name = "testRegisterOpResc" + filename = 'register_test_file' + collection = self.coll.path + obj_path = '{collection}/{filename}'.format(**locals()) + + test_path = make_ufs_resc_in_tmpdir(self.sess,resc_name, allow_local = loc_server) + test_file = os.path.join(test_path, filename) + + # make random 4K binary file + with open(test_file, 'wb') as f: + f.write(os.urandom(1024 * 4)) + + # register file in test collection + self.sess.data_objects.register(test_file, obj_path) + + # confirm object presence + obj = self.sess.data_objects.get(obj_path) + + # in a real use case we would likely + # want to leave the physical file on disk + obj.unregister() + + # delete file + os.remove(test_file) + + + def test_register_with_checksum(self): + test_dir = helpers.irods_shared_tmp_dir() + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(test_dir) and not(loc_server): + self.skipTest('data_obj register requires server has access to local or shared files') + + # test vars + resc_name= 'regWithChksumResc' + filename = 'register_test_file' + collection = self.coll.path + obj_path = '{collection}/{filename}'.format(**locals()) + + test_path = make_ufs_resc_in_tmpdir(self.sess, resc_name, allow_local = loc_server) + test_file = os.path.join(test_path, filename) + + # make random 4K binary file + with open(test_file, 'wb') as f: + f.write(os.urandom(1024 * 4)) + + # register file in test collection + options = {kw.VERIFY_CHKSUM_KW: '', kw.RESC_NAME_KW: resc_name} + self.sess.data_objects.register(test_file, obj_path, **options) + + # confirm object presence and verify checksum + obj = self.sess.data_objects.get(obj_path) + + # don't use obj.path (aka logical path) + phys_path = obj.replicas[0].path + digest = helpers.compute_sha256_digest(phys_path) + self.assertEqual(obj.checksum, "sha2:{}".format(digest)) + + # leave physical file on disk + obj.unregister() + + # delete file + os.remove(test_file) + + def test_register_with_xml_special_chars(self): + test_dir = helpers.irods_shared_tmp_dir() + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(test_dir) and not(loc_server): + self.skipTest('data_obj register requires server has access to local or shared files') + + # test vars + resc_name = 'regWithXmlSpecialCharsResc' + collection = self.coll.path + filename = '''aaa'"<&test&>"'_file''' + test_path = make_ufs_resc_in_tmpdir(self.sess, resc_name, allow_local = loc_server) + test_file = os.path.join(test_path, filename) + obj_path = '{collection}/{filename}'.format(**locals()) + + # make random 4K binary file + with open(test_file, 'wb') as f: + f.write(os.urandom(1024 * 4)) + + # register file in test collection + self.sess.data_objects.register(test_file, obj_path, **{kw.RESC_NAME_KW: resc_name}) + + # confirm object presence + obj = self.sess.data_objects.get(obj_path) + + # in a real use case we would likely + # want to leave the physical file on disk + obj.unregister() + + # delete file + os.remove(test_file) + + if __name__ == '__main__': # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath('../..')) diff --git a/irods/test/helpers.py b/irods/test/helpers.py index fc32fa9..f784f1b 100644 --- a/irods/test/helpers.py +++ b/irods/test/helpers.py @@ -7,12 +7,69 @@ import hashlib import base64 import math +import socket +import inspect +import threading +import random from pwd import getpwnam from irods.session import iRODSSession from irods.message import iRODSMessage from six.moves import range +def my_function_name(): + '''Returns the name of the calling function or method''' + return inspect.getframeinfo(inspect.currentframe().f_back).function + +_thrlocal = threading.local() + +def unique_name(*seed_tuple): + '''For deterministic pseudo-random identifiers based on function/method name + to prevent e.g. ICAT collisions within and between tests. Example use: + def f(session): + seq_num = 1 + a_name = unique_name( my_function_name(), seq_num # [, *optional_further_args] + ) + seq_num += 1 + session.resources.create( a_name, 'unixfilesystem', session.host, '/tmp/' + a_name ) + ''' + if not getattr(_thrlocal,"rand_gen",None) : _thrlocal.rand_gen = random.Random() + _thrlocal.rand_gen.seed(seed_tuple) + return '%016X' % _thrlocal.rand_gen.randint(0,(1<<64)-1) + + +IRODS_SHARED_DIR = os.path.join( os.path.sep, 'irods_shared' ) +IRODS_SHARED_TMP_DIR = os.path.join(IRODS_SHARED_DIR,'tmp') +IRODS_SHARED_REG_RESC_VAULT = os.path.join(IRODS_SHARED_DIR,'reg_resc') + +IRODS_REG_RESC = 'MyRegResc' +Reg_Resc_Name = '' + +def irods_shared_tmp_dir(): + pth = IRODS_SHARED_TMP_DIR + can_write = False + if os.path.exists(pth): + try: tempfile.NamedTemporaryFile(dir = pth) + except: pass + else: can_write = True + return pth if can_write else '' + +def irods_shared_reg_resc_vault() : + vault = IRODS_SHARED_REG_RESC_VAULT + if os.path.exists(vault): + return vault + else: + return None + +def get_register_resource(session): + vault_path = irods_shared_reg_resc_vault() + Reg_Resc_Name = '' + if vault_path: + session.resources.create(IRODS_REG_RESC, 'unixfilesystem', session.host, vault_path) + Reg_Resc_Name = IRODS_REG_RESC + return Reg_Resc_Name + + def make_session(**kwargs): try: @@ -161,3 +218,8 @@ def file_backed_up(filename): yield filename finally: shutil.copyfile(f.name, filename) + + +def irods_session_host_local (sess): + return socket.gethostbyname(sess.host) == \ + socket.gethostbyname(socket.gethostname()) diff --git a/irods/test/pool_test.py b/irods/test/pool_test.py index 0ecaa66..3b28da4 100644 --- a/irods/test/pool_test.py +++ b/irods/test/pool_test.py @@ -2,17 +2,50 @@ from __future__ import absolute_import import datetime import os +import re import sys import time +import json import unittest import irods.test.helpers as helpers +# Regular expression to match common synonyms for localhost. +# + +LOCALHOST_REGEX = re.compile(r"""^(127(\.\d+){1,3}|[0:]+1|(.*-)?localhost(\.\w+)?)$""",re.IGNORECASE) +USE_ONLY_LOCALHOST = False class TestPool(unittest.TestCase): + config_extension = ".json" + test_extension = "" + preferred_parameters = {} + + @classmethod + def setUpClass(cls): # generate test env files using connect data from ~/.irods environment + if USE_ONLY_LOCALHOST: return + Nonlocal_Ext = ".test" + with helpers.make_session() as session: + cls.preferred_parameters = { 'irods_host':session.host, + 'irods_port':session.port, + 'irods_user_name':session.username, + 'irods_zone_name':session.zone } + test_configs_dir = os.path.join(irods_test_path(),"test-data") + for config in [os.path.join(test_configs_dir,f) for f in os.listdir(test_configs_dir) + if f.endswith(cls.config_extension)]: + with open(config,"r") as in_, open(config + Nonlocal_Ext,"w") as out_: + cf = json.load(in_) + cf.update(cls.preferred_parameters) + json.dump(cf, out_,indent=4) + cls.test_extension = Nonlocal_Ext + + def setUp(self): - self.sess = helpers.make_session(irods_env_file="./test-data/irods_environment.json") + self.sess = helpers.make_session( + irods_env_file=os.path.join(irods_test_path(),"test-data","irods_environment.json" + self.test_extension)) + if USE_ONLY_LOCALHOST and not LOCALHOST_REGEX.match (self.sess.host): + self.skipTest('for non-local server') def tearDown(self): '''Close connections @@ -204,26 +237,36 @@ def test_no_refresh_connection(self): self.assertEqual(0, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) + def test_get_connection_refresh_time_no_env_file_input_param(self): connection_refresh_time = self.sess.get_connection_refresh_time(first_name="Magic", last_name="Johnson") self.assertEqual(connection_refresh_time, -1) def test_get_connection_refresh_time_none_existant_env_file(self): - connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment_non_existant.json") + connection_refresh_time = self.sess.get_connection_refresh_time( + irods_env_file=os.path.join(irods_test_path(),"test-data","irods_environment_non_existant.json" + self.test_extension)) self.assertEqual(connection_refresh_time, -1) def test_get_connection_refresh_time_no_connection_refresh_field(self): - connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment_no_refresh_field.json") + connection_refresh_time = self.sess.get_connection_refresh_time( + irods_env_file=os.path.join(irods_test_path(),"test-data","irods_environment_no_refresh_field.json" + self.test_extension)) self.assertEqual(connection_refresh_time, -1) def test_get_connection_refresh_time_negative_connection_refresh_field(self): - connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment_negative_refresh_field.json") + connection_refresh_time = self.sess.get_connection_refresh_time( + irods_env_file=os.path.join(irods_test_path(),"test-data","irods_environment_negative_refresh_field.json" + self.test_extension)) self.assertEqual(connection_refresh_time, -1) def test_get_connection_refresh_time(self): - connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment.json") + default_path = os.path.join (irods_test_path(),"test-data","irods_environment.json" + self.test_extension) + connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file=default_path) self.assertEqual(connection_refresh_time, 3) + +def irods_test_path(): + return os.path.dirname(__file__) + + if __name__ == '__main__': # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath('../..')) diff --git a/irods/test/query_test.py b/irods/test/query_test.py index 9fdd555..e370a8c 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -1,5 +1,6 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- +from __future__ import print_function from __future__ import absolute_import import os import six @@ -22,11 +23,14 @@ from irods.meta import iRODSMeta from irods.rule import Rule from irods import MAX_SQL_ROWS +from irods.test.helpers import (irods_shared_reg_resc_vault, get_register_resource) import irods.test.helpers as helpers from six.moves import range as py3_range +import irods.keywords as kw IRODS_STATEMENT_TABLE_SIZE = 50 + def rows_returned(query): return len( list(query) ) @@ -38,6 +42,26 @@ class TestQuery(unittest.TestCase): More_than_one_batch = 2*MAX_SQL_ROWS # may need to increase if PRC default page # size is increased beyond 500 + register_resc = '' + + @classmethod + def setUpClass(cls): + with helpers.make_session() as sess: + resource_name = helpers.get_register_resource(sess) + if resource_name: + cls.register_resc = resource_name + + @classmethod + def tearDownClass(cls): + with helpers.make_session() as sess: + try: + resc = sess.resources.get(cls.register_resc) + resc.remove() + except Exception as e: + print( "Could not remove resc {!r} due to: {} ".format(cls.register_resc,e), + file=sys.stderr) + + def setUp(self): self.sess = helpers.make_session() @@ -316,19 +340,25 @@ def test_multiple_criteria_on_one_column_name(self): @unittest.skipIf(six.PY3, 'Test is for python2 only') def test_query_for_data_object_with_utf8_name_python2(self): + + if not helpers.irods_session_host_local (self.sess) and not( self.register_resc ): + self.skipTest('for non-local server - registering data objects requires a shared path') + filename_prefix = '_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' self.assertEqual(self.FILENAME_PREFIX.encode('utf-8'), filename_prefix) - _,test_file = tempfile.mkstemp(prefix=filename_prefix) + dir_ = irods_shared_reg_resc_vault() + _,test_file = tempfile.mkstemp(dir=dir_,prefix=filename_prefix) obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) + results = None try: - self.sess.data_objects.register(test_file, obj_path) + self.sess.data_objects.register(test_file, obj_path, **{kw.RESC_NAME_KW: self.register_resc}) results = self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file).first() result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) result_physical_path = results[DataObject.path] self.assertEqual(result_logical_path, obj_path.decode('utf8')) self.assertEqual(result_physical_path, test_file.decode('utf8')) finally: - self.sess.data_objects.unregister(obj_path) + if results: self.sess.data_objects.unregister(obj_path) os.remove(test_file) # view/change this line in text editors under own risk: @@ -336,6 +366,10 @@ def test_query_for_data_object_with_utf8_name_python2(self): @unittest.skipIf(six.PY2, 'Test is for python3 only') def test_query_for_data_object_with_utf8_name_python3(self): + + if not helpers.irods_session_host_local (self.sess) and not( self.register_resc ): + self.skipTest('for non-local server - registering data objects requires a shared path') + def python34_unicode_mkstemp( prefix, dir = None, open_mode = 0o777 ): file_path = os.path.join ((dir or os.environ.get('TMPDIR') or '/tmp'), prefix+'-'+str(uuid.uuid1())) encoded_file_path = file_path.encode('utf-8') @@ -345,21 +379,25 @@ def python34_unicode_mkstemp( prefix, dir = None, open_mode = 0o777 ): u'\u01e0\u01e1\u01e2\u01e3\u01e4\u01e5\u01e6\u01e7\u01e8\u01e9\u01ea\u01eb\u01ec\u01ed\u01ee\u01ef'\ u'\u01f0\u01f1\u01f2\u01f3\u01f4\u01f5\u01f6\u01f7\u01f8' # make more visible/changeable in VIM self.assertEqual(self.FILENAME_PREFIX, filename_prefix) - (fd,encoded_test_file) = tempfile.mkstemp(prefix=filename_prefix.encode('utf-8')) \ + dir_ = irods_shared_reg_resc_vault() + (fd,encoded_test_file) = tempfile.mkstemp(dir = dir_.encode('utf-8'),prefix=filename_prefix.encode('utf-8')) \ if sys.version_info >= (3,5) \ - else python34_unicode_mkstemp(prefix = filename_prefix) + else python34_unicode_mkstemp(dir = dir_, prefix = filename_prefix) self.assertTrue(os.path.exists(encoded_test_file)) test_file = encoded_test_file.decode('utf-8') obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) + results = None try: - self.sess.data_objects.register(test_file, obj_path) - results = self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file).first() - result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) - result_physical_path = results[DataObject.path] - self.assertEqual(result_logical_path, obj_path) - self.assertEqual(result_physical_path, test_file) + self.sess.data_objects.register(test_file, obj_path, **{kw.RESC_NAME_KW: self.register_resc}) + results = list(self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file)) + if results: + results = results[0] + result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) + result_physical_path = results[DataObject.path] + self.assertEqual(result_logical_path, obj_path) + self.assertEqual(result_physical_path, test_file) finally: - self.sess.data_objects.unregister(obj_path) + if results: self.sess.data_objects.unregister(obj_path) if fd is not None: os.close(fd) os.remove(encoded_test_file) diff --git a/irods_consortium_continuous_integration_test_module.py b/irods_consortium_continuous_integration_test_module.py new file mode 100644 index 0000000..c8ef414 --- /dev/null +++ b/irods_consortium_continuous_integration_test_module.py @@ -0,0 +1,25 @@ +import json +import sys + +def run (CI): + + final_config = CI.store_config( + { + "yaml_substitutions": { # -> written to ".env" + "python_version" : "3", + "client_os_generic": "ubuntu", + "client_os_image": "ubuntu:18.04" + }, + "container_environments": { + "client-runner" : { # -> written to "client-runner.env" + "TESTS_TO_RUN": "" # run test subset, e.g. "irods.test.data_obj_test" + } + + } + } + ) + + print ('----------\nconfig after CI modify pass\n----------',file=sys.stderr) + print(json.dumps(final_config,indent=4),file=sys.stderr) + + return CI.run_and_wait_on_client_exit () diff --git a/run_python_tests.sh b/run_python_tests.sh new file mode 100644 index 0000000..2430364 --- /dev/null +++ b/run_python_tests.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -o pipefail +cd repo/irods/test + +export PYTHONUNBUFFERED="Y" + +if [ -z "${TESTS_TO_RUN}" ] ; then + python${PY_N} runner.py 2>&1 | tee ${LOG_OUTPUT_DIR}/prc_test_logs.txt +else + python${PY_N} -m unittest -v ${TESTS_TO_RUN} 2>&1 | tee ${LOG_OUTPUT_DIR}/prc_test_logs.txt +fi + From dcd1f67942c0f77cfc0331f98eb7fde2604f79d6 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Tue, 27 Apr 2021 12:59:07 +0000 Subject: [PATCH 81/96] [#270] Speed up tests. - create large collection only once, since tests are read-only - remove redundant creat( ) call from pre 4.1.4 --- irods/test/extended_test.py | 31 ++++++++++++++++++++++--------- irods/test/helpers.py | 12 ++++++++---- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/irods/test/extended_test.py b/irods/test/extended_test.py index 9a81dd4..e3b7051 100644 --- a/irods/test/extended_test.py +++ b/irods/test/extended_test.py @@ -1,4 +1,5 @@ #! /usr/bin/env python +from __future__ import print_function from __future__ import absolute_import import os import sys @@ -9,21 +10,33 @@ class TestContinueQuery(unittest.TestCase): + @classmethod + def setUpClass(cls): + # once only (before all tests), set up large collection + print ("Creating a large collection...", file = sys.stderr) + with helpers.make_session() as sess: + # Create test collection + cls.coll_path = '/{}/home/{}/test_dir'.format(sess.zone, sess.username) + cls.obj_count = 2500 + cls.coll = helpers.make_test_collection( sess, cls.coll_path, cls.obj_count) + def setUp(self): + # open the session (per-test) self.sess = helpers.make_session() - # Create test collection - self.coll_path = '/{}/home/{}/test_dir'.format(self.sess.zone, self.sess.username) - self.obj_count = 2500 - self.coll = helpers.make_test_collection( - self.sess, self.coll_path, self.obj_count) - def tearDown(self): - '''Remove test data and close connections - ''' - self.coll.remove(recurse=True, force=True) + # close the session (per-test) self.sess.cleanup() + @classmethod + def tearDownClass(cls): + '''Remove test data + ''' + # once only (after all tests), delete large collection + print ("Deleting the large collection...", file = sys.stderr) + with helpers.make_session() as sess: + sess.collections.remove(cls.coll_path, recurse=True, force=True) + def test_walk_large_collection(self): for current_coll, subcolls, objects in self.coll.walk(): # check number of objects diff --git a/irods/test/helpers.py b/irods/test/helpers.py index f784f1b..6b53178 100644 --- a/irods/test/helpers.py +++ b/irods/test/helpers.py @@ -95,10 +95,14 @@ def make_object(session, path, content=None, **options): content = iRODSMessage.encode_unicode(content) - # 2 step open-create necessary for iRODS 4.1.4 or older - obj = session.data_objects.create(path) - with obj.open('w', **options) as obj_desc: - obj_desc.write(content) + if session.server_version <= (4,1,4): + # 2 step open-create necessary for iRODS 4.1.4 or older + obj = session.data_objects.create(path) + with obj.open('w', **options) as obj_desc: + obj_desc.write(content) + else: + with session.data_objects.open(path, 'w', **options) as obj_desc: + obj_desc.write(content) # refresh object after write return session.data_objects.get(path) From 9a2469a7be73cac5b187fec6c533351566016c3d Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 13 Jan 2021 15:41:30 -0500 Subject: [PATCH 82/96] [#246] implementation of chksum for data object mgr --- irods/data_object.py | 3 +++ irods/manager/data_object_manager.py | 15 ++++++++++- irods/message/__init__.py | 13 ++++++++++ irods/test/data_obj_test.py | 37 ++++++++++++++++++++++++++++ irods/test/helpers.py | 16 ++++++++++++ 5 files changed, 83 insertions(+), 1 deletion(-) diff --git a/irods/data_object.py b/irods/data_object.py index 2bc823e..d0c2e26 100644 --- a/irods/data_object.py +++ b/irods/data_object.py @@ -84,6 +84,9 @@ def open(self, mode='r', **options): return self.manager.open(self.path, mode, **options) + def chksum(self, **options): + return self.manager.chksum(self.path, **options) + def unlink(self, force=False, **options): self.manager.unlink(self.path, force, **options) diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index 3c5bf11..e4bd60d 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -4,7 +4,8 @@ from irods.models import DataObject from irods.manager import Manager from irods.message import ( - iRODSMessage, FileOpenRequest, ObjCopyRequest, StringStringMap, DataObjInfo, ModDataObjMeta) + iRODSMessage, FileOpenRequest, ObjCopyRequest, StringStringMap, DataObjInfo, ModDataObjMeta, + DataObjChksumRequest, DataObjChksumResponse) import irods.exception as ex from irods.api_number import api_number from irods.data_object import ( @@ -81,6 +82,18 @@ def put(self, file, irods_path, return_data_object=False, **options): return self.get(obj) + def chksum(self, path, **options): + message_body = DataObjChksumRequest(path, **options) + message = iRODSMessage('RODS_API_REQ', msg=message_body, + int_info=api_number['DATA_OBJ_CHKSUM_AN']) + checksum = None + with self.sess.pool.get_connection() as conn: + conn.send(message) + response = conn.recv() + results = response.get_main_message(DataObjChksumResponse) + checksum = results.myStr + return checksum + def create(self, path, resource=None, force=False, **options): options[kw.DATA_TYPE_KW] = 'generic' diff --git a/irods/message/__init__.py b/irods/message/__init__.py index a25dcde..5b09e0f 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -406,6 +406,19 @@ class FileOpenRequest(Message): oprType = IntegerProperty() KeyValPair_PI = SubmessageProperty(StringStringMap) +class DataObjChksumRequest(FileOpenRequest): + def __init__(self,path,**chksumOptions): + super(DataObjChksumRequest,self).__init__() + for attr,prop in vars(FileOpenRequest).items(): + if isinstance(prop, (IntegerProperty,LongProperty)): + setattr(self, attr, 0) + self.objPath = path + self.KeyValPair_PI = StringStringMap(chksumOptions) + +class DataObjChksumResponse(Message): + name = 'Str_PI' + myStr = StringProperty() + # define OpenedDataObjInp_PI "int l1descInx; int len; int whence; int # oprType; double offset; double bytesWritten; struct KeyValPair_PI;" diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index b1e3523..f752308 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -9,6 +9,7 @@ import random import string import unittest +import tempfile from irods.models import Collection, DataObject from irods.session import iRODSSession import irods.exception as ex @@ -20,6 +21,7 @@ from irods.test.helpers import (unique_name, my_function_name) + def make_ufs_resc_in_tmpdir(session, base_name, allow_local = False): tmpdir = helpers.irods_shared_tmp_dir() if not tmpdir and allow_local: @@ -35,6 +37,8 @@ def make_ufs_resc_in_tmpdir(session, base_name, allow_local = False): class TestDataObjOps(unittest.TestCase): + from irods.test.helpers import (create_simple_resc) + def setUp(self): # Create test collection self.sess = helpers.make_session() @@ -69,6 +73,39 @@ def sha256_checksum(self, filename, block_size=65536): return sha256.hexdigest() + def test_compute_chksum( self ): + + with self.create_simple_resc() as R, tempfile.NamedTemporaryFile(mode = 'wb') as f: + coll_path = '/{0.zone}/home/{0.username}' .format(self.sess) + dobj_path = coll_path + '/' + os.path.basename(f.name) + Data = self.sess.data_objects + try: + f.write(b'some content bytes ...\n') + f.flush() + Data.put( f.name, dobj_path ) + + # get original checksum and resource name + my_object = Data.get(dobj_path) + orig_resc = my_object.replicas[0].resource_name + chk1 = my_object.chksum() + + # repl to new resource and iput to that new replica + Data.replicate( dobj_path, resource = R) + f.write(b'...added bytes\n') + f.flush() + Data.put( f.name, dobj_path, **{kw.DEST_RESC_NAME_KW: R, + kw.FORCE_FLAG_KW: '1'}) + # compare checksums + my_object = Data.get(dobj_path) + chk2 = my_object.chksum( **{kw.RESC_NAME_KW : R} ) + chk1b = my_object.chksum( **{kw.RESC_NAME_KW : orig_resc} ) + self.assertEqual (chk1, chk1b) + self.assertNotEqual (chk1, chk2) + + finally: + if Data.exists (dobj_path): Data.unlink (dobj_path, force = True) + + def test_obj_exists(self): obj_name = 'this_object_will_exist_once_made' exists_path = '{}/{}'.format(self.coll_path, obj_name) diff --git a/irods/test/helpers.py b/irods/test/helpers.py index 6b53178..7c99a41 100644 --- a/irods/test/helpers.py +++ b/irods/test/helpers.py @@ -11,6 +11,7 @@ import inspect import threading import random +import datetime from pwd import getpwnam from irods.session import iRODSSession from irods.message import iRODSMessage @@ -171,6 +172,21 @@ def make_flat_test_dir(dir_path, file_count=10, file_size=1024): with open(file_path, 'wb') as f: f.write(os.urandom(file_size)) +@contextlib.contextmanager +def create_simple_resc (self, rescName = None): + if not rescName: + rescName = 'simple_resc_' + unique_name (my_function_name() + '_simple_resc', datetime.datetime.now()) + created = False + try: + self.sess.resources.create(rescName, + 'unixfilesystem', + host = self.sess.host, + path = '/tmp/' + rescName) + created = True + yield rescName + finally: + if created: + self.sess.resources.remove(rescName) @contextlib.contextmanager def create_simple_resc_hierarchy (self, Root, Leaf): From 38072e9df237196cde86cf0153898e13d9019029 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Fri, 20 Nov 2020 00:26:15 +0000 Subject: [PATCH 83/96] [#233] add null handler for irods package root --- irods/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/irods/__init__.py b/irods/__init__.py index 7520648..132453e 100644 --- a/irods/__init__.py +++ b/irods/__init__.py @@ -1,5 +1,8 @@ from .version import __version__ +import logging +logging.getLogger(__name__).addHandler(logging.NullHandler()) + # Magic Numbers MAX_PASSWORD_LENGTH = 50 MAX_SQL_ATTR = 50 From 75f6f0c10487a2471a7842718863b3bc0d9f80c6 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 19 Nov 2020 22:35:19 +0000 Subject: [PATCH 84/96] [#232] do not arbitrarily pick first replica for DEST RESC We also add the create argument in the data object open() call which defaults to old behavior, ie letting it default True means we create and open a new replica on the default or otherwise targeted resource (DEST_RESC_NAME_KW if specified) if none already exists. --- irods/data_object.py | 3 -- irods/manager/data_object_manager.py | 12 ++++---- irods/test/data_obj_test.py | 46 ++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 12 deletions(-) diff --git a/irods/data_object.py b/irods/data_object.py index d0c2e26..8935cee 100644 --- a/irods/data_object.py +++ b/irods/data_object.py @@ -79,9 +79,6 @@ def metadata(self): return self._meta def open(self, mode='r', **options): - if kw.DEST_RESC_NAME_KW not in options: - options[kw.DEST_RESC_NAME_KW] = self.replicas[0].resource_name - return self.manager.open(self.path, mode, **options) def chksum(self, **options): diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index e4bd60d..92b7599 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -131,21 +131,21 @@ def create(self, path, resource=None, force=False, **options): return self.get(path) - def open(self, path, mode, **options): + def open(self, path, mode, create = True, **options): if kw.DEST_RESC_NAME_KW not in options: # Use client-side default resource if available try: options[kw.DEST_RESC_NAME_KW] = self.sess.default_resource except AttributeError: pass - + createFlag = self.O_CREAT if create else 0 flags, seek_to_end = { 'r': (self.O_RDONLY, False), 'r+': (self.O_RDWR, False), - 'w': (self.O_WRONLY | self.O_CREAT | self.O_TRUNC, False), - 'w+': (self.O_RDWR | self.O_CREAT | self.O_TRUNC, False), - 'a': (self.O_WRONLY | self.O_CREAT, True), - 'a+': (self.O_RDWR | self.O_CREAT, True), + 'w': (self.O_WRONLY | createFlag | self.O_TRUNC, False), + 'w+': (self.O_RDWR | createFlag | self.O_TRUNC, False), + 'a': (self.O_WRONLY | createFlag, True), + 'a+': (self.O_RDWR | createFlag, True), }[mode] # TODO: Use seek_to_end diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index f752308..390fe2e 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -9,7 +9,7 @@ import random import string import unittest -import tempfile +import contextlib # check if redundant from irods.models import Collection, DataObject from irods.session import iRODSSession import irods.exception as ex @@ -18,10 +18,12 @@ import irods.test.helpers as helpers import irods.keywords as kw from datetime import datetime +from tempfile import NamedTemporaryFile, mkdtemp +# used only in create_resc_hierarchy which may be redundant - see later comment +import shutil from irods.test.helpers import (unique_name, my_function_name) - def make_ufs_resc_in_tmpdir(session, base_name, allow_local = False): tmpdir = helpers.irods_shared_tmp_dir() if not tmpdir and allow_local: @@ -51,6 +53,44 @@ def tearDown(self): self.coll.remove(recurse=True, force=True) self.sess.cleanup() +#-- probably redundant ( see helpers.create_simple_resc (self, rescName = None)) + + @contextlib.contextmanager + def create_resc_hierarchy (self, Root, Leaf): + d = mkdtemp() + self.sess.resources.create(Leaf,'unixfilesystem', + host = self.sess.host, + path=d) + self.sess.resources.create(Root,'passthru') + self.sess.resources.add_child(Root,Leaf) + try: + yield ';'.join([Root,Leaf]) + finally: + self.sess.resources.remove_child(Root,Leaf) + self.sess.resources.remove(Leaf) + self.sess.resources.remove(Root) + shutil.rmtree(d) + + + def test_open_existing_dataobj_in_resource_hierarchy__232(self): + Root = 'pt1' + Leaf = 'resc1' + with self.create_resc_hierarchy(Root,Leaf) as hier_str: + obj = None + try: + datafile = NamedTemporaryFile (prefix='getfromhier_232_',delete=True) + datafile.write(b'abc\n') + datafile.flush() + fname = datafile.name + bname = os.path.basename(fname) + LOGICAL = self.coll_path + '/' + bname + self.sess.data_objects.put(fname,LOGICAL, **{kw.DEST_RESC_NAME_KW:Root}) + self.assertEqual([bname], [res[DataObject.name] for res in + self.sess.query(DataObject.name).filter(DataObject.resc_hier == hier_str)]) + obj = self.sess.data_objects.get(LOGICAL) + obj.open('a') # prior to #232 fix, raises DIRECT_CHILD_ACCESS + finally: + if obj: obj.unlink(force=True) def make_new_server_config_json(self, server_config_filename): # load server_config.json to inject a new rule base @@ -75,7 +115,7 @@ def sha256_checksum(self, filename, block_size=65536): def test_compute_chksum( self ): - with self.create_simple_resc() as R, tempfile.NamedTemporaryFile(mode = 'wb') as f: + with self.create_simple_resc() as R, NamedTemporaryFile(mode = 'wb') as f: coll_path = '/{0.zone}/home/{0.username}' .format(self.sess) dobj_path = coll_path + '/' + os.path.basename(f.name) Data = self.sess.data_objects From 67c228eee0156953eec640055399d58c10f347bb Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 18 Nov 2020 12:43:47 +0000 Subject: [PATCH 85/96] [#235] multithreaded parallel transfer for PUT and GET The parallel_demo.py script is not included but can be seen in the 4.2.8 backport. --- README.rst | 25 ++ irods/__init__.py | 24 +- irods/api_number.py | 4 +- irods/data_object.py | 57 +++- irods/keywords.py | 1 + irods/manager/__init__.py | 11 + irods/manager/data_object_manager.py | 128 +++++-- irods/message/__init__.py | 1 + irods/parallel.py | 493 +++++++++++++++++++++++++++ irods/pool.py | 18 +- irods/test/admin_test.py | 39 ++- irods/test/data_obj_test.py | 82 ++++- setup.py | 6 +- 13 files changed, 831 insertions(+), 58 deletions(-) create mode 100644 irods/parallel.py diff --git a/README.rst b/README.rst index ba9b05e..b29b683 100644 --- a/README.rst +++ b/README.rst @@ -49,6 +49,9 @@ or:: pip install git+https://github.com/irods/python-irodsclient.git[@branch|@commit|@tag] +See also [these instructions](PYTHON_install_caveats.rst), with hints about pip and +virtualenv, relevant to installation on older Linux distributions. + Uninstalling ------------ @@ -127,6 +130,28 @@ This can be overridden by changing the session `connection_timeout` immediately This will set the timeout to five minutes for any associated connections. +Simple PUTs and GETs +-------------------- + +We can use the just-created session object to put files to (or get them from) iRODS. + +>>> logical_path = "/{0.zone}/home/{0.username}/{1}".format(session,"myfile.dat") +>>> session.data_objects.put( "myfile.dat", logical_path) +>>> session.data_objects.get( logical_path, "/tmp/myfile.dat.copy" ) + +Note that local file paths may be relative, but iRODS data objects must always be referred to by +their absolute paths. This is in contrast to the ``iput`` and ``iget`` icommands, which keep +track of the current working collection (as modified by ``icd``) for the unix shell. + + +Parallel Transfer +----------------- + +Starting with release 0.9.0, data object transfers using put() and get() will spawn a number +of threads in order to optimize performance for iRODS server versions 4.2.9+ and file sizes +larger than a default threshold value of 32 Megabytes. + + Working with collections ------------------------ diff --git a/irods/__init__.py b/irods/__init__.py index 132453e..0a7ffb5 100644 --- a/irods/__init__.py +++ b/irods/__init__.py @@ -1,7 +1,29 @@ from .version import __version__ import logging -logging.getLogger(__name__).addHandler(logging.NullHandler()) +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) +gHandler = None + +def client_logging(flag=True,handler=None): + """ + import irods + # Enable / Disable general client logging + irods.client_logging(True[,handler]) -> handler + # (handler is a StreamHandler to stderr by default) + irods.client_logging(False) # - disable irods client logging + """ + global gHandler + if flag: + if handler is not None: + if gHandler: logger.removeHandler(gHandler) + if not handler: handler = logging.StreamHandler() + gHandler = handler + logger.addHandler(handler) + else: + if gHandler: logger.removeHandler(gHandler) + gHandler = None + return gHandler # Magic Numbers MAX_PASSWORD_LENGTH = 50 diff --git a/irods/api_number.py b/irods/api_number.py index 41545de..91bb432 100644 --- a/irods/api_number.py +++ b/irods/api_number.py @@ -176,5 +176,7 @@ # 1100 - 1200 - SSL API calls "SSL_START_AN": 1100, "SSL_END_AN": 1101, - "ATOMIC_APPLY_METADATA_OPERATIONS_APN": 20002 + "ATOMIC_APPLY_METADATA_OPERATIONS_APN": 20002, + "GET_FILE_DESCRIPTOR_INFO_APN": 20000, + "REPLICA_CLOSE_APN": 20004 } diff --git a/irods/data_object.py b/irods/data_object.py index 8935cee..c67dd01 100644 --- a/irods/data_object.py +++ b/irods/data_object.py @@ -3,13 +3,20 @@ import sys import logging import six +import json +import os +import ast +import xml.etree.ElementTree as ET from irods.models import DataObject from irods.meta import iRODSMetaCollection import irods.keywords as kw +from irods.api_number import api_number +from irods.message import (StringStringMap, FileOpenRequest, JSON_Message, iRODSMessage) logger = logging.getLogger(__name__) +IRODS_SERVER_WITH_CLOSE_REPLICA_API = (4,2,9) def chunks(f, chunksize=io.DEFAULT_BUFFER_SIZE): return iter(lambda: f.read(chunksize), b'') @@ -68,6 +75,9 @@ def __init__(self, manager, parent=None, results=None): ) for r in replicas] self._meta = None + + + def __repr__(self): return "".format(**vars(self)) @@ -78,8 +88,8 @@ def metadata(self): self.manager.sess.metadata, DataObject, self.path) return self._meta - def open(self, mode='r', **options): - return self.manager.open(self.path, mode, **options) + def open(self, mode='r', finalize_on_close = True, **options): + return self.manager.open(self.path, mode, finalize_on_close = finalize_on_close, **options) def chksum(self, **options): return self.manager.chksum(self.path, **options) @@ -101,13 +111,52 @@ def replicate(self, resource=None, **options): class iRODSDataObjectFileRaw(io.RawIOBase): - def __init__(self, conn, descriptor, **options): + def __init__(self, conn, descriptor, finalize_on_close = True, **options): self.conn = conn self.desc = descriptor self.options = options + self.finalize_on_close = finalize_on_close + + def replica_access_info(self): + message_body = JSON_Message( {'fd': self.desc}, + server_version = self.conn.server_version ) + message = iRODSMessage('RODS_API_REQ', msg = message_body, + int_info=api_number['GET_FILE_DESCRIPTOR_INFO_APN']) + self.conn.send(message) + result = None + try: + result = self.conn.recv() + except Exception as e: + logger.warning('''Couldn't receive or process response to GET_FILE_DESCRIPTOR_INFO_APN -- ''' + '''caught: {0!r}'''.format(e)) + raise + dobj_info = result.get_json_encoded_struct() + replica_token = dobj_info.get("replica_token","") + resc_hier = ( dobj_info.get("data_object_info") or {} ).get("resource_hierarchy","") + return (replica_token, resc_hier) + + def _close_replica(self): + server_version = ast.literal_eval(os.environ.get('IRODS_VERSION_OVERRIDE', '()' )) + if (server_version or self.conn.server_version) < IRODS_SERVER_WITH_CLOSE_REPLICA_API: return False + message_body = JSON_Message( { "fd": self.desc, + "send_notification": False, + "update_size": False, + "update_status": False, + "send_notification": False, + "compute_checksum": False }, + server_version = self.conn.server_version ) + self.conn.send( iRODSMessage('RODS_API_REQ', msg = message_body, + int_info=api_number['REPLICA_CLOSE_APN']) ) + try: + self.conn.recv().int_info + except Exception as e: + logger.warning ('** ERROR on closing replica **') + raise + return True def close(self): - self.conn.close_file(self.desc, **self.options) + if self.finalize_on_close or not self._close_replica(): + self.conn.close_file(self.desc, **self.options) self.conn.release() super(iRODSDataObjectFileRaw, self).close() return None diff --git a/irods/keywords.py b/irods/keywords.py index 6880bfe..c49e907 100644 --- a/irods/keywords.py +++ b/irods/keywords.py @@ -210,6 +210,7 @@ # =-=-=-=-=-=-=- # irods general keywords definitions RESC_HIER_STR_KW = "resc_hier" +REPLICA_TOKEN_KW = "replicaToken" DEST_RESC_HIER_STR_KW = "dest_resc_hier" IN_PDMO_KW = "in_pdmo" STAGE_OBJ_KW = "stage_object" diff --git a/irods/manager/__init__.py b/irods/manager/__init__.py index 9ad1dcf..09c184c 100644 --- a/irods/manager/__init__.py +++ b/irods/manager/__init__.py @@ -1,4 +1,15 @@ class Manager(object): + __server_version = () + + @property + def server_version(self): + if not self.__server_version: + p = self.sess.pool + if p is None : raise RuntimeError ("session not configured") + conn = getattr(p,"_conn",None) or p.get_connection() + if conn: self.__server_version = conn.server_version + return tuple( self.__server_version ) + def __init__(self, sess): self.sess = sess diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index 92b7599..11c1337 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -11,8 +11,18 @@ from irods.data_object import ( iRODSDataObject, iRODSDataObjectFileRaw, chunks, irods_dirname, irods_basename) import irods.keywords as kw +import irods.parallel as parallel +import six +import ast +MAXIMUM_SINGLE_THREADED_TRANSFER_SIZE = 32 * ( 1024 ** 2) + +DEFAULT_NUMBER_OF_THREADS = 0 # Defaults for reasonable number of threads -- optimized to be + # performant but allow no more worker threads than available CPUs. + # Setting this to 1 disables automatic use of parallel transfer. +DEFAULT_QUEUE_DEPTH = 32 + class DataObjectManager(Manager): READ_BUFFER_SIZE = 1024 * io.DEFAULT_BUFFER_SIZE @@ -27,27 +37,58 @@ class DataObjectManager(Manager): O_EXCL = 128 O_TRUNC = 512 - def _download(self, obj, local_path, **options): + + def should_parallelize_transfer( self, + num_threads = 0, + obj_sz = 1+MAXIMUM_SINGLE_THREADED_TRANSFER_SIZE, + server_version_hint = ()): + # Allow an environment variable to override the detection of the server version. + # Example: $ export IRODS_VERSION_OVERRIDE="4,2,9" ; python -m irods.parallel ... + server_version = ( ast.literal_eval(os.environ.get('IRODS_VERSION_OVERRIDE', '()' )) or server_version_hint or + self.server_version ) + if num_threads == 1 or ( server_version < parallel.MINIMUM_SERVER_VERSION ): + return False + if getattr(obj_sz,'seek',None) : + pos = obj_sz.tell() + size = obj_sz.seek(0,os.SEEK_END) + if not isinstance(size,six.integer_types): + size = obj_sz.tell() + obj_sz.seek(pos,os.SEEK_SET) + else: + size = obj_sz + assert (size > -1) + return size > MAXIMUM_SINGLE_THREADED_TRANSFER_SIZE + + + def _download(self, obj, local_path, num_threads, **options): + if os.path.isdir(local_path): - file = os.path.join(local_path, irods_basename(obj)) + local_file = os.path.join(local_path, irods_basename(obj)) else: - file = local_path + local_file = local_path - # Check for force flag if file exists - if os.path.exists(file) and kw.FORCE_FLAG_KW not in options: + # Check for force flag if local_file exists + if os.path.exists(local_file) and kw.FORCE_FLAG_KW not in options: raise ex.OVERWRITE_WITHOUT_FORCE_FLAG - with open(file, 'wb') as f, self.open(obj, 'r', **options) as o: - for chunk in chunks(o, self.READ_BUFFER_SIZE): - f.write(chunk) + with open(local_file, 'wb') as f, self.open(obj, 'r', **options) as o: + if self.should_parallelize_transfer (num_threads, o): + f.close() + if not self.parallel_get( (obj,o), local_path, num_threads = num_threads, + target_resource_name = options.get(kw.RESC_NAME_KW,'')): + raise RuntimeError("parallel get failed") + else: + for chunk in chunks(o, self.READ_BUFFER_SIZE): + f.write(chunk) - def get(self, path, file=None, **options): + + def get(self, path, local_path = None, num_threads = DEFAULT_NUMBER_OF_THREADS, **options): parent = self.sess.collections.get(irods_dirname(path)) # TODO: optimize - if file: - self._download(path, file, **options) + if local_path: + self._download(path, local_path, num_threads = num_threads, **options) query = self.sess.query(DataObject)\ .filter(DataObject.name == irods_basename(path))\ @@ -60,19 +101,27 @@ def get(self, path, file=None, **options): return iRODSDataObject(self, parent, results) - def put(self, file, irods_path, return_data_object=False, **options): + def put(self, local_path, irods_path, return_data_object = False, num_threads = DEFAULT_NUMBER_OF_THREADS, **options): + if irods_path.endswith('/'): - obj = irods_path + os.path.basename(file) + obj = irods_path + os.path.basename(local_path) else: obj = irods_path - # Set operation type to trigger acPostProcForPut - if kw.OPR_TYPE_KW not in options: - options[kw.OPR_TYPE_KW] = 1 # PUT_OPR - - with open(file, 'rb') as f, self.open(obj, 'w', **options) as o: - for chunk in chunks(f, self.WRITE_BUFFER_SIZE): - o.write(chunk) + with open(local_path, 'rb') as f, self.open(obj, 'w', **options) as o: + + if self.should_parallelize_transfer (num_threads, f): + f.close(); + if not self.parallel_put( local_path, (obj,o), num_threads = num_threads, + target_resource_name = options.get(kw.RESC_NAME_KW,'') or + options.get(kw.DEST_RESC_NAME_KW,'')): + raise RuntimeError("parallel put failed") + else: + # Set operation type to trigger acPostProcForPut + if kw.OPR_TYPE_KW not in options: + options[kw.OPR_TYPE_KW] = 1 # PUT_OPR + for chunk in chunks(f, self.WRITE_BUFFER_SIZE): + o.write(chunk) if kw.ALL_KW in options: options[kw.UPDATE_REPL_KW] = '' @@ -81,7 +130,6 @@ def put(self, file, irods_path, return_data_object=False, **options): if return_data_object: return self.get(obj) - def chksum(self, path, **options): message_body = DataObjChksumRequest(path, **options) message = iRODSMessage('RODS_API_REQ', msg=message_body, @@ -94,6 +142,32 @@ def chksum(self, path, **options): checksum = results.myStr return checksum + + def parallel_get(self, + data_or_path_ , + file_ , + async_ = False, + num_threads = 0, + target_resource_name = '', + progressQueue = False): + + return parallel.io_main( self.sess, data_or_path_, parallel.Oper.GET | (parallel.Oper.NONBLOCKING if async_ else 0), file_, + num_threads = num_threads, target_resource_name = target_resource_name, + queueLength = (DEFAULT_QUEUE_DEPTH if progressQueue else 0)) + + def parallel_put(self, + file_ , + data_or_path_ , + async_ = False, + num_threads = 0, + target_resource_name = '', + progressQueue = False): + + return parallel.io_main( self.sess, data_or_path_, parallel.Oper.PUT | (parallel.Oper.NONBLOCKING if async_ else 0), file_, + num_threads = num_threads, target_resource_name = target_resource_name, + queueLength = (DEFAULT_QUEUE_DEPTH if progressQueue else 0)) + + def create(self, path, resource=None, force=False, **options): options[kw.DATA_TYPE_KW] = 'generic' @@ -131,7 +205,13 @@ def create(self, path, resource=None, force=False, **options): return self.get(path) - def open(self, path, mode, create = True, **options): + def open_with_FileRaw(self, *arg, **kw): + holder = [] + handle = self.open(*arg,_raw_fd_holder=holder,**kw) + return (handle, holder[-1]) + + def open(self, path, mode, create = True, finalize_on_close = True, **options): + _raw_fd_holder = options.get('_raw_fd_holder',[]) if kw.DEST_RESC_NAME_KW not in options: # Use client-side default resource if available try: @@ -171,7 +251,9 @@ def open(self, path, mode, create = True, **options): conn.send(message) desc = conn.recv().int_info - return io.BufferedRandom(iRODSDataObjectFileRaw(conn, desc, **options)) + raw = iRODSDataObjectFileRaw(conn, desc, finalize_on_close = finalize_on_close, **options) + (_raw_fd_holder).append(raw) + return io.BufferedRandom(raw) def unlink(self, path, force=False, **options): diff --git a/irods/message/__init__.py b/irods/message/__init__.py index 5b09e0f..53e86df 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -282,6 +282,7 @@ def JSON_Message( msg_struct , server_version = () ): else JSON_Binary_Request return cls(msg_struct) + class PluginAuthMessage(Message): _name = 'authPlugReqInp_PI' auth_scheme_ = StringProperty() diff --git a/irods/parallel.py b/irods/parallel.py new file mode 100644 index 0000000..c0a1f3c --- /dev/null +++ b/irods/parallel.py @@ -0,0 +1,493 @@ +#!/usr/bin/env python +from __future__ import print_function + +import os +import ssl +import json +import time +import sys +import logging +import contextlib +import concurrent.futures +import threading +import multiprocessing +import six + +import irods +import irods.data_object +from irods.data_object import iRODSDataObjectFileRaw, iRODSDataObject +from irods.exception import DataObjectDoesNotExist +from irods.message import ( StringStringMap, FileOpenRequest, iRODSMessage ) +from irods.api_number import api_number +import irods.keywords as kw +from collections import OrderedDict +from six.moves.queue import Queue,Full,Empty + + +logger = logging.getLogger( __name__ ) +_nullh = logging.NullHandler() +logger.addHandler( _nullh ) + + +MINIMUM_SERVER_VERSION = (4,2,9) + + +try: + from threading import Barrier # Use 'Barrier' class if included (as in Python >= 3.2) ... +except ImportError: # ... but otherwise, use this ad hoc: + class Barrier(object): + def __init__(self, n): + """Initialize a Barrier to wait on n threads.""" + self.n = n + self.count = 0 + self.mutex = threading.Semaphore(1) + self.barrier = threading.Semaphore(0) + def wait(self): + """Per-thread wait function. + As in Python3.2 threading, returns 0 <= wait_serial_int < n + """ + self.mutex.acquire() + self.count += 1 + count = self.count + self.mutex.release() + if count == self.n: self.barrier.release() + self.barrier.acquire() + self.barrier.release() + return count - 1 + +@contextlib.contextmanager +def enableLogging(handlerType,args,level_ = logging.INFO): + """Context manager for temporarily enabling a logger. For debug or test. + Usage Example - + with irods.parallel.enableLogging(logging.FileHandler,('/tmp/logfile.txt',)): + # parallel put/get code here + """ + h = None + saveLevel = logger.level + try: + logger.setLevel(level_) + h = handlerType(*args) + h.setLevel( level_ ) + logger.addHandler(h) + yield + finally: + logger.setLevel(saveLevel) + if h in logger.handlers: + logger.removeHandler(h) + + +RECOMMENDED_NUM_THREADS_PER_TRANSFER = 3 + +verboseConnection = False + +class BadCallbackTarget(TypeError): pass + +class AsyncNotify (object): + + """A type returned when the PUT or GET operation passed includes NONBLOCKING. + If enabled, the callback function (or callable object) will be triggered + when all parts of the parallel transfer are complete. It should accept + exactly one argument, the irods.parallel.AsyncNotify instance that + is calling it. + """ + + def set_transfer_done_callback( self, callback ): + if callback is not None: + if not callable(callback): + raise BadCallbackTarget( '"callback" must be a callable accepting at least 1 argument' ) + self.done_callback = callback + + def __init__(self, futuresList, callback = None, progress_Queue = None, total = None, keep_ = ()): + self._futures = set(futuresList) + self._futures_done = dict() + self.keep = dict(keep_) + self._lock = threading.Lock() + self.set_transfer_done_callback (callback) + self.__done = False + if self._futures: + for future in self._futures: future.add_done_callback( self ) + else: + self.__invoke_done_callback() + + self.progress = [0, 0] + if (progress_Queue) and (total is not None): + self.progress[1] = total + def _progress(Q,this): # - thread to update progress indicator + while this.progress[0] < this.progress[1]: + i = None + try: + i = Q.get(timeout=0.1) + except Empty: + pass + if i is not None: + if isinstance(i,six.integer_types) and i >= 0: this.progress[0] += i + else: break + self._progress_fn = _progress + self._progress_thread = threading.Thread( target = self._progress_fn, args = (progress_Queue, self)) + self._progress_thread.start() + + @staticmethod + def asciiBar( lst, memo = [1] ): + memo[0] += 1 + spinner = "|/-\\"[memo[0]%4] + percent = "%5.1f%%"%(lst[0]*100.0/lst[1]) + mbytes = "%9.1f MB / %9.1f MB"%(lst[0]/1e6,lst[1]/1e6) + if lst[1] != 0: + s = " {spinner} {percent} [ {mbytes} ] " + else: + s = " {spinner} " + return s.format(**locals()) + + def wait_until_transfer_done (self, timeout=float('inf'), progressBar = False): + carriageReturn = '\r' + begin = t = time.time() + end = begin + timeout + while not self.__done: + time.sleep(min(0.1, max(0.0, end - t))) + t = time.time() + if t >= end: break + if progressBar: + print (' ' + self.asciiBar( self.progress ) + carriageReturn, end='', file=sys.stderr) + sys.stderr.flush() + return self.__done + + def __call__(self,future): # Our instance is called by each future (individual file part) when done. + # When all futures are done, we invoke the configured callback. + with self._lock: + self._futures_done[future] = future.result() + if len(self._futures) == len(self._futures_done): self.__invoke_done_callback() + + def __invoke_done_callback(self): + try: + if callable(self.done_callback): self.done_callback(self) + finally: + self.keep.pop('mgr',None) + self.__done = True + self.set_transfer_done_callback(None) + + @property + def futures(self): return list(self._futures) + + @property + def futures_done(self): return dict(self._futures_done) + + +class Oper(object): + + """A custom enum-type class with utility methods. """ + + GET = 0 + PUT = 1 + NONBLOCKING = 2 + + def __int__(self): return self._opr + def __init__(self, rhs): self._opr = int(rhs) + def isPut(self): return 0 != (self._opr & self.PUT) + def isGet(self): return not self.isPut() + def isNonBlocking(self): return 0 != (self._opr & self.NONBLOCKING) + + def data_object_mode(self, initial_open = False): + if self.isPut(): + return 'w' if initial_open else 'a' + else: + return 'r' + + def disk_file_mode(self, initial_open = False, binary = True): + if self.isPut(): + mode = 'r' + else: + mode = 'w' if initial_open else 'r+' + return ((mode + 'b') if binary else mode) + + +def _io_send_bytes_progress (queueObject, item): + try: + queueObject.put(item) + return True + except Full: + return False + +COPY_BUF_SIZE = (1024 ** 2) * 4 + +def _copy_part( src, dst, length, queueObject, debug_info, mgr): + + bytecount = 0 + accum = 0 + while True and bytecount < length: + buf = src.read(min(COPY_BUF_SIZE, length - bytecount)) + buf_len = len(buf) + if 0 == buf_len: break + dst.write(buf) + bytecount += buf_len + accum += buf_len + if queueObject and accum and _io_send_bytes_progress(queueObject,accum): accum = 0 + if verboseConnection: + print ("("+debug_info+")",end='',file=sys.stderr) + sys.stderr.flush() + + # In a put or get, exactly one of (src,dst) is a file. Find which and close that one first. + (file_,obj_) = (src,dst) if dst in mgr else (dst,src) + file_.close() + mgr.remove_io( obj_ ) # 1. closes obj if it is not the mgr's initial descriptor + # 2. blocks at barrier until all transfer threads are done copying + # 3. closes with finalize if obj is mgr's initial descriptor + return bytecount + + +class _Multipart_close_manager: + + def __init__(self, initial_io_, exit_barrier_): + self.exit_barrier = exit_barrier_ + self.initial_io = initial_io_ + self.__lock = threading.Lock() + self.aux = [] + + def __contains__(self,Io): + with self.__lock: + return Io is self.initial_io or \ + Io in self.aux + + # `add_io' - add an i/o object to be managed + # note: `remove_io' should only be called for managed i/o objects + + def add_io(self,Io): + with self.__lock: + if Io is not self.initial_io: + self.aux.append(Io) + + # `remove_io' is for closing a channel of parallel i/o and allowing the + # data object to flush write operations (if any) in a timely fashion. It also + # synchronizes all of the parallel threads just before exit, so that we know + # exactly when to perform a finalizing close on the data object + + def remove_io(self,Io): + is_initial = True + with self.__lock: + if Io is not self.initial_io: + Io.close() + self.aux.remove(Io) + is_initial = False + self.exit_barrier.wait() + if is_initial: self.finalize() + + def finalize(self): + self.initial_io.close() + + +def _io_part (objHandle, range_, file_, opr_, mgr_, thread_debug_id = '', queueObject = None ): + if 0 == len(range_): return 0 + Operation = Oper(opr_) + (offset,length) = (range_[0], len(range_)) + objHandle.seek(offset) + file_.seek(offset) + if thread_debug_id == '': + thread_debug_id = str(threading.currentThread().ident) + return ( _copy_part (file_, objHandle, length, queueObject, thread_debug_id, mgr_) if Operation.isPut() + else _copy_part (objHandle, file_, length, queueObject, thread_debug_id, mgr_) ) + + +def _io_multipart_threaded(operation_ , dataObj_and_IO, replica_token, hier_str, session, fname, + total_size, num_threads = 0, **extra_options): + """Called by _io_main. + Carve up (0,total_size) range into `num_threads` parts and initiate a transfer thread for each one.""" + + (D, Io) = dataObj_and_IO + Operation = Oper( operation_ ) + + if num_threads < 1: + num_threads = RECOMMENDED_NUM_THREADS_PER_TRANSFER + num_threads = max(1, min(multiprocessing.cpu_count(), num_threads)) + + P = 1 + (total_size // num_threads) + logger.info("num_threads = %s ; (P)artitionSize = %s", num_threads, P) + ranges = [six.moves.range(i*P,min(i*P+P,total_size)) for i in range(num_threads) if i*P < total_size] + + _queueLength = extra_options.get('_queueLength',0) + if _queueLength > 0: + queueObject = Queue(_queueLength) + else: + queueObject = None + + futures = [] + executor = concurrent.futures.ThreadPoolExecutor(max_workers = num_threads) + num_threads = min(num_threads, len(ranges)) + mgr = _Multipart_close_manager(Io, Barrier(num_threads)) + counter = 1 + gen_file_handle = lambda: open(fname, Operation.disk_file_mode(initial_open = (counter == 1))) + File = gen_file_handle() + for r in ranges: + if Io is None: + Io = session.data_objects.open( D.path, Operation.data_object_mode(initial_open = False), + create = False, finalize_on_close = False, + **{kw.RESC_HIER_STR_KW: hier_str, kw.REPLICA_TOKEN_KW: replica_token} ) + mgr.add_io( Io ) + if File is None: File = gen_file_handle() + futures.append(executor.submit( _io_part, Io, r, File, Operation, mgr, str(counter), queueObject)) + counter += 1 + Io = File = None + + if Operation.isNonBlocking(): + if _queueLength: + return futures, queueObject, mgr + else: + return futures + else: + bytecounts = [ f.result() for f in futures ] + return sum(bytecounts), total_size + + +# _io_main +# * Entry point for parallel transfers (multithreaded PUT and GET operations) +# * determine replica information +# * call multithread manager + +def io_main( session, Data, opr_, fname, R='', **kwopt): + + Operation = Oper(opr_) + d_path = None + Io = None + if isinstance(Data,tuple): + (Data, Io) = Data[:2] + if isinstance (Data, six.string_types): + d_path = Data + try: + Data = session.data_objects.get( Data ) + d_path = Data.path + except DataObjectDoesNotExist: + if Operation.isGet(): raise + + R_via_libcall = kwopt.pop( 'target_resource_name', '') + if R_via_libcall: + R = R_via_libcall + + resc_options = {} + if Operation.isPut(): + if R: + resc_options [kw.RESC_NAME_KW] = R + resc_options [kw.DEST_RESC_NAME_KW] = R + + if (not Io): + (Io, rawfile) = session.data_objects.open_with_FileRaw( (d_path or Data.path), + Operation.data_object_mode(initial_open = True), + finalize_on_close = True, **resc_options ) + else: + rawfile = Io.raw + + # data object should now exist + if not isinstance(Data,iRODSDataObject): + Data = session.data_objects.get(d_path) + + if Operation.isGet(): + total_bytes = Io.seek(0,os.SEEK_END) + Io.seek(0,os.SEEK_SET) + else: + with open(fname, 'rb') as f: + f.seek(0,os.SEEK_END) + total_bytes = f.tell() + + (replica_token , resc_hier) = rawfile.replica_access_info() + + num_threads = kwopt.pop( 'num_threads', None) + + if num_threads is None: num_threads = int(kwopt.get('N','0')) + + queueLength = kwopt.get('queueLength',0) + retval = _io_multipart_threaded (Operation, (Data, Io), replica_token, resc_hier, session, fname, total_bytes, + num_threads = num_threads, + _queueLength = queueLength) + + # SessionObject.data_objects.parallel_{put,get} will return: + # - immediately with an AsyncNotify instance, if Oper.NONBLOCKING flag is used. + # - upon completion with a boolean completion status, otherwise. + + if Operation.isNonBlocking(): + + if queueLength > 0: + (futures, chunk_notify_queue, mgr) = retval + else: + futures = retval + chunk_notify_queue = total_bytes = None + + return AsyncNotify( futures, # individual futures, one per transfer thread + progress_Queue = chunk_notify_queue, # for notifying the progress indicator thread + total = total_bytes, # total number of bytes for parallel transfer + keep_ = {'mgr': mgr} ) # an open raw i/o object needing to be persisted, if any + else: + (_bytes_transferred, _bytes_total) = retval + return (_bytes_transferred == _bytes_total) + +if __name__ == '__main__': + + import getopt + import atexit + from irods.session import iRODSSession + + def setupLoggingWithDateTimeHeader(name,level = logging.DEBUG): + if _nullh in logger.handlers: + logger.removeHandler(_nullh) + if name: + handler = logging.FileHandler(name) + else: + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)-15s - %(message)s')) + logger.addHandler(handler) + logger.setLevel( level ) + + try: + env_file = os.environ['IRODS_ENVIRONMENT_FILE'] + except KeyError: + env_file = os.path.expanduser('~/.irods/irods_environment.json') + ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, cafile=None, capath=None, cadata=None) + ssl_settings = {'ssl_context': ssl_context} + sess = iRODSSession(irods_env_file=env_file, **ssl_settings) + atexit.register(lambda : sess.cleanup()) + + opt,arg = getopt.getopt( sys.argv[1:], 'vL:l:aR:N:') + + opts = dict(opt) + + logFilename = opts.pop('-L',None) # '' for console, non-empty for filesystem destination + logLevel = (logging.INFO if logFilename is None else logging.DEBUG) + logFilename = logFilename or opts.pop('-l',None) + + if logFilename is not None: + setupLoggingWithDateTimeHeader(logFilename, logLevel) + + verboseConnection = (opts.pop('-v',None) is not None) + + async_xfer = opts.pop('-a',None) + + kwarg = { k.lstrip('-'):v for k,v in opts.items() } + + arg[1] = Oper.PUT if arg[1].lower() in ('w','put','a') \ + else Oper.GET + if async_xfer is not None: + arg[1] |= Oper.NONBLOCKING + + ret = io_main(sess, *arg, **kwarg) # arg[0] = data object or path + # arg[1] = operation: or'd flags : [PUT|GET] NONBLOCKING + # arg[2] = file path on local filesystem + # kwarg['queueLength'] sets progress-queue length (0 if no progress indication needed) + # kwarg options 'N' (num threads) and 'R' (target resource name) are via command-line + # kwarg['num_threads'] (overrides 'N' when called as a library) + # kwarg['target_resource_name'] (overrides 'R' when called as a library) + if isinstance( ret, AsyncNotify ): + print('waiting on completion...',file=sys.stderr) + ret.set_transfer_done_callback(lambda r: print('Async transfer done for:',r,file=sys.stderr)) + done = ret.wait_until_transfer_done (timeout=10.0) # - or do other useful work here + if done: + bytes_transferred = sum(ret.futures_done.values()) + print ('Asynch transfer complete. Total bytes transferred:', bytes_transferred,file=sys.stderr) + else: + print ('Asynch transfer was not completed before timeout expired.',file=sys.stderr) + else: + print('Synchronous transfer {}'.format('succeeded' if ret else 'failed'),file=sys.stderr) + +# Note : This module requires concurrent.futures, included in Python3.x. +# On Python2.7, this dependency must be installed using 'pip install futures'. +# Demonstration : +# +# $ dd if=/dev/urandom bs=1k count=150000 of=$HOME/puttest +# $ time python -m irods.parallel -R demoResc -N 3 `ipwd`/test.dat put $HOME/puttest # add -v,-a for verbose, asynch +# $ time python -m irods.parallel -R demoResc -N 3 `ipwd`/test.dat get $HOME/gettest # add -v,-a for verbose, asynch +# $ diff puttest gettest diff --git a/irods/pool.py b/irods/pool.py index c8b2732..eb02084 100644 --- a/irods/pool.py +++ b/irods/pool.py @@ -9,10 +9,17 @@ logger = logging.getLogger(__name__) +def attribute_from_return_value(attrname): + def deco(method): + def method_(self,*s,**kw): + ret = method(self,*s,**kw) + setattr(self,attrname,ret) + return ret + return method_ + return deco DEFAULT_APPLICATION_NAME = 'python-irodsclient' - class Pool(object): def __init__(self, account, application_name='', connection_refresh_time=-1): @@ -21,6 +28,8 @@ def __init__(self, account, application_name='', connection_refresh_time=-1): Create an iRODS connection pool; 'account' is an irods.account.iRODSAccount instance and 'application_name' specifies the application name as it should appear in an 'ips' listing. ''' + + self._thread_local = threading.local() self.account = account self._lock = threading.RLock() self.active = set() @@ -37,6 +46,13 @@ def __init__(self, account, application_name='', connection_refresh_time=-1): self.refresh_connection = False self.connection_refresh_time = None + @property + def _conn(self): return getattr( self._thread_local, "_conn", None) + + @_conn.setter + def _conn(self, conn_): setattr( self._thread_local, "_conn", conn_) + + @attribute_from_return_value("_conn") def get_connection(self): with self._lock: try: diff --git a/irods/test/admin_test.py b/irods/test/admin_test.py index 4f1cb80..6f57508 100644 --- a/irods/test/admin_test.py +++ b/irods/test/admin_test.py @@ -154,30 +154,33 @@ def test_make_compound_resource(self): session.resources.add_child(comp.name, ufs1.name, 'archive') session.resources.add_child(comp.name, ufs2.name, 'cache') - # create object on compound resource - obj = session.data_objects.create(obj_path, comp.name) + obj = None - # write to object - with obj.open('w+',**{kw.DEST_RESC_NAME_KW:comp.name}) as obj_desc: - obj_desc.write(dummy_str) + try: + # create object on compound resource + obj = session.data_objects.create(obj_path, resource = comp.name) - # refresh object - obj = session.data_objects.get(obj_path) + # write to object + with obj.open('w+',**{kw.DEST_RESC_NAME_KW:comp.name}) as obj_desc: + obj_desc.write(dummy_str) - # check that we have 2 replicas - self.assertEqual(len(obj.replicas), 2) + # refresh object + obj = session.data_objects.get(obj_path) - # remove object - obj.unlink(force=True) + # check that we have 2 replicas + self.assertEqual(len(obj.replicas), 2) + finally: + # remove object + if obj: obj.unlink(force=True) - # remove children from compound resource - session.resources.remove_child(comp.name, ufs1.name) - session.resources.remove_child(comp.name, ufs2.name) + # remove children from compound resource + session.resources.remove_child(comp.name, ufs1.name) + session.resources.remove_child(comp.name, ufs2.name) - # remove resources - ufs1.remove() - ufs2.remove() - comp.remove() + # remove resources + ufs1.remove() + ufs2.remove() + comp.remove() def test_get_resource_children(self): diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 390fe2e..6e2a8c4 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -10,18 +10,21 @@ import string import unittest import contextlib # check if redundant +import logging +import io +import re + from irods.models import Collection, DataObject -from irods.session import iRODSSession import irods.exception as ex from irods.column import Criterion from irods.data_object import chunks import irods.test.helpers as helpers import irods.keywords as kw +from irods.manager import data_object_manager from datetime import datetime from tempfile import NamedTemporaryFile, mkdtemp -# used only in create_resc_hierarchy which may be redundant - see later comment -import shutil from irods.test.helpers import (unique_name, my_function_name) +import irods.parallel def make_ufs_resc_in_tmpdir(session, base_name, allow_local = False): @@ -46,6 +49,8 @@ def setUp(self): self.sess = helpers.make_session() self.coll_path = '/{}/home/{}/test_dir'.format(self.sess.zone, self.sess.username) self.coll = helpers.make_collection(self.sess, self.coll_path) + with self.sess.pool.get_connection() as conn: + self.SERVER_VERSION = conn.server_version def tearDown(self): '''Remove test data and close connections @@ -53,24 +58,83 @@ def tearDown(self): self.coll.remove(recurse=True, force=True) self.sess.cleanup() -#-- probably redundant ( see helpers.create_simple_resc (self, rescName = None)) + @staticmethod + def In_Memory_Stream(): + return io.BytesIO() if sys.version_info < (3,) else io.StringIO() + @contextlib.contextmanager - def create_resc_hierarchy (self, Root, Leaf): - d = mkdtemp() + def create_resc_hierarchy (self, Root, Leaf = None): + if not Leaf: + Leaf = 'simple_leaf_resc_' + unique_name (my_function_name(), datetime.now()) + y_value = (Root,Leaf) + else: + y_value = ';'.join([Root,Leaf]) self.sess.resources.create(Leaf,'unixfilesystem', host = self.sess.host, - path=d) + path='/tmp/' + Leaf) self.sess.resources.create(Root,'passthru') self.sess.resources.add_child(Root,Leaf) try: - yield ';'.join([Root,Leaf]) + yield y_value finally: self.sess.resources.remove_child(Root,Leaf) self.sess.resources.remove(Leaf) self.sess.resources.remove(Root) - shutil.rmtree(d) + def test_put_get_parallel_autoswitch_A__235(self): + if not self.sess.data_objects.should_parallelize_transfer(server_version_hint = self.SERVER_VERSION): + self.skipTest('Skip unless detected server version is 4.2.9') + if getattr(data_object_manager,'DEFAULT_NUMBER_OF_THREADS',None) in (1, None): + self.skipTest('Data object manager not configured for parallel puts and gets') + Root = 'pt235' + Leaf = 'resc235' + files_to_delete = [] + # This test does the following: + # - set up a small resource hierarchy and generate a file large enough to trigger parallel transfer + # - `put' the file to iRODS, then `get' it back, comparing the resulting two disk files and making + # sure that the parallel routines were invoked to do both transfers + + with self.create_resc_hierarchy(Root) as (Root_ , Leaf): + self.assertEqual(Root , Root_) + self.assertIsInstance( Leaf, str) + datafile = NamedTemporaryFile (prefix='getfromhier_235_',delete=True) + datafile.write( os.urandom( data_object_manager.MAXIMUM_SINGLE_THREADED_TRANSFER_SIZE + 1 )) + datafile.flush() + base_name = os.path.basename(datafile.name) + data_obj_name = '/{0.zone}/home/{0.username}/{1}'.format(self.sess, base_name) + options = { kw.DEST_RESC_NAME_KW:Root, + kw.RESC_NAME_KW:Root } + + PUT_LOG = self.In_Memory_Stream() + GET_LOG = self.In_Memory_Stream() + NumThreadsRegex = re.compile('^num_threads\s*=\s*(\d+)',re.MULTILINE) + + try: + with irods.parallel.enableLogging( logging.StreamHandler, (PUT_LOG,), level_=logging.INFO): + self.sess.data_objects.put(datafile.name, data_obj_name, num_threads = 0, **options) # - PUT + match = NumThreadsRegex.search (PUT_LOG.getvalue()) + self.assertTrue (match is not None and int(match.group(1)) >= 1) # - PARALLEL code path taken? + + with irods.parallel.enableLogging( logging.StreamHandler, (GET_LOG,), level_=logging.INFO): + self.sess.data_objects.get(data_obj_name, datafile.name+".get", num_threads = 0, **options) # - GET + match = NumThreadsRegex.search (GET_LOG.getvalue()) + self.assertTrue (match is not None and int(match.group(1)) >= 1) # - PARALLEL code path taken? + + files_to_delete += [datafile.name + ".get"] + + with open(datafile.name, "rb") as f1, open(datafile.name + ".get", "rb") as f2: + self.assertEqual ( f1.read(), f2.read() ) + + q = self.sess.query (DataObject.name,DataObject.resc_hier).filter( DataObject.name == base_name, + DataObject.resource_name == Leaf) + replicas = list(q) + self.assertEqual( len(replicas), 1 ) + self.assertEqual( replicas[0][DataObject.resc_hier] , ';'.join([Root,Leaf]) ) + + finally: + self.sess.data_objects.unlink( data_obj_name, force = True) + for n in files_to_delete: os.unlink(n) def test_open_existing_dataobj_in_resource_hierarchy__232(self): Root = 'pt1' diff --git a/setup.py b/setup.py index 687f105..5d1eae4 100644 --- a/setup.py +++ b/setup.py @@ -40,5 +40,9 @@ 'six>=1.10.0', 'PrettyTable>=0.7.2', 'xmlrunner>=1.7.7' - ] + # - the new syntax: + #'futures; python_version == "2.7"' + ], + # - the old syntax: + extras_require={ ':python_version == "2.7"': ['futures'] } ) From 682bc7e47eb904552dd3102526814ce17ffb133e Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Sat, 8 May 2021 18:57:01 -0400 Subject: [PATCH 86/96] [#3] codacy corrections --- irods/__init__.py | 2 ++ irods/data_object.py | 19 +++++++++++++------ irods/manager/data_object_manager.py | 6 +++--- irods/message/__init__.py | 19 ++++++++++++++----- irods/parallel.py | 27 ++++++++++++++++----------- irods/test/data_obj_test.py | 2 +- irods/test/extended_test.py | 3 +-- irods/test/force_create.py | 2 +- irods/test/helpers.py | 4 ++-- irods/test/query_test.py | 2 +- run_python_tests.sh | 4 ++-- 11 files changed, 56 insertions(+), 34 deletions(-) diff --git a/irods/__init__.py b/irods/__init__.py index 0a7ffb5..d88d0d4 100644 --- a/irods/__init__.py +++ b/irods/__init__.py @@ -7,6 +7,8 @@ def client_logging(flag=True,handler=None): """ + Example of use: + import irods # Enable / Disable general client logging irods.client_logging(True[,handler]) -> handler diff --git a/irods/data_object.py b/irods/data_object.py index c67dd01..0760f27 100644 --- a/irods/data_object.py +++ b/irods/data_object.py @@ -3,16 +3,14 @@ import sys import logging import six -import json import os import ast -import xml.etree.ElementTree as ET from irods.models import DataObject from irods.meta import iRODSMetaCollection import irods.keywords as kw from irods.api_number import api_number -from irods.message import (StringStringMap, FileOpenRequest, JSON_Message, iRODSMessage) +from irods.message import (JSON_Message, iRODSMessage) logger = logging.getLogger(__name__) @@ -111,7 +109,17 @@ def replicate(self, resource=None, **options): class iRODSDataObjectFileRaw(io.RawIOBase): + """The raw object supporting file-like operations (read/write/seek) for the + iRODSDataObject.""" + def __init__(self, conn, descriptor, finalize_on_close = True, **options): + """ + Constructor needs a connection and an iRODS data object descriptor. If the + finalize_on_close flag evaluates False, close() will invoke the REPLICA_CLOSE + API instead of closing and finalizing the object (useful for parallel + transfers using multiple threads). + """ + super(iRODSDataObjectFileRaw,self).__init__() self.conn = conn self.desc = descriptor self.options = options @@ -128,7 +136,7 @@ def replica_access_info(self): result = self.conn.recv() except Exception as e: logger.warning('''Couldn't receive or process response to GET_FILE_DESCRIPTOR_INFO_APN -- ''' - '''caught: {0!r}'''.format(e)) + '''caught: %r''',e) raise dobj_info = result.get_json_encoded_struct() replica_token = dobj_info.get("replica_token","") @@ -142,14 +150,13 @@ def _close_replica(self): "send_notification": False, "update_size": False, "update_status": False, - "send_notification": False, "compute_checksum": False }, server_version = self.conn.server_version ) self.conn.send( iRODSMessage('RODS_API_REQ', msg = message_body, int_info=api_number['REPLICA_CLOSE_APN']) ) try: self.conn.recv().int_info - except Exception as e: + except Exception: logger.warning ('** ERROR on closing replica **') raise return True diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index 11c1337..709bd69 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -111,7 +111,7 @@ def put(self, local_path, irods_path, return_data_object = False, num_threads = with open(local_path, 'rb') as f, self.open(obj, 'w', **options) as o: if self.should_parallelize_transfer (num_threads, f): - f.close(); + f.close() if not self.parallel_put( local_path, (obj,o), num_threads = num_threads, target_resource_name = options.get(kw.RESC_NAME_KW,'') or options.get(kw.DEST_RESC_NAME_KW,'')): @@ -205,9 +205,9 @@ def create(self, path, resource=None, force=False, **options): return self.get(path) - def open_with_FileRaw(self, *arg, **kw): + def open_with_FileRaw(self, *arg, **kw_options): holder = [] - handle = self.open(*arg,_raw_fd_holder=holder,**kw) + handle = self.open(*arg,_raw_fd_holder=holder,**kw_options) return (handle, holder[-1]) def open(self, path, mode, create = True, finalize_on_close = True, **options): diff --git a/irods/message/__init__.py b/irods/message/__init__.py index 53e86df..ec9b5e0 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -1,4 +1,5 @@ -import base64 +"""Define objects related to communication with iRODS server API endpoints.""" + import struct import logging import socket @@ -252,7 +253,9 @@ class AuthPluginOut(Message): # define InxIvalPair_PI "int iiLen; int *inx(iiLen); int *ivalue(iiLen);" class JSON_Binary_Request(BinBytesBuf): + """A message body whose payload is BinBytesBuf containing JSON.""" + def __init__(self,msg_struct): """Initialize with a Python data structure that will be converted to JSON.""" super(JSON_Binary_Request,self).__init__() @@ -261,16 +264,19 @@ def __init__(self,msg_struct): self.buflen = len(string) class BytesBuf(Message): + + """A generic structure carrying text content""" + _name = 'BytesBuf_PI' buflen = IntegerProperty() buf = StringProperty() def __init__(self,string,*v,**kw): - super(BytesBuf,self).__init__(*v,**kw) - _buf = StringProperty.escape_xml_string( string ) - self.buf = string - self.buflen = len(self.buf) + super(BytesBuf,self).__init__(*v,**kw) + self.buf = string + self.buflen = len(self.buf) class JSON_XMLFramed_Request(BytesBuf): + """A message body whose payload is a BytesBuf containing JSON.""" def __init__(self, msg_struct): """Initialize with a Python data structure that will be converted to JSON.""" @@ -408,7 +414,10 @@ class FileOpenRequest(Message): KeyValPair_PI = SubmessageProperty(StringStringMap) class DataObjChksumRequest(FileOpenRequest): + """Report and/or generate a data object's checksum.""" + def __init__(self,path,**chksumOptions): + """Construct the request using the path of a data object.""" super(DataObjChksumRequest,self).__init__() for attr,prop in vars(FileOpenRequest).items(): if isinstance(prop, (IntegerProperty,LongProperty)): diff --git a/irods/parallel.py b/irods/parallel.py index c0a1f3c..52843dc 100644 --- a/irods/parallel.py +++ b/irods/parallel.py @@ -3,7 +3,6 @@ import os import ssl -import json import time import sys import logging @@ -13,14 +12,9 @@ import multiprocessing import six -import irods -import irods.data_object -from irods.data_object import iRODSDataObjectFileRaw, iRODSDataObject +from irods.data_object import iRODSDataObject from irods.exception import DataObjectDoesNotExist -from irods.message import ( StringStringMap, FileOpenRequest, iRODSMessage ) -from irods.api_number import api_number import irods.keywords as kw -from collections import OrderedDict from six.moves.queue import Queue,Full,Empty @@ -44,6 +38,7 @@ def __init__(self, n): self.barrier = threading.Semaphore(0) def wait(self): """Per-thread wait function. + As in Python3.2 threading, returns 0 <= wait_serial_int < n """ self.mutex.acquire() @@ -58,6 +53,7 @@ def wait(self): @contextlib.contextmanager def enableLogging(handlerType,args,level_ = logging.INFO): """Context manager for temporarily enabling a logger. For debug or test. + Usage Example - with irods.parallel.enableLogging(logging.FileHandler,('/tmp/logfile.txt',)): # parallel put/get code here @@ -98,6 +94,10 @@ def set_transfer_done_callback( self, callback ): self.done_callback = callback def __init__(self, futuresList, callback = None, progress_Queue = None, total = None, keep_ = ()): + """AsyncNotify initialization (used internally to the io.parallel library). + The casual user will only be concerned with the callback parameter, called when all threads + of the parallel PUT or GET have been terminated and the data object closed. + """ self._futures = set(futuresList) self._futures_done = dict() self.keep = dict(keep_) @@ -173,15 +173,21 @@ def futures_done(self): return dict(self._futures_done) class Oper(object): - """A custom enum-type class with utility methods. """ GET = 0 PUT = 1 NONBLOCKING = 2 - def __int__(self): return self._opr - def __init__(self, rhs): self._opr = int(rhs) + def __int__(self): + """Return the stored flags as an integer bitmask. """ + return self._opr + + def __init__(self, rhs): + """Initialize with a bit mask of flags ie. whether Operation PUT or GET, + and whether NONBLOCKING.""" + self._opr = int(rhs) + def isPut(self): return 0 != (self._opr & self.PUT) def isGet(self): return not self.isPut() def isNonBlocking(self): return 0 != (self._opr & self.NONBLOCKING) @@ -290,7 +296,6 @@ def _io_multipart_threaded(operation_ , dataObj_and_IO, replica_token, hier_str, total_size, num_threads = 0, **extra_options): """Called by _io_main. Carve up (0,total_size) range into `num_threads` parts and initiate a transfer thread for each one.""" - (D, Io) = dataObj_and_IO Operation = Oper( operation_ ) diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 6e2a8c4..ef03582 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -22,7 +22,7 @@ import irods.keywords as kw from irods.manager import data_object_manager from datetime import datetime -from tempfile import NamedTemporaryFile, mkdtemp +from tempfile import NamedTemporaryFile from irods.test.helpers import (unique_name, my_function_name) import irods.parallel diff --git a/irods/test/extended_test.py b/irods/test/extended_test.py index e3b7051..884a4f1 100644 --- a/irods/test/extended_test.py +++ b/irods/test/extended_test.py @@ -30,8 +30,7 @@ def tearDown(self): @classmethod def tearDownClass(cls): - '''Remove test data - ''' + """Remove test data.""" # once only (after all tests), delete large collection print ("Deleting the large collection...", file = sys.stderr) with helpers.make_session() as sess: diff --git a/irods/test/force_create.py b/irods/test/force_create.py index 3510ebe..5fd0a85 100644 --- a/irods/test/force_create.py +++ b/irods/test/force_create.py @@ -19,7 +19,7 @@ def tearDown(self): # This test should pass whether or not federation is configured: def test_force_create(self): if self.sess.server_version > (4, 2, 8): - self.skipTest('force flag unneeded for create in iRODS > 4.2.8') + self.skipTest('force flag unneeded for create in iRODS > 4.2.8') session = self.sess FILE = '/{session.zone}/home/{session.username}/a.txt'.format(**locals()) try: diff --git a/irods/test/helpers.py b/irods/test/helpers.py index 7c99a41..5f18088 100644 --- a/irods/test/helpers.py +++ b/irods/test/helpers.py @@ -19,7 +19,7 @@ def my_function_name(): - '''Returns the name of the calling function or method''' + """Returns the name of the calling function or method""" return inspect.getframeinfo(inspect.currentframe().f_back).function _thrlocal = threading.local() @@ -27,6 +27,7 @@ def my_function_name(): def unique_name(*seed_tuple): '''For deterministic pseudo-random identifiers based on function/method name to prevent e.g. ICAT collisions within and between tests. Example use: + def f(session): seq_num = 1 a_name = unique_name( my_function_name(), seq_num # [, *optional_further_args] @@ -44,7 +45,6 @@ def f(session): IRODS_SHARED_REG_RESC_VAULT = os.path.join(IRODS_SHARED_DIR,'reg_resc') IRODS_REG_RESC = 'MyRegResc' -Reg_Resc_Name = '' def irods_shared_tmp_dir(): pth = IRODS_SHARED_TMP_DIR diff --git a/irods/test/query_test.py b/irods/test/query_test.py index e370a8c..d520306 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -23,7 +23,7 @@ from irods.meta import iRODSMeta from irods.rule import Rule from irods import MAX_SQL_ROWS -from irods.test.helpers import (irods_shared_reg_resc_vault, get_register_resource) +from irods.test.helpers import irods_shared_reg_resc_vault import irods.test.helpers as helpers from six.moves import range as py3_range import irods.keywords as kw diff --git a/run_python_tests.sh b/run_python_tests.sh index 2430364..5ec2207 100644 --- a/run_python_tests.sh +++ b/run_python_tests.sh @@ -6,8 +6,8 @@ cd repo/irods/test export PYTHONUNBUFFERED="Y" if [ -z "${TESTS_TO_RUN}" ] ; then - python${PY_N} runner.py 2>&1 | tee ${LOG_OUTPUT_DIR}/prc_test_logs.txt + python"${PY_N}" runner.py 2>&1 | tee "${LOG_OUTPUT_DIR}"/prc_test_logs.txt else - python${PY_N} -m unittest -v ${TESTS_TO_RUN} 2>&1 | tee ${LOG_OUTPUT_DIR}/prc_test_logs.txt + python"${PY_N}" -m unittest -v ${TESTS_TO_RUN} 2>&1 | tee "${LOG_OUTPUT_DIR}"/prc_test_logs.txt fi From 75684aa2c833dcf0eed62db73a6b7ae4eb7d885a Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Mon, 10 May 2021 09:20:48 -0400 Subject: [PATCH 87/96] [#21] Update README.rst regarding outdated versions of Python --- README.rst | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index b29b683..53d0a10 100644 --- a/README.rst +++ b/README.rst @@ -41,7 +41,7 @@ Installing ---------- PRC requires Python 2.7 or 3.4+. -To install with pip:: +Canonically, to install with pip:: pip install python-irodsclient @@ -49,10 +49,6 @@ or:: pip install git+https://github.com/irods/python-irodsclient.git[@branch|@commit|@tag] -See also [these instructions](PYTHON_install_caveats.rst), with hints about pip and -virtualenv, relevant to installation on older Linux distributions. - - Uninstalling ------------ @@ -60,6 +56,21 @@ Uninstalling pip uninstall python-irodsclient +Hazard: Outdated Python +-------------------------- +With older versions of Python (as of this writing, the aforementioned 2.7 and 3.4), we +can take preparatory steps toward securing workable versions of pip and virtualenv by +using these commands:: + + $ pip install --upgrade --user pip'<21.0' + $ python -m pip install --user virtualenv + +We are then ready to use any of the following commands relevant to and required for the +installation:: + + $ python -m virtualenv ... + $ python -m pip install ... + Establishing a (secure) connection ---------------------------------- From 776e9dcbc1a2b3df87dbd38e92624327bf3bf31b Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 14 Jan 2021 19:36:51 -0500 Subject: [PATCH 88/96] [#269] cleanup() is now automatic with session destruct. Also: In cleanup( ), the session pool's connections will now be properly shut down and the pool itself re-initialized to contain no active or idle connections. This is important in case the session is used again, as happens in certain tests (see the admin_test) which deliberately do this kind of refresh to update information from the catalog. When called from iRODSSession's __del__ destructor, an exception to that rule is made, and the pool reinitialization does not happen. --- irods/session.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/irods/session.py b/irods/session.py index bf31479..d227707 100644 --- a/irods/session.py +++ b/irods/session.py @@ -24,8 +24,10 @@ def __init__(self, configure=True, **kwargs): self.pool = None self.numThreads = 0 + self.do_configure = (kwargs if configure else {}) + self.__configured = None if configure: - self.configure(**kwargs) + self.__configured = self.configure(**kwargs) self.collections = CollectionManager(self) self.data_objects = DataObjectManager(self) @@ -42,6 +44,10 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.cleanup() + def __del__(self): + self.do_configure = {} + self.cleanup() + def cleanup(self): for conn in self.pool.active | self.pool.idle: try: @@ -49,8 +55,10 @@ def cleanup(self): except NetworkException: pass conn.release(True) + if self.do_configure: self.configure(**self.do_configure) def _configure_account(self, **kwargs): + try: env_file = kwargs['irods_env_file'] @@ -99,10 +107,13 @@ def _configure_account(self, **kwargs): return iRODSAccount(**creds) def configure(self, **kwargs): - account = self._configure_account(**kwargs) + account = self.__configured + if not account: + account = self._configure_account(**kwargs) connection_refresh_time = self.get_connection_refresh_time(**kwargs) logger.debug("In iRODSSession's configure(). connection_refresh_time set to {}".format(connection_refresh_time)) self.pool = Pool(account, application_name=kwargs.pop('application_name',''), connection_refresh_time=connection_refresh_time) + return account def query(self, *args): return Query(self, *args) From c168fcc7df4d9c5f2d849ac56e26eed31dc1c701 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 12 May 2021 13:25:31 -0400 Subject: [PATCH 89/96] [#221] new way of syncing containers avoids false starts Tests were failing against 4-2-stable and master branches of iRODS server because the PUT test during server installation lasted long enough to satisfy the client's HEARTBEAT requirement, but the new default is to shut down the running server just after installation. So instead of HEARTBEAT we now use communication on a specified (but easily reconfigurable) port 8888 to signal the client that the server is up "for good." --- Dockerfile.prc_test.centos | 6 ++---- Dockerfile.prc_test.ubuntu | 6 ++---- docker_build/Dockerfile.provider | 12 ++++++----- docker_build/recv_oneshot | 35 ++++++++++++++++++++++++++++++++ docker_build/send_oneshot | 6 ++++++ 5 files changed, 52 insertions(+), 13 deletions(-) create mode 100755 docker_build/recv_oneshot create mode 100755 docker_build/send_oneshot diff --git a/Dockerfile.prc_test.centos b/Dockerfile.prc_test.centos index bd4ef60..4c8668e 100644 --- a/Dockerfile.prc_test.centos +++ b/Dockerfile.prc_test.centos @@ -23,9 +23,7 @@ RUN python${py_N} repo/docker_build/iinit.py \ zone tempZone \ password rods SHELL ["/bin/bash","-c"] -CMD echo "Waiting on iRODS server ::: " &&\ - repo/docker_build/wait_on_condition -v -i 10 -n 30 \ - 'echo -e "\x00\x00\x00\x33HEARTBEAT" | nc irods-provider 1247 | grep HEARTBEAT' &&\ - echo "::: iRODS server is up." |tee /tmp/irods_is_up &&\ +CMD echo "Waiting on iRODS server... " ; \ + python${PY_N} repo/docker_build/recv_oneshot -h irods-provider -p 8888 -t 360 && \ sudo groupadd -o -g $(stat -c%g /irods_shared) irods && sudo usermod -aG irods user && \ newgrp irods < repo/run_python_tests.sh diff --git a/Dockerfile.prc_test.ubuntu b/Dockerfile.prc_test.ubuntu index 924696d..3d42d5a 100644 --- a/Dockerfile.prc_test.ubuntu +++ b/Dockerfile.prc_test.ubuntu @@ -30,9 +30,7 @@ SHELL ["/bin/bash","-c"] # 2. give user group permissions to access shared irods directories # 3. run python tests as the new group -CMD echo "Waiting on iRODS server ::: " &&\ - repo/docker_build/wait_on_condition -v -i 10 -n 30 \ - 'echo -e "\x00\x00\x00\x33HEARTBEAT" | nc irods-provider 1247 | grep HEARTBEAT' && \ - echo "::: iRODS server is up." |tee /tmp/irods_is_up && \ +CMD echo "Waiting on iRODS server... " ; \ + python${PY_N} repo/docker_build/recv_oneshot -h irods-provider -p 8888 -t 360 && \ sudo groupadd -o -g $(stat -c%g /irods_shared) irods && sudo usermod -aG irods user && \ newgrp irods < repo/run_python_tests.sh diff --git a/docker_build/Dockerfile.provider b/docker_build/Dockerfile.provider index 5a45e84..f901dc1 100644 --- a/docker_build/Dockerfile.provider +++ b/docker_build/Dockerfile.provider @@ -14,16 +14,17 @@ COPY ICAT.sql /tmp COPY pgpass root/.pgpass RUN chmod 600 root/.pgpass -RUN apt install -y rsyslog -ADD build_deps_list wait_on_condition /tmp/ +RUN apt install -y rsyslog gawk +ADD build_deps_list wait_on_condition send_oneshot /tmp/ # At Runtime: 1. Install apt dependencies for the iRODS package files given. # 2. Install the package files. # 3. Wait on database container. # 4. Configure iRODS provider and make sure it is running. -# 5. Configure shared folder for tests that need to register data objects. +# 5. Open a server port, informing the client to start tests now that iRODS is up. +# 6. Configure shared folder for tests that need to register data objects. # (We opt out if /irods_shared does not exist, ie is omitted in the docker-compose.yml). -# 6. Wait forever. +# 7. Wait forever. CMD apt install -y $(/tmp/build_deps_list /irods_packages/irods*{serv,dev,icommand,runtime,database-*postgres}*.deb) && \ dpkg -i /irods_packages/irods*{serv,dev,icommand,runtime,database-*postgres}*.deb && \ @@ -31,7 +32,8 @@ CMD apt install -y $(/tmp/build_deps_list /irods_packages/irods*{serv,dev,icomma psql -h icat -U postgres -f /tmp/ICAT.sql && \ sed 's/localhost/icat/' < /var/lib/irods/packaging/localhost_setup_postgres.input \ | python /var/lib/irods/scripts/setup_irods.py && \ - { pgrep -u irods irodsServer >/dev/null || su irods -c '~/irodsctl start'; } && \ + { pgrep -u irods irodsServer >/dev/null || su irods -c '~/irodsctl start'; \ + env PORT=8888 /tmp/send_oneshot "iRODS is running..." & } && \ { [ ! -d /irods_shared ] || { mkdir -p /irods_shared/reg_resc && mkdir -p /irods_shared/tmp && \ chown -R irods.irods /irods_shared && chmod g+ws /irods_shared/tmp && \ chmod 777 /irods_shared/reg_resc ; } } && \ diff --git a/docker_build/recv_oneshot b/docker_build/recv_oneshot new file mode 100755 index 0000000..47e2bdd --- /dev/null +++ b/docker_build/recv_oneshot @@ -0,0 +1,35 @@ +#!/usr/bin/env python +from __future__ import print_function +import sys, os, time +from socket import * +import getopt + +def try_connect(host,port): + try: + s=socket(AF_INET,SOCK_STREAM) + s.connect((host,port)) + return s + except: + s.close() + return None + +# Options: +# +# -t timeout +# -h host +# -p port + +t = now = time.time() +opts = dict(getopt.getopt(sys.argv[1:],'t:h:p:')[0]) + +host = opts['-h'] +port = int(opts['-p']) +timeout = float(opts['-t']) + +while time.time() < now + timeout: + time.sleep(1) + s = try_connect(host, port) + if s: + print(s.recv(32767).decode('utf-8'),end='') + exit(0) +exit(1) diff --git a/docker_build/send_oneshot b/docker_build/send_oneshot new file mode 100755 index 0000000..b265af1 --- /dev/null +++ b/docker_build/send_oneshot @@ -0,0 +1,6 @@ +#!/usr/bin/gawk -f +BEGIN { + SERVER = "/inet/tcp/"ENVIRON["PORT"]"/0/0" + print ARGV[1] " - " strftime() |& SERVER + close(SERVER) +} From 193a18b45273dd43dc1b0cc282c2ec1e39a61fd3 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Fri, 14 May 2021 21:28:36 -0400 Subject: [PATCH 90/96] [#3] v0.9.0 and update changelog --- CHANGELOG.rst | 17 +++++++++++++++++ README.rst | 2 +- irods/version.py | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4f5e85a..8e7e857 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,23 @@ Changelog ========= +v0.9.0 (2021-05-14) +------------------- +[#269] cleanup() is now automatic with session destruct [Daniel Moore] +[#235] multithreaded parallel transfer for PUT and GET [Daniel Moore] +[#232] do not arbitrarily pick first replica for DEST RESC [Daniel Moore] +[#233] add null handler for irods package root [Daniel Moore] +[#246] implementation of checksum for data object manager [Daniel Moore] +[#270] speed up tests [Daniel Moore] +[#260] [irods/irods#5520] XML protocol will use BinBytesBuf in 4.2.9 [Daniel Moore] +[#221] prepare test suite for CI [Daniel Moore] +[#267] add RuleExec model for genquery [Daniel Moore] +[#263] update documentation for connection_timeout [Terrell Russell] +[#261] add temporary password support [Paul van Schayck] +[#257] better SSL examples [Terrell Russell] +[#255] make results of atomic metadata operations visible [Daniel Moore] +[#250] add exception for SYS_INVALID_INPUT_PARAM [Daniel Moore] + v0.8.6 (2021-01-22) ------------------- [#244] added capability to add/remove atomic metadata [Daniel Moore] diff --git a/README.rst b/README.rst index 53d0a10..eca6b71 100644 --- a/README.rst +++ b/README.rst @@ -16,7 +16,7 @@ Currently supported: - Execute direct SQL queries - Execute iRODS rules - Support read, write, and seek operations for files -- PUT/GET data objects +- Parallel PUT/GET data objects - Create collections - Rename collections - Delete collections diff --git a/irods/version.py b/irods/version.py index 37e9d6d..e4e49b3 100644 --- a/irods/version.py +++ b/irods/version.py @@ -1 +1 @@ -__version__ = '0.8.6' +__version__ = '0.9.0' From f49368b2a5fd479e3ce49fb25f76dc98f2904673 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Sun, 16 May 2021 17:08:58 -0400 Subject: [PATCH 91/96] [#269] maintain account info in session cleanup --- irods/session.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/irods/session.py b/irods/session.py index d227707..18b1f50 100644 --- a/irods/session.py +++ b/irods/session.py @@ -55,7 +55,8 @@ def cleanup(self): except NetworkException: pass conn.release(True) - if self.do_configure: self.configure(**self.do_configure) + if self.do_configure: + self.__configured = self.configure(**self.do_configure) def _configure_account(self, **kwargs): From d50614591cec72cf5c08a2dc4737f15e1b90e56e Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Sun, 16 May 2021 18:02:07 -0400 Subject: [PATCH 92/96] [#269] Update README.rst with regard to session cleanup --- README.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.rst b/README.rst index eca6b71..116c390 100644 --- a/README.rst +++ b/README.rst @@ -140,6 +140,19 @@ This can be overridden by changing the session `connection_timeout` immediately This will set the timeout to five minutes for any associated connections. +Session objects and cleanup +--------------------------- + +When iRODSSession objects are kept as state in an application, spurious SYS_HEADER_READ_LEN_ERR errors +can sometimes be seen in the connected iRODS server's log file. This is frequently seen at program exit +because socket connections are terminated without having been closed out by the session object's +cleanup() method. + +Starting with PRC Release 0.9.0, code has been included in the session object's __del__ method to call +cleanup(), properly closing out network connections. However, __del__ cannot be relied to run under all +circumstances (Python2 being more problematic), so an alternative may be to call session.cleanup() on +any session variable which might not be used again. + Simple PUTs and GETs -------------------- From e1bdce503b788169913b8a5c90dfe14072dcb645 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 27 May 2021 04:48:39 +0000 Subject: [PATCH 93/96] [#274] calculate common vault dir for unicode query tests Two tests querying for unicode object names got minor changes / refactor. --- irods/test/query_test.py | 41 ++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/irods/test/query_test.py b/irods/test/query_test.py index d520306..78fe864 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -55,8 +55,8 @@ def setUpClass(cls): def tearDownClass(cls): with helpers.make_session() as sess: try: - resc = sess.resources.get(cls.register_resc) - resc.remove() + if cls.register_resc: + sess.resources.get(cls.register_resc).remove() except Exception as e: print( "Could not remove resc {!r} due to: {} ".format(cls.register_resc,e), file=sys.stderr) @@ -338,20 +338,34 @@ def test_multiple_criteria_on_one_column_name(self): results = [r[DataObject.name] for r in q] self.assertTrue(len(results) == len(dummy_test)) - @unittest.skipIf(six.PY3, 'Test is for python2 only') - def test_query_for_data_object_with_utf8_name_python2(self): - if not helpers.irods_session_host_local (self.sess) and not( self.register_resc ): - self.skipTest('for non-local server - registering data objects requires a shared path') + def common_dir_or_vault_info(self): + register_opts= {} + dir_ = None + if self.register_resc: + dir_ = irods_shared_reg_resc_vault() + register_opts[ kw.RESC_NAME_KW ] = self.register_resc + if not(dir_) and helpers.irods_session_host_local (self.sess): + dir_ = tempfile.gettempdir() + if not dir_: + return () + else: + return (dir_ , register_opts) + + @unittest.skipIf(six.PY3, 'Test is for python2 only') + def test_query_for_data_object_with_utf8_name_python2(self): + reg_info = self.common_dir_or_vault_info() + if not reg_info: + self.skipTest('server is non-localhost and no common path exists for object registration') + (dir_,resc_option) = reg_info filename_prefix = '_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' self.assertEqual(self.FILENAME_PREFIX.encode('utf-8'), filename_prefix) - dir_ = irods_shared_reg_resc_vault() _,test_file = tempfile.mkstemp(dir=dir_,prefix=filename_prefix) obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) results = None try: - self.sess.data_objects.register(test_file, obj_path, **{kw.RESC_NAME_KW: self.register_resc}) + self.sess.data_objects.register(test_file, obj_path, **resc_option) results = self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file).first() result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) result_physical_path = results[DataObject.path] @@ -366,10 +380,10 @@ def test_query_for_data_object_with_utf8_name_python2(self): @unittest.skipIf(six.PY2, 'Test is for python3 only') def test_query_for_data_object_with_utf8_name_python3(self): - - if not helpers.irods_session_host_local (self.sess) and not( self.register_resc ): - self.skipTest('for non-local server - registering data objects requires a shared path') - + reg_info = self.common_dir_or_vault_info() + if not reg_info: + self.skipTest('server is non-localhost and no common path exists for object registration') + (dir_,resc_option) = reg_info def python34_unicode_mkstemp( prefix, dir = None, open_mode = 0o777 ): file_path = os.path.join ((dir or os.environ.get('TMPDIR') or '/tmp'), prefix+'-'+str(uuid.uuid1())) encoded_file_path = file_path.encode('utf-8') @@ -379,7 +393,6 @@ def python34_unicode_mkstemp( prefix, dir = None, open_mode = 0o777 ): u'\u01e0\u01e1\u01e2\u01e3\u01e4\u01e5\u01e6\u01e7\u01e8\u01e9\u01ea\u01eb\u01ec\u01ed\u01ee\u01ef'\ u'\u01f0\u01f1\u01f2\u01f3\u01f4\u01f5\u01f6\u01f7\u01f8' # make more visible/changeable in VIM self.assertEqual(self.FILENAME_PREFIX, filename_prefix) - dir_ = irods_shared_reg_resc_vault() (fd,encoded_test_file) = tempfile.mkstemp(dir = dir_.encode('utf-8'),prefix=filename_prefix.encode('utf-8')) \ if sys.version_info >= (3,5) \ else python34_unicode_mkstemp(dir = dir_, prefix = filename_prefix) @@ -388,7 +401,7 @@ def python34_unicode_mkstemp( prefix, dir = None, open_mode = 0o777 ): obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) results = None try: - self.sess.data_objects.register(test_file, obj_path, **{kw.RESC_NAME_KW: self.register_resc}) + self.sess.data_objects.register(test_file, obj_path, **resc_option) results = list(self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file)) if results: results = results[0] From 232a2d73d496f77d11060495fbb4fac7ab099f56 Mon Sep 17 00:00:00 2001 From: Terrell Russell Date: Wed, 2 Jun 2021 22:11:09 -0400 Subject: [PATCH 94/96] [#3] v1.0.0 and update changelog --- CHANGELOG.rst | 5 +++++ irods/version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8e7e857..f77687b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,11 @@ Changelog ========= +v1.0.0 (2021-06-03) +------------------- +[#274] calculate common vault dir for unicode query tests [Daniel Moore] +[#269] better session cleanup [Daniel Moore] + v0.9.0 (2021-05-14) ------------------- [#269] cleanup() is now automatic with session destruct [Daniel Moore] diff --git a/irods/version.py b/irods/version.py index e4e49b3..1f356cc 100644 --- a/irods/version.py +++ b/irods/version.py @@ -1 +1 @@ -__version__ = '0.9.0' +__version__ = '1.0.0' From ef823d7db6cc369147618725eb7070c714879e3f Mon Sep 17 00:00:00 2001 From: root Date: Mon, 28 Jun 2021 09:41:35 +0200 Subject: [PATCH 95/96] Fix missing comma. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 92102a1..6108534 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ 'six>=1.10.0', 'PrettyTable>=0.7.2', 'xmlrunner>=1.7.7', - 'humanize' + 'humanize', 'xmlrunner>=1.7.7' # - the new syntax: #'futures; python_version == "2.7"' From 532c2eb0f11df4e91fb66506d68b7f0bab1e1765 Mon Sep 17 00:00:00 2001 From: Functional Account for irods Date: Wed, 27 Oct 2021 12:08:47 +0200 Subject: [PATCH 96/96] Expose creation time of collection; comments regarding other fields probably worth exposing. --- irods/collection.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/irods/collection.py b/irods/collection.py index c750f29..23982c3 100644 --- a/irods/collection.py +++ b/irods/collection.py @@ -15,6 +15,11 @@ def __init__(self, manager, result=None): self.id = result[Collection.id] self.path = result[Collection.name] self.name = irods_basename(result[Collection.name]) + self.create_time = result[Collection.create_time] + #self.modify_time = result[Collection.modify_time] + #self.inheritance = result[Collection.inheritance] + #self.owner_name = result[Collection.owner_name] + #self.owner_zone = result[Collection.owner_zone] self._meta = None @property