From 59402c0d2ec83d3a54d7087a027b57f5eaadf0bd Mon Sep 17 00:00:00 2001 From: jm Date: Fri, 13 Feb 2026 23:39:19 +0100 Subject: [PATCH 1/2] fix: upstream bugfixes and proxy support Backports critical bugfixes and proxy support from upstream the-library-code/dspace-rest-python. Bugfixes: - Fix User model trailing commas that turned fields into tuples - Fix get_items() using wrong embedded key ('collections' -> 'items') - Fix InProgressSubmission step assigned from lastModified instead of step - Fix InProgressSubmission type assigned from lastModified instead of type - Fix EntityType type field overwriting label - Fix parse_json to handle None response safely Improvements: - Add proxy support via PROXY_URL env var and proxies constructor param - Add proxies to all HTTP methods (GET, POST, PUT, DELETE, PATCH, send) - Add proxies to authenticate status check GET - Add params parameter to api_patch method - Add embedded attribute to HALResource base class - Add ITER_PAGE_SIZE class variable (preparation for pagination) - Add upstream_ref/ to .gitignore --- .gitignore | 1 + dspace_rest_client/client.py | 51 ++++++++++++++++++++++++------------ dspace_rest_client/models.py | 22 +++++++++------- 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 0dc7c57..f0de7dc 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ __pypackages__/ env/ venv/ .idea/ +upstream_ref/ diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 9e3196d..80408b7 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -16,11 +16,12 @@ """ import json import logging +import os +from uuid import UUID import requests from requests import Request -import os -from uuid import UUID + from .models import * __all__ = ['DSpaceClient'] @@ -37,9 +38,13 @@ def parse_json(response): """ response_json = None try: - response_json = response.json() + if response is not None: + response_json = response.json() except ValueError as err: - _logger.error(f'Error parsing response JSON: {err}. Body text: {response.text}') + if response is not None: + _logger.error(f'Error parsing response JSON: {err}. Body text: {response.text}') + else: + _logger.error(f'Error parsing response JSON: {err}. Response is None') return response_json @@ -73,6 +78,8 @@ class DSpaceClient: if 'USER_AGENT' in os.environ: USER_AGENT = os.environ['USER_AGENT'] verbose = False + ITER_PAGE_SIZE = 20 + PROXY_DICT = dict(http=os.environ["PROXY_URL"],https=os.environ["PROXY_URL"]) if "PROXY_URL" in os.environ else dict() # Simple enum for patch operation types class PatchOperation: @@ -82,7 +89,7 @@ class PatchOperation: MOVE = 'move' def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWORD, solr_endpoint=SOLR_ENDPOINT, - solr_auth=SOLR_AUTH, fake_user_agent=False): + solr_auth=SOLR_AUTH, fake_user_agent=False, proxies=PROXY_DICT): """ Accept optional API endpoint, username, password arguments using the OS environment variables as defaults :param api_endpoint: base path to DSpace REST API, eg. http://localhost:8080/server/api @@ -95,6 +102,7 @@ def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWO self.USERNAME = username self.PASSWORD = password self.SOLR_ENDPOINT = solr_endpoint + self.proxies = proxies self.solr = None self._last_err = None try: @@ -128,7 +136,8 @@ def authenticate(self, retry=False): # Set headers for requests made during authentication # Get and update CSRF token r = self.session.post(self.LOGIN_URL, data={'user': self.USERNAME, 'password': self.PASSWORD}, - headers=self.auth_request_headers) + headers=self.auth_request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -154,7 +163,8 @@ def authenticate(self, retry=False): self.session.headers.update({'Authorization': r.headers.get('Authorization')}) # Get and check authentication status - r = self.session.get(f'{self.API_ENDPOINT}/authn/status', headers=self.request_headers) + r = self.session.get(f'{self.API_ENDPOINT}/authn/status', headers=self.request_headers, + proxies=self.proxies) if r.status_code == 200: r_json = parse_json(r) if 'authenticated' in r_json and r_json['authenticated'] is True: @@ -203,7 +213,8 @@ def api_get(self, url, params=None, data=None, headers=None): self._last_err = None if headers is None: headers = self.request_headers - r = self.session.get(url, params=params, data=data, headers=headers) + r = self.session.get(url, params=params, data=data, headers=headers, + proxies=self.proxies) self.update_token(r) return r @@ -218,7 +229,8 @@ def api_post(self, url, params, json, retry=False): @return: Response from API """ self._last_err = None - r = self.session.post(url, json=json, params=params, headers=self.request_headers) + r = self.session.post(url, json=json, params=params, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -262,7 +274,8 @@ def api_post_uri(self, url, params, uri_list, retry=False): @return: Response from API """ self._last_err = None - r = self.session.post(url, data=uri_list, params=params, headers=self.list_request_headers) + r = self.session.post(url, data=uri_list, params=params, headers=self.list_request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -291,7 +304,8 @@ def api_put(self, url, params, json, retry=False): @return: Response from API """ self._last_err = None - r = self.session.put(url, params=params, json=json, headers=self.request_headers) + r = self.session.put(url, params=params, json=json, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -321,7 +335,8 @@ def api_delete(self, url, params, retry=False): @return: Response from API """ self._last_err = None - r = self.session.delete(url, params=params, headers=self.request_headers) + r = self.session.delete(url, params=params, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -341,12 +356,13 @@ def api_delete(self, url, params, retry=False): return r - def api_patch(self, url, operation, path, value, retry=False): + def api_patch(self, url, operation, path, value, params=None, retry=False): """ @param url: DSpace REST API URL @param operation: 'add', 'remove', 'replace', or 'move' (see PatchOperation enumeration) @param path: path to perform operation - eg, metadata, withdrawn, etc. @param value: new value for add or replace operations, or 'original' path for move operations + @param params: Optional parameters @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: @see https://github.com/DSpace/RestContract/blob/main/metadata-patch.md @@ -377,7 +393,8 @@ def api_patch(self, url, operation, path, value, retry=False): # set headers # perform patch request - r = self.session.patch(url, json=[data], headers=self.request_headers) + r = self.session.patch(url, json=[data], params=params, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -392,7 +409,7 @@ def api_patch(self, url, operation, path, value, retry=False): _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: _logger.debug("Retrying request with updated CSRF token") - return self.api_patch(url, operation, path, value, True) + return self.api_patch(url, operation, path, value, params, True) elif r.status_code == 200: # 200 Success _logger.info(f'successful patch update to {r.json()["type"]} {r.json()["id"]}') @@ -727,7 +744,7 @@ def create_bitstream(self, bundle=None, name=None, path=None, mime=None, metadat h.update({'Content-Encoding': 'gzip', 'User-Agent': self.USER_AGENT}) req = Request('POST', url, data=payload, headers=h, files=files) prepared_req = self.session.prepare_request(req) - r = self.session.send(prepared_req) + r = self.session.send(prepared_req, proxies=self.proxies) if 'DSPACE-XSRF-TOKEN' in r.headers: t = r.headers['DSPACE-XSRF-TOKEN'] _logger.debug('Updating token to ' + t) @@ -922,7 +939,7 @@ def get_items(self, page=0, size=20): r = self.api_get(url, params=params) r_json = parse_json(response=r) if '_embedded' in r_json: - if 'collections' in r_json['_embedded']: + if 'items' in r_json['_embedded']: for item_resource in r_json['_embedded']['items']: items.append(Item(item_resource)) elif 'uuid' in r_json: diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index 903dd8a..f1c8550 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -37,6 +37,10 @@ def __init__(self, api_resource=None): self.links = api_resource['_links'].copy() else: self.links = {'self': {'href': None}} + if '_embedded' in api_resource: + self.embedded = api_resource['_embedded'].copy() + else: + self.embedded = {} class AddressableHALResource(HALResource): id = None @@ -421,12 +425,12 @@ class User(SimpleDSpaceObject): Extends DSpaceObject to implement specific attributes and methods for users (aka. EPersons) """ type = 'user' - name = None, - netid = None, - lastActive = None, - canLogIn = False, - email = None, - requireCertificate = False, + name = None + netid = None + lastActive = None + canLogIn = False + email = None + requireCertificate = False selfRegistered = False def __init__(self, api_resource=None): @@ -473,11 +477,11 @@ def __init__(self, api_resource): if 'lastModified' in api_resource: self.lastModified = api_resource['lastModified'] if 'step' in api_resource: - self.step = api_resource['lastModified'] + self.step = api_resource['step'] if 'sections' in api_resource: self.sections = api_resource['sections'].copy() if 'type' in api_resource: - self.lastModified = api_resource['lastModified'] + self.type = api_resource['type'] def as_dict(self): parent_dict = super(InProgressSubmission, self).as_dict() @@ -508,7 +512,7 @@ def __init__(self, api_resource): if 'label' in api_resource: self.label = api_resource['label'] if 'type' in api_resource: - self.label = api_resource['type'] + self.type = api_resource['type'] class RelationshipType(AddressableHALResource): """ From c7ca664cc9d748188d1550a6943380a4f756a234 Mon Sep 17 00:00:00 2001 From: Jozef Misutka <332350+vidiecan@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:57:18 +0100 Subject: [PATCH 2/2] Remove upstream_ref from .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index f0de7dc..0dc7c57 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,3 @@ __pypackages__/ env/ venv/ .idea/ -upstream_ref/