From 19ded80c53f2174f543c098fd68ba20943b95bab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:23:36 +0000 Subject: [PATCH 1/4] Initial plan From 14921d7f35ab5c2fb029802379faf47fbdc8d235 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:27:36 +0000 Subject: [PATCH 2/4] Fix TypeError: Object of type __proxy__ is not JSON serializable Update sanitize_for_json to handle Django lazy translation objects (__proxy__) and any other non-JSON-serializable types by converting them to their string representation. Previously, the function only handled str, dict, list/tuple and returned all other types unchanged, causing JSON serialization to fail when Django's gettext_lazy strings were added to task events. Add tests covering the new behavior with lazy proxy objects. Agent-Logs-Url: https://github.com/scieloorg/scms-upload/sessions/628ffd7d-9c8b-4b44-9f27-e162ad029666 Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- core/utils/sanitize.py | 18 +++++++++++++----- proc/tests.py | 43 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/core/utils/sanitize.py b/core/utils/sanitize.py index c23913b47..b48b51ef6 100644 --- a/core/utils/sanitize.py +++ b/core/utils/sanitize.py @@ -1,9 +1,13 @@ def sanitize_for_json(obj): - """Recursively sanitize data to remove Unicode surrogate characters. + """Recursively sanitize data to make it JSON-serializable. - Surrogate characters (U+D800-U+DFFF) are invalid in JSON and rejected by - PostgreSQL. They can appear in file paths read from filesystems using - Python's 'surrogateescape' error handler. + Handles: + - Unicode surrogate characters (U+D800-U+DFFF), which are invalid in JSON + and rejected by PostgreSQL. They can appear in file paths read from + filesystems using Python's 'surrogateescape' error handler. + - Django lazy translation objects (``__proxy__``) and any other + non-JSON-serializable types, which are converted to their string + representation. """ if isinstance(obj, str): # Encode using surrogateescape to recover original bytes from surrogates, @@ -17,4 +21,8 @@ def sanitize_for_json(obj): return {sanitize_for_json(k): sanitize_for_json(v) for k, v in obj.items()} if isinstance(obj, (list, tuple)): return [sanitize_for_json(item) for item in obj] - return obj + if isinstance(obj, (int, float, bool, type(None))): + return obj + # Convert any other non-JSON-serializable type (e.g. Django lazy __proxy__ + # objects from gettext_lazy) to its string representation. + return str(obj) diff --git a/proc/tests.py b/proc/tests.py index d0af74de6..6915773d9 100644 --- a/proc/tests.py +++ b/proc/tests.py @@ -97,6 +97,49 @@ def test_high_surrogate_handled(self): json.dumps(result) # Must not raise self.assertNotIn("\ud800", result) + def test_django_lazy_proxy_converted_to_string(self): + """Django lazy translation objects (__proxy__) must be converted to str.""" + from django.utils.translation import gettext_lazy as _ + + lazy_text = _("Select journals by collection") + result = sanitize_for_json(lazy_text) + self.assertIsInstance(result, str) + json.dumps(result) # Must not raise + + def test_list_with_lazy_proxy_converted(self): + """A list containing a lazy proxy object must be fully serializable.""" + from django.utils.translation import gettext_lazy as _ + + data = [_("Select journals by collection"), "normal string", 42] + result = sanitize_for_json(data) + json_str = json.dumps(result) # Must not raise + parsed = json.loads(json_str) + self.assertIsInstance(parsed[0], str) + self.assertEqual(parsed[1], "normal string") + self.assertEqual(parsed[2], 42) + + def test_dict_with_lazy_proxy_value_converted(self): + """A dict containing a lazy proxy value must be fully serializable.""" + from django.utils.translation import gettext_lazy as _ + + data = {"events": [_("Select journals by collection")], "count": 1} + result = sanitize_for_json(data) + json_str = json.dumps(result) # Must not raise + parsed = json.loads(json_str) + self.assertIsInstance(parsed["events"][0], str) + self.assertEqual(parsed["count"], 1) + + def test_unknown_object_converted_to_string(self): + """Any unknown non-JSON-serializable object is converted to its str repr.""" + + class CustomObj: + def __str__(self): + return "custom" + + result = sanitize_for_json(CustomObj()) + self.assertEqual(result, "custom") + json.dumps(result) # Must not raise + if __name__ == "__main__": unittest.main() From 10fd2dca8b4fdbbac54eaa114076047d4f063012 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:29:39 +0000 Subject: [PATCH 3/4] Address code review: add clarifying comment and assertion to test Agent-Logs-Url: https://github.com/scieloorg/scms-upload/sessions/628ffd7d-9c8b-4b44-9f27-e162ad029666 Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- core/utils/sanitize.py | 3 +++ proc/tests.py | 1 + 2 files changed, 4 insertions(+) diff --git a/core/utils/sanitize.py b/core/utils/sanitize.py index b48b51ef6..1093716e9 100644 --- a/core/utils/sanitize.py +++ b/core/utils/sanitize.py @@ -21,6 +21,9 @@ def sanitize_for_json(obj): return {sanitize_for_json(k): sanitize_for_json(v) for k, v in obj.items()} if isinstance(obj, (list, tuple)): return [sanitize_for_json(item) for item in obj] + # Preserve native JSON scalar types as-is. This guard is required for + # booleans in particular: without it, True/False would be converted to the + # strings 'True'/'False' instead of the JSON literals true/false. if isinstance(obj, (int, float, bool, type(None))): return obj # Convert any other non-JSON-serializable type (e.g. Django lazy __proxy__ diff --git a/proc/tests.py b/proc/tests.py index 6915773d9..b7a0f3262 100644 --- a/proc/tests.py +++ b/proc/tests.py @@ -104,6 +104,7 @@ def test_django_lazy_proxy_converted_to_string(self): lazy_text = _("Select journals by collection") result = sanitize_for_json(lazy_text) self.assertIsInstance(result, str) + self.assertEqual(result, "Select journals by collection") json.dumps(result) # Must not raise def test_list_with_lazy_proxy_converted(self): From fc84f73df74e26e955f7e965458a73e26ca777c1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 26 Apr 2026 22:11:43 +0000 Subject: [PATCH 4/4] Refactor sanitize_for_json to use try/except json.dumps for serializability check - Use json.dumps in a try/except to test whether an object is already JSON-serializable, returning it unchanged if it is. For non-serializable objects, convert to string and recursively re-sanitize to also handle surrogate code points that __str__() might produce. - Add `import json` required by the new try/except block. - Fix brittle test assertion: compare against str(lazy_text) instead of hard-coded English literal, so the test stays valid under any active locale. - Remove __main__ block from proc/tests.py since the tests now depend on Django being configured and must be run via pytest. Agent-Logs-Url: https://github.com/scieloorg/scms-upload/sessions/1b89ffc2-7f29-4874-963f-f1394ce02dbc Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- core/utils/sanitize.py | 18 +++++++++++------- proc/tests.py | 5 ++--- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/core/utils/sanitize.py b/core/utils/sanitize.py index 1093716e9..ebaa4f774 100644 --- a/core/utils/sanitize.py +++ b/core/utils/sanitize.py @@ -1,3 +1,6 @@ +import json + + def sanitize_for_json(obj): """Recursively sanitize data to make it JSON-serializable. @@ -21,11 +24,12 @@ def sanitize_for_json(obj): return {sanitize_for_json(k): sanitize_for_json(v) for k, v in obj.items()} if isinstance(obj, (list, tuple)): return [sanitize_for_json(item) for item in obj] - # Preserve native JSON scalar types as-is. This guard is required for - # booleans in particular: without it, True/False would be converted to the - # strings 'True'/'False' instead of the JSON literals true/false. - if isinstance(obj, (int, float, bool, type(None))): + # For any other type, test JSON serializability directly. If the object + # serializes fine (int, float, bool, None, …) return it unchanged. + # Otherwise convert to string and re-sanitize to also catch surrogate code + # points that __str__() might produce (e.g. Django lazy __proxy__). + try: + json.dumps(obj) return obj - # Convert any other non-JSON-serializable type (e.g. Django lazy __proxy__ - # objects from gettext_lazy) to its string representation. - return str(obj) + except (TypeError, ValueError): + return sanitize_for_json(str(obj)) diff --git a/proc/tests.py b/proc/tests.py index b7a0f3262..49ab0d4d4 100644 --- a/proc/tests.py +++ b/proc/tests.py @@ -104,7 +104,7 @@ def test_django_lazy_proxy_converted_to_string(self): lazy_text = _("Select journals by collection") result = sanitize_for_json(lazy_text) self.assertIsInstance(result, str) - self.assertEqual(result, "Select journals by collection") + self.assertEqual(result, str(lazy_text)) json.dumps(result) # Must not raise def test_list_with_lazy_proxy_converted(self): @@ -142,5 +142,4 @@ def __str__(self): json.dumps(result) # Must not raise -if __name__ == "__main__": - unittest.main() +