1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
|
From 36be975b3aa8a3ddae16462d1e1f55a76a61a8ea Mon Sep 17 00:00:00 2001
From: Zachary Dremann <dremann@gmail.com>
Date: Tue, 25 Mar 2025 21:44:56 -0400
Subject: [PATCH] fix: convert to cstrings in PyString::from_object
fixes #5005
This only fixes the API, and adds a test of the API, it does not
deprecate the API or introduce a version which takes `&CStr` directly,
this can be done later.
Backported from v0.24.1
Reviewed-by: NoisyCoil <noisycoil@tutanota.com>
---
newsfragments/5008.fixed.md | 1 +
src/types/string.rs | 28 ++++++++++++++++++++++++++++
2 files changed, 29 insertions(+)
create mode 100644 newsfragments/5008.fixed.md
--- /dev/null
+++ b/newsfragments/5008.fixed.md
@@ -0,0 +1 @@
+Fix `PyString::from_object`, avoid out of bounds reads by null terminating the `encoding` and `errors` parameters
\ No newline at end of file
--- a/src/types/string.rs
+++ b/src/types/string.rs
@@ -10,6 +10,7 @@
use crate::PyNativeType;
use crate::{ffi, Bound, IntoPy, Py, PyAny, PyResult, Python};
use std::borrow::Cow;
+use std::ffi::CString;
use std::str;
/// Represents raw data backing a Python `str`.
@@ -198,6 +199,8 @@
encoding: &str,
errors: &str,
) -> PyResult<Bound<'py, PyString>> {
+ let encoding = CString::new(encoding)?;
+ let errors = CString::new(errors)?;
unsafe {
ffi::PyUnicode_FromEncodedObject(
src.as_ptr(),
@@ -718,6 +721,31 @@
}
#[test]
+ fn test_string_from_object() {
+ Python::with_gil(|py| {
+ let py_bytes = PyBytes::new_bound(py, b"ab\xFFcd");
+
+ let py_string = PyString::from_object_bound(&py_bytes, "utf-8", "ignore").unwrap();
+
+ let result = py_string.to_cow().unwrap();
+ assert_eq!(result, "abcd");
+ });
+ }
+
+ #[test]
+ fn test_string_from_obect_with_invalid_encoding_errors() {
+ Python::with_gil(|py| {
+ let py_bytes = PyBytes::new_bound(py, b"abcd");
+
+ let result = PyString::from_object_bound(&py_bytes, "utf\0-8", "ignore");
+ assert!(result.is_err());
+
+ let result = PyString::from_object_bound(&py_bytes, "utf-8", "ign\0ore");
+ assert!(result.is_err());
+ });
+ }
+
+ #[test]
#[cfg(not(any(Py_LIMITED_API, PyPy)))]
fn test_string_data_ucs1() {
Python::with_gil(|py| {
|