1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
|
Description: workaround overflow errors with numpy 2.
These changes fix various overflow errors caught since numpy 2 by
restoring the former behavior by emulating the situation when overflows
weren't trapped. The -1 error code in unsigned types is handled by
setting the value to the maximum unsigned integer instead, and large
index values not fitting in 32-bit are guarded with modulo operation.
.
The change also includes a slight modification to account for weirder
base 16 numbers caught by end user upstream, resolving the following
error:
.
File "/home/shuaiw/miniconda3/envs/methy2/lib/python3.11/site-packages/pbcore/io/align/BamIO.py", line 218, in __init__
self._loadReadGroupInfo()
File "/home/shuaiw/miniconda3/envs/methy2/lib/python3.11/site-packages/pbcore/io/align/BamIO.py", line 89, in _loadReadGroupInfo
rgID = rgAsInt(rg["ID"])
^^^^^^^^^^^^^^^^^
File "/home/shuaiw/miniconda3/envs/methy2/lib/python3.11/site-packages/pbcore/io/align/_BamSupport.py", line 70, in rgAsInt
return np.int32(int(rgIdString.split("/")[0], 16))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: invalid literal for int() with base 16: 'cb4d472d-100C60F6'
.
I'm reluctant to formally send that patch upstream, as it feels like a
workaround where the right answer would probably be to bump the type
sizes to 64-bit. However, I do not know what is the actually correct
approach.
Author: Étienne Mollier <emollier@debian.org>
Bug: https://github.com/PacificBiosciences/pbcore/issues/127
Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1095090
Last-Update: 2025-02-27
---
This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
--- python-pbcore.orig/pbcore/io/align/BamAlignment.py
+++ python-pbcore/pbcore/io/align/BamAlignment.py
@@ -532,7 +532,11 @@
if data.dtype == np.int8:
gapCode = ord("-")
else:
- gapCode = data.dtype.type(-1)
+ try:
+ gapCode = data.dtype.type(-1)
+ except OverflowError:
+ # Hack: accomodate for unsigned types with numpy 2+.
+ gapCode = data.dtype.type(np.iinfo(data.dtype).max)
uc = self.unrolledCigar(orientation=orientation)
alnData = np.repeat(np.array(gapCode, dtype=data.dtype), len(uc))
gapMask = (uc == gapOp)
--- python-pbcore.orig/pbcore/io/align/_BamSupport.py
+++ python-pbcore/pbcore/io/align/_BamSupport.py
@@ -1,6 +1,7 @@
# Author: David Alexander
import numpy as np
+import re
class UnavailableFeature(Exception):
@@ -63,7 +64,8 @@
# qId calculation from RG ID string
#
def rgAsInt(rgIdString):
- return np.int32(int(rgIdString.split("/")[0], 16))
+ return np.int32(int(re.sub("-", "", rgIdString.split("/")[0]), 16)
+ % (np.iinfo(np.int32).max+1))
#
# Kinetics: decode the scheme we are using to encode approximate frame
|