X Tutup
Skip to content

Commit 7d389ce

Browse files
einstein95bluegr
authored andcommitted
DEVTOOLS: PYCDLIB: Allow None encoding in _get_*_entry functions, add encoding fallback in walk
1 parent 8a194ad commit 7d389ce

File tree

1 file changed

+39
-42
lines changed

1 file changed

+39
-42
lines changed

devtools/contrib/pycdlib/pycdlib/pycdlib.py

Lines changed: 39 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1616

1717
"""Main PyCdlib class and support classes and utilities."""
18+
# fmt: off
1819

1920
import bisect
2021
import collections
@@ -39,8 +40,7 @@
3940
from pycdlib import utils
4041

4142
# For mypy annotations
42-
if False: # pylint: disable=using-constant-test
43-
from typing import Any, BinaryIO, Callable, Deque, Dict, Generator, IO, List, Optional, Tuple, Union # NOQA pylint: disable=unused-import
43+
from typing import Any, BinaryIO, Callable, Deque, Dict, Generator, IO, List, Optional, Tuple, Union, cast
4444

4545
# There are a number of specific ways that numerical data is stored in the
4646
# ISO9660/Ecma-119 standard. In the text these are reference by the section
@@ -1685,6 +1685,7 @@ def _add_child_to_dr(self, child):
16851685
raise pycdlibexception.PyCdlibInternalError('Trying to add child without a parent')
16861686

16871687
try_long_entry = False
1688+
ret = -1
16881689
try:
16891690
ret = child.parent.add_child(child, self.logical_block_size)
16901691
except pycdlibexception.PyCdlibInvalidInput:
@@ -2415,8 +2416,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
24152416
utils.copy_data(data_len, blocksize, data_fp, outfp)
24162417

24172418
def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
2418-
joliet_path, encoding=''):
2419-
# type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str) -> None
2419+
joliet_path, encoding=None):
2420+
# type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str | None) -> None
24202421
"""
24212422
An internal method to fetch a single file from the ISO and write it out
24222423
to the file object.
@@ -2437,16 +2438,13 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
24372438
if joliet_path is not None:
24382439
if self.joliet_vd is None:
24392440
raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a joliet_path from a non-Joliet ISO')
2440-
encoding = encoding or 'utf-16_be'
2441-
found_record = self._find_joliet_record(joliet_path, encoding)
2441+
found_record = self._find_joliet_record(joliet_path, encoding or 'utf-16_be')
24422442
elif rr_path is not None:
24432443
if not self.rock_ridge:
24442444
raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a rr_path from a non-Rock Ridge ISO')
2445-
encoding = encoding or 'utf-8'
2446-
found_record = self._find_rr_record(rr_path, encoding)
2445+
found_record = self._find_rr_record(rr_path, encoding or 'utf-8')
24472446
elif iso_path is not None:
2448-
encoding = encoding or 'utf-8'
2449-
found_record = self._find_iso_record(iso_path, encoding)
2447+
found_record = self._find_iso_record(iso_path, encoding or 'utf-8')
24502448
else:
24512449
raise pycdlibexception.PyCdlibInternalError('Invalid path passed to get_file_from_iso_fp')
24522450

@@ -3233,6 +3231,7 @@ def _add_fp(self, fp, length, manage_fp, iso_path, rr_name,
32333231
offset = 0
32343232
done = False
32353233
num_bytes_to_add = 0
3234+
ino = None
32363235
while not done:
32373236
# The maximum length we allow in one directory record is 0xfffff800
32383237
# (this is taken from xorriso, though I don't really know why).
@@ -3479,8 +3478,8 @@ def _rm_joliet_dir(self, joliet_path):
34793478

34803479
return num_bytes_to_remove
34813480

3482-
def _get_iso_entry(self, iso_path, encoding='utf-8'):
3483-
# type: (bytes, str) -> dr.DirectoryRecord
3481+
def _get_iso_entry(self, iso_path, encoding=None):
3482+
# type: (bytes, str | None) -> dr.DirectoryRecord
34843483
"""
34853484
Internal method to get the directory record for an ISO path.
34863485
@@ -3490,13 +3489,14 @@ def _get_iso_entry(self, iso_path, encoding='utf-8'):
34903489
Returns:
34913490
A dr.DirectoryRecord object representing the path.
34923491
"""
3492+
encoding = encoding or 'utf-8'
34933493
if self._needs_reshuffle:
34943494
self._reshuffle_extents()
34953495

34963496
return self._find_iso_record(iso_path, encoding)
34973497

3498-
def _get_rr_entry(self, rr_path, encoding='utf-8'):
3499-
# type: (bytes, str) -> dr.DirectoryRecord
3498+
def _get_rr_entry(self, rr_path, encoding=None):
3499+
# type: (bytes, str | None) -> dr.DirectoryRecord
35003500
"""
35013501
Internal method to get the directory record for a Rock Ridge path.
35023502
@@ -3507,13 +3507,14 @@ def _get_rr_entry(self, rr_path, encoding='utf-8'):
35073507
Returns:
35083508
A dr.DirectoryRecord object representing the path.
35093509
"""
3510+
encoding = encoding or 'utf-8'
35103511
if self._needs_reshuffle:
35113512
self._reshuffle_extents()
35123513

35133514
return self._find_rr_record(rr_path, encoding)
35143515

3515-
def _get_joliet_entry(self, joliet_path, encoding='utf-16_be'):
3516-
# type: (bytes, str) -> dr.DirectoryRecord
3516+
def _get_joliet_entry(self, joliet_path, encoding=None):
3517+
# type: (bytes, str | None) -> dr.DirectoryRecord
35173518
"""
35183519
Internal method to get the directory record for a Joliet path.
35193520
@@ -3524,6 +3525,7 @@ def _get_joliet_entry(self, joliet_path, encoding='utf-16_be'):
35243525
Returns:
35253526
A dr.DirectoryRecord object representing the path.
35263527
"""
3528+
encoding = encoding or 'utf-16_be'
35273529
if self._needs_reshuffle:
35283530
self._reshuffle_extents()
35293531

@@ -5443,18 +5445,15 @@ def list_dir(self, iso_path, joliet=False):
54435445
if not self._initialized:
54445446
raise pycdlibexception.PyCdlibInvalidInput('This object is not initialized; call either open() or new() to create an ISO')
54455447

5448+
rec = None
54465449
use_rr = False
54475450
if joliet:
54485451
rec = self._get_joliet_entry(self._normalize_joliet_path(iso_path))
54495452
else:
54505453
normpath = utils.normpath(iso_path)
5451-
try_rr = False
54525454
try:
54535455
rec = self._get_iso_entry(normpath)
54545456
except pycdlibexception.PyCdlibInvalidInput:
5455-
try_rr = True
5456-
5457-
if try_rr:
54585457
rec = self._get_rr_entry(normpath)
54595458
use_rr = True
54605459

@@ -5486,10 +5485,10 @@ def list_children(self, **kwargs):
54865485
if key in ('joliet_path', 'rr_path', 'iso_path', 'udf_path'):
54875486
if value is not None:
54885487
num_paths += 1
5489-
elif key in ('encoding'):
5488+
elif key == 'encoding':
54905489
continue
54915490
else:
5492-
raise pycdlibexception.PyCdlibInvalidInput("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'")
5491+
raise pycdlibexception.PyCdlibInvalidInput(f"Invalid keyword {key}, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'")
54935492

54945493
if num_paths != 1:
54955494
raise pycdlibexception.PyCdlibInvalidInput("Must specify one, and only one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'")
@@ -5505,15 +5504,12 @@ def list_children(self, **kwargs):
55055504
else:
55065505
use_rr = False
55075506
if 'joliet_path' in kwargs:
5508-
kwargs['encoding'] = kwargs.get('encoding') or 'utf-16_be'
5509-
rec = self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']), kwargs['encoding'])
5507+
rec = self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']), kwargs.get('encoding'))
55105508
elif 'rr_path' in kwargs:
5511-
kwargs['encoding'] = kwargs.get('encoding') or 'utf-8'
5512-
rec = self._get_rr_entry(utils.normpath(kwargs['rr_path']), kwargs['encoding'])
5509+
rec = self._get_rr_entry(utils.normpath(kwargs['rr_path']), kwargs.get('encoding'))
55135510
use_rr = True
55145511
else:
5515-
kwargs['encoding'] = kwargs.get('encoding') or 'utf-8'
5516-
rec = self._get_iso_entry(utils.normpath(kwargs['iso_path']), kwargs['encoding'])
5512+
rec = self._get_iso_entry(utils.normpath(kwargs['iso_path']), kwargs.get('encoding'))
55175513

55185514
for c in _yield_children(rec, use_rr): # pylint: disable=use-yield-from
55195515
yield c
@@ -5569,17 +5565,16 @@ def get_record(self, **kwargs):
55695565
else:
55705566
raise pycdlibexception.PyCdlibInvalidInput(f"Invalid keyword {key}, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'")
55715567

5572-
encoding = {"encoding": kwargs["encoding"]} if "encoding" in kwargs else {}
55735568
if num_paths != 1:
55745569
raise pycdlibexception.PyCdlibInvalidInput("Must specify one, and only one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'")
55755570

55765571
if 'joliet_path' in kwargs:
5577-
return self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']), **encoding)
5572+
return self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']), kwargs.get("encoding"))
55785573
if 'rr_path' in kwargs:
5579-
return self._get_rr_entry(utils.normpath(kwargs['rr_path']), **encoding)
5574+
return self._get_rr_entry(utils.normpath(kwargs['rr_path']), kwargs.get("encoding"))
55805575
if 'udf_path' in kwargs:
5581-
return self._get_udf_entry(kwargs['udf_path'], **encoding)
5582-
return self._get_iso_entry(utils.normpath(kwargs['iso_path']), **encoding)
5576+
return self._get_udf_entry(kwargs['udf_path'])
5577+
return self._get_iso_entry(utils.normpath(kwargs['iso_path']), kwargs.get("encoding"))
55835578

55845579
def add_isohybrid(self, part_entry=1, mbr_id=None, part_offset=0,
55855580
geometry_sectors=32, geometry_heads=64, part_type=None,
@@ -5921,7 +5916,7 @@ def walk(self, **kwargs):
59215916
if rec is None:
59225917
raise pycdlibexception.PyCdlibInvalidInput('Cannot get entry for empty UDF File Entry')
59235918
path_type = 'udf_path'
5924-
default_encoding = ''
5919+
default_encoding = None
59255920
elif 'rr_path' in kwargs:
59265921
if not self.rock_ridge:
59275922
raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a rr_path from a non-Rock Ridge ISO')
@@ -5943,23 +5938,25 @@ def walk(self, **kwargs):
59435938
filelist = []
59445939
dirdict = {}
59455940

5946-
for child in reversed(list(self.list_children(**{path_type: relpath, 'encoding': user_encoding or default_encoding}))):
5941+
fallback_encoding = default_encoding or 'utf-8'
5942+
encoding = user_encoding or fallback_encoding
5943+
for child in reversed(list(self.list_children(**{path_type: relpath, 'encoding': encoding}))):
59475944
if child is None or child.is_dot() or child.is_dotdot():
59485945
continue
59495946

5950-
if user_encoding != '':
5951-
encoding = user_encoding
5952-
elif isinstance(child, udfmod.UDFFileEntry) and child.file_ident is not None:
5947+
if isinstance(child, udfmod.UDFFileEntry) and child.file_ident is not None:
59535948
encoding = child.file_ident.encoding
5954-
else:
5955-
encoding = default_encoding or 'utf-8'
59565949

59575950
if path_type == 'rr_path':
5958-
name = child.rock_ridge.name()
5951+
name = cast(dr.DirectoryRecord, child).rock_ridge.name()
59595952
else:
59605953
name = child.file_identifier()
59615954

5962-
encoded = name.decode(encoding)
5955+
# If the user-specified encoding is wrong, use the fallback encoding
5956+
try:
5957+
encoded = name.decode(encoding)
5958+
except UnicodeDecodeError:
5959+
encoded = name.decode(fallback_encoding)
59635960

59645961
if child.is_dir():
59655962
dirlist.append(encoded)

0 commit comments

Comments
 (0)
X Tutup