1515# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1616
1717"""Main PyCdlib class and support classes and utilities."""
18+ # fmt: off
1819
1920import bisect
2021import collections
3940from pycdlib import utils
4041
4142# For mypy annotations
42- if False : # pylint: disable=using-constant-test
43- from typing import Any , BinaryIO , Callable , Deque , Dict , Generator , IO , List , Optional , Tuple , Union # NOQA pylint: disable=unused-import
43+ from typing import Any , BinaryIO , Callable , Deque , Dict , Generator , IO , List , Optional , Tuple , Union , cast
4444
4545# There are a number of specific ways that numerical data is stored in the
4646# ISO9660/Ecma-119 standard. In the text these are reference by the section
@@ -1685,6 +1685,7 @@ def _add_child_to_dr(self, child):
16851685 raise pycdlibexception .PyCdlibInternalError ('Trying to add child without a parent' )
16861686
16871687 try_long_entry = False
1688+ ret = - 1
16881689 try :
16891690 ret = child .parent .add_child (child , self .logical_block_size )
16901691 except pycdlibexception .PyCdlibInvalidInput :
@@ -2415,8 +2416,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
24152416 utils .copy_data (data_len , blocksize , data_fp , outfp )
24162417
24172418 def _get_file_from_iso_fp (self , outfp , blocksize , iso_path , rr_path ,
2418- joliet_path , encoding = '' ):
2419- # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str) -> None
2419+ joliet_path , encoding = None ):
2420+ # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str | None ) -> None
24202421 """
24212422 An internal method to fetch a single file from the ISO and write it out
24222423 to the file object.
@@ -2437,16 +2438,13 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
24372438 if joliet_path is not None :
24382439 if self .joliet_vd is None :
24392440 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a joliet_path from a non-Joliet ISO' )
2440- encoding = encoding or 'utf-16_be'
2441- found_record = self ._find_joliet_record (joliet_path , encoding )
2441+ found_record = self ._find_joliet_record (joliet_path , encoding or 'utf-16_be' )
24422442 elif rr_path is not None :
24432443 if not self .rock_ridge :
24442444 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a rr_path from a non-Rock Ridge ISO' )
2445- encoding = encoding or 'utf-8'
2446- found_record = self ._find_rr_record (rr_path , encoding )
2445+ found_record = self ._find_rr_record (rr_path , encoding or 'utf-8' )
24472446 elif iso_path is not None :
2448- encoding = encoding or 'utf-8'
2449- found_record = self ._find_iso_record (iso_path , encoding )
2447+ found_record = self ._find_iso_record (iso_path , encoding or 'utf-8' )
24502448 else :
24512449 raise pycdlibexception .PyCdlibInternalError ('Invalid path passed to get_file_from_iso_fp' )
24522450
@@ -3233,6 +3231,7 @@ def _add_fp(self, fp, length, manage_fp, iso_path, rr_name,
32333231 offset = 0
32343232 done = False
32353233 num_bytes_to_add = 0
3234+ ino = None
32363235 while not done :
32373236 # The maximum length we allow in one directory record is 0xfffff800
32383237 # (this is taken from xorriso, though I don't really know why).
@@ -3479,8 +3478,8 @@ def _rm_joliet_dir(self, joliet_path):
34793478
34803479 return num_bytes_to_remove
34813480
3482- def _get_iso_entry (self , iso_path , encoding = 'utf-8' ):
3483- # type: (bytes, str) -> dr.DirectoryRecord
3481+ def _get_iso_entry (self , iso_path , encoding = None ):
3482+ # type: (bytes, str | None ) -> dr.DirectoryRecord
34843483 """
34853484 Internal method to get the directory record for an ISO path.
34863485
@@ -3490,13 +3489,14 @@ def _get_iso_entry(self, iso_path, encoding='utf-8'):
34903489 Returns:
34913490 A dr.DirectoryRecord object representing the path.
34923491 """
3492+ encoding = encoding or 'utf-8'
34933493 if self ._needs_reshuffle :
34943494 self ._reshuffle_extents ()
34953495
34963496 return self ._find_iso_record (iso_path , encoding )
34973497
3498- def _get_rr_entry (self , rr_path , encoding = 'utf-8' ):
3499- # type: (bytes, str) -> dr.DirectoryRecord
3498+ def _get_rr_entry (self , rr_path , encoding = None ):
3499+ # type: (bytes, str | None ) -> dr.DirectoryRecord
35003500 """
35013501 Internal method to get the directory record for a Rock Ridge path.
35023502
@@ -3507,13 +3507,14 @@ def _get_rr_entry(self, rr_path, encoding='utf-8'):
35073507 Returns:
35083508 A dr.DirectoryRecord object representing the path.
35093509 """
3510+ encoding = encoding or 'utf-8'
35103511 if self ._needs_reshuffle :
35113512 self ._reshuffle_extents ()
35123513
35133514 return self ._find_rr_record (rr_path , encoding )
35143515
3515- def _get_joliet_entry (self , joliet_path , encoding = 'utf-16_be' ):
3516- # type: (bytes, str) -> dr.DirectoryRecord
3516+ def _get_joliet_entry (self , joliet_path , encoding = None ):
3517+ # type: (bytes, str | None ) -> dr.DirectoryRecord
35173518 """
35183519 Internal method to get the directory record for a Joliet path.
35193520
@@ -3524,6 +3525,7 @@ def _get_joliet_entry(self, joliet_path, encoding='utf-16_be'):
35243525 Returns:
35253526 A dr.DirectoryRecord object representing the path.
35263527 """
3528+ encoding = encoding or 'utf-16_be'
35273529 if self ._needs_reshuffle :
35283530 self ._reshuffle_extents ()
35293531
@@ -5443,18 +5445,15 @@ def list_dir(self, iso_path, joliet=False):
54435445 if not self ._initialized :
54445446 raise pycdlibexception .PyCdlibInvalidInput ('This object is not initialized; call either open() or new() to create an ISO' )
54455447
5448+ rec = None
54465449 use_rr = False
54475450 if joliet :
54485451 rec = self ._get_joliet_entry (self ._normalize_joliet_path (iso_path ))
54495452 else :
54505453 normpath = utils .normpath (iso_path )
5451- try_rr = False
54525454 try :
54535455 rec = self ._get_iso_entry (normpath )
54545456 except pycdlibexception .PyCdlibInvalidInput :
5455- try_rr = True
5456-
5457- if try_rr :
54585457 rec = self ._get_rr_entry (normpath )
54595458 use_rr = True
54605459
@@ -5486,10 +5485,10 @@ def list_children(self, **kwargs):
54865485 if key in ('joliet_path' , 'rr_path' , 'iso_path' , 'udf_path' ):
54875486 if value is not None :
54885487 num_paths += 1
5489- elif key in ( 'encoding' ) :
5488+ elif key == 'encoding' :
54905489 continue
54915490 else :
5492- raise pycdlibexception .PyCdlibInvalidInput ("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
5491+ raise pycdlibexception .PyCdlibInvalidInput (f "Invalid keyword { key } , must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
54935492
54945493 if num_paths != 1 :
54955494 raise pycdlibexception .PyCdlibInvalidInput ("Must specify one, and only one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
@@ -5505,15 +5504,12 @@ def list_children(self, **kwargs):
55055504 else :
55065505 use_rr = False
55075506 if 'joliet_path' in kwargs :
5508- kwargs ['encoding' ] = kwargs .get ('encoding' ) or 'utf-16_be'
5509- rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), kwargs ['encoding' ])
5507+ rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), kwargs .get ('encoding' ))
55105508 elif 'rr_path' in kwargs :
5511- kwargs ['encoding' ] = kwargs .get ('encoding' ) or 'utf-8'
5512- rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), kwargs ['encoding' ])
5509+ rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), kwargs .get ('encoding' ))
55135510 use_rr = True
55145511 else :
5515- kwargs ['encoding' ] = kwargs .get ('encoding' ) or 'utf-8'
5516- rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), kwargs ['encoding' ])
5512+ rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), kwargs .get ('encoding' ))
55175513
55185514 for c in _yield_children (rec , use_rr ): # pylint: disable=use-yield-from
55195515 yield c
@@ -5569,17 +5565,16 @@ def get_record(self, **kwargs):
55695565 else :
55705566 raise pycdlibexception .PyCdlibInvalidInput (f"Invalid keyword { key } , must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
55715567
5572- encoding = {"encoding" : kwargs ["encoding" ]} if "encoding" in kwargs else {}
55735568 if num_paths != 1 :
55745569 raise pycdlibexception .PyCdlibInvalidInput ("Must specify one, and only one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
55755570
55765571 if 'joliet_path' in kwargs :
5577- return self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), ** encoding )
5572+ return self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), kwargs . get ( " encoding" ) )
55785573 if 'rr_path' in kwargs :
5579- return self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), ** encoding )
5574+ return self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), kwargs . get ( " encoding" ) )
55805575 if 'udf_path' in kwargs :
5581- return self ._get_udf_entry (kwargs ['udf_path' ], ** encoding )
5582- return self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), ** encoding )
5576+ return self ._get_udf_entry (kwargs ['udf_path' ])
5577+ return self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), kwargs . get ( " encoding" ) )
55835578
55845579 def add_isohybrid (self , part_entry = 1 , mbr_id = None , part_offset = 0 ,
55855580 geometry_sectors = 32 , geometry_heads = 64 , part_type = None ,
@@ -5921,7 +5916,7 @@ def walk(self, **kwargs):
59215916 if rec is None :
59225917 raise pycdlibexception .PyCdlibInvalidInput ('Cannot get entry for empty UDF File Entry' )
59235918 path_type = 'udf_path'
5924- default_encoding = ''
5919+ default_encoding = None
59255920 elif 'rr_path' in kwargs :
59265921 if not self .rock_ridge :
59275922 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a rr_path from a non-Rock Ridge ISO' )
@@ -5943,23 +5938,25 @@ def walk(self, **kwargs):
59435938 filelist = []
59445939 dirdict = {}
59455940
5946- for child in reversed (list (self .list_children (** {path_type : relpath , 'encoding' : user_encoding or default_encoding }))):
5941+ fallback_encoding = default_encoding or 'utf-8'
5942+ encoding = user_encoding or fallback_encoding
5943+ for child in reversed (list (self .list_children (** {path_type : relpath , 'encoding' : encoding }))):
59475944 if child is None or child .is_dot () or child .is_dotdot ():
59485945 continue
59495946
5950- if user_encoding != '' :
5951- encoding = user_encoding
5952- elif isinstance (child , udfmod .UDFFileEntry ) and child .file_ident is not None :
5947+ if isinstance (child , udfmod .UDFFileEntry ) and child .file_ident is not None :
59535948 encoding = child .file_ident .encoding
5954- else :
5955- encoding = default_encoding or 'utf-8'
59565949
59575950 if path_type == 'rr_path' :
5958- name = child .rock_ridge .name ()
5951+ name = cast ( dr . DirectoryRecord , child ) .rock_ridge .name ()
59595952 else :
59605953 name = child .file_identifier ()
59615954
5962- encoded = name .decode (encoding )
5955+ # If the user-specified encoding is wrong, use the fallback encoding
5956+ try :
5957+ encoded = name .decode (encoding )
5958+ except UnicodeDecodeError :
5959+ encoded = name .decode (fallback_encoding )
59635960
59645961 if child .is_dir ():
59655962 dirlist .append (encoded )
0 commit comments