@@ -705,8 +705,8 @@ def _seek_to_extent(self, extent):
705705 self ._cdfp .seek (extent * self .logical_block_size )
706706
707707 @functools .lru_cache (maxsize = 256 )
708- def _find_iso_record (self , iso_path ):
709- # type: (bytes) -> dr.DirectoryRecord
708+ def _find_iso_record (self , iso_path , encoding = 'utf-8' ):
709+ # type: (bytes, str ) -> dr.DirectoryRecord
710710 """
711711 An internal method to find a directory record on the ISO given an ISO
712712 path. If the entry is found, it returns the directory record object
@@ -715,14 +715,15 @@ def _find_iso_record(self, iso_path):
715715
716716 Parameters:
717717 iso_path - The ISO9660 path to lookup.
718+ encoding - The string encoding used for the path.
718719 Returns:
719720 The directory record entry representing the entry on the ISO.
720721 """
721- return _find_dr_record_by_name (self .pvd , iso_path , 'utf-8' )
722+ return _find_dr_record_by_name (self .pvd , iso_path , encoding )
722723
723724 @functools .lru_cache (maxsize = 256 )
724- def _find_rr_record (self , rr_path ):
725- # type: (bytes) -> dr.DirectoryRecord
725+ def _find_rr_record (self , rr_path , encoding = 'utf-8' ):
726+ # type: (bytes, str ) -> dr.DirectoryRecord
726727 """
727728 An internal method to find a directory record on the ISO given a Rock
728729 Ridge path. If the entry is found, it returns the directory record
@@ -731,6 +732,7 @@ def _find_rr_record(self, rr_path):
731732
732733 Parameters:
733734 rr_path - The Rock Ridge path to lookup.
735+ encoding - The string encoding used for the path.
734736 Returns:
735737 The directory record entry representing the entry on the ISO.
736738 """
@@ -742,7 +744,7 @@ def _find_rr_record(self, rr_path):
742744
743745 splitpath = utils .split_path (rr_path )
744746
745- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
747+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
746748
747749 entry = root_dir_record
748750
@@ -793,13 +795,13 @@ def _find_rr_record(self, rr_path):
793795 if not child .is_dir ():
794796 break
795797 entry = child
796- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
798+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
797799
798800 raise pycdlibexception .PyCdlibInvalidInput ('Could not find path' )
799801
800802 @functools .lru_cache (maxsize = 256 )
801- def _find_joliet_record (self , joliet_path ):
802- # type: (bytes) -> dr.DirectoryRecord
803+ def _find_joliet_record (self , joliet_path , encoding = 'utf-16_be' ):
804+ # type: (bytes, str ) -> dr.DirectoryRecord
803805 """
804806 An internal method to find a directory record on the ISO given a Joliet
805807 path. If the entry is found, it returns the directory record object
@@ -808,12 +810,13 @@ def _find_joliet_record(self, joliet_path):
808810
809811 Parameters:
810812 joliet_path - The Joliet path to lookup.
813+ encoding - The string encoding used for the path.
811814 Returns:
812815 The directory record entry representing the entry on the ISO.
813816 """
814817 if self .joliet_vd is None :
815818 raise pycdlibexception .PyCdlibInternalError ('Joliet path requested on non-Joliet ISO' )
816- return _find_dr_record_by_name (self .joliet_vd , joliet_path , 'utf-16_be' )
819+ return _find_dr_record_by_name (self .joliet_vd , joliet_path , encoding )
817820
818821 @functools .lru_cache (maxsize = 256 )
819822 def _find_udf_record (self , udf_path ):
@@ -2412,8 +2415,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
24122415 utils .copy_data (data_len , blocksize , data_fp , outfp )
24132416
24142417 def _get_file_from_iso_fp (self , outfp , blocksize , iso_path , rr_path ,
2415- joliet_path ):
2416- # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None
2418+ joliet_path , encoding = '' ):
2419+ # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str ) -> None
24172420 """
24182421 An internal method to fetch a single file from the ISO and write it out
24192422 to the file object.
@@ -2427,19 +2430,23 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
24272430 with iso_path and joliet_path).
24282431 joliet_path - The absolute Joliet path to lookup on the ISO (exclusive
24292432 with iso_path and rr_path).
2433+ encoding - The string encoding used for the path.
24302434 Returns:
24312435 Nothing.
24322436 """
24332437 if joliet_path is not None :
24342438 if self .joliet_vd is None :
24352439 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a joliet_path from a non-Joliet ISO' )
2436- found_record = self ._find_joliet_record (joliet_path )
2440+ encoding = encoding or 'utf-16_be'
2441+ found_record = self ._find_joliet_record (joliet_path , encoding )
24372442 elif rr_path is not None :
24382443 if not self .rock_ridge :
24392444 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a rr_path from a non-Rock Ridge ISO' )
2440- found_record = self ._find_rr_record (rr_path )
2445+ encoding = encoding or 'utf-8'
2446+ found_record = self ._find_rr_record (rr_path , encoding )
24412447 elif iso_path is not None :
2442- found_record = self ._find_iso_record (iso_path )
2448+ encoding = encoding or 'utf-8'
2449+ found_record = self ._find_iso_record (iso_path , encoding )
24432450 else :
24442451 raise pycdlibexception .PyCdlibInternalError ('Invalid path passed to get_file_from_iso_fp' )
24452452
@@ -3472,52 +3479,55 @@ def _rm_joliet_dir(self, joliet_path):
34723479
34733480 return num_bytes_to_remove
34743481
3475- def _get_iso_entry (self , iso_path ):
3476- # type: (bytes) -> dr.DirectoryRecord
3482+ def _get_iso_entry (self , iso_path , encoding = 'utf-8' ):
3483+ # type: (bytes, str ) -> dr.DirectoryRecord
34773484 """
34783485 Internal method to get the directory record for an ISO path.
34793486
34803487 Parameters:
34813488 iso_path - The path on the ISO filesystem to look up the record for.
3489+ encoding - The string encoding used for the path.
34823490 Returns:
34833491 A dr.DirectoryRecord object representing the path.
34843492 """
34853493 if self ._needs_reshuffle :
34863494 self ._reshuffle_extents ()
34873495
3488- return self ._find_iso_record (iso_path )
3496+ return self ._find_iso_record (iso_path , encoding )
34893497
3490- def _get_rr_entry (self , rr_path ):
3491- # type: (bytes) -> dr.DirectoryRecord
3498+ def _get_rr_entry (self , rr_path , encoding = 'utf-8' ):
3499+ # type: (bytes, str ) -> dr.DirectoryRecord
34923500 """
34933501 Internal method to get the directory record for a Rock Ridge path.
34943502
34953503 Parameters:
34963504 rr_path - The Rock Ridge path on the ISO filesystem to look up the
34973505 record for.
3506+ encoding - The string encoding used for the path.
34983507 Returns:
34993508 A dr.DirectoryRecord object representing the path.
35003509 """
35013510 if self ._needs_reshuffle :
35023511 self ._reshuffle_extents ()
35033512
3504- return self ._find_rr_record (rr_path )
3513+ return self ._find_rr_record (rr_path , encoding )
35053514
3506- def _get_joliet_entry (self , joliet_path ):
3507- # type: (bytes) -> dr.DirectoryRecord
3515+ def _get_joliet_entry (self , joliet_path , encoding = 'utf-16_be' ):
3516+ # type: (bytes, str ) -> dr.DirectoryRecord
35083517 """
35093518 Internal method to get the directory record for a Joliet path.
35103519
35113520 Parameters:
35123521 joliet_path - The path on the Joliet filesystem to look up the record
35133522 for.
3523+ encoding - The string encoding used for the path.
35143524 Returns:
35153525 A dr.DirectoryRecord object representing the path.
35163526 """
35173527 if self ._needs_reshuffle :
35183528 self ._reshuffle_extents ()
35193529
3520- return self ._find_joliet_record (joliet_path )
3530+ return self ._find_joliet_record (joliet_path , encoding )
35213531
35223532 def _get_udf_entry (self , udf_path ):
35233533 # type: (str) -> udfmod.UDFFileEntry
@@ -4103,6 +4113,7 @@ def get_file_from_iso(self, local_path, **kwargs):
41034113 with iso_path, rr_path, and udf_path).
41044114 udf_path - The absolute UDF path to lookup on the ISO (exclusive with
41054115 iso_path, rr_path, and joliet_path).
4116+ encoding - The encoding to use for parsing the filenames.
41064117 Returns:
41074118 Nothing.
41084119 """
@@ -4114,6 +4125,7 @@ def get_file_from_iso(self, local_path, **kwargs):
41144125 iso_path = None
41154126 rr_path = None
41164127 udf_path = None
4128+ encoding = ''
41174129 num_paths = 0
41184130 for key , value in kwargs .items ():
41194131 if key == 'blocksize' :
@@ -4144,6 +4156,10 @@ def get_file_from_iso(self, local_path, **kwargs):
41444156 num_paths += 1
41454157 elif value is not None :
41464158 raise pycdlibexception .PyCdlibInvalidInput ('iso_path must be a string' )
4159+ elif key == 'encoding' :
4160+ if not isinstance (value , str ):
4161+ raise pycdlibexception .PyCdlibInvalidInput ('encoding must be a string' )
4162+ encoding = value
41474163 else :
41484164 raise pycdlibexception .PyCdlibInvalidInput ('Unknown keyword %s' % (key ))
41494165
@@ -4155,7 +4171,7 @@ def get_file_from_iso(self, local_path, **kwargs):
41554171 self ._udf_get_file_from_iso_fp (fp , blocksize , udf_path )
41564172 else :
41574173 self ._get_file_from_iso_fp (fp , blocksize , iso_path , rr_path ,
4158- joliet_path )
4174+ joliet_path , encoding )
41594175
41604176 def get_file_from_iso_fp (self , outfp , ** kwargs ):
41614177 # type: (BinaryIO, Union[str, int]) -> None
@@ -4173,6 +4189,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
41734189 with iso_path, rr_path, and udf_path).
41744190 udf_path - The absolute UDF path to lookup on the ISO (exclusive with
41754191 iso_path, rr_path, and joliet_path).
4192+ encoding - The encoding to use for parsing the filenames.
41764193 Returns:
41774194 Nothing.
41784195 """
@@ -4184,6 +4201,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
41844201 iso_path = None
41854202 rr_path = None
41864203 udf_path = None
4204+ encoding = None
41874205 num_paths = 0
41884206 for key , value in kwargs .items ():
41894207 if key == 'blocksize' :
@@ -4214,6 +4232,10 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42144232 num_paths += 1
42154233 elif value is not None :
42164234 raise pycdlibexception .PyCdlibInvalidInput ('udf_path must be a string' )
4235+ elif key == 'encoding' :
4236+ if not isinstance (value , str ):
4237+ raise pycdlibexception .PyCdlibInvalidInput ('encoding must be a string' )
4238+ encoding = value
42174239 else :
42184240 raise pycdlibexception .PyCdlibInvalidInput ('Unknown keyword %s' % (key ))
42194241
@@ -4224,7 +4246,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42244246 self ._udf_get_file_from_iso_fp (outfp , blocksize , udf_path )
42254247 else :
42264248 self ._get_file_from_iso_fp (outfp , blocksize , iso_path , rr_path ,
4227- joliet_path )
4249+ joliet_path , encoding )
42284250
42294251 def get_and_write (self , iso_path , local_path , blocksize = 8192 ):
42304252 # type: (str, str, int) -> None
@@ -5450,6 +5472,7 @@ def list_children(self, **kwargs):
54505472 rr_path - The absolute Rock Ridge path on the ISO to list the children for.
54515473 joliet_path - The absolute Joliet path on the ISO to list the children for.
54525474 udf_path - The absolute UDF path on the ISO to list the children for.
5475+ encoding - The string encoding used for the path; defaults to 'utf-8' or 'utf-16_be'
54535476 Yields:
54545477 Children of this path.
54555478 Returns:
@@ -5463,6 +5486,8 @@ def list_children(self, **kwargs):
54635486 if key in ('joliet_path' , 'rr_path' , 'iso_path' , 'udf_path' ):
54645487 if value is not None :
54655488 num_paths += 1
5489+ elif key in ('encoding' ):
5490+ continue
54665491 else :
54675492 raise pycdlibexception .PyCdlibInvalidInput ("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
54685493
@@ -5480,12 +5505,15 @@ def list_children(self, **kwargs):
54805505 else :
54815506 use_rr = False
54825507 if 'joliet_path' in kwargs :
5483- rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]))
5508+ kwargs ['encoding' ] = kwargs .get ('encoding' ) or 'utf-16_be'
5509+ rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), kwargs ['encoding' ])
54845510 elif 'rr_path' in kwargs :
5485- rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]))
5511+ kwargs ['encoding' ] = kwargs .get ('encoding' ) or 'utf-8'
5512+ rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), kwargs ['encoding' ])
54865513 use_rr = True
54875514 else :
5488- rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]))
5515+ kwargs ['encoding' ] = kwargs .get ('encoding' ) or 'utf-8'
5516+ rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), kwargs ['encoding' ])
54895517
54905518 for c in _yield_children (rec , use_rr ): # pylint: disable=use-yield-from
54915519 yield c
@@ -5630,14 +5658,15 @@ def rm_isohybrid(self):
56305658
56315659 self .isohybrid_mbr = None
56325660
5633- def full_path_from_dirrecord (self , rec , rockridge = False ):
5634- # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool) -> str
5661+ def full_path_from_dirrecord (self , rec , rockridge = False , user_encoding = '' ):
5662+ # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool, str ) -> str
56355663 """
56365664 Get the absolute path of a directory record.
56375665
56385666 Parameters:
56395667 rec - The directory record to get the full path for.
56405668 rockridge - Whether to get the rock ridge full path.
5669+ user_encoding - The string encoding used for the path as determined by the user.
56415670 Returns:
56425671 A string representing the absolute path to the file on the ISO.
56435672 """
@@ -5650,6 +5679,9 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
56505679 if self .joliet_vd is not None and id (rec .vd ) == id (self .joliet_vd ):
56515680 encoding = 'utf-16_be'
56525681
5682+ if user_encoding :
5683+ encoding = user_encoding
5684+
56535685 # A root entry has no Rock Ridge entry, even on a Rock Ridge ISO.
56545686 # Always return / here.
56555687 if rec .is_root :
@@ -5689,6 +5721,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
56895721 encoding = rec .file_ident .encoding
56905722 else :
56915723 encoding = 'utf-8'
5724+ if user_encoding :
5725+ encoding = user_encoding
56925726 udf_rec = rec # type: Optional[udfmod.UDFFileEntry]
56935727 while udf_rec is not None :
56945728 ident = udf_rec .file_identifier ()
@@ -5859,12 +5893,11 @@ def walk(self, **kwargs):
58595893 raise pycdlibexception .PyCdlibInvalidInput ('This object is not initialized; call either open() or new() to create an ISO' )
58605894
58615895 num_paths = 0
5862- user_encoding = None
5896+ user_encoding = ''
58635897 for key , value in kwargs .items ():
5864- if key in ('joliet_path' , 'rr_path' , 'iso_path' , 'udf_path' ):
5865- if value is not None :
5866- num_paths += 1
5867- elif key == 'encoding' :
5898+ if key in ('joliet_path' , 'rr_path' , 'iso_path' , 'udf_path' ) and value is not None :
5899+ num_paths += 1
5900+ elif key == 'encoding' and value :
58685901 user_encoding = value
58695902 else :
58705903 raise pycdlibexception .PyCdlibInvalidInput ("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
@@ -5901,23 +5934,22 @@ def walk(self, **kwargs):
59015934 while dirs :
59025935 dir_record = dirs .popleft ()
59035936
5904- relpath = self .full_path_from_dirrecord (dir_record ,
5905- rockridge = path_type == 'rr_path' )
5937+ relpath = self .full_path_from_dirrecord (dir_record , rockridge = path_type == 'rr_path' ,
5938+ user_encoding = user_encoding )
59065939 dirlist = []
59075940 filelist = []
59085941 dirdict = {}
59095942
5910- for child in reversed (list (self .list_children (** {path_type : relpath }))):
5943+ for child in reversed (list (self .list_children (** {path_type : relpath , 'encoding' : user_encoding or default_encoding }))):
59115944 if child is None or child .is_dot () or child .is_dotdot ():
59125945 continue
59135946
5914- if user_encoding is not None :
5947+ if user_encoding != '' :
59155948 encoding = user_encoding
5949+ elif isinstance (child , udfmod .UDFFileEntry ) and child .file_ident is not None :
5950+ encoding = child .file_ident .encoding
59165951 else :
5917- if isinstance (child , udfmod .UDFFileEntry ) and child .file_ident is not None :
5918- encoding = child .file_ident .encoding
5919- else :
5920- encoding = default_encoding
5952+ encoding = default_encoding or 'utf-8'
59215953
59225954 if path_type == 'rr_path' :
59235955 name = child .rock_ridge .name ()
0 commit comments