X Tutup
Skip to content

Commit 66554c6

Browse files
einstein95sev-
authored andcommitted
DEVTOOLS: PYCDLIB: Allow user to specify encoding
1 parent 09cff5e commit 66554c6

File tree

1 file changed

+76
-44
lines changed

1 file changed

+76
-44
lines changed

devtools/contrib/pycdlib/pycdlib/pycdlib.py

Lines changed: 76 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -705,8 +705,8 @@ def _seek_to_extent(self, extent):
705705
self._cdfp.seek(extent * self.logical_block_size)
706706

707707
@functools.lru_cache(maxsize=256)
708-
def _find_iso_record(self, iso_path):
709-
# type: (bytes) -> dr.DirectoryRecord
708+
def _find_iso_record(self, iso_path, encoding='utf-8'):
709+
# type: (bytes, str) -> dr.DirectoryRecord
710710
"""
711711
An internal method to find a directory record on the ISO given an ISO
712712
path. If the entry is found, it returns the directory record object
@@ -715,14 +715,15 @@ def _find_iso_record(self, iso_path):
715715
716716
Parameters:
717717
iso_path - The ISO9660 path to lookup.
718+
encoding - The string encoding used for the path.
718719
Returns:
719720
The directory record entry representing the entry on the ISO.
720721
"""
721-
return _find_dr_record_by_name(self.pvd, iso_path, 'utf-8')
722+
return _find_dr_record_by_name(self.pvd, iso_path, encoding)
722723

723724
@functools.lru_cache(maxsize=256)
724-
def _find_rr_record(self, rr_path):
725-
# type: (bytes) -> dr.DirectoryRecord
725+
def _find_rr_record(self, rr_path, encoding='utf-8'):
726+
# type: (bytes, str) -> dr.DirectoryRecord
726727
"""
727728
An internal method to find a directory record on the ISO given a Rock
728729
Ridge path. If the entry is found, it returns the directory record
@@ -731,6 +732,7 @@ def _find_rr_record(self, rr_path):
731732
732733
Parameters:
733734
rr_path - The Rock Ridge path to lookup.
735+
encoding - The string encoding used for the path.
734736
Returns:
735737
The directory record entry representing the entry on the ISO.
736738
"""
@@ -742,7 +744,7 @@ def _find_rr_record(self, rr_path):
742744

743745
splitpath = utils.split_path(rr_path)
744746

745-
currpath = splitpath.pop(0).decode('utf-8').encode('utf-8')
747+
currpath = splitpath.pop(0).decode('utf-8').encode(encoding)
746748

747749
entry = root_dir_record
748750

@@ -793,13 +795,13 @@ def _find_rr_record(self, rr_path):
793795
if not child.is_dir():
794796
break
795797
entry = child
796-
currpath = splitpath.pop(0).decode('utf-8').encode('utf-8')
798+
currpath = splitpath.pop(0).decode('utf-8').encode(encoding)
797799

798800
raise pycdlibexception.PyCdlibInvalidInput('Could not find path')
799801

800802
@functools.lru_cache(maxsize=256)
801-
def _find_joliet_record(self, joliet_path):
802-
# type: (bytes) -> dr.DirectoryRecord
803+
def _find_joliet_record(self, joliet_path, encoding='utf-16_be'):
804+
# type: (bytes, str) -> dr.DirectoryRecord
803805
"""
804806
An internal method to find a directory record on the ISO given a Joliet
805807
path. If the entry is found, it returns the directory record object
@@ -808,12 +810,13 @@ def _find_joliet_record(self, joliet_path):
808810
809811
Parameters:
810812
joliet_path - The Joliet path to lookup.
813+
encoding - The string encoding used for the path.
811814
Returns:
812815
The directory record entry representing the entry on the ISO.
813816
"""
814817
if self.joliet_vd is None:
815818
raise pycdlibexception.PyCdlibInternalError('Joliet path requested on non-Joliet ISO')
816-
return _find_dr_record_by_name(self.joliet_vd, joliet_path, 'utf-16_be')
819+
return _find_dr_record_by_name(self.joliet_vd, joliet_path, encoding)
817820

818821
@functools.lru_cache(maxsize=256)
819822
def _find_udf_record(self, udf_path):
@@ -2412,8 +2415,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
24122415
utils.copy_data(data_len, blocksize, data_fp, outfp)
24132416

24142417
def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
2415-
joliet_path):
2416-
# type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None
2418+
joliet_path, encoding=''):
2419+
# type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str) -> None
24172420
"""
24182421
An internal method to fetch a single file from the ISO and write it out
24192422
to the file object.
@@ -2427,19 +2430,23 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
24272430
with iso_path and joliet_path).
24282431
joliet_path - The absolute Joliet path to lookup on the ISO (exclusive
24292432
with iso_path and rr_path).
2433+
encoding - The string encoding used for the path.
24302434
Returns:
24312435
Nothing.
24322436
"""
24332437
if joliet_path is not None:
24342438
if self.joliet_vd is None:
24352439
raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a joliet_path from a non-Joliet ISO')
2436-
found_record = self._find_joliet_record(joliet_path)
2440+
encoding = encoding or 'utf-16_be'
2441+
found_record = self._find_joliet_record(joliet_path, encoding)
24372442
elif rr_path is not None:
24382443
if not self.rock_ridge:
24392444
raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a rr_path from a non-Rock Ridge ISO')
2440-
found_record = self._find_rr_record(rr_path)
2445+
encoding = encoding or 'utf-8'
2446+
found_record = self._find_rr_record(rr_path, encoding)
24412447
elif iso_path is not None:
2442-
found_record = self._find_iso_record(iso_path)
2448+
encoding = encoding or 'utf-8'
2449+
found_record = self._find_iso_record(iso_path, encoding)
24432450
else:
24442451
raise pycdlibexception.PyCdlibInternalError('Invalid path passed to get_file_from_iso_fp')
24452452

@@ -3472,52 +3479,55 @@ def _rm_joliet_dir(self, joliet_path):
34723479

34733480
return num_bytes_to_remove
34743481

3475-
def _get_iso_entry(self, iso_path):
3476-
# type: (bytes) -> dr.DirectoryRecord
3482+
def _get_iso_entry(self, iso_path, encoding='utf-8'):
3483+
# type: (bytes, str) -> dr.DirectoryRecord
34773484
"""
34783485
Internal method to get the directory record for an ISO path.
34793486
34803487
Parameters:
34813488
iso_path - The path on the ISO filesystem to look up the record for.
3489+
encoding - The string encoding used for the path.
34823490
Returns:
34833491
A dr.DirectoryRecord object representing the path.
34843492
"""
34853493
if self._needs_reshuffle:
34863494
self._reshuffle_extents()
34873495

3488-
return self._find_iso_record(iso_path)
3496+
return self._find_iso_record(iso_path, encoding)
34893497

3490-
def _get_rr_entry(self, rr_path):
3491-
# type: (bytes) -> dr.DirectoryRecord
3498+
def _get_rr_entry(self, rr_path, encoding='utf-8'):
3499+
# type: (bytes, str) -> dr.DirectoryRecord
34923500
"""
34933501
Internal method to get the directory record for a Rock Ridge path.
34943502
34953503
Parameters:
34963504
rr_path - The Rock Ridge path on the ISO filesystem to look up the
34973505
record for.
3506+
encoding - The string encoding used for the path.
34983507
Returns:
34993508
A dr.DirectoryRecord object representing the path.
35003509
"""
35013510
if self._needs_reshuffle:
35023511
self._reshuffle_extents()
35033512

3504-
return self._find_rr_record(rr_path)
3513+
return self._find_rr_record(rr_path, encoding)
35053514

3506-
def _get_joliet_entry(self, joliet_path):
3507-
# type: (bytes) -> dr.DirectoryRecord
3515+
def _get_joliet_entry(self, joliet_path, encoding='utf-16_be'):
3516+
# type: (bytes, str) -> dr.DirectoryRecord
35083517
"""
35093518
Internal method to get the directory record for a Joliet path.
35103519
35113520
Parameters:
35123521
joliet_path - The path on the Joliet filesystem to look up the record
35133522
for.
3523+
encoding - The string encoding used for the path.
35143524
Returns:
35153525
A dr.DirectoryRecord object representing the path.
35163526
"""
35173527
if self._needs_reshuffle:
35183528
self._reshuffle_extents()
35193529

3520-
return self._find_joliet_record(joliet_path)
3530+
return self._find_joliet_record(joliet_path, encoding)
35213531

35223532
def _get_udf_entry(self, udf_path):
35233533
# type: (str) -> udfmod.UDFFileEntry
@@ -4103,6 +4113,7 @@ def get_file_from_iso(self, local_path, **kwargs):
41034113
with iso_path, rr_path, and udf_path).
41044114
udf_path - The absolute UDF path to lookup on the ISO (exclusive with
41054115
iso_path, rr_path, and joliet_path).
4116+
encoding - The encoding to use for parsing the filenames.
41064117
Returns:
41074118
Nothing.
41084119
"""
@@ -4114,6 +4125,7 @@ def get_file_from_iso(self, local_path, **kwargs):
41144125
iso_path = None
41154126
rr_path = None
41164127
udf_path = None
4128+
encoding = ''
41174129
num_paths = 0
41184130
for key, value in kwargs.items():
41194131
if key == 'blocksize':
@@ -4144,6 +4156,10 @@ def get_file_from_iso(self, local_path, **kwargs):
41444156
num_paths += 1
41454157
elif value is not None:
41464158
raise pycdlibexception.PyCdlibInvalidInput('iso_path must be a string')
4159+
elif key == 'encoding':
4160+
if not isinstance(value, str):
4161+
raise pycdlibexception.PyCdlibInvalidInput('encoding must be a string')
4162+
encoding = value
41474163
else:
41484164
raise pycdlibexception.PyCdlibInvalidInput('Unknown keyword %s' % (key))
41494165

@@ -4155,7 +4171,7 @@ def get_file_from_iso(self, local_path, **kwargs):
41554171
self._udf_get_file_from_iso_fp(fp, blocksize, udf_path)
41564172
else:
41574173
self._get_file_from_iso_fp(fp, blocksize, iso_path, rr_path,
4158-
joliet_path)
4174+
joliet_path, encoding)
41594175

41604176
def get_file_from_iso_fp(self, outfp, **kwargs):
41614177
# type: (BinaryIO, Union[str, int]) -> None
@@ -4173,6 +4189,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
41734189
with iso_path, rr_path, and udf_path).
41744190
udf_path - The absolute UDF path to lookup on the ISO (exclusive with
41754191
iso_path, rr_path, and joliet_path).
4192+
encoding - The encoding to use for parsing the filenames.
41764193
Returns:
41774194
Nothing.
41784195
"""
@@ -4184,6 +4201,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
41844201
iso_path = None
41854202
rr_path = None
41864203
udf_path = None
4204+
encoding = None
41874205
num_paths = 0
41884206
for key, value in kwargs.items():
41894207
if key == 'blocksize':
@@ -4214,6 +4232,10 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42144232
num_paths += 1
42154233
elif value is not None:
42164234
raise pycdlibexception.PyCdlibInvalidInput('udf_path must be a string')
4235+
elif key == 'encoding':
4236+
if not isinstance(value, str):
4237+
raise pycdlibexception.PyCdlibInvalidInput('encoding must be a string')
4238+
encoding = value
42174239
else:
42184240
raise pycdlibexception.PyCdlibInvalidInput('Unknown keyword %s' % (key))
42194241

@@ -4224,7 +4246,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42244246
self._udf_get_file_from_iso_fp(outfp, blocksize, udf_path)
42254247
else:
42264248
self._get_file_from_iso_fp(outfp, blocksize, iso_path, rr_path,
4227-
joliet_path)
4249+
joliet_path, encoding)
42284250

42294251
def get_and_write(self, iso_path, local_path, blocksize=8192):
42304252
# type: (str, str, int) -> None
@@ -5450,6 +5472,7 @@ def list_children(self, **kwargs):
54505472
rr_path - The absolute Rock Ridge path on the ISO to list the children for.
54515473
joliet_path - The absolute Joliet path on the ISO to list the children for.
54525474
udf_path - The absolute UDF path on the ISO to list the children for.
5475+
encoding - The string encoding used for the path; defaults to 'utf-8' or 'utf-16_be'
54535476
Yields:
54545477
Children of this path.
54555478
Returns:
@@ -5463,6 +5486,8 @@ def list_children(self, **kwargs):
54635486
if key in ('joliet_path', 'rr_path', 'iso_path', 'udf_path'):
54645487
if value is not None:
54655488
num_paths += 1
5489+
elif key in ('encoding'):
5490+
continue
54665491
else:
54675492
raise pycdlibexception.PyCdlibInvalidInput("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'")
54685493

@@ -5480,12 +5505,15 @@ def list_children(self, **kwargs):
54805505
else:
54815506
use_rr = False
54825507
if 'joliet_path' in kwargs:
5483-
rec = self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']))
5508+
kwargs['encoding'] = kwargs.get('encoding') or 'utf-16_be'
5509+
rec = self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']), kwargs['encoding'])
54845510
elif 'rr_path' in kwargs:
5485-
rec = self._get_rr_entry(utils.normpath(kwargs['rr_path']))
5511+
kwargs['encoding'] = kwargs.get('encoding') or 'utf-8'
5512+
rec = self._get_rr_entry(utils.normpath(kwargs['rr_path']), kwargs['encoding'])
54865513
use_rr = True
54875514
else:
5488-
rec = self._get_iso_entry(utils.normpath(kwargs['iso_path']))
5515+
kwargs['encoding'] = kwargs.get('encoding') or 'utf-8'
5516+
rec = self._get_iso_entry(utils.normpath(kwargs['iso_path']), kwargs['encoding'])
54895517

54905518
for c in _yield_children(rec, use_rr): # pylint: disable=use-yield-from
54915519
yield c
@@ -5630,14 +5658,15 @@ def rm_isohybrid(self):
56305658

56315659
self.isohybrid_mbr = None
56325660

5633-
def full_path_from_dirrecord(self, rec, rockridge=False):
5634-
# type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool) -> str
5661+
def full_path_from_dirrecord(self, rec, rockridge=False, user_encoding=''):
5662+
# type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool, str) -> str
56355663
"""
56365664
Get the absolute path of a directory record.
56375665
56385666
Parameters:
56395667
rec - The directory record to get the full path for.
56405668
rockridge - Whether to get the rock ridge full path.
5669+
user_encoding - The string encoding used for the path as determined by the user.
56415670
Returns:
56425671
A string representing the absolute path to the file on the ISO.
56435672
"""
@@ -5650,6 +5679,9 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
56505679
if self.joliet_vd is not None and id(rec.vd) == id(self.joliet_vd):
56515680
encoding = 'utf-16_be'
56525681

5682+
if user_encoding:
5683+
encoding = user_encoding
5684+
56535685
# A root entry has no Rock Ridge entry, even on a Rock Ridge ISO.
56545686
# Always return / here.
56555687
if rec.is_root:
@@ -5689,6 +5721,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
56895721
encoding = rec.file_ident.encoding
56905722
else:
56915723
encoding = 'utf-8'
5724+
if user_encoding:
5725+
encoding = user_encoding
56925726
udf_rec = rec # type: Optional[udfmod.UDFFileEntry]
56935727
while udf_rec is not None:
56945728
ident = udf_rec.file_identifier()
@@ -5859,12 +5893,11 @@ def walk(self, **kwargs):
58595893
raise pycdlibexception.PyCdlibInvalidInput('This object is not initialized; call either open() or new() to create an ISO')
58605894

58615895
num_paths = 0
5862-
user_encoding = None
5896+
user_encoding = ''
58635897
for key, value in kwargs.items():
5864-
if key in ('joliet_path', 'rr_path', 'iso_path', 'udf_path'):
5865-
if value is not None:
5866-
num_paths += 1
5867-
elif key == 'encoding':
5898+
if key in ('joliet_path', 'rr_path', 'iso_path', 'udf_path') and value is not None:
5899+
num_paths += 1
5900+
elif key == 'encoding' and value:
58685901
user_encoding = value
58695902
else:
58705903
raise pycdlibexception.PyCdlibInvalidInput("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'")
@@ -5901,23 +5934,22 @@ def walk(self, **kwargs):
59015934
while dirs:
59025935
dir_record = dirs.popleft()
59035936

5904-
relpath = self.full_path_from_dirrecord(dir_record,
5905-
rockridge=path_type == 'rr_path')
5937+
relpath = self.full_path_from_dirrecord(dir_record, rockridge=path_type == 'rr_path',
5938+
user_encoding=user_encoding)
59065939
dirlist = []
59075940
filelist = []
59085941
dirdict = {}
59095942

5910-
for child in reversed(list(self.list_children(**{path_type: relpath}))):
5943+
for child in reversed(list(self.list_children(**{path_type: relpath, 'encoding': user_encoding or default_encoding}))):
59115944
if child is None or child.is_dot() or child.is_dotdot():
59125945
continue
59135946

5914-
if user_encoding is not None:
5947+
if user_encoding != '':
59155948
encoding = user_encoding
5949+
elif isinstance(child, udfmod.UDFFileEntry) and child.file_ident is not None:
5950+
encoding = child.file_ident.encoding
59165951
else:
5917-
if isinstance(child, udfmod.UDFFileEntry) and child.file_ident is not None:
5918-
encoding = child.file_ident.encoding
5919-
else:
5920-
encoding = default_encoding
5952+
encoding = default_encoding or 'utf-8'
59215953

59225954
if path_type == 'rr_path':
59235955
name = child.rock_ridge.name()

0 commit comments

Comments
 (0)
X Tutup