@@ -99,6 +99,7 @@ def __init__(self) -> None:
9999 self .genre : str | None = None
100100 self ._genre_text : str | None = None # From ©gen text atom
101101 self ._genre_binary : str | None = None # From gnre binary atom
102+ self ._xmp_data : bytes | None = None # XMP data from uuid atoms
102103 self .year : str | None = None
103104 self .comment : str | None = None
104105
@@ -280,6 +281,9 @@ def _cleanup_internal_fields(self) -> None:
280281 delattr (self , '_genre_text' )
281282 if hasattr (self , '_genre_binary' ):
282283 delattr (self , '_genre_binary' )
284+ # Remove XMP processing fields
285+ if hasattr (self , '_xmp_data' ):
286+ delattr (self , '_xmp_data' )
283287
284288 def _set_field (self , fieldname : str , value : str | float ,
285289 check_conflict : bool = True ) -> None :
@@ -524,7 +528,9 @@ def _parse_tag(self, fh: BinaryIO) -> None:
524528 # the atom data. Callables return {fieldname: value} which is updates
525529 # the TinyTag.
526530 if _MP4 ._meta_data_tree is None :
527- _MP4 ._meta_data_tree = {b'moov' : {b'udta' : {b'meta' : {b'ilst' : {
531+ _MP4 ._meta_data_tree = {
532+ b'uuid' : lambda atom_data : {}, # handled by special case
533+ b'moov' : {b'udta' : {b'meta' : {b'ilst' : {
528534 # http://atomicparsley.sourceforge.net/mpeg-4files.html
529535 # https://metacpan.org/dist/Image-ExifTool/source/lib/Image/ExifTool/QuickTime.pm#L3093
530536 b'\xa9 ART' : {b'data' : _MP4 ._data_parser ('artist' )},
@@ -551,10 +557,13 @@ def _parse_tag(self, fh: BinaryIO) -> None:
551557 b'tmpo' : {b'data' : _MP4 ._data_parser ('other.bpm' )},
552558 b'covr' : {b'data' : _MP4 ._parse_cover_image },
553559 b'----' : _MP4 ._parse_custom_field ,
554- }}}}}
560+ }}}},
561+ }
555562 self ._traverse_atoms (fh , path = _MP4 ._meta_data_tree )
556563 # Apply genre priority: prefer ©gen text over gnre binary
557564 self ._resolve_mp4_genre ()
565+ # Apply XMP metadata as fallback for missing fields
566+ self ._apply_xmp_metadata_fallback ()
558567
559568 def _resolve_mp4_genre (self ) -> None :
560569 """Apply MP4 genre priority: prefer ©gen text over gnre binary."""
@@ -568,6 +577,45 @@ def _resolve_mp4_genre(self) -> None:
568577 self ._genre_text = None
569578 self ._genre_binary = None
570579
580+ def _apply_xmp_metadata_fallback (self ) -> None :
581+ """Apply XMP metadata as fallback for missing MP4 fields."""
582+ # Only process XMP if we have missing metadata and captured XMP data
583+ if (self ._xmp_data and
584+ (not self .title or not self .artist or not self .album or
585+ not self .year or not self .comment )):
586+
587+ # Process the stored XMP data
588+ xmp_metadata = self ._parse_xmp_metadata (self ._xmp_data )
589+
590+ # Only use XMP data if TinyTag didn't find the corresponding fields
591+ title = xmp_metadata .get ("title" )
592+ if title and isinstance (title , str ) and not self .title :
593+ self .title = title
594+ artist = xmp_metadata .get ("artist" )
595+ if artist and isinstance (artist , str ) and not self .artist :
596+ self .artist = artist
597+ album = xmp_metadata .get ("album" )
598+ if album and isinstance (album , str ) and not self .album :
599+ self .album = album
600+ track = xmp_metadata .get ("track" )
601+ if track and not self .track :
602+ try :
603+ if isinstance (track , str ):
604+ self .track = int (track )
605+ elif isinstance (track , int ):
606+ self .track = track
607+ except (ValueError , TypeError ):
608+ pass
609+ year = xmp_metadata .get ("year" )
610+ if year and isinstance (year , str ) and not self .year :
611+ self .year = year
612+ comment = xmp_metadata .get ("comment" )
613+ if comment and isinstance (comment , str ) and not self .comment :
614+ self .comment = comment
615+
616+ # Clear XMP data after processing
617+ self ._xmp_data = None
618+
571619 def _traverse_atoms (self ,
572620 fh : BinaryIO ,
573621 path : _DataTreeDict ,
@@ -621,7 +669,15 @@ def _traverse_atoms(self,
621669 curr_path = curr_path + [atom_type ])
622670 # if the path-leaf is a callable, call it on the atom data
623671 elif callable (sub_path ):
624- for fieldname , value in sub_path (fh .read (atom_size )).items ():
672+ atom_data = fh .read (atom_size )
673+ # Special handling for UUID atoms to capture XMP data
674+ if atom_type == b'uuid' :
675+ self ._parse_uuid_atom (atom_data )
676+ result_dict = {}
677+ else :
678+ result_dict = sub_path (atom_data )
679+
680+ for fieldname , value in result_dict .items ():
625681 if _DEBUG :
626682 print (' ' * 4 * len (curr_path ), 'FIELD: ' , fieldname )
627683 if isinstance (value , Image ):
@@ -699,6 +755,160 @@ def _parse_cover_image(cls, data_atom: bytes) -> dict[str, Image]:
699755 'front_cover' , data_atom [8 :], cls ._IMAGE_MIME_TYPES .get (data_type ))
700756 return {'images.front_cover' : image }
701757
758+ def _parse_uuid_atom (
759+ self , atom_data : bytes
760+ ) -> dict [str , str | int | bool | list [str ]]:
761+ """Parse uuid atoms and capture XMP data."""
762+ if len (atom_data ) >= 16 :
763+ uuid_bytes = atom_data [:16 ]
764+ content = atom_data [16 :]
765+
766+ # Standard XMP UUID: BE7ACFCB-97A9-42E8-9C71-999491E3AFAC
767+ xmp_uuid = bytes .fromhex ("BE7ACFCB97A942E89C71999491E3AFAC" )
768+
769+ if (uuid_bytes == xmp_uuid or
770+ content .startswith (b"<?xpacket begin=" ) or
771+ b"<x:xmpmeta" in content or
772+ b'xmlns:x="adobe:ns:meta/"' in content ):
773+ # Store XMP data for later processing
774+ self ._xmp_data = content
775+
776+ return {} # uuid atoms don't directly set metadata fields
777+
778+ @classmethod
779+ def _parse_filename_metadata (
780+ cls , filename : str
781+ ) -> dict [str , str | int | bool | list [str ]]:
782+ """Parse metadata from structured filename patterns."""
783+ import re
784+ metadata : dict [str , str | int | bool | list [str ]] = {}
785+
786+ # Parse different filename patterns
787+ parts = [part .strip () for part in filename .split (" - " )]
788+ if len (parts ) >= 4 :
789+ # Format: artist - album - track_number - title
790+ metadata ["artist" ] = parts [0 ]
791+ metadata ["album" ] = parts [1 ]
792+ metadata ["track" ] = parts [2 ]
793+ metadata ["title" ] = parts [3 ]
794+ elif len (parts ) == 3 :
795+ # Format: artist - album - title or artist - album - track_number
796+ metadata ["artist" ] = parts [0 ]
797+ metadata ["album" ] = parts [1 ]
798+ third_part = parts [2 ]
799+
800+ # Check if third part is just a number (track number)
801+ if third_part .isdigit ():
802+ metadata ["track" ] = third_part
803+ else :
804+ # Check if it starts with a track number
805+ track_match = re .match (r"^(\d+)\s*[-.]?\s*(.+)" , third_part )
806+ if track_match :
807+ metadata ["track" ] = track_match .group (1 )
808+ if track_match .group (2 ):
809+ metadata ["title" ] = track_match .group (2 )
810+ else :
811+ metadata ["title" ] = third_part
812+ elif len (parts ) == 2 :
813+ # Format: artist - title
814+ metadata ["artist" ] = parts [0 ]
815+ title_part = parts [1 ]
816+
817+ # Check if title starts with track number
818+ track_match = re .match (r"^(\d+)\s*[-.]?\s*(.+)" , title_part )
819+ if track_match :
820+ metadata ["track" ] = track_match .group (1 )
821+ if track_match .group (2 ):
822+ metadata ["title" ] = track_match .group (2 )
823+ else :
824+ metadata ["title" ] = title_part
825+
826+ return metadata
827+
828+ @classmethod
829+ def _parse_dublin_core_metadata (
830+ cls , xmp_text : str
831+ ) -> dict [str , str | int | bool | list [str ]]:
832+ """Parse Dublin Core metadata from XMP text."""
833+ import re
834+ metadata : dict [str , str | int | bool | list [str ]] = {}
835+
836+ # Look for standard Dublin Core metadata
837+ # dc:title → title
838+ dc_title_pattern = r"<dc:title[^>]*>.*?<rdf:li[^>]*>(.*?)</rdf:li>"
839+ title_matches = re .findall (dc_title_pattern , xmp_text , re .DOTALL )
840+ if title_matches :
841+ metadata ["title" ] = title_matches [0 ].strip ()
842+
843+ # dc:creator → artist
844+ dc_creator_pattern = r"<dc:creator[^>]*>.*?<rdf:li[^>]*>(.*?)</rdf:li>"
845+ creator_matches = re .findall (dc_creator_pattern , xmp_text , re .DOTALL )
846+ if creator_matches :
847+ metadata ["artist" ] = creator_matches [0 ].strip ()
848+
849+ # dc:date → year
850+ dc_date_pattern = r"<dc:date[^>]*>.*?<rdf:li[^>]*>(.*?)</rdf:li>"
851+ date_matches = re .findall (dc_date_pattern , xmp_text , re .DOTALL )
852+ if date_matches :
853+ year_match = re .match (r"(\d{4})" , date_matches [0 ].strip ())
854+ if year_match :
855+ metadata ["year" ] = year_match .group (1 )
856+
857+ # dc:subject → comments
858+ dc_subject_pattern = r"<dc:subject[^>]*>.*?<rdf:li[^>]*>(.*?)</rdf:li>"
859+ subject_matches = re .findall (dc_subject_pattern , xmp_text , re .DOTALL )
860+ if subject_matches :
861+ metadata ["comment" ] = subject_matches [0 ].strip ()
862+
863+ # dc:description → comments (overwrites subject)
864+ desc_pattern = r"<dc:description[^>]*>.*?<rdf:li[^>]*>(.*?)</rdf:li>"
865+ description_matches = re .findall (desc_pattern , xmp_text , re .DOTALL )
866+ if description_matches :
867+ metadata ["comment" ] = description_matches [0 ].strip ()
868+
869+ return metadata
870+
871+ @classmethod
872+ def _parse_xmp_metadata (
873+ cls , xmp_content : bytes
874+ ) -> dict [str , str | int | bool | list [str ]]:
875+ """Parse XMP content for music metadata fields."""
876+ if not xmp_content :
877+ return {}
878+
879+ try :
880+ # Decode XMP content
881+ xmp_text = xmp_content .decode ("utf-8" , errors = "ignore" )
882+ metadata : dict [str , str | int | bool | list [str ]] = {}
883+
884+ # Look for file references that contain music metadata
885+ import re
886+ file_path_pattern = r'stRef:filePath="([^"]+)"'
887+ file_paths = re .findall (file_path_pattern , xmp_text )
888+
889+ for file_path in file_paths :
890+ # Check if it's an audio file
891+ audio_exts = [".wav" , ".mp3" , ".flac" , ".aac" , ".m4a" ]
892+ if file_path and any (
893+ file_path .lower ().endswith (ext ) for ext in audio_exts
894+ ):
895+ # Parse metadata from filename
896+ filename = file_path .split ("/" )[- 1 ] # Get filename
897+ filename = re .sub (r"\.[^.]+$" , "" , filename ) # Remove ext
898+ metadata .update (cls ._parse_filename_metadata (filename ))
899+ break # Use first audio file found
900+
901+ # Add Dublin Core metadata if filename parsing didn't work
902+ dublin_metadata = cls ._parse_dublin_core_metadata (xmp_text )
903+ for key , value in dublin_metadata .items ():
904+ if key not in metadata :
905+ metadata [key ] = value
906+
907+ return metadata
908+
909+ except Exception :
910+ return {}
911+
702912 @classmethod
703913 def _read_extended_descriptor (cls , esds_atom : BinaryIO ) -> None :
704914 for _i in range (4 ):
0 commit comments