This commit is contained in:
Ignacio Serantes
2026-05-09 10:26:57 +02:00
parent 3fb55ee4f3
commit 6207cab27a
7 changed files with 622 additions and 177 deletions

View File

@@ -1,7 +1,19 @@
from .baloo_tools import BalooTools
def get_info(id):
"""Interfaz simplificada para la librería."""
tools = BalooTools()
return tools.get_info(id)
def get_resolution(id):
"""Interfaz simplificada para la librería."""
tools = BalooTools()
return tools.get_resolution(id)
def get_tags(id):
"""Interfaz simplificada para la librería."""
tools = BalooTools()
return tools.get_tags(id)

View File

@@ -8,9 +8,97 @@ Helper functions to interact directly with the Baloo LMDB index.
import json
import lmdb
import os
import re
import sys
from typing import Tuple
PROPERTIES_ID_MAP = {
'0': 'Empty',
'1': 'BitRate',
'2': 'Channels',
'3': 'Duration',
'4': 'Genre',
'5': 'SampleRate',
'6': 'TrackNumber',
'7': 'ReleaseYear',
'8': 'Comment',
'9': 'Artist',
'10': 'Album',
'11': 'AlbumArtist',
'12': 'Composer',
'13': 'Lyricist',
'14': 'Author',
'15': 'Title',
'16': 'Subject',
'17': 'Generator',
'18': 'PageCount',
'19': 'WordCount',
'20': 'LineCount',
'21': 'Language',
'22': 'Copyright',
'23': 'Publisher',
'24': 'CreationDate',
'25': 'Keywords',
'26': 'Width',
'27': 'Height',
'28': 'AspectRatio',
'29': 'FrameRate',
'30': 'Manufacturer',
'31': 'Model',
'32': 'ImageDateTime',
'33': 'ImageOrientation',
'34': 'PhotoFlash',
'35': 'PhotoPixelXDimension',
'36': 'PhotoPixelYDimension',
'37': 'PhotoDateTimeOriginal',
'38': 'PhotoFocalLength',
'39': 'PhotoFocalLengthIn35mmFilm',
'40': 'PhotoExposureTime',
'41': 'PhotoFNumber',
'42': 'PhotoApertureValue',
'43': 'PhotoExposureBiasValue',
'44': 'PhotoWhiteBalance',
'45': 'PhotoMeteringMode',
'46': 'PhotoISOSpeedRatings',
'47': 'PhotoSaturation',
'48': 'PhotoSharpness',
'49': 'PhotoGpsLatitude',
'50': 'PhotoGpsLongitude',
'51': 'PhotoGpsAltitude',
'52': 'TranslationUnitsTotal',
'53': 'TranslationUnitsWithTranslation',
'54': 'TranslationUnitsWithDraftTranslation',
'55': 'TranslationLastAuthor',
'56': 'TranslationLastUpDate',
'57': 'TranslationTemplateDate',
'58': 'OriginUrl',
'59': 'OriginEmailSubject',
'60': 'OriginEmailSender',
'61': 'OriginEmailMessageId',
'62': 'DiscNumber',
'63': 'Location',
'64': 'Performer',
'65': 'Ensemble',
'66': 'Arranger',
'67': 'Conductor',
'68': 'Opus',
'69': 'Label',
'70': 'Compilation',
'71': 'License',
'72': 'Rating',
'73': 'Lyrics',
'74': 'ReplayGainAlbumPeak',
'75': 'ReplayGainAlbumGain',
'76': 'ReplayGainTrackPeak',
'77': 'ReplayGainTrackGain',
'78': 'Description',
'79': 'VideoCodec',
'80': 'AudioCodec',
'81': 'PixelFormat',
'82': 'ColorSpace',
'83': 'AssistiveAlternateDescription'
}
class BalooTools:
"""Class to interact directly with the Baloo LMDB index."""
@@ -21,16 +109,15 @@ class BalooTools:
os.path.expanduser("~"), ".local/share/baloo/index"
)
def get_resolution(self, file_id: int, sep: str = 'x') -> Tuple[int, int]:
def get_info(self, file_id: int) -> json:
"""
Retrieves the width and height of an image/video from the Baloo index.
Retrieves file metadata from the Baloo index.
Args:
file_id: The integer ID of the file.
sep: Separator used (unused currently, kept for compatibility).
Returns:
A tuple of (width, height) integers. Returns (-1, -1) if not found.
A json with all file metadata fields.
"""
try:
# Using context manager ensures the environment is closed properly
@@ -58,15 +145,85 @@ class BalooTools:
try:
jvalue = json.loads(value.decode())
# Baloo stores width in '26' and height in '27'
return jvalue.get('26', -1), jvalue.get('27', -1)
return {PROPERTIES_ID_MAP.get(k, k):
v for k, v in jvalue.items()}
except (json.JSONDecodeError, KeyError):
return -1, -1
return {}
except lmdb.Error as e:
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
return -1, -1
return {}
def get_resolution(self, file_id: int, sep: str = 'x') -> Tuple[int, int]:
"""
Retrieves the width and height of an image/video from the Baloo index.
Args:
file_id: The integer ID of the file.
sep: Separator used (unused currently, kept for compatibility).
Returns:
A tuple of (width, height) integers. Returns (-1, -1) if not found.
"""
file_info = self.get_info(file_id)
try:
return file_info.get('26', -1), file_info.get('27', -1)
except (json.JSONDecodeError, KeyError):
return -1, -1
def get_tags(self, file_id: int) -> json:
"""
Retrieves a string with all file tags from the Baloo index.
Args:
file_id: The integer ID of the file.
Returns:
A json with a field called tags with all tags comma separated.
"""
try:
# Using context manager ensures the environment is closed properly
with lmdb.Environment(
self.baloo_db_path,
subdir=False,
readonly=True,
lock=False,
max_dbs=20
) as env:
document_data_db = env.open_db(b'docxatrrterms')
with env.begin() as txn:
cursor = txn.cursor(document_data_db)
# Convert ID to 8-byte little-endian format
file_id_bytes = int.to_bytes(
file_id, length=8, byteorder='little', signed=False
)
if cursor.set_range(file_id_bytes):
for key, value in cursor:
if key != file_id_bytes:
break
text = value.decode('utf-8', errors='replace')
text = re.sub(r'\x00(?![T])', '', text)
parts = re.split(r'[\x00\x01]', text)
tags = []
for p in parts:
p = p.strip()
if p:
tag = p.removeprefix('TAG-').removeprefix('TA')
tags.append(tag)
return {'tags': tags}
# return {'tags': ",".join(tags)}
except lmdb.Error as e:
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
return {}
# Helper function to maintain compatibility with bagheera_search_lib.py