v1.1.0
This commit is contained in:
@@ -33,31 +33,42 @@ def expression_contains_tags(text):
|
|||||||
|
|
||||||
class EvaluateExpression:
|
class EvaluateExpression:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# Pre-define the grammar structure during initialization
|
|
||||||
self.grammar = self._build_grammar()
|
self.grammar = self._build_grammar()
|
||||||
|
|
||||||
def _compare_single(self, l_val, op, r_val):
|
def _compare_single(self, l_val, op, r_val):
|
||||||
"""
|
# 1. CASE SENSITIVE (Strict)
|
||||||
Atomic comparison logic for individual values.
|
if op == "==":
|
||||||
Handles numeric conversion and standard operators.
|
return str(l_val) == str(r_val)
|
||||||
"""
|
|
||||||
# Numeric conversion for mathematical operators
|
# 2. NUMERIC LOGIC
|
||||||
if op in (">", "<", ">=", "<="):
|
if op in (">", "<", ">=", "<="):
|
||||||
try:
|
try:
|
||||||
# Attempt to treat both sides as floats
|
# We use float for numeric magnitude
|
||||||
curr_l, curr_r = float(l_val), float(r_val)
|
curr_l, curr_r = float(l_val), float(r_val)
|
||||||
|
if op == ">":
|
||||||
|
return curr_l > curr_r
|
||||||
|
if op == "<":
|
||||||
|
return curr_l < curr_r
|
||||||
|
if op == ">=":
|
||||||
|
return curr_l >= curr_r
|
||||||
|
if op == "<=":
|
||||||
|
return curr_l <= curr_r
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
# Fallback to string comparison if conversion fails
|
# Fallback to case-insensitive string if not numeric
|
||||||
curr_l, curr_r = str(l_val), str(r_val)
|
pass
|
||||||
else:
|
|
||||||
# Default to string representation for other operators
|
# 3. CASE INSENSITIVE (Default for =, !=, :)
|
||||||
curr_l, curr_r = str(l_val), str(r_val)
|
curr_l = str(l_val).lower()
|
||||||
|
curr_r = str(r_val).lower()
|
||||||
|
|
||||||
# Standard operator logic
|
|
||||||
if op == "=":
|
if op == "=":
|
||||||
return l_val == r_val
|
return curr_l == curr_r
|
||||||
if op == "!=":
|
if op == "!=":
|
||||||
return l_val != r_val
|
return curr_l != curr_r
|
||||||
|
if op == ":":
|
||||||
|
return curr_r in curr_l
|
||||||
|
|
||||||
|
# String fallback for magnitude if numeric failed
|
||||||
if op == ">":
|
if op == ">":
|
||||||
return curr_l > curr_r
|
return curr_l > curr_r
|
||||||
if op == "<":
|
if op == "<":
|
||||||
@@ -66,44 +77,39 @@ class EvaluateExpression:
|
|||||||
return curr_l >= curr_r
|
return curr_l >= curr_r
|
||||||
if op == "<=":
|
if op == "<=":
|
||||||
return curr_l <= curr_r
|
return curr_l <= curr_r
|
||||||
if op == ":":
|
|
||||||
return str(r_val).lower() in str(l_val).lower()
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _compare(self, data, left_key, op, right_val):
|
def _compare(self, data, left_key, op, right_val):
|
||||||
"""
|
# Normalizing keys for lookup, but KEEPING the values intact
|
||||||
Main comparison router. Checks if the field is a list or a single value.
|
|
||||||
"""
|
|
||||||
# Normalize data keys to lowercase for case-insensitive lookup
|
|
||||||
normalized_data = {k.lower(): v for k, v in data.items()}
|
normalized_data = {k.lower(): v for k, v in data.items()}
|
||||||
|
|
||||||
# Extract the left-hand value (the field from the JSON)
|
# Get left value from data or use as literal
|
||||||
l_val = normalized_data.get(left_key.lower(), left_key)
|
l_val = normalized_data.get(left_key.lower(), left_key)
|
||||||
|
|
||||||
# Extract the right-hand value (check if it's a literal or another field)
|
# Resolve right value: if it's a key in data, use its value.
|
||||||
r_val = normalized_data.get(str(right_val).lower(), right_val)
|
# Important: use lower() only for the KEY lookup, not the value itself.
|
||||||
|
r_key_lookup = str(right_val).lower()
|
||||||
|
if r_key_lookup in normalized_data:
|
||||||
|
r_val = normalized_data[r_key_lookup]
|
||||||
|
else:
|
||||||
|
r_val = right_val
|
||||||
|
|
||||||
# IF THE FIELD VALUE IS A LIST
|
|
||||||
if isinstance(l_val, list):
|
if isinstance(l_val, list):
|
||||||
# Return True if ANY item in the list satisfies the condition
|
|
||||||
return any(self._compare_single(item, op, r_val) for item in l_val)
|
return any(self._compare_single(item, op, r_val) for item in l_val)
|
||||||
|
|
||||||
# IF THE FIELD VALUE IS A SINGLE DATA POINT
|
|
||||||
return self._compare_single(l_val, op, r_val)
|
return self._compare_single(l_val, op, r_val)
|
||||||
|
|
||||||
def _build_grammar(self):
|
def _build_grammar(self):
|
||||||
"""
|
# CRITICAL: '==' must come BEFORE '=' in the list
|
||||||
Defines the pyparsing grammar for the expression engine.
|
# We use a list to ensure explicit priority in the parser
|
||||||
"""
|
operators = one_of(["==", ">=", "<=", "!=", "=", ">", "<", ":"])
|
||||||
operators = one_of(">= <= != = > < :")
|
|
||||||
identifier = Word(alphanums + "_./\\")
|
identifier = Word(alphanums + "_./\\")
|
||||||
quoted_string = QuotedString("'") | QuotedString('"')
|
quoted_string = QuotedString("'") | QuotedString('"')
|
||||||
operand = quoted_string | identifier
|
operand = quoted_string | identifier
|
||||||
|
|
||||||
# Define basic condition (e.g., "width > 100" or "word")
|
|
||||||
condition = Group((operand + operators + operand) | operand)
|
condition = Group((operand + operators + operand) | operand)
|
||||||
|
|
||||||
# Attach the parse action to convert tokens into executable functions (lambdas)
|
|
||||||
condition.set_parse_action(lambda t: self._create_evaluator_func(t[0]))
|
condition.set_parse_action(lambda t: self._create_evaluator_func(t[0]))
|
||||||
|
|
||||||
return infix_notation(
|
return infix_notation(
|
||||||
@@ -119,25 +125,16 @@ class EvaluateExpression:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _create_evaluator_func(self, tokens):
|
def _create_evaluator_func(self, tokens):
|
||||||
"""
|
|
||||||
Creates a closure that captures tokens and waits for the data dictionary.
|
|
||||||
"""
|
|
||||||
if len(tokens) == 1:
|
if len(tokens) == 1:
|
||||||
# Rule: Single term -> path CONTAINS term
|
|
||||||
return lambda data: self._compare(data, 'path', ':', tokens[0])
|
return lambda data: self._compare(data, 'path', ':', tokens[0])
|
||||||
else:
|
else:
|
||||||
# Rule: Explicit triplet (key, operator, value)
|
|
||||||
return lambda data: self._compare(data, tokens[0], tokens[1], tokens[2])
|
return lambda data: self._compare(data, tokens[0], tokens[1], tokens[2])
|
||||||
|
|
||||||
def compile(self, expression):
|
def compile(self, expression):
|
||||||
"""
|
|
||||||
Parses the expression once and returns a reusable function.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
return self.grammar.parse_string(expression, parse_all=True)[0]
|
return self.grammar.parse_string(expression, parse_all=True)[0]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Compilation Error: {e}")
|
print(f"Compilation Error: {e}")
|
||||||
# Fallback: return a function that always fails gracefully
|
|
||||||
return lambda data: False
|
return lambda data: False
|
||||||
|
|
||||||
|
|
||||||
@@ -250,7 +247,7 @@ class BagheeraSearcher:
|
|||||||
self.ids_processed.add(file_id)
|
self.ids_processed.add(file_id)
|
||||||
|
|
||||||
if exclude_evaluator:
|
if exclude_evaluator:
|
||||||
file_info = {'path': item["path"]}
|
file_info = {'path': item["path"], 'filename': Path(item["path"]).name}
|
||||||
if exclude_sources.get('properties'):
|
if exclude_sources.get('properties'):
|
||||||
file_info = file_info | get_info(file_id)
|
file_info = file_info | get_info(file_id)
|
||||||
if exclude_sources.get('tags'):
|
if exclude_sources.get('tags'):
|
||||||
@@ -325,7 +322,7 @@ class BagheeraSearcher:
|
|||||||
self.ids_processed.add(file_id)
|
self.ids_processed.add(file_id)
|
||||||
|
|
||||||
if exclude_evaluator:
|
if exclude_evaluator:
|
||||||
file_info = {'path': item["path"]}
|
file_info = {'path': item["path"], 'filename': Path(item["path"]).name}
|
||||||
if exclude_sources.get('properties'):
|
if exclude_sources.get('properties'):
|
||||||
file_info = file_info | get_info(file_id)
|
file_info = file_info | get_info(file_id)
|
||||||
if exclude_sources.get('tags'):
|
if exclude_sources.get('tags'):
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ __status__ = "Production"
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import signal
|
import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
# from baloo_tools import get_resolution
|
# from baloo_tools import get_resolution
|
||||||
@@ -26,7 +26,7 @@ PROG_NAME = "Bagheera Search Tool"
|
|||||||
PROG_ID = "bagheerasearch"
|
PROG_ID = "bagheerasearch"
|
||||||
PROG_VERSION = __version__
|
PROG_VERSION = __version__
|
||||||
PROG_BY = __author__
|
PROG_BY = __author__
|
||||||
PROG_DATE = "2026-05-09"
|
PROG_DATE = "2026-05-10"
|
||||||
|
|
||||||
CONFIG_DIR = Path.home() / ".config" / PROG_ID
|
CONFIG_DIR = Path.home() / ".config" / PROG_ID
|
||||||
CONFIG_FILE = CONFIG_DIR / "config.json"
|
CONFIG_FILE = CONFIG_DIR / "config.json"
|
||||||
@@ -55,7 +55,7 @@ def save_config(config: dict) -> None:
|
|||||||
|
|
||||||
def print_help_query() -> None:
|
def print_help_query() -> None:
|
||||||
"""Prints the detailed help for query syntax."""
|
"""Prints the detailed help for query syntax."""
|
||||||
help_query = f"""Help updated to 2025-01-01.
|
help_query = f"""{PROG_NAME} uses the Baloo search engine, which is part of the KDE ecosystem, to perform file searches so next help is obtained from Baloo documentation on 2025-01-01, with some additional information, and it may not be up to date with the latest features or changes in Baloo. For the most current information, please refer to the official Baloo documentation or resources.
|
||||||
|
|
||||||
Baloo offers a rich syntax for searching through your files. Certain attributes of a file can be searched through.
|
Baloo offers a rich syntax for searching through your files. Certain attributes of a file can be searched through.
|
||||||
|
|
||||||
@@ -63,7 +63,7 @@ For example 'type' can be used to filter for files based on their general type:
|
|||||||
|
|
||||||
type:Audio OR type:Document
|
type:Audio OR type:Document
|
||||||
|
|
||||||
The following comparison operators are supported, but note that 'not equal' (!=) operator is not available.
|
The following comparison operators are supported, but note that 'not equal' (!=) operator is not available in Baloo search engine.
|
||||||
· : - contains (only for text comparison)
|
· : - contains (only for text comparison)
|
||||||
· = - equal
|
· = - equal
|
||||||
· > - greater than
|
· > - greater than
|
||||||
@@ -71,7 +71,7 @@ The following comparison operators are supported, but note that 'not equal' (!=)
|
|||||||
· < - less than
|
· < - less than
|
||||||
· <= - less than or equal to
|
· <= - less than or equal to
|
||||||
|
|
||||||
Currently the following types are supported:
|
Currently the following types, to use in --type property, are supported:
|
||||||
· Archive
|
· Archive
|
||||||
· Folder
|
· Folder
|
||||||
· Audio
|
· Audio
|
||||||
@@ -90,7 +90,7 @@ The full list of properties which can be searched is listed below. They are grou
|
|||||||
All Files
|
All Files
|
||||||
· filename
|
· filename
|
||||||
· mimetype
|
· mimetype
|
||||||
· modified
|
· modified (formated as yyyy-MM-dd[ hh[:mm[:ss]]])
|
||||||
· rating
|
· rating
|
||||||
· tags
|
· tags
|
||||||
· userComment
|
· userComment
|
||||||
@@ -103,7 +103,7 @@ Audio
|
|||||||
· Channels
|
· Channels
|
||||||
· Comment
|
· Comment
|
||||||
· Composer
|
· Composer
|
||||||
· Duration
|
· Duration (this value must be in seconds, for example use 'duration > 300' to find files longer than 5 minutes)
|
||||||
· Genre
|
· Genre
|
||||||
· Lyricist
|
· Lyricist
|
||||||
· ReleaseYear
|
· ReleaseYear
|
||||||
@@ -113,7 +113,7 @@ Audio
|
|||||||
Documents
|
Documents
|
||||||
· Author
|
· Author
|
||||||
· Copyright
|
· Copyright
|
||||||
· CreationDate
|
· CreationDate (formated as yyyy-MM-dd[ hh[:mm[:ss]]])
|
||||||
· Generator
|
· Generator
|
||||||
· Keywords
|
· Keywords
|
||||||
· Language
|
· Language
|
||||||
@@ -153,8 +153,44 @@ Media
|
|||||||
· PhotoWhiteBalance
|
· PhotoWhiteBalance
|
||||||
· Width
|
· Width
|
||||||
|
|
||||||
|
Next properties are undocumented but available in source code, may work or not, but worth trying:
|
||||||
|
· AssistiveAlternateDescription
|
||||||
|
· Arranger
|
||||||
|
· AudioCodec
|
||||||
|
· ColorSpace
|
||||||
|
· Compilation
|
||||||
|
· Conductor
|
||||||
|
· Description
|
||||||
|
· DiscNumber
|
||||||
|
· Ensemble
|
||||||
|
· Label
|
||||||
|
· License
|
||||||
|
· Location
|
||||||
|
· Lyrics
|
||||||
|
· Manufacturer
|
||||||
|
· Model
|
||||||
|
· Opus
|
||||||
|
· OriginUrl
|
||||||
|
· OriginEmailSubject
|
||||||
|
· OriginEmailSender
|
||||||
|
· OriginEmailMessageId
|
||||||
|
· Performer
|
||||||
|
· PixelFormat
|
||||||
|
· ReplayGainAlbumPeak
|
||||||
|
· ReplayGainAlbumGain
|
||||||
|
· ReplayGainTrackPeak
|
||||||
|
· ReplayGainTrackGain
|
||||||
|
· TranslationUnitsTotal
|
||||||
|
· TranslationUnitsWithTranslation
|
||||||
|
· TranslationUnitsWithDraftTranslation
|
||||||
|
· TranslationLastAuthor
|
||||||
|
· TranslationLastUpDate
|
||||||
|
· TranslationTemplateDate
|
||||||
|
· VideoCodec
|
||||||
|
|
||||||
{PROG_NAME} recognizes some natural language sentences in English, as long as they are capitalized, and transforms them into queries that can be interpreted by the search engine.
|
Baloo documentation ends here, but {PROG_NAME} adds some extra features on top of it.
|
||||||
|
|
||||||
|
Search engine recognizes some natural language sentences in English, as long as they are capitalized, and transforms them into queries that can be interpreted by the search engine.
|
||||||
|
|
||||||
Supported natural language sentences and patterns for queries are:
|
Supported natural language sentences and patterns for queries are:
|
||||||
· MODIFIED TODAY
|
· MODIFIED TODAY
|
||||||
@@ -166,12 +202,18 @@ Supported natural language sentences and patterns for queries are:
|
|||||||
<NUMBER> can be any number or a number text from ONE to TWENTY.
|
<NUMBER> can be any number or a number text from ONE to TWENTY.
|
||||||
|
|
||||||
|
|
||||||
The --exclude and --recursive-exclude options allow you to filter files out of the results. The syntax for both options supports parentheses and logical operators (AND, OR, and NOT) to combine multiple patterns.
|
The --exclude and --recursive-exclude options allow you to filter files out of the results.
|
||||||
|
The syntax for both options supports parentheses and logical operators (AND, OR, and NOT) to combine multiple patterns.
|
||||||
In addition to standard query comparison operators, the not equal (!=) operator is available for comparing properties against specific values. Furthermore, you can compare two properties directly; for example, 'width > height' is a valid expression.
|
In addition to standard query comparison operators, the not equal (!=) operator is available for comparing properties against specific values. Furthermore, you can compare two properties directly; for example, 'width > height' is a valid expression.
|
||||||
|
|
||||||
Remarks:
|
Remarks:
|
||||||
· All text comparison are case insensitive.
|
· Text comparisons are case sensitive with '==' operator but case insensitive with '=' and ':' operator. For example, 'filename:report' would match 'report.docx', 'Report.docx', and 'REPORT.docx', while 'filename=report.docx' would only match 'report.docx'.
|
||||||
· Tags comparisons are performed against both individual full tag string (using the '/' character as a level separator) and each individual level. All individual level values are normalized to lowercase and stripped of accents or diacritics. For example, a file tagged as 'Opera,Person/María Callas,Singer' would match any of the following elements: ['Opera', 'Person/María Callas', 'Singer', 'callas', 'maria', 'opera', 'person', 'singer']."
|
· Tags comparisons are performed against both individual full tag string (using the '/' character as a level separator) and each individual level. All individual level values are normalized to lowercase and stripped of accents or diacritics. For example, a file tagged as 'Opera,Person/María Callas,Singer' would match any of the following elements: ['Opera', 'Person/María Callas', 'Singer', 'callas', 'maria', 'opera', 'person', 'singer']."
|
||||||
· Only text and numeric data are supported."""
|
· Only text and numeric data are supported, dates are not supported as of now.
|
||||||
|
· Baloo limit of at least three characters for property values is not applied in --exclude and --recursive-exclude options, so you can use shorter values in those options.
|
||||||
|
|
||||||
|
For example, if you have a tag named 'Science' and another one 'Science Fiction' you can't obtain only results tagged with 'Science' becouse Baloo search engine will match both 'Science' and 'Science Fiction' tags when you use 'tags:Science' in your query. To exclude results tagged with 'Science Fiction' you can use the following query:
|
||||||
|
{PROG_ID} --exclude tags:Fiction tags:Science"""
|
||||||
print(help_query)
|
print(help_query)
|
||||||
|
|
||||||
|
|
||||||
@@ -183,19 +225,12 @@ def print_version() -> None:
|
|||||||
"the good people at KDE"
|
"the good people at KDE"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def signal_handler(sig, frame) -> None:
|
|
||||||
"""Handles Ctrl+C gracefully."""
|
|
||||||
print("\nSearch canceled at user request.")
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="An improved search tool for Baloo"
|
description="An improved search tool for Baloo"
|
||||||
)
|
)
|
||||||
parser.add_argument("query", nargs="?", help="list of words to query for")
|
parser.add_argument("query", nargs="?", help="list of words to query for")
|
||||||
parser.add_argument("-d", "--directory", help="limit search to specified directory")
|
parser.add_argument("-d", "--directory", help="limit search to specified directory tree")
|
||||||
parser.add_argument("-e", "--exclude", help="Search exclude pattern")
|
parser.add_argument("-e", "--exclude", help="Search exclude pattern")
|
||||||
parser.add_argument("-i", "--id", action="store_true", help="show document IDs")
|
parser.add_argument("-i", "--id", action="store_true", help="show document IDs")
|
||||||
parser.add_argument("-k", "--konsole", action="store_true", help="show files using file:/ and quotes")
|
parser.add_argument("-k", "--konsole", action="store_true", help="show files using file:/ and quotes")
|
||||||
@@ -203,7 +238,7 @@ def main():
|
|||||||
parser.add_argument("-o", "--offset", type=int, help="offset from which to start the search")
|
parser.add_argument("-o", "--offset", type=int, help="offset from which to start the search")
|
||||||
parser.add_argument("-r", "--recursive", nargs="?", const="", default=None, help="enable recurse with or without a query")
|
parser.add_argument("-r", "--recursive", nargs="?", const="", default=None, help="enable recurse with or without a query")
|
||||||
parser.add_argument("-n", "--recursive-indent", help="recursive indent character")
|
parser.add_argument("-n", "--recursive-indent", help="recursive indent character")
|
||||||
parser.add_argument("-x", "--recursive-exclude", help="recursion exclude pattern")
|
parser.add_argument("-x", "--recursive-exclude", help="recursion exclude query")
|
||||||
parser.add_argument("-s", "--sort", help="sorting criteria <auto|none>")
|
parser.add_argument("-s", "--sort", help="sorting criteria <auto|none>")
|
||||||
parser.add_argument("-t", "--type", help="type of Baloo data to be searched")
|
parser.add_argument("-t", "--type", help="type of Baloo data to be searched")
|
||||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode")
|
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode")
|
||||||
@@ -318,16 +353,30 @@ def main():
|
|||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
print(e)
|
print(e)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
# Captura Ctrl+C dentro de main para una salida inmediata y limpia
|
||||||
|
print("\nSearch canceled at user request.")
|
||||||
|
sys.exit(0)
|
||||||
|
except BrokenPipeError:
|
||||||
|
# Silencia errores cuando se usa con 'head' o 'less' y se cierra el pipe
|
||||||
|
devnull = os.open(os.devnull, os.O_WRONLY)
|
||||||
|
os.dup2(devnull, sys.stdout.fileno())
|
||||||
|
sys.exit(1)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error executing search: {e}")
|
print(f"Error executing search: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
main()
|
main()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
# Respaldo por si la interrupción ocurre fuera del bloque principal de main
|
||||||
|
print("\nSearch canceled at user request.")
|
||||||
|
try:
|
||||||
|
sys.exit(0)
|
||||||
|
except SystemExit:
|
||||||
|
os._exit(0)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Critical error: {e}")
|
print(f"Critical error: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import lmdb
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import unicodedata
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
PROPERTIES_ID_MAP = {
|
PROPERTIES_ID_MAP = {
|
||||||
@@ -100,6 +101,18 @@ PROPERTIES_ID_MAP = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_text(text):
|
||||||
|
"""
|
||||||
|
Remove accents/diacritics for string comparison.
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
text = unicodedata.normalize('NFD', text)
|
||||||
|
text = "".join(c for c in text if unicodedata.category(c) != 'Mn')
|
||||||
|
# return text.lower().strip()
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
class BalooTools:
|
class BalooTools:
|
||||||
"""Class to interact directly with the Baloo LMDB index."""
|
"""Class to interact directly with the Baloo LMDB index."""
|
||||||
|
|
||||||
@@ -214,11 +227,46 @@ class BalooTools:
|
|||||||
for p in parts:
|
for p in parts:
|
||||||
p = p.strip()
|
p = p.strip()
|
||||||
if p:
|
if p:
|
||||||
tag = p.removeprefix('TAG-').removeprefix('TA')
|
""" 'TA' elements are tags normalized to lowercase
|
||||||
tags.append(tag)
|
and stripped of accents/diacritics, while 'TAG'
|
||||||
|
elements are the original tags as they were added by
|
||||||
|
the user. We need to process both to ensure we can
|
||||||
|
match tags in a case-insensitive and
|
||||||
|
accent-insensitive way. But we only want to add the
|
||||||
|
original tags to the final result, not the
|
||||||
|
normalized ones, because the normalized ones are
|
||||||
|
not handle correctly tags with spaces and words with
|
||||||
|
less than three characters.
|
||||||
|
"""
|
||||||
|
if p.startswith('TAG-'):
|
||||||
|
tag = p.removeprefix('TAG-')
|
||||||
|
tags.append(tag)
|
||||||
|
|
||||||
return {'tags': tags}
|
result_set = set(tags)
|
||||||
# return {'tags': ",".join(tags)}
|
|
||||||
|
""" Must add individual parts of the tags to the result set
|
||||||
|
to be able to match them with queries like 'tags:callas'
|
||||||
|
or 'tags:maria' for tags "María Callas" or "Person/María
|
||||||
|
Callas". To maintain Baloo tag behaviour with spaces, it's
|
||||||
|
not possible to search for tags="María Callas" and must
|
||||||
|
search for tags=María tags:Callas, items with spaces are
|
||||||
|
not added to avoid confusion."""
|
||||||
|
for item in tags:
|
||||||
|
parts = re.split(r'[ /\n\t]+', item)
|
||||||
|
|
||||||
|
for part in parts:
|
||||||
|
if part:
|
||||||
|
result_set.add(part)
|
||||||
|
normalize_part = normalize_text(part)
|
||||||
|
if normalize_part:
|
||||||
|
result_set.add(normalize_part)
|
||||||
|
|
||||||
|
tags = sorted(list(result_set))
|
||||||
|
|
||||||
|
if not tags:
|
||||||
|
return {}
|
||||||
|
else:
|
||||||
|
return {'tags': tags}
|
||||||
|
|
||||||
except lmdb.Error as e:
|
except lmdb.Error as e:
|
||||||
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
|
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
|
||||||
|
|||||||
Reference in New Issue
Block a user