From 3fb55ee4f32faa68119ec36f22fc53ddf1bd832c Mon Sep 17 00:00:00 2001 From: Ignacio Serantes Date: Sun, 22 Mar 2026 18:13:22 +0100 Subject: [PATCH] First commit --- README.md | 88 +++++ bagheera_query_parser_lib/__init__.py | 6 + .../bagheera_query_parser.py | 224 +++++++++++++ .../bagheera_query_parser.py.sav | 170 ++++++++++ bagheera_search_lib/__init__.py | 7 + bagheera_search_lib/bagheera_search.py | 313 ++++++++++++++++++ .../bagheera_search_lib.py.sav | 284 ++++++++++++++++ bagheerasearch | 25 ++ bagheerasearch.py | 269 +++++++++++++++ baloo_tools/__init__.py | 7 + baloo_tools/baloo_tools.py | 89 +++++ baloo_wrapper/CMakeLists.txt | 28 ++ baloo_wrapper/__init__.py | 9 + baloo_wrapper/baloo_wrapper.cpp | 114 +++++++ build.sh | 48 +++ build_baloo_wrappers.sh | 29 ++ requirements.txt | 1 + setup.py | 127 +++++++ syntax.txt | 90 +++++ 19 files changed, 1928 insertions(+) create mode 100644 README.md create mode 100644 bagheera_query_parser_lib/__init__.py create mode 100644 bagheera_query_parser_lib/bagheera_query_parser.py create mode 100644 bagheera_query_parser_lib/bagheera_query_parser.py.sav create mode 100644 bagheera_search_lib/__init__.py create mode 100644 bagheera_search_lib/bagheera_search.py create mode 100644 bagheera_search_lib/bagheera_search_lib.py.sav create mode 100755 bagheerasearch create mode 100755 bagheerasearch.py create mode 100644 baloo_tools/__init__.py create mode 100644 baloo_tools/baloo_tools.py create mode 100644 baloo_wrapper/CMakeLists.txt create mode 100644 baloo_wrapper/__init__.py create mode 100644 baloo_wrapper/baloo_wrapper.cpp create mode 100755 build.sh create mode 100755 build_baloo_wrappers.sh create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 syntax.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..6897e75 --- /dev/null +++ b/README.md @@ -0,0 +1,88 @@ +# Bagheera Search Tool + +Bagheera is an advanced search utility and library for the **KDE Baloo** indexing service. It provides a flexible Python interface to perform logical queries, recursive searches, and metadata retrieval, specifically optimized for **KDE Frameworks 6 (KF6)**. + +## Features + +* **Modular Architecture**: Use it as a standalone CLI tool or as a Python library (`BagheeraSearcher`). +* **KF6 Native**: Compiled against `KF6Baloo` and `KF6CoreAddons` using `pkg-config` for robust path detection. +* **Natural Language Dates**: Supports queries like `MODIFIED YESTERDAY` or `MODIFIED LAST THREE WEEKS`. +* **Advanced Logic**: Complex filtering with `AND`, `OR`, and parenthesis, plus image dimension checks (`PORTRAIT`, `LANDSCAPE`, `SQUARE`). +* **Persistence**: Automatically remembers the last used `--sort` order in user configuration. + +## Prerequisites + +Before installing, ensure your system has the following KF6 development libraries and tools: + +### Arch Linux + +```bash +sudo pacman -S baloo6 kcoreaddons6 pkgconf gcc +``` + +### Fedora + +```bash +sudo dnf install kf6-baloo-devel kf6-kcoreaddons-devel pkgconf-pkg-config gcc +``` + +### openSuSE + +```bash +zypper install baloo6-6 kcoreaddons6-dev pkgconf gcc +``` + +### Ubuntu / Debian (Testing/Unstable) + +```bash +sudo apt install libkf6baloo-dev libkf6coreaddons-dev pkg-config gcc +``` + +## Installation + +1. Clone the repository and ensure your C wrapper source (baloo_wrapper.c) is present in the root directory. + +2. Install via pip: + This will automatically trigger the compilation of the C wrapper and install Python dependencies like lmdb. + +```bash +pip install . +``` + +## CLI Usage + +The command bagheerasearch will be available after installation. + +```bash +# Search for images modified this week +bagheerasearch --type image "MODIFIED THIS WEEK" + +# Recursive search in a specific directory excluding portraits +bagheerasearch -d ~/Pictures -r "nature" --exclude "PORTRAIT" + +# Show file IDs and use Konsole-friendly output +bagheerasearch "vacation" -i -k +``` + +## Library Usage + +You can integrate Bagheera into your own projects: + +```Python +from bagheera_search_lib import BagheeraSearcher + +searcher = BagheeraSearcher() +results = searcher.search("MODIFIED TODAY", {"limit": 10}, {"recursive": None}) + +for item in results: + print(f"Found: {item['path']}") +``` + +## Configuration + +User settings (like the last used sort order) are stored in: +~/.config/bagheerasearch/config.json + +## License + +Copyright (C) 2026 by Ignacio Serantes. diff --git a/bagheera_query_parser_lib/__init__.py b/bagheera_query_parser_lib/__init__.py new file mode 100644 index 0000000..b13d974 --- /dev/null +++ b/bagheera_query_parser_lib/__init__.py @@ -0,0 +1,6 @@ +from .bagheera_query_parser import BagheeraQueryParser + + +def parse_date(query): + parser = BagheeraQueryParser() + return parser.parse_date(query) diff --git a/bagheera_query_parser_lib/bagheera_query_parser.py b/bagheera_query_parser_lib/bagheera_query_parser.py new file mode 100644 index 0000000..aae4f9a --- /dev/null +++ b/bagheera_query_parser_lib/bagheera_query_parser.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python + +""" +Bagheera Query Parser +Converts natural language English date expressions into Baloo-compatible queries. +""" + +import calendar +import re +from datetime import datetime, timedelta +from typing import Dict, Optional + + +class BagheeraQueryParser: + # Compile regex for number conversion once + NUMBER_MAP: Dict[str, int] = { + 'ONE': 1, 'TWO': 2, 'THREE': 3, 'FOUR': 4, 'FIVE': 5, + 'SIX': 6, 'SEVEN': 7, 'EIGHT': 8, 'NINE': 9, 'TEN': 10, + 'ELEVEN': 11, 'TWELVE': 12, 'THIRTEEN': 13, 'FOURTEEN': 14, + 'FIFTEEN': 15, 'SIXTEEN': 16, 'SEVENTEEN': 17, 'EIGHTEEN': 18, + 'NINETEEN': 19, 'TWENTY': 20 + } + + def __init__(self): + # Initialize today, but it will be refreshed on each parse_date call + self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + + def _convert_numbers(self, query: str) -> str: + """ + Replaces written numbers (ONE to TWENTY) with their numeric string equivalent. + Case insensitive. + """ + pattern = r'\b(' + '|'.join(self.NUMBER_MAP.keys()) + r')\b' + + def replace(match): + key = match.group(0).upper() + return str(self.NUMBER_MAP.get(key, key)) + + return re.sub(pattern, replace, query, flags=re.IGNORECASE) + + def _safe_replace_date(self, dt: datetime, year: Optional[int] = None, + month: Optional[int] = None, + day: Optional[int] = None) -> datetime: + """Handles date replacement safely (e.g., Feb 29 on non-leap years).""" + try: + return dt.replace( + year=year if year is not None else dt.year, + month=month if month is not None else dt.month, + day=day if day is not None else dt.day + ) + except ValueError: + # Likely Feb 29 issue, fallback to day 28 + return dt.replace( + year=year if year is not None else dt.year, + month=month if month is not None else dt.month, + day=28 + ) + + def _add_months(self, dt: datetime, months: int) -> datetime: + """Robust month addition/subtraction.""" + month = dt.month - 1 + months + year = dt.year + month // 12 + month = month % 12 + 1 + day = min(dt.day, calendar.monthrange(year, month)[1]) + return dt.replace(year=year, month=month, day=day) + + def _get_start_of_unit(self, dt: datetime, unit: str, offset: int = 0) -> datetime: + if unit == 'YEAR': + target_year = dt.year - offset + return dt.replace(year=target_year, month=1, day=1) + if unit == 'MONTH': + # Subtract offset months, then snap to day 1 + target_dt = self._add_months(dt, -offset) + return target_dt.replace(day=1) + if unit == 'WEEK': + # Monday is 0 + return dt - timedelta(days=dt.weekday() + (offset * 7)) + if unit == 'DAY': + return dt - timedelta(days=offset) + return dt + + def _subtract_units(self, dt: datetime, unit: str, n: int) -> datetime: + if unit == 'YEAR': + return self._safe_replace_date(dt, year=dt.year - n) + if unit == 'MONTH': + return self._add_months(dt, -n) + if unit == 'WEEK': + return dt - timedelta(weeks=n) + if unit == 'DAY': + return dt - timedelta(days=n) + + def parse_date(self, query): + self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + q = self._convert_numbers(query) + + # 1. Reemplazo de TODAY / YESTERDAY + q = re.sub(r'\bMODIFIED\s+TODAY\b', + f"modified={self.today.strftime('%Y-%m-%d')}", + q, flags=re.IGNORECASE) + + yest = self.today - timedelta(days=1) + q = re.sub(r'\bMODIFIED\s+YESTERDAY\b', + f"modified={yest.strftime('%Y-%m-%d')}", + q, flags=re.IGNORECASE) + + # 2. Reemplazo de (LAST/THIS) (YEAR/MONTH/WEEK) + # Usamos re.sub para encontrar el patrón en cualquier parte y reemplazarlo + def replace_simple(m): + # Groups are uppercase due to regex, need normalization if strictly matching + mod, unit = m.groups() + mod = mod.upper() + unit = unit.upper() + + if mod == "THIS": + start = self._get_start_of_unit(self.today, unit).strftime('%Y-%m-%d') + end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d') + else: + # LAST unit: Start of previous unit -> Start of current unit + start = self._get_start_of_unit(self.today, unit, + offset=1).strftime('%Y-%m-%d') + end = (self._get_start_of_unit(self.today, unit)).strftime('%Y-%m-%d') + return f"(modified>={start} AND modified<{end})" + + q = re.sub(r"\bMODIFIED\s+(LAST|THIS)\s+(YEAR|MONTH|WEEK)\b", + replace_simple, q, flags=re.IGNORECASE) + + # 3. Reemplazo de LAST (YEAR/MONTH/WEEK/DAY) + def replace_last_n(m): + n, unit = m.groups() + unit = unit.upper() + n_val = int(n) + + # Rolling window: Now minus N units TO Now (exclusive of tomorrow) + if unit == 'DAY': + start = (self.today - + timedelta(days=max(0, n_val - 1))).strftime('%Y-%m-%d') + elif unit == 'WEEK': + start = (self.today - + timedelta(days=max(0, (n_val * 7) - 1))).strftime('%Y-%m-%d') + else: + start = self._subtract_units( + self.today, unit, n_val).strftime('%Y-%m-%d') + + end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d') + return f"(modified>={start} AND modified<{end})" + + q = re.sub(r"\bMODIFIED\s+LAST\s+(\d+)\s+(YEAR|MONTH|WEEK|DAY)S?\b", + replace_last_n, q, flags=re.IGNORECASE) + + # 4. Reemplazo de AGO + def replace_ago(m): + n, unit = m.groups() + unit = unit.upper() + n_val = int(n) + + # "2 MONTHS AGO": Whole calendar period of that month + # Base is Start-Of-Current-Unit + base_start = self._get_start_of_unit(self.today, unit, offset=0) + + # Start: Base - N + start = self._subtract_units(base_start, unit, n_val) + # End: Base - (N-1) + end = self._subtract_units(base_start, unit, n_val - 1) + + return f"(modified>={start.strftime( + '%Y-%m-%d')} AND modified<{end.strftime('%Y-%m-%d')})" + + q = re.sub(r"\bMODIFIED\s+(\d+)\s+(YEAR|MONTH|WEEK|DAY)S?\s+AGO\b", + replace_ago, q, flags=re.IGNORECASE) + + return q + + +if __name__ == '__main__': + # Pruebas unitarias básicas para el parseo de fechas + test_queries = [ + "MODIFIED TODAY", + "first MODIFIED YESTERDAY last", + "MODIFIED ONE DAY AGO", + "MODIFIED TWO DAYS AGO", + "MODIFIED THREE DAYS AGO", + "MODIFIED LAST TWO DAYS", + "MODIFIED THIS WEEK", + "MODIFIED LAST WEEK", + "MODIFIED LAST TWO WEEKS", + "MODIFIED ONE WEEK AGO", + "MODIFIED TWO WEEKS AGO", + "MODIFIED THREE WEEKS AGO", + "MODIFIED THIS MONTH", + "MODIFIED LAST MONTH", + "MODIFIED LAST TWO MONTHS", + "MODIFIED ONE MONTH AGO", + "MODIFIED TWO MONTHS AGO", + "MODIFIED THREE MONTHS AGO", + "MODIFIED THIS YEAR", + "MODIFIED LAST YEAR", + "MODIFIED LAST TWO YEARS", + "MODIFIED ONE YEAR AGO", + "MODIFIED TWO YEARS AGO", + "MODIFIED THREE YEARS AGO", + "foto MODIFIED LAST 2 YEARS" + ] + + parser = BagheeraQueryParser() + print(f"Testing {__file__}:") + for q in test_queries: + print(f" Input: '{q}'") + print(f" Output: '{parser.parse_date(q)}'") + print("-" * 20) + + test_queries = [ + "MODIFIED TODAYMODIFIED TODAY", + "MODIFIED yesterday", + "MODIFIED THIS MONTHMODIFIED THIS WEEK", + "MODIFIED LAST YEARMODIFIED YESTERDAY", + "modified TODAY", + "modified today" + ] + parser = BagheeraQueryParser() + print(f"Testing {__file__}:") + for q in test_queries: + print(f" Input: '{q}'") + print(f" Output: '{parser.parse_date(q)}'") + print("-" * 20) diff --git a/bagheera_query_parser_lib/bagheera_query_parser.py.sav b/bagheera_query_parser_lib/bagheera_query_parser.py.sav new file mode 100644 index 0000000..d02ca16 --- /dev/null +++ b/bagheera_query_parser_lib/bagheera_query_parser.py.sav @@ -0,0 +1,170 @@ +#!/usr/bin/env python + +""" +Bagheera Query Parser +Converts natural language English date expressions into Baloo-compatible queries. +""" + +import re +from datetime import datetime, timedelta +from typing import Dict + + +class BagheeraQueryParser: + def __init__(self): + # Actualizamos 'today' cada vez que se usa para evitar problemas si el proceso + # queda abierto días + self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + + def _convert_numbers(self, query: str) -> str: + """ + Replaces written numbers (ONE to TWENTY) with their numeric string equivalent. + Only processes uppercase words. + """ + # Mapping for numbers as requested (ONE to TWENTY) + number_map: Dict[str, int] = { + 'ONE': 1, 'TWO': 2, 'THREE': 3, 'FOUR': 4, 'FIVE': 5, + 'SIX': 6, 'SEVEN': 7, 'EIGHT': 8, 'NINE': 9, 'TEN': 10, + 'ELEVEN': 11, 'TWELVE': 12, 'THIRTEEN': 13, 'FOURTEEN': 14, + 'FIFTEEN': 15, 'SIXTEEN': 16, 'SEVENTEEN': 17, 'EIGHTEEN': 18, + 'NINETEEN': 19, 'TWENTY': 20 + } + + words = query.split() + for i, word in enumerate(words): + # Solo intentamos convertir si la palabra está en mayúsculas + if word.isupper() and word in number_map: + words[i] = str(number_map[word]) + + return " ".join(words) + + def _get_start_of_unit(self, dt, unit, offset=0): + if unit == 'YEAR': + return dt.replace(year=dt.year - offset, month=1, day=1) + if unit == 'MONTH': + month = dt.month - offset + year = dt.year + while month <= 0: + month += 12 + year -= 1 + return dt.replace(year=year, month=month, day=1) + if unit == 'WEEK': + return dt - timedelta(days=dt.weekday() + (offset * 7)) + if unit == 'DAY': + return dt - timedelta(days=offset) + + def _subtract_units(self, dt, unit, n): + if unit == 'YEAR': + return dt.replace(year=dt.year - n) + if unit == 'MONTH': + return self._get_start_of_unit(dt, 'MONTH', offset=n) + if unit == 'WEEK': + return dt - timedelta(weeks=n) + if unit == 'DAY': + return dt - timedelta(days=n) + + def parse_date(self, query): + self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + q = self._convert_numbers(query) + + # 1. Reemplazo de TODAY / YESTERDAY + q = re.sub(r'\bMODIFIED TODAY\b', f"modified={self.today.strftime('%Y-%m-%d')}", + q) + yest = self.today - timedelta(days=1) + q = re.sub(r'\bMODIFIED YESTERDAY\b', f"modified={yest.strftime('%Y-%m-%d')}", + q) + + # 2. Reemplazo de (LAST/THIS) (YEAR/MONTH/WEEK) + # Usamos re.sub para encontrar el patrón en cualquier parte y reemplazarlo + def replace_simple(m): + mod, unit = m.groups() + if mod == "THIS": + start = self._get_start_of_unit(self.today, unit).strftime('%Y-%m-%d') + end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d') + else: + start = self._get_start_of_unit(self.today, unit, + offset=1).strftime('%Y-%m-%d') + # end = (self._get_start_of_unit(self.today, unit) - + # timedelta(seconds=1)).strftime('%Y-%m-%d') + end = (self._get_start_of_unit(self.today, unit)).strftime('%Y-%m-%d') + return f"(modified>={start} AND modified<{end})" + + q = re.sub(r"\bMODIFIED (LAST|THIS) (YEAR|MONTH|WEEK)\b", replace_simple, q) + + # 3. Reemplazo de LAST (YEAR/MONTH/WEEK/DAY) + def replace_last_n(m): + n, unit = m.groups() + start = self._subtract_units(self.today, unit, int(n)).strftime('%Y-%m-%d') + end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d') + return f"(modified>={start} AND modified<{end})" + + q = re.sub(r"\bMODIFIED LAST (\d+) (YEAR|MONTH|WEEK|DAY)S?\b", + replace_last_n, q) + + # 4. Reemplazo de AGO + def replace_ago(m): + n, unit = m.groups() + start = self._subtract_units(self.today, unit, int(n)) + end = (start + timedelta(days=1)).strftime('%Y-%m-%d') + + start = self._get_start_of_unit(self.today, unit, offset=1) + end = self._subtract_units(start, unit, int(n)-1).strftime('%Y-%m-%d') + start = self._subtract_units(start, unit, int(n)) + return f"(modified>={start.strftime('%Y-%m-%d')} AND modified<{end})" + + q = re.sub(r"\bMODIFIED (\d+) (YEAR|MONTH|WEEK|DAY)S? AGO\b", replace_ago, q) + + return q + + +if __name__ == '__main__': + # Pruebas unitarias básicas para el parseo de fechas + test_queries = [ + "MODIFIED TODAY", + "first MODIFIED YESTERDAY last", + "MODIFIED ONE DAY AGO", + "MODIFIED TWO DAYS AGO", + "MODIFIED THREE DAYS AGO", + "MODIFIED LAST TWO DAYS", + "MODIFIED THIS WEEK", + "MODIFIED LAST WEEK", + "MODIFIED LAST TWO WEEKS", + "MODIFIED ONE WEEK AGO", + "MODIFIED TWO WEEKS AGO", + "MODIFIED THREE WEEKS AGO", + "MODIFIED THIS MONTH", + "MODIFIED LAST MONTH", + "MODIFIED LAST TWO MONTHS", + "MODIFIED ONE MONTH AGO", + "MODIFIED TWO MONTHS AGO", + "MODIFIED THREE MONTHS AGO", + "MODIFIED THIS YEAR", + "MODIFIED LAST YEAR", + "MODIFIED LAST TWO YEARS", + "MODIFIED ONE YEAR AGO", + "MODIFIED TWO YEARS AGO", + "MODIFIED THREE YEARS AGO", + "foto MODIFIED LAST 2 YEARS" + ] + + parser = BagheeraQueryParser() + print(f"Testing {__file__}:") + for q in test_queries: + print(f" Input: '{q}'") + print(f" Output: '{parser.parse_date(q)}'") + print("-" * 20) + + test_queries = [ + "MODIFIED TODAYMODIFIED TODAY", + "MODIFIED yesterday", + "MODIFIED THIS MONTHMODIFIED THIS WEEK", + "MODIFIED LAST YEARMODIFIED YESTERDAY", + "modified TODAY", + "modified today" + ] + parser = BagheeraQueryParser() + print(f"Testing {__file__}:") + for q in test_queries: + print(f" Input: '{q}'") + print(f" Output: '{parser.parse_date(q)}'") + print("-" * 20) diff --git a/bagheera_search_lib/__init__.py b/bagheera_search_lib/__init__.py new file mode 100644 index 0000000..9858c51 --- /dev/null +++ b/bagheera_search_lib/__init__.py @@ -0,0 +1,7 @@ +from .bagheera_search import BagheeraSearcher + + +def search(query): + """Interfaz simplificada para la librería.""" + bs = BagheeraSearcher() + return bs.search(query) diff --git a/bagheera_search_lib/bagheera_search.py b/bagheera_search_lib/bagheera_search.py new file mode 100644 index 0000000..79eda36 --- /dev/null +++ b/bagheera_search_lib/bagheera_search.py @@ -0,0 +1,313 @@ +""" +Bagheera Search Library +A Python interface for the Baloo search wrapper. +""" + +import ctypes +import json +import re +import sys +from pathlib import Path +from typing import Dict, Any, Iterator, Optional, Union + +from baloo_tools import get_resolution +from bagheera_query_parser_lib import parse_date + + +class BagheeraSearcher: + """Class to handle Baloo searches and interact with the C wrapper.""" + + def __init__(self, lib_path: Optional[Union[str, Path]] = None) -> None: + self.ids_processed: set[int] = set() + self.baloo_lib = self._load_baloo_wrapper(lib_path) + + def _load_baloo_wrapper(self, custom_path: Optional[Union[str, Path]]) \ + -> ctypes.CDLL: + """Loads and configures the Baloo C wrapper library.""" + if custom_path: + lib_path = Path(custom_path) + else: + lib_name = "libbaloo_wrapper.so" + if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): + base_dir = Path(getattr(sys, '_MEIPASS')) / 'lib' + else: + base_dir = Path(__file__).parent.absolute() + + search_paths = [base_dir] + + if sys.prefix != sys.base_prefix: + venv_base = Path(sys.prefix) + search_paths.append(venv_base / "lib64") + search_paths.append(venv_base / "lib") + + search_paths.extend([ + Path("/lib64"), + Path("/lib"), + Path("/usr/lib64"), + Path("/usr/lib"), + Path("/usr/local/lib64"), + Path("/usr/local/lib") + ]) + + lib_path = None + for path in search_paths: + potential_path = path / lib_name + if potential_path.exists(): + lib_path = potential_path + break + + if lib_path is None or not lib_path.exists(): + raise FileNotFoundError( + f"ERROR: Baloo wrapper '{lib_name}' not found at {search_paths}" + ) + + lib = ctypes.CDLL(str(lib_path)) + lib.execute_baloo_query.argtypes = [ctypes.c_char_p] + lib.execute_baloo_query.restype = ctypes.c_char_p + lib.get_file_properties.argtypes = [ctypes.c_char_p] + lib.get_file_properties.restype = ctypes.c_char_p + + return lib + + def check_keywords( + self, text: str, query: str, file_path: str = "", file_id: int = 0 + ) -> bool: + """ + Evaluates if a text meets a logical query. + Supports: AND, OR, ( ), dimensions (width=height, etc.), and shapes. + """ + if file_path: + try: + w, h = get_resolution(file_id) + except Exception: + w, h = -1, -1 + + def replace_dim(match: re.Match) -> str: + if w <= 0 or h <= 0: + return "__false__" + + s = match.group(0).upper() + if "PORTRAIT" in s: + return "__true__" if w < h else "__false__" + if "LANDSCAPE" in s: + return "__true__" if w > h else "__false__" + if "SQUARE" in s: + return "__true__" if w == h else "__false__" + + op = match.group(1) + ops_map = { + "=": w == h, + ">": w > h, + "<": w < h, + ">=": w >= h, + "<=": w <= h, + "!=": w != h, + } + return "__true__" if ops_map.get(op, False) else "__false__" + + query = re.sub( + r"\b(PORTRAIT|LANDSCAPE|SQUARE)\b", + replace_dim, + query, + flags=re.IGNORECASE, + ) + query = re.sub( + r"\bwidth\s*(<=|>=|!=|<|>|=)\s*height\b", + replace_dim, + query, + flags=re.IGNORECASE, + ) + + text = text.lower() + query = re.sub(r"(?<=\w)\s+(?=\w)", " AND ", query) + + tokens = re.findall(r"\(|\)|OR|AND|[^\s()]+", query) + regex_parts = [] + + for t in tokens: + if t in ("(", ")"): + regex_parts.append(t) + elif t == "OR": + regex_parts.append("|") + elif t == "AND": + continue + elif t == "__true__": + regex_parts.append("(?=.*)") + elif t == "__false__": + regex_parts.append("(?!)") + else: + regex_parts.append(rf"(?=.*{re.escape(t)})") + + final_regex = "".join(regex_parts).lower() + + try: + return bool(re.search(f"^{final_regex}.*", text, re.DOTALL)) + except re.error: + return False + + def get_baloo_info(self, file_path: str) -> Dict[str, str]: + """Retrieves properties for a specific file from Baloo.""" + result = self.baloo_lib.get_file_properties(file_path.encode("utf-8")) + if not result: + return {} + + data_raw = result.decode("utf-8") + properties = {} + for entry in data_raw.split("|"): + if ":" in entry: + k, v = entry.split(":", 1) + properties[k] = v + + return properties + + def _execute_query(self, options: Dict[str, Any]) -> list: + """Helper method to execute the query against the C wrapper.""" + query_json = json.dumps(options).encode("utf-8") + result_ptr = self.baloo_lib.execute_baloo_query(query_json) + + if not result_ptr: + return [] + + try: + raw_results = result_ptr.decode("utf-8") + return json.loads(raw_results) + except json.JSONDecodeError as e: + print(f"JSON decode error from Baloo wrapper: {e}") + return [] + + def search_recursive( + self, + query_text: str, + options: Dict[str, Any], + search_opts: Dict[str, Any], + files_count: int, + ) -> Iterator[Dict[str, Any]]: + """Executes a recursive search yielded item by item.""" + options["query"] = query_text + files = self._execute_query(options) + + for item in files: + if search_opts.get("limit", 0) <= 0: + break + + file_id = int(item["id"], 16) + if file_id in self.ids_processed: + continue + + self.ids_processed.add(file_id) + rec_exclude = search_opts.get("recursive_exclude") + + if not rec_exclude or not self.check_keywords( + item["path"], rec_exclude, item["path"], file_id + ): + if files_count >= search_opts.get("offset", 0): + search_opts["limit"] -= 1 + yield item + + files_count += 1 + + def search( + self, + query_text: str, + main_options: Dict[str, Any], + search_opts: Dict[str, Any], + ) -> Iterator[Dict[str, Any]]: + """ + Main search generator. Yields file dictionaries. + """ + main_options["query"] = parse_date(query_text) + files = self._execute_query(main_options) + + if not files: + return + + is_recursive = search_opts.get("recursive") is not None + if is_recursive: + if search_opts.get("type"): + main_options["type"] = search_opts["type"] + elif "type" in main_options: + main_options.pop("type") + + rec_query = search_opts.get("recursive") + query_text = parse_date(rec_query) if rec_query else "" + + files_count = 0 + for item in files: + if search_opts.get("limit", 0) <= 0: + break + + file_id = int(item["id"], 16) + if file_id in self.ids_processed: + continue + + self.ids_processed.add(file_id) + exclude_pattern = search_opts.get("exclude") + + if not exclude_pattern or not self.check_keywords( + item["path"], exclude_pattern, item["path"], file_id + ): + if is_recursive: + main_options["directory"] = item["path"] + yield from self.search_recursive( + query_text, main_options, search_opts, files_count + ) + else: + yield item + files_count += 1 + + def reset_state(self) -> None: + """Clears the processed IDs to allow for fresh consecutive searches.""" + self.ids_processed.clear() + + +if __name__ == "__main__": + # Test de integración rápido + print(f"Testing {__file__} integration:") + try: + searcher = BagheeraSearcher() + print("✔ Library and wrapper loaded successfully.") + + # Intento de búsqueda de prueba (limitado a 1 resultado) + test_main_opts = {"limit": 1} + test_search_opts = {"limit": 1} + + print("Searching for recent files...") + results = list(searcher.search("MODIFIED TODAY", test_main_opts, + test_search_opts)) + + if results: + print(f"✔ Found: {results[0].get('path')}") + else: + print("? No files found for today, but search executed correctly.") + + except FileNotFoundError as e: + print(f"✘ Setup error: {e}") + except Exception as e: + print(f"✘ Unexpected error: {e}") + + +if __name__ == "__main__": + # Integration test block + print(f"Testing {__file__} integration:") + try: + searcher = BagheeraSearcher() + print("✔ Library and wrapper loaded successfully.") + + # Test search (limited to 1 result for today) + test_main_opts = {"limit": 1} + test_search_opts = {"limit": 1} + + print("Searching for recent files...") + results = list(searcher.search( + "MODIFIED TODAY", test_main_opts, test_search_opts + )) + + if results: + print(f"✔ Found: {results[0].get('path')}") + else: + print("? No files found for today, but search executed correctly.") + + except FileNotFoundError as e: + print(f"✘ Setup error: {e}") + except Exception as e: + print(f"✘ Unexpected error: {e}") diff --git a/bagheera_search_lib/bagheera_search_lib.py.sav b/bagheera_search_lib/bagheera_search_lib.py.sav new file mode 100644 index 0000000..170f03d --- /dev/null +++ b/bagheera_search_lib/bagheera_search_lib.py.sav @@ -0,0 +1,284 @@ +""" +Bagheera Search Library +A Python interface for the Baloo search wrapper. +""" + +import ctypes +import json +import re +import sys +from pathlib import Path +from typing import Dict, Any, Iterator, Optional, Union + +from baloo_tools import get_resolution +from date_query_parser import parse_date + + +class BagheeraSearcher: + """Class to handle Baloo searches and interact with the C wrapper.""" + + def __init__(self, lib_path: Optional[Union[str, Path]] = None) -> None: + self.ids_processed: set[int] = set() + self.baloo_lib = self._load_baloo_wrapper(lib_path) + + def _load_baloo_wrapper(self, custom_path: Optional[Union[str, Path]]) \ + -> ctypes.CDLL: + """Loads and configures the Baloo C wrapper library.""" + if custom_path: + lib_path = Path(custom_path) + else: + if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): + current_dir = Path(getattr(sys, '_MEIPASS')) / 'lib' + else: + current_dir = Path(__file__).parent.absolute() + + lib_name = "libbaloo_wrapper.so" + lib_path = current_dir / lib_name + + if not lib_path.exists(): + raise FileNotFoundError( + f"ERROR: Baloo wrapper '{lib_path.name}' not found at {lib_path}" + ) + + lib = ctypes.CDLL(str(lib_path)) + lib.execute_baloo_query.argtypes = [ctypes.c_char_p] + lib.execute_baloo_query.restype = ctypes.c_char_p + lib.get_file_properties.argtypes = [ctypes.c_char_p] + lib.get_file_properties.restype = ctypes.c_char_p + + return lib + + def check_keywords( + self, text: str, query: str, file_path: str = "", file_id: int = 0 + ) -> bool: + """ + Evaluates if a text meets a logical query. + Supports: AND, OR, ( ), dimensions (width=height, etc.), and shapes. + """ + if file_path: + try: + w, h = get_resolution(file_id) + except Exception: + w, h = -1, -1 + + def replace_dim(match: re.Match) -> str: + if w <= 0 or h <= 0: + return "__false__" + + s = match.group(0).upper() + if "PORTRAIT" in s: + return "__true__" if w < h else "__false__" + if "LANDSCAPE" in s: + return "__true__" if w > h else "__false__" + if "SQUARE" in s: + return "__true__" if w == h else "__false__" + + op = match.group(1) + ops_map = { + "=": w == h, + ">": w > h, + "<": w < h, + ">=": w >= h, + "<=": w <= h, + "!=": w != h, + } + return "__true__" if ops_map.get(op, False) else "__false__" + + query = re.sub( + r"\b(PORTRAIT|LANDSCAPE|SQUARE)\b", + replace_dim, + query, + flags=re.IGNORECASE, + ) + query = re.sub( + r"\bwidth\s*(<=|>=|!=|<|>|=)\s*height\b", + replace_dim, + query, + flags=re.IGNORECASE, + ) + + text = text.lower() + query = re.sub(r"(?<=\w)\s+(?=\w)", " AND ", query) + + tokens = re.findall(r"\(|\)|OR|AND|[^\s()]+", query) + regex_parts = [] + + for t in tokens: + if t in ("(", ")"): + regex_parts.append(t) + elif t == "OR": + regex_parts.append("|") + elif t == "AND": + continue + elif t == "__true__": + regex_parts.append("(?=.*)") + elif t == "__false__": + regex_parts.append("(?!)") + else: + regex_parts.append(rf"(?=.*{re.escape(t)})") + + final_regex = "".join(regex_parts) + + try: + return bool(re.search(f"^{final_regex}.*", text, re.DOTALL)) + except re.error: + return False + + def get_baloo_info(self, file_path: str) -> Dict[str, str]: + """Retrieves properties for a specific file from Baloo.""" + result = self.baloo_lib.get_file_properties(file_path.encode("utf-8")) + if not result: + return {} + + data_raw = result.decode("utf-8") + properties = {} + for entry in data_raw.split("|"): + if ":" in entry: + k, v = entry.split(":", 1) + properties[k] = v + + return properties + + def _execute_query(self, options: Dict[str, Any]) -> list: + """Helper method to execute the query against the C wrapper.""" + query_json = json.dumps(options).encode("utf-8") + result_ptr = self.baloo_lib.execute_baloo_query(query_json) + + if not result_ptr: + return [] + + try: + raw_results = result_ptr.decode("utf-8") + return json.loads(raw_results) + except json.JSONDecodeError as e: + print(f"JSON decode error from Baloo wrapper: {e}") + return [] + + def search_recursive( + self, + query_text: str, + options: Dict[str, Any], + search_opts: Dict[str, Any], + files_count: int, + ) -> Iterator[Dict[str, Any]]: + """Executes a recursive search yielded item by item.""" + options["query"] = query_text + files = self._execute_query(options) + + for item in files: + if search_opts.get("limit", 0) <= 0: + break + + file_id = int(item["id"], 16) + if file_id in self.ids_processed: + continue + + self.ids_processed.add(file_id) + rec_exclude = search_opts.get("recursive_exclude") + + if not rec_exclude or not self.check_keywords( + item["path"], rec_exclude, item["path"], file_id + ): + if files_count >= search_opts.get("offset", 0): + search_opts["limit"] -= 1 + yield item + + files_count += 1 + + def search( + self, + query_text: str, + main_options: Dict[str, Any], + search_opts: Dict[str, Any], + ) -> Iterator[Dict[str, Any]]: + """ + Main search generator. Yields file dictionaries. + """ + main_options["query"] = parse_date(query_text) + files = self._execute_query(main_options) + + if not files: + return + + is_recursive = search_opts.get("recursive") is not None + if is_recursive: + if search_opts.get("type"): + main_options["type"] = search_opts["type"] + elif "type" in main_options: + main_options.pop("type") + + rec_query = search_opts.get("recursive") + query_text = parse_date(rec_query) if rec_query else "" + + files_count = 0 + for item in files: + if search_opts.get("limit", 0) <= 0: + break + + file_id = int(item["id"], 16) + if file_id in self.ids_processed: + continue + + self.ids_processed.add(file_id) + exclude_pattern = search_opts.get("exclude") + + if not exclude_pattern or not self.check_keywords( + item["path"], exclude_pattern, item["path"], file_id + ): + if is_recursive: + main_options["directory"] = item["path"] + yield from self.search_recursive( + query_text, main_options, search_opts, files_count + ) + else: + yield item + files_count += 1 + + def reset_state(self) -> None: + """Clears the processed IDs to allow for fresh consecutive searches.""" + self.ids_processed.clear() + + +# from bagheera_search_lib import BagheeraSearcher +# +# def main(): +# # ... tu lógica de argparse existente ... +# +# try: +# # Inicializamos la librería +# searcher = BagheeraSearcher() +# +# # Consumimos el generador +# for file_info in searcher.search(query_text, main_options, other_options): +# output = file_info['path'] +# if other_options.get('konsole'): +# output = f"file:/'{output}'" +# if other_options.get('id'): +# output += f" [ID: {file_info['id']}]" +# +# print(output) +# +# except FileNotFoundError as e: +# print(e) +# sys.exit(1) +# + + +# if __name__ == "__main__": +# try: +# # Inicializamos la librería +# searcher = BagheeraSearcher() + +# # Consumimos el generador +# for file_info in searcher.search(query_text, main_options, other_options): +# output = file_info['path'] +# if other_options.get('konsole'): +# output = f"file:/'{output}'" +# if other_options.get('id'): +# output += f" [ID: {file_info['id']}]" + +# print(output) + +# except FileNotFoundError as e: +# print(e) +# sys.exit(1) diff --git a/bagheerasearch b/bagheerasearch new file mode 100755 index 0000000..4dffc6f --- /dev/null +++ b/bagheerasearch @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +# From https://stackoverflow.com/questions/59895/getting-the-source-directory-of-a-bash-script-from-within +SOURCE="${BASH_SOURCE[0]}" +while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink + DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located +done +APPPATH="$( cd -P "$( dirname "$SOURCE" )" && pwd )" + +pushd . >/dev/null +cd "$APPPATH" || exit + +if [ -d "$APPPATH/.venv" ]; then + source "$APPPATH/.venv/bin/activate" +fi + +"$APPPATH/bagheerasearch.py" "$@" + +if [ -n "$VIRTUAL_ENV" ]; then + deactivate +fi + +popd >/dev/null || exit diff --git a/bagheerasearch.py b/bagheerasearch.py new file mode 100755 index 0000000..d571dd9 --- /dev/null +++ b/bagheerasearch.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +# flake8: noqa: E501 +""" +Bagheera Search Tool - CLI Client +""" + +__appname__ = "BagheeraSearch" +__version__ = "1.0" +__author__ = "Ignacio Serantes" +__email__ = "kde@aynoa.net" +__license__ = "LGPL" +__status__ = "Production" +# "Prototype, Development, Alpha, Beta, Production, Stable, Deprecated" + +import argparse +import json +import signal +import sys +from pathlib import Path +# from baloo_tools import get_resolution +# from date_query_parser import parse_date +from bagheera_search_lib import BagheeraSearcher + +# --- CONFIGURATION --- +PROG_NAME = "Bagheera Search Tool" +PROG_ID = "bagheerasearch" +PROG_VERSION = "1.0" +PROG_BY = "Ignacio Serantes" +PROG_DATE = "2026-03-19" + +CONFIG_DIR = Path.home() / ".config" / PROG_ID +CONFIG_FILE = CONFIG_DIR / "config.json" + + +def load_config() -> dict: + """Loads user configuration from disk.""" + if CONFIG_FILE.exists(): + try: + with open(CONFIG_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError) as e: + print(f"Warning: Could not load config file: {e}") + return {} + + +def save_config(config: dict) -> None: + """Saves user configuration to disk.""" + try: + CONFIG_DIR.mkdir(parents=True, exist_ok=True) + with open(CONFIG_FILE, "w", encoding="utf-8") as f: + json.dump(config, f, indent=4) + except OSError as e: + print(f"Warning: Could not save config file: {e}") + + +def print_help_query() -> None: + """Prints the detailed help for query syntax.""" + help_query = f"""Help updated to 2025-01-01. + +Baloo offers a rich syntax for searching through your files. Certain attributes of a file can be searched through. + +For example 'type' can be used to filter for files based on their general type: + + type:Audio or type:Document + +The following comparison operators are supported, but note that 'not equal' operator is not available. + · : - contains (only for text comparison) + · = - equal + · > - greater than + · >= - greater than or equal to + · < - less than + · <= - less than or equal to + +Currently the following types are supported: + + · Archive + · Folder + · Audio + · Video + · Image + · Document + · Spreadsheet + · Presentation + · Text + +These expressions can be combined using AND or OR and additional parenthesis, but note that 'NOT' logical operator is not available. + +[... omitted for brevity, but includes the full list of searchable properties as in your original script ...] + +{PROG_NAME} recognizes some natural language sentences in English, as long as they are capitalized, and transforms them into queries that can be interpreted by the search engine. + +Supported natural language sentences and patterns for queries are: + · MODIFIED TODAY + · MODIFIED YESTERDAY + · MODIFIED THIS [ DAY | WEEK | MONTH | YEAR ] + · LAST [ DAYS | WEEKS | MONTHS | YEARS ] + · [ DAYS | WEEKS | MONTHS | YEARS ] AGO + + can be any number or a number text from ONE to TWENTY. + +Remarks: LAST DAY, if used, is interpreted as YESTERDAY. + +Supported expressions for --exclude and --recursive-exclude are: + · widthheight - only if file has width and height properties + · heightwidth - only if file has width and height properties + · PORTRAIT - only if file width is greater or equal to height + · LANDSCAPE - only if file height is greater or equal to width + · SQUARE - only if file width equals to height + + can be: != | >= | <= | = | > | <""" + print(help_query) + + +def print_version() -> None: + """Prints version information.""" + print(f"{PROG_NAME} v{PROG_VERSION} - {PROG_DATE}") + print( + f"Copyright (C) {PROG_DATE[:4]} by {PROG_BY} and, mostly, " + "the good people at KDE" + ) + + +def signal_handler(sig, frame) -> None: + """Handles Ctrl+C gracefully.""" + print("\nSearch canceled at user request.") + sys.exit(0) + + +def main(): + parser = argparse.ArgumentParser( + description="An improved search tool for Baloo" + ) + parser.add_argument("query", nargs="?", help="list of words to query for") + parser.add_argument("-d", "--directory", help="limit search to specified directory") + parser.add_argument("-e", "--exclude", help="Search exclude pattern") + parser.add_argument("-i", "--id", action="store_true", help="show document IDs") + parser.add_argument("-k", "--konsole", action="store_true", help="show files using file:/ and quotes") + parser.add_argument("-l", "--limit", type=int, help="the maximum number of results") + parser.add_argument("-o", "--offset", type=int, help="offset from which to start the search") + parser.add_argument("-r", "--recursive", nargs="?", const="", default=None, help="enable recurse with or without a query") + parser.add_argument("-n", "--recursive-indent", help="recursive indent character") + parser.add_argument("-x", "--recursive-exclude", help="recursion exclude pattern") + parser.add_argument("-s", "--sort", help="sorting criteria ") + parser.add_argument("-t", "--type", help="type of Baloo data to be searched") + parser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode") + + parser.add_argument("--day", type=int, help="day fixed filter, --month is required") + parser.add_argument("--month", type=int, help="month fixed filter, --year is required") + parser.add_argument("--year", type=int, help="year filter fixed filter") + + parser.add_argument("--help-query", action="store_true", help="show query syntax help") + parser.add_argument("--version", action="store_true", help="show version information") + + args, unknown_args = parser.parse_known_args() + + query_parts = [args.query] if args.query else [] + if unknown_args: + query_parts.extend(unknown_args) + + query_text = " ".join(query_parts) + + if args.day is not None and args.month is None: + raise ValueError("Missing --month (required when --day is used)") + + if args.month is not None and args.year is None: + raise ValueError("Missing --year (requered when --month is used)") + + if args.help_query: + print_help_query() + return + + if args.version: + print_version() + return + + if not query_text and not args.recursive and not args.type and not args.directory: + parser.print_help() + return + + # Configuration and Sort restoring + user_config = load_config() + if args.sort: + user_config["last_sort_order"] = args.sort + save_config(user_config) + elif "last_sort_order" in user_config: + args.sort = user_config["last_sort_order"] + + # Build options dictionary + main_options = {} + if args.recursive is not None: + main_options["type"] = "folder" + else: + if args.limit is not None: + main_options["limit"] = args.limit + if args.offset is not None: + main_options["offset"] = args.offset + if args.type: + main_options["type"] = args.type + + if args.directory: + main_options["directory"] = args.directory + if args.year is not None: + main_options["year"] = args.year + if args.month is not None: + main_options["month"] = args.month + if args.day is not None: + main_options["day"] = args.day + if args.sort: + main_options["sort"] = args.sort + + other_options = { + "exclude": args.exclude, + "id": args.id, + "konsole": args.konsole, + "limit": args.limit if args.limit and args.recursive is not None else 99999999999, + "offset": args.offset if args.offset and args.recursive is not None else 0, + "recursive": args.recursive, + "recursive_indent": args.recursive_indent or "", + "recursive_exclude": args.recursive_exclude, + "sort": args.sort, + "type": args.type if args.recursive is not None else None, + "verbose": args.verbose, + } + + if other_options["verbose"]: + print(f"Query: '{query_text}'") + print(f"Main Options: {main_options}") + print(f"Other Options: {other_options}") + print("-" * 30) + + try: + searcher = BagheeraSearcher() + files_count = 0 + + # Consumir el generador de la librería + for item in searcher.search(query_text, main_options, other_options): + if other_options["konsole"]: + output = f"file:/'{item['path']}'" + else: + output = item["path"] + + if other_options["id"]: + output += f" [ID: {item['id']}]" + + print(output) + files_count += 1 + + if other_options["verbose"]: + if files_count == 0: + print("No results found.") + else: + print(f"Total: {files_count} files found.") + + except FileNotFoundError as e: + print(e) + sys.exit(1) + except Exception as e: + print(f"Error executing search: {e}") + sys.exit(1) + + +if __name__ == "__main__": + signal.signal(signal.SIGINT, signal_handler) + + try: + main() + except Exception as e: + print(f"Critical error: {e}") + sys.exit(1) diff --git a/baloo_tools/__init__.py b/baloo_tools/__init__.py new file mode 100644 index 0000000..47c594f --- /dev/null +++ b/baloo_tools/__init__.py @@ -0,0 +1,7 @@ +from .baloo_tools import BalooTools + + +def get_resolution(id): + """Interfaz simplificada para la librería.""" + tools = BalooTools() + return tools.get_resolution(id) diff --git a/baloo_tools/baloo_tools.py b/baloo_tools/baloo_tools.py new file mode 100644 index 0000000..927aa39 --- /dev/null +++ b/baloo_tools/baloo_tools.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 + +""" +Baloo Tools Library +Helper functions to interact directly with the Baloo LMDB index. +""" + +import json +import lmdb +import os +import sys +from typing import Tuple + + +class BalooTools: + """Class to interact directly with the Baloo LMDB index.""" + + def __init__(self) -> None: + """Initializes the connection path to the Baloo index.""" + self.baloo_db_path = os.path.join( + os.path.expanduser("~"), ".local/share/baloo/index" + ) + + def get_resolution(self, file_id: int, sep: str = 'x') -> Tuple[int, int]: + """ + Retrieves the width and height of an image/video from the Baloo index. + + Args: + file_id: The integer ID of the file. + sep: Separator used (unused currently, kept for compatibility). + + Returns: + A tuple of (width, height) integers. Returns (-1, -1) if not found. + """ + try: + # Using context manager ensures the environment is closed properly + with lmdb.Environment( + self.baloo_db_path, + subdir=False, + readonly=True, + lock=False, + max_dbs=20 + ) as env: + document_data_db = env.open_db(b'documentdatadb') + + with env.begin() as txn: + cursor = txn.cursor(document_data_db) + + # Convert ID to 8-byte little-endian format + file_id_bytes = int.to_bytes( + file_id, length=8, byteorder='little', signed=False + ) + + if cursor.set_range(file_id_bytes): + for key, value in cursor: + if key != file_id_bytes: + break + + try: + jvalue = json.loads(value.decode()) + # Baloo stores width in '26' and height in '27' + return jvalue.get('26', -1), jvalue.get('27', -1) + except (json.JSONDecodeError, KeyError): + return -1, -1 + + except lmdb.Error as e: + print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr) + + return -1, -1 + + +# Helper function to maintain compatibility with bagheera_search_lib.py +# since it imports `get_resolution` directly. +def get_resolution(file_id: int, sep: str = 'x') -> Tuple[int, int]: + """Standalone helper function to instantiate BalooTools and get resolution.""" + tools = BalooTools() + return tools.get_resolution(file_id, sep) + + +if __name__ == '__main__': + # CLI execution support for testing + if len(sys.argv) > 1: + try: + target_id = int(sys.argv[1], 16) + width, height = get_resolution(target_id) + print(f"{width} {height}") + except ValueError: + print("Error: Please provide a valid hexadecimal file ID.", file=sys.stderr) + sys.exit(1) diff --git a/baloo_wrapper/CMakeLists.txt b/baloo_wrapper/CMakeLists.txt new file mode 100644 index 0000000..0bb561d --- /dev/null +++ b/baloo_wrapper/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.16) +project(baloo_query_wrapper) + +find_package(ECM REQUIRED NO_MODULE) +set(CMAKE_MODULE_PATH ${ECM_MODULE_PATH}) + +find_package(Qt6 REQUIRED COMPONENTS Core) +find_package(KF6 REQUIRED COMPONENTS Baloo FileMetaData) + +add_library(baloo_wrapper SHARED baloo_wrapper.cpp) + +target_link_libraries(baloo_wrapper + Qt6::Core + KF6::Baloo + KF6::BalooEngine + KF6::FileMetaData + KF6::CoreAddons +) + +# Importante: C++17 es necesario para el literal u"|" +set_target_properties(baloo_wrapper PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + # Aseguramos que los símbolos marcados como default sean visibles + C_VISIBILITY_PRESET default + CXX_VISIBILITY_PRESET default + VISIBILITY_INLINES_HIDDEN OFF +) diff --git a/baloo_wrapper/__init__.py b/baloo_wrapper/__init__.py new file mode 100644 index 0000000..5460628 --- /dev/null +++ b/baloo_wrapper/__init__.py @@ -0,0 +1,9 @@ +import ctypes +import glob +import os + +_current_dir = os.path.dirname(__file__) +_so_files = glob.glob(os.path.join(_current_dir, "baloo_wrapper*.so")) + +if _so_files: + baloo_lib = ctypes.CDLL(_so_files[0]) diff --git a/baloo_wrapper/baloo_wrapper.cpp b/baloo_wrapper/baloo_wrapper.cpp new file mode 100644 index 0000000..12d49d6 --- /dev/null +++ b/baloo_wrapper/baloo_wrapper.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Usamos extern "C" para evitar el "name mangling" de C++ +extern "C" { + + // Añadimos el atributo para forzar la visibilidad pública del símbolo + __attribute__((visibility("default"))) + const char* execute_baloo_query(const char* options_json) { + // Parse JSON options + QJsonDocument doc = QJsonDocument::fromJson(options_json); + QJsonObject options = doc.object(); + + Baloo::Query q; + + if (options.contains("query")) { + q.setSearchString(options["query"].toString()); + } + if (options.contains("limit")) { + q.setLimit(options["limit"].toInt()); + } + if (options.contains("offset")) { + q.setOffset(options["offset"].toInt()); + } + if (options.contains("type")) { + q.addType(options["type"].toString()); + } + if (options.contains("directory")) { + q.setIncludeFolder(options["directory"].toString()); + } + if (options.contains("year")) { + if (options.contains("month")) { + if (options.contains("day")) { + q.setDateFilter(options["year"].toInt(), options["month"].toInt(), options["day"].toInt()); + } else { + q.setDateFilter(options["year"].toInt(), options["month"].toInt(), 0); + } + } else { + q.setDateFilter(options["year"].toInt(), 0, 0); + } + } + if (options.contains("sort")) { + if (options["sort"].toString() == QStringLiteral("auto")) { + q.setSortingOption(Baloo::Query::SortNone); + } else if (options["sort"].toString() == QStringLiteral("none")) { + q.setSortingOption(Baloo::Query::SortAuto); + } + } + + Baloo::ResultIterator it = q.exec(); + QJsonArray results; + + while (it.next()) { + QJsonObject result; + result["path"] = it.filePath(); + result["id"] = QString::fromUtf8(it.documentId()); + results.append(result); + } + + QJsonDocument responseDoc(results); + static std::string output; + output = responseDoc.toJson(QJsonDocument::Compact).toStdString(); + + return output.c_str(); + } +} + +extern "C" { + // Forzamos visibilidad para que ctypes lo vea + __attribute__((visibility("default"))) + const char* get_file_properties(const char* path) { + QString filePath = QString::fromUtf8(path); + + // Detectar tipo MIME + QMimeDatabase mimeDb; + QString mimeType = mimeDb.mimeTypeForFile(filePath).name(); + + // Obtener extractores para ese tipo + KFileMetaData::ExtractorCollection extractors; + QList exList = extractors.fetchExtractors(mimeType); + + // Extraer metadatos + KFileMetaData::SimpleExtractionResult result(filePath, mimeType); + for (KFileMetaData::Extractor* ex : exList) { + ex->extract(&result); + } + + const auto props = result.properties(); + if (props.isEmpty()) return ""; + + static std::string output; + output = ""; + + // Formateamos las propiedades como un string simple: "Clave:Valor|Clave:Valor" + for (auto it = props.constBegin(); it != props.constEnd(); ++it) { + KFileMetaData::PropertyInfo pi(it.key()); + + output += pi.name().toStdString() + ":" + it.value().toString().toStdString() + "|"; + } + + return output.c_str(); + } +} diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..29f951c --- /dev/null +++ b/build.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +source .venv/bin/activate + +case $1 in + -v) + shift + case $1 in + 3.8) PYINSTALLER=pyinstaller-3.8;; + 3.9) PYINSTALLER=pyinstaller-3.9;; + 3.10) PYINSTALLER=pyinstaller-3.10;; + 3.11) PYINSTALLER=pyinstaller-3.11;; + 3.12) PYINSTALLER=pyinstaller-3.12;; + 3.13) PYINSTALLER=pyinstaller-3.13;; + 3.14) PYINSTALLER=pyinstaller-3.14;; + *) PYINSTALLER=pyinstaller;; + esac + ;; + --version=3.8) PYINSTALLER=pyinstaller-3.8;; + --version=3.9) PYINSTALLER=pyinstaller-3.9;; + --version=3.10) PYINSTALLER=pyinstaller-3.10;; + --version=3.11) PYINSTALLER=pyinstaller-3.11;; + --version=3.12) PYINSTALLER=pyinstaller-3.12;; + --version=3.13) PYINSTALLER=pyinstaller-3.13;; + --version=3.14) PYINSTALLER=pyinstaller-3.14;; + *) PYINSTALLER=pyinstaller;; +esac + +# $PYINSTALLER \ +# --add-binary 'desktop/Desktogram.png:desktop' \ +# --add-binary 'locale/en/LC_MESSAGES/messages.mo:locale/en/LC_MESSAGES' \ +# --add-binary 'locale/es/LC_MESSAGES/messages.mo:locale/es/LC_MESSAGES' \ +# --add-binary 'locale/gl/LC_MESSAGES/messages.mo:locale/gl/LC_MESSAGES' \ +# --add-data 'js/downloader.js:js' \ +# --noconsole \ +# -F tagmanager.py + +# Sólo en windows. +# --icon=desktop/TagsManager.png \ +# --hidden-import=imagesize \ +# --hidden-import=word2number \ + +$PYINSTALLER \ + --add-binary="baloo_wrapper/build/libbaloo_wrapper.so:lib" \ + --onefile \ + -F bagheerasearch.py + +deactivate diff --git a/build_baloo_wrappers.sh b/build_baloo_wrappers.sh new file mode 100755 index 0000000..52b1c79 --- /dev/null +++ b/build_baloo_wrappers.sh @@ -0,0 +1,29 @@ +#!/usr/bin/bash + +OVERWRITE=0 + +while [ -n "$1" ]; do + + case "$1" in + -o|--overwrite) + OVERWRITE=1 + ;; + esac + shift + +done + +MAIN_PATH="$PWD" +DEST_PATH="$MAIN_PATH/bagheera_search_lib" + +LIB_NAME="libbaloo_wrapper.so" +WORK_PATH="$MAIN_PATH/baloo_wrapper" +FUNC_NAME1="execute_baloo_query" +FUNC_NAME2="get_file_properties" +if (( OVERWRITE )) || ! [ -f "$DEST_PATH/$LIB_NAME" ] && [ -d "$WORK_PATH" ]; then + BUILD_PATH="$WORK_PATH/build" + rm -Rf "$BUILD_PATH" && mkdir -p "$BUILD_PATH" && cd "$BUILD_PATH" && cmake .. && make && cp "$LIB_NAME" "$DEST_PATH" + ldd "$DEST_PATH/$LIB_NAME" | grep Baloo + nm -D "$DEST_PATH/$LIB_NAME" | grep "$FUNC_NAME1" + nm -D "$DEST_PATH/$LIB_NAME" | grep "$FUNC_NAME2" +fi diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..341ed84 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +lmdb diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ef5294b --- /dev/null +++ b/setup.py @@ -0,0 +1,127 @@ +import os +import subprocess +import sys +from setuptools import setup +from setuptools.command.install import install +from setuptools.command.develop import develop +from setuptools.command.build_ext import build_ext + + +def compile_wrapper(): + """ + Compila libbaloo_wrapper.so forzando la ruta de inclusión profunda + detectada para KFileMetaData en KF6. + """ + base_path = os.path.abspath(os.path.dirname(__file__)) + source_file = os.path.join(base_path, 'baloo_wrapper', 'baloo_wrapper.cpp') + output_lib = os.path.join(base_path, 'libbaloo_wrapper.so') + + if not os.path.exists(source_file): + print(f"✘ Error: Source file not found at {source_file}") + sys.exit(1) + + # Paquetes para pkg-config (nombres comunes en KF6) + packages = [ + 'KF6Baloo', + 'KF6BalooEngine', + 'KF6FileMetadata', + 'KF6CoreAddons', + 'Qt6Core' + ] + + cflags = [] + libs = [] + + print("Detecting KF6 dependencies...") + for pkg in packages: + try: + cf = subprocess.check_output(['pkg-config', '--cflags', pkg], + text=True).split() + lb = subprocess.check_output(['pkg-config', '--libs', pkg], + text=True).split() + cflags.extend(cf) + libs.extend(lb) + print(f" [OK] {pkg}") + except subprocess.CalledProcessError: + print(f" [!] Warning: pkg-config could not find {pkg}") + + # CONFIGURACIÓN DE RUTAS SEGÚN TU SISTEMA: + # Añadimos el nivel intermedio para que + # se encuentre en /usr/include/KF6/KFileMetaData/KFileMetaData/ + extra_includes = [ + '-I/usr/include/KF6', + '-I/usr/include/KF6/KFileMetaData', # Permite resolver KFileMetaData/ + '-I/usr/include/qt6', + '-I/usr/include/qt6/QtCore' + ] + + cflags = list(set(cflags + extra_includes)) + libs = list(set(libs)) + + # Comando de compilación C++17 replicando tu CMakeLists.txt [cite: 1, 2] + compile_cmd = [ + 'g++', '-shared', '-o', output_lib, + '-fPIC', '-std=c++17', + source_file + ] + cflags + libs + + try: + print(f"Executing compilation:\n{' '.join(compile_cmd)}") + subprocess.check_call(compile_cmd) + + if os.path.exists(output_lib): + print(f"✔ Successfully compiled: {output_lib}") + else: + raise FileNotFoundError("Compilation finished but .so file is missing.") + + except subprocess.CalledProcessError as e: + print(f"\n✘ Compilation failed (Exit code {e.returncode}).") + sys.exit(1) + + +class CustomInstall(install): + def run(self): + compile_wrapper() + super().run() + + +class CustomDevelop(develop): + def run(self): + compile_wrapper() + super().run() + + +class CustomBuildExt(build_ext): + def run(self): + compile_wrapper() + super().run() + + +setup( + name="bagheerasearch", + version="1.0.0", + author="Ignacio Serantes", + description="Bagheera Search Tool & Lib (KF6/C++17)", + py_modules=["bagheerasearch"], + package_dir={ + "": ".", + "bagheera_query_parser_lib": "bagheera_query_parser_lib", + "bagheera_search_lib": "bagheera_search_lib", + "baloo_tools": "baloo_tools", + }, + packages=[ + "bagheera_query_parser_lib", + "bagheera_search_lib", + "baloo_tools" + ], + install_requires=["lmdb"], + entry_points={'console_scripts': ['bagheerasearch=bagheerasearch:main']}, + cmdclass={ + 'install': CustomInstall, + 'develop': CustomDevelop, + 'build_ext': CustomBuildExt, + }, + data_files=[('lib', ['libbaloo_wrapper.so'])], + include_package_data=True, + zip_safe=False, +) diff --git a/syntax.txt b/syntax.txt new file mode 100644 index 0000000..6693ebc --- /dev/null +++ b/syntax.txt @@ -0,0 +1,90 @@ +bagheera [options] query [--recursive [query]] [options] + +options: + -a | --date Date filter. Format year[-month[-day]] + -d | --directory Base directory to execute main query. + -h | --help [help] Print help. Optional help parameter can be 'attributes', 'dates', 'examples', 'having', 'query', 'recursive' or 'types'. + -g | --having Results not matching expression are ignored. + -i | --ids Add file ID prior to file name. + -l | --limit Maximum number of results to print. + -o | --offset Offset to first result to print. + -r | --recursive [query] Enable directory results recursion. An optional query for recursive results could be used. + -e | --recursive-depth Maximun directory recursion depth. + -c | --recursive-having Recursive results not matching expression are ignored. + -y | --recursive-type File type filter for recursive results, use --help types to obtain information about available types. + -t | --silent Print basic info only, aka only print summary. + -s | --sort < sort [ < ASC | DES > ] > Sort obtained results, by default results are not sorted. Sort types available are: automatic, date, default and name. + -t | --type File type filter for results, use --help types to obtain information about available types. + -v | --verbose More verbosity. + --version Print version information. + +Remarks: + · query searchs only for independent words with size greather than 3, even if phrase is quoted or double quoted. + · having filters only in results obtained by queries, but supports quoted or double quoted phrases and is not limited by word size. + · the NOT logical operator has a high processing and memory cost, so it is preferable to limit its use. + · compare two attributes is possible but is more expensive than compare a constant value. The same remark with NOT applies here. + · recursive mode recurse over all directories found, but without a recursive query, or recursive-depth param, could obtain many results if there are many trees and leafs in directory arborescence. + · when there are duplicate parameters only last parameter value is used and previous parameter values are ignored. + · sort parameter must process all results first, so there is a performance impact that depends on the size of the result set. + +Syntax reference: + +query :== | [ NOT ] ( ) + +expression :== [ NOT ] [ [ < AND | OR > ] [ NOT ] ] + + ::= | + + ::= | | + + ::= | [ [ comp_value ] ] | | + + ::= + + ::= + + ::= | + + ::= | | | | + + ::= album | albumartist | artist | author | composer | lyricist | height | person | title | width | ... + + ::= aaaammdd | aaaa-mm-dd | dd-mm-aaaa | dd/mm/aaaa | [ + | - ] [ d | m | w | y ] + + ::= + + ::= [ . ] + + ::= <1..10> + + ::= < | [ ] | [ ] >... + + ::= < any utf-8 character except or > + + ::= < any utf-8 character except > + + ::= < any utf-8 character except > + + ::= < > + + ::= < > + + ::= <'> + + ::= <"> + + ::= = | : | != | <> | >= | <= | > | < + + ::= MODIFIED < TODAY | YESTERDAY | LAST [date_length] | [date_length] AGO > + + ::= | ONE | TWO | THREE | ... + + ::== DAYS | DAY | MONTHS | MONTH | WEEKS | WEEK | YEARS | YEAR + + ::= LANDSCAPE | PORTRAID | SQUARE + + ::= | [ NOT ] ( ) + + ::= [ NOT ] [ [ < AND | OR > ] [ NOT ] ] + + ::= | |