From 4e324a70e87ec384cf98e8acb07510be5dbba5f7 Mon Sep 17 00:00:00 2001 From: Thorsten Date: Tue, 3 Feb 2026 16:34:04 +0100 Subject: [PATCH] . --- bin/auto_add_sub_folder.sh | 328 +++++++++++++++++++++++++------------ 1 file changed, 219 insertions(+), 109 deletions(-) diff --git a/bin/auto_add_sub_folder.sh b/bin/auto_add_sub_folder.sh index 346e864..a40850b 100755 --- a/bin/auto_add_sub_folder.sh +++ b/bin/auto_add_sub_folder.sh @@ -19,8 +19,15 @@ shopt -s nullglob # Subs, Sub, Subtitles, Subtitle # then muxes ALL .idx/.sub pairs found inside. # -# - Does NOT override language (VobSub language usually lives in the .idx) -# - Sets track-name (heuristic) + forced flag based on idx filename only +# - Language naming strategy: +# 1) Prefer filename tokens (more reliable for scene releases) +# 2) If filename yields no hint -> read .idx "id: xx" and map to German display names +# 3) If still unknown -> do not set --track-name +# +# - Subtitle ordering is configurable via settings.ini [subtitle_order] (read ONCE): +# order=de:forced,de,en:forced,en +# unknown=end +# unknown_forced_first=1 # # mkvmerge: # - auto-detect native OR flatpak (org.bunkus.mkvtoolnix-gui etc.) @@ -46,8 +53,47 @@ VERBOSE=1 # Robust subtitle folder detection (case-insensitive) SUBDIR_CANDIDATES=("Subs" "Sub" "Subtitles" "Subtitle") +# Defaults for subtitle order (can be overridden in settings.ini) +SUB_ORDER_RAW_DEFAULT="de:forced,de,en:forced,en" +SUB_UNKNOWN_MODE_DEFAULT="end" # end | keep +SUB_UNKNOWN_FORCED_FIRST_DEFAULT="1" # 1/0 + +# Will be filled in main() once +SUB_ORDER_RAW="" +SUB_UNKNOWN_MODE="" +SUB_UNKNOWN_FORCED_FIRST="" + +# mkvmerge command (native or flatpak) MKVMERGE_CMD=() +# Order rank map built once from SUB_ORDER_RAW +declare -A SUB_ORDER_RANK=() + +# ---------------- language "database" for display names (GERMAN) ---------------- +# Maps ISO 639-1/2 codes to German display names +declare -A LANG_DB=( + [de]="Deutsch" [deu]="Deutsch" [ger]="Deutsch" + [en]="Englisch" [eng]="Englisch" + [fr]="Französisch" [fra]="Französisch" [fre]="Französisch" + [es]="Spanisch" [spa]="Spanisch" + [it]="Italienisch" [ita]="Italienisch" + [nl]="Niederländisch" [nld]="Niederländisch" [dut]="Niederländisch" + [pl]="Polnisch" [pol]="Polnisch" + [ru]="Russisch" [rus]="Russisch" + [pt]="Portugiesisch" [por]="Portugiesisch" + [tr]="Türkisch" [tur]="Türkisch" + [sv]="Schwedisch" [swe]="Schwedisch" + [no]="Norwegisch" [nor]="Norwegisch" + [da]="Dänisch" [dan]="Dänisch" + [fi]="Finnisch" [fin]="Finnisch" + [cs]="Tschechisch" [ces]="Tschechisch" [cze]="Tschechisch" + [hu]="Ungarisch" [hun]="Ungarisch" + [ro]="Rumänisch" [ron]="Rumänisch" [rum]="Rumänisch" + [bg]="Bulgarisch" [bul]="Bulgarisch" + [el]="Griechisch" [ell]="Griechisch" [gre]="Griechisch" + [he]="Hebräisch" [heb]="Hebräisch" +) + usage() { cat <<'EOF' Usage: @@ -69,9 +115,6 @@ dbg() { (( VERBOSE == 1 )) && printf ' %s\n' "$*"; } die() { printf 'ERROR: %s\n' "$*" >&2; exit 1; } # ---------------- INI parsing (minimal, safe for values with spaces) ---------------- -# Reads first matching key in a section: -# [section] -# key=value ini_get() { local ini_file="$1" local section="$2" @@ -84,14 +127,11 @@ ini_get() { BEGIN{ insec=0 } { line=$0 - # strip BOM if present - sub(/^\xef\xbb\xbf/, "", line) + sub(/^\xef\xbb\xbf/, "", line) # strip BOM if present - # ignore comments (full-line) if (line ~ /^[ \t]*#/) next if (line ~ /^[ \t]*;/) next - # section header if (match(line, /^[ \t]*\[[^]]+\][ \t]*$/)) { insec = (trim(line) == sec) next @@ -99,7 +139,6 @@ ini_get() { if (!insec) next - # key=value, keep everything after first '=' (may contain spaces) if (match(line, /^[ \t]*[^=]+=/)) { split(line, a, "=") k = trim(a[1]) @@ -109,7 +148,6 @@ ini_get() { v = substr(line, pos+1) v = trim(v) - # remove optional surrounding quotes if (v ~ /^".*"$/) { sub(/^"/, "", v); sub(/"$/, "", v) } print v exit @@ -157,7 +195,6 @@ detect_mkvmerge() { } # ---------------- sample exclusion ---------------- -# Returns 0 if path should be excluded as sample; otherwise 1. is_sample_mkv() { local f="$1" local base bn lc @@ -165,17 +202,14 @@ is_sample_mkv() { bn="${base%.*}" lc="${bn,,}" - # 1) exactly "sample" if [[ "$lc" == "sample" ]]; then return 0 fi - # 2) token "sample" in basename (separators: dot, underscore, hyphen, space) if [[ "$lc" =~ (^|[._\ \-])sample([._\ \-]|$) ]]; then return 0 fi - # 3) any directory component equals "sample" (case-insensitive) local d seg d="$(dirname "$f")" while [[ "$d" != "/" && -n "$d" ]]; do @@ -189,7 +223,7 @@ is_sample_mkv() { return 1 } -# ---------------- subtitle folder finder (case-insensitive candidates) ---------------- +# ---------------- subtitle folder finder ---------------- find_subs_dir() { local mkv="$1" local dir cand hit @@ -206,55 +240,13 @@ find_subs_dir() { return 1 } - -# ---------------- language "database" for display names (GERMAN) ---------------- -# Maps ISO 639-1/2 codes to German display names -declare -A LANG_DB=( - # Deutsch - [de]="Deutsch" [deu]="Deutsch" [ger]="Deutsch" - # Englisch - [en]="Englisch" [eng]="Englisch" - # Französisch - [fr]="Französisch" [fra]="Französisch" [fre]="Französisch" - # Spanisch - [es]="Spanisch" [spa]="Spanisch" - # Italienisch - [it]="Italienisch" [ita]="Italienisch" - # Niederländisch - [nl]="Niederländisch" [nld]="Niederländisch" [dut]="Niederländisch" - # Polnisch - [pl]="Polnisch" [pol]="Polnisch" - # Russisch - [ru]="Russisch" [rus]="Russisch" - # Portugiesisch - [pt]="Portugiesisch" [por]="Portugiesisch" - # Türkisch - [tr]="Türkisch" [tur]="Türkisch" - # Schwedisch - [sv]="Schwedisch" [swe]="Schwedisch" - # Norwegisch - [no]="Norwegisch" [nor]="Norwegisch" - # Dänisch - [da]="Dänisch" [dan]="Dänisch" - # Finnisch - [fi]="Finnisch" [fin]="Finnisch" - # Tschechisch - [cs]="Tschechisch" [ces]="Tschechisch" [cze]="Tschechisch" - # Ungarisch - [hu]="Ungarisch" [hun]="Ungarisch" - # Rumänisch - [ro]="Rumänisch" [ron]="Rumänisch" [rum]="Rumänisch" - # Bulgarisch - [bg]="Bulgarisch" [bul]="Bulgarisch" - # Griechisch - [el]="Griechisch" [ell]="Griechisch" [gre]="Griechisch" - # Hebräisch - [he]="Hebräisch" [heb]="Hebräisch" -) - +# ---------------- language helpers ---------------- +lang_name_from_code() { + local code="${1,,}" + [[ -n "${LANG_DB[$code]:-}" ]] && printf '%s' "${LANG_DB[$code]}" +} # Extract language code from VobSub .idx (first "id:" line), lowercased. -# Example line: "id: en, index: 0" idx_lang_from_file() { local idx="$1" local code="" @@ -279,20 +271,20 @@ idx_lang_from_file() { [[ -n "$code" ]] && printf '%s' "$code" } -# Try to infer a language "code" from filename tokens (best effort). -# Returns: code or empty +# Infer a language code from filename tokens (best effort). +# Returns: code or empty (exit 1) lang_code_from_filename() { local stem="$1" local lc="${stem,,}" - # English tokens + # English if [[ "$lc" =~ (^|[._\ \-])eng([._\ \-]|$) ]] || \ [[ "$lc" =~ (^|[._\ \-])en([._\ \-]|$) ]] || \ [[ "$lc" == *english* ]]; then printf 'en'; return 0 fi - # German tokens + # German if [[ "$lc" =~ (^|[._\ \-])deu([._\ \-]|$) ]] || \ [[ "$lc" =~ (^|[._\ \-])ger([._\ \-]|$) ]] || \ [[ "$lc" =~ (^|[._\ \-])de([._\ \-]|$) ]] || \ @@ -300,7 +292,7 @@ lang_code_from_filename() { printf 'de'; return 0 fi - # French tokens + # French if [[ "$lc" =~ (^|[._\ \-])fra([._\ \-]|$) ]] || \ [[ "$lc" =~ (^|[._\ \-])fre([._\ \-]|$) ]] || \ [[ "$lc" =~ (^|[._\ \-])fr([._\ \-]|$) ]] || \ @@ -308,38 +300,85 @@ lang_code_from_filename() { printf 'fr'; return 0 fi - # Spanish tokens + # Spanish if [[ "$lc" =~ (^|[._\ \-])spa([._\ \-]|$) ]] || \ [[ "$lc" =~ (^|[._\ \-])es([._\ \-]|$) ]] || \ [[ "$lc" == *spanish* ]]; then printf 'es'; return 0 fi - # Italian tokens + # Italian if [[ "$lc" =~ (^|[._\ \-])ita([._\ \-]|$) ]] || \ [[ "$lc" =~ (^|[._\ \-])it([._\ \-]|$) ]] || \ [[ "$lc" == *italian* ]]; then printf 'it'; return 0 fi - # Add more here if you want... + # Dutch + if [[ "$lc" =~ (^|[._\ \-])nld([._\ \-]|$) ]] || \ + [[ "$lc" =~ (^|[._\ \-])dut([._\ \-]|$) ]] || \ + [[ "$lc" =~ (^|[._\ \-])nl([._\ \-]|$) ]] || \ + [[ "$lc" == *dutch* ]]; then + printf 'nl'; return 0 + fi + + # Polish + if [[ "$lc" =~ (^|[._\ \-])pol([._\ \-]|$) ]] || \ + [[ "$lc" =~ (^|[._\ \-])pl([._\ \-]|$) ]] || \ + [[ "$lc" == *polish* ]]; then + printf 'pl'; return 0 + fi + + # Russian + if [[ "$lc" =~ (^|[._\ \-])rus([._\ \-]|$) ]] || \ + [[ "$lc" =~ (^|[._\ \-])ru([._\ \-]|$) ]] || \ + [[ "$lc" == *russian* ]]; then + printf 'ru'; return 0 + fi + + # Portuguese + if [[ "$lc" =~ (^|[._\ \-])por([._\ \-]|$) ]] || \ + [[ "$lc" =~ (^|[._\ \-])pt([._\ \-]|$) ]] || \ + [[ "$lc" == *portuguese* ]]; then + printf 'pt'; return 0 + fi + + # Turkish + if [[ "$lc" =~ (^|[._\ \-])tur([._\ \-]|$) ]] || \ + [[ "$lc" =~ (^|[._\ \-])tr([._\ \-]|$) ]] || \ + [[ "$lc" == *turkish* ]]; then + printf 'tr'; return 0 + fi + + # Hebrew + if [[ "$lc" =~ (^|[._\ \-])heb([._\ \-]|$) ]] || \ + [[ "$lc" =~ (^|[._\ \-])he([._\ \-]|$) ]] || \ + [[ "$lc" == *hebrew* ]]; then + printf 'he'; return 0 + fi return 1 } -# Convert a language code to a nice name using LANG_DB. -# Returns: name or empty -lang_name_from_code() { - local code="${1,,}" - [[ -n "${LANG_DB[$code]:-}" ]] && printf '%s' "${LANG_DB[$code]}" +# Infer language code for sorting: +# 1) filename tokens, else 2) idx id:xx, else "und" +infer_lang_code_for_idx() { + local idx="$1" + local stem code + + stem="$(basename "$idx" .idx)" + + code="$(lang_code_from_filename "$stem" || true)" + [[ -n "${code:-}" ]] && { printf '%s' "$code"; return 0; } + + code="$(idx_lang_from_file "$idx" || true)" + [[ -n "${code:-}" ]] && { printf '%s' "$code"; return 0; } + + printf 'und' } - +# Track naming (German display names), prefers filename tokens, falls back to idx content. # Echoes: track_name|forcedFlag -# Strategy: -# 1) Prefer filename tokens (more reliable for scene releases) -# 2) If filename gives no hint -> read idx "id: xx" and map to display name -# 3) If still unknown -> empty name (=> don't set --track-name) infer_meta_for_idx() { local idx="$1" local stem lc forced name code_file code_idx @@ -351,14 +390,11 @@ infer_meta_for_idx() { [[ "$lc" == *forced* ]] && forced="yes" name="" - - # 1) from filename code_file="$(lang_code_from_filename "$stem" || true)" if [[ -n "${code_file:-}" ]]; then name="$(lang_name_from_code "$code_file" || true)" fi - # 2) fallback: from idx file content if [[ -z "$name" ]]; then code_idx="$(idx_lang_from_file "$idx" || true)" if [[ -n "${code_idx:-}" ]]; then @@ -366,11 +402,11 @@ infer_meta_for_idx() { fi fi - # Optional: warn on mismatch if both exist (filename wins) + # warn on mismatch if both exist (filename wins) if [[ -n "${code_file:-}" ]]; then code_idx="$(idx_lang_from_file "$idx" || true)" - if [[ -n "${code_idx:-}" && "$code_idx" != "$code_file" ]]; then - dbg "WARN: language mismatch for $(basename "$idx"): filename=$code_file, idx=$code_idx (using filename)" + if [[ -n "${code_idx:-}" && "${code_idx,,}" != "${code_file,,}" ]]; then + dbg "WARN: Sprach-Mismatch für $(basename "$idx"): Dateiname=${code_file}, IDX=${code_idx} (Dateiname gewinnt)" fi fi @@ -378,8 +414,50 @@ infer_meta_for_idx() { echo "${name}|${forced}" } +# ---------------- subtitle order helpers ---------------- +build_sub_order_rank() { + local raw="$1" + SUB_ORDER_RANK=() # reset + local i=0 item + IFS=',' read -r -a _items <<<"$raw" + for item in "${_items[@]}"; do + item="${item//[[:space:]]/}" + [[ -z "$item" ]] && continue + SUB_ORDER_RANK["$item"]=$i + ((i++)) + done +} +subtitle_sort_rank() { + local lang="$1" forced="$2" + local ftok="normal" + [[ "$forced" == "yes" ]] && ftok="forced" + # exact match: de:forced + if [[ -n "${SUB_ORDER_RANK["$lang:$ftok"]+x}" ]]; then + printf '%05d' "${SUB_ORDER_RANK["$lang:$ftok"]}" + return 0 + fi + # fallback: de + if [[ -n "${SUB_ORDER_RANK["$lang"]+x}" ]]; then + printf '%05d' "${SUB_ORDER_RANK["$lang"]}" + return 0 + fi + + # unknown handling + if [[ "${SUB_UNKNOWN_MODE:-end}" == "end" ]]; then + # unknowns go behind knowns; optionally forced unknown before non-forced unknown + local base=90000 bump=0 + if [[ "${SUB_UNKNOWN_FORCED_FIRST:-1}" == "1" ]]; then + [[ "$forced" == "yes" ]] && bump=0 || bump=1 + fi + printf '%05d' "$((base + bump))" + return 0 + fi + + # keep + printf '%05d' 90000 +} # ---------------- mux one mkv ---------------- mux_one_mkv() { @@ -407,7 +485,31 @@ mux_one_mkv() { return 0 fi - dbg "IDX files: ${#idxs[@]}" + # ---- sort idxs according to settings-defined order ---- + local -a decorated=() + local idx forcedFlag langcode rank + for idx in "${idxs[@]}"; do + forcedFlag="no" + [[ "${idx,,}" == *forced* ]] && forcedFlag="yes" + + langcode="$(infer_lang_code_for_idx "$idx")" + rank="$(subtitle_sort_rank "$langcode" "$forcedFlag")" + + # Decorate line for sorting: + # ranklangforcedpath + decorated+=( "${rank}"$'\t'"${langcode}"$'\t'"${forcedFlag}"$'\t'"${idx}" ) + done + + IFS=$'\n' decorated=($(printf '%s\n' "${decorated[@]}" | LC_ALL=C sort)) + unset IFS + + idxs=() + local row + for row in "${decorated[@]}"; do + idxs+=( "$(printf '%s' "$row" | cut -f4-)" ) + done + + dbg "IDX files (sorted): ${#idxs[@]}" for f in "${idxs[@]}"; do dbg " - $(basename "$f")"; done local out_tmp="${mkv%.*}.with-subs.tmp.mkv" @@ -417,7 +519,6 @@ mux_one_mkv() { cmd=("${MKVMERGE_CMD[@]}" -o "$out_tmp" "$mkv") local added=0 - local idx="" for idx in "${idxs[@]}"; do local sub="${idx%.*}.sub" if [[ ! -f "$sub" ]]; then @@ -425,24 +526,24 @@ mux_one_mkv() { continue fi - local meta name forcedFlag + local meta name forcedFlag2 meta="$(infer_meta_for_idx "$idx")" - IFS='|' read -r name forcedFlag <<<"$meta" + IFS='|' read -r name forcedFlag2 <<<"$meta" - dbg "Add VobSub: $(basename "$idx") -> name='$name', forced=$forcedFlag" + dbg "Add VobSub: $(basename "$idx") -> name='${name:-}', forced=$forcedFlag2" - # IMPORTANT: do NOT override language; keep what is in the .idx + # Do NOT override language; keep what is in the .idx if [[ -n "$name" ]]; then cmd+=( --track-name 0:"$name" ) fi - if [[ "$forcedFlag" == "yes" ]]; then + if [[ "$forcedFlag2" == "yes" ]]; then cmd+=( --forced-track 0:yes --default-track 0:no ) else cmd+=( --forced-track 0:no --default-track 0:no ) fi - cmd+=( "$idx" ) + cmd+=( "$idx" ) ((++added)) done @@ -493,12 +594,9 @@ process_root() { done < <(find "$root" -type f -iname "*.mkv" -print0) log "Found MKVs: ${#mkvs[@]} under $root" - if [[ ${#mkvs[@]} -eq 0 ]]; then - return 0 - fi + [[ ${#mkvs[@]} -eq 0 ]] && return 0 - local processed=0 - local mkv="" + local processed=0 mkv="" for mkv in "${mkvs[@]}"; do mux_one_mkv "$mkv" ((++processed)) @@ -509,7 +607,7 @@ process_root() { # ---------------- main ---------------- main() { - ROOTS=() + local -a ROOTS=() while [[ $# -gt 0 ]]; do case "$1" in -n|--dry-run) DRY_RUN=1; shift ;; @@ -527,7 +625,7 @@ main() { base_dir="$(cd "$script_dir/.." && pwd)" ini_file="$base_dir/settings.ini" - # If no CLI roots: read input_folder from settings.ini + # settings.ini only needed if no CLI roots if [[ ${#ROOTS[@]} -eq 0 ]]; then if [[ ! -f "$ini_file" ]]; then echo @@ -539,7 +637,6 @@ main() { local input_folder input_folder="$(ini_get "$ini_file" "pathes" "input_folder" || true)" - if [[ -z "${input_folder:-}" ]]; then echo echo -e "${WHITE_ON_RED} input_folder fehlt in settings.ini ([pathes]) ${NC}" @@ -547,10 +644,22 @@ main() { read -n 1 -s -r -p "Press any key to exit" exit 1 fi - ROOTS=("$input_folder") fi + # Read subtitle order ONCE (only if ini exists; if script is run with CLI roots and no ini, use defaults) + if [[ -f "$ini_file" ]]; then + SUB_ORDER_RAW="$(ini_get "$ini_file" "subtitle_order" "order" || true)" + SUB_UNKNOWN_MODE="$(ini_get "$ini_file" "subtitle_order" "unknown" || true)" + SUB_UNKNOWN_FORCED_FIRST="$(ini_get "$ini_file" "subtitle_order" "unknown_forced_first" || true)" + fi + + SUB_ORDER_RAW="${SUB_ORDER_RAW:-$SUB_ORDER_RAW_DEFAULT}" + SUB_UNKNOWN_MODE="${SUB_UNKNOWN_MODE:-$SUB_UNKNOWN_MODE_DEFAULT}" + SUB_UNKNOWN_FORCED_FIRST="${SUB_UNKNOWN_FORCED_FIRST:-$SUB_UNKNOWN_FORCED_FIRST_DEFAULT}" + + build_sub_order_rank "$SUB_ORDER_RAW" + detect_mkvmerge echo @@ -558,11 +667,12 @@ main() { echo -e "${BLACK_ON_WHITE} Auto Add Sub Folder (VobSub) ${NC}" echo "────────────────────────────────────────────────────────────────" echo " " - echo -e "${WHITE_ON_GRAY} Input Folder ${NC} ${ROOTS[*]}" - echo -e "${WHITE_ON_GRAY} Subs Folders ${NC} ${SUBDIR_CANDIDATES[*]}" - echo -e "${WHITE_ON_GRAY} MKVMerge ${NC} ${MKVMERGE_CMD[*]}" - echo -e "${WHITE_ON_GRAY} Dry Run ${NC} ${DRY_RUN}" - echo -e "${WHITE_ON_GRAY} Keep Backup ${NC} ${KEEP_BAK}" + echo -e "${WHITE_ON_GRAY} Input Folder ${NC} ${ROOTS[*]}" + echo -e "${WHITE_ON_GRAY} Subs Folders ${NC} ${SUBDIR_CANDIDATES[*]}" + echo -e "${WHITE_ON_GRAY} Reihenfolge ${NC} ${SUB_ORDER_RAW}" + echo -e "${WHITE_ON_GRAY} MKVMerge ${NC} ${MKVMERGE_CMD[*]}" + echo -e "${WHITE_ON_GRAY} Dry Run ${NC} ${DRY_RUN}" + echo -e "${WHITE_ON_GRAY} Keep Backup ${NC} ${KEEP_BAK}" echo " " echo "────────────────────────────────────────────────────────────────" echo