This commit is contained in:
2026-02-03 16:34:04 +01:00
parent 16ec7703cd
commit 4e324a70e8

View File

@@ -19,8 +19,15 @@ shopt -s nullglob
# Subs, Sub, Subtitles, Subtitle
# then muxes ALL .idx/.sub pairs found inside.
#
# - Does NOT override language (VobSub language usually lives in the .idx)
# - Sets track-name (heuristic) + forced flag based on idx filename only
# - Language naming strategy:
# 1) Prefer filename tokens (more reliable for scene releases)
# 2) If filename yields no hint -> read .idx "id: xx" and map to German display names
# 3) If still unknown -> do not set --track-name
#
# - Subtitle ordering is configurable via settings.ini [subtitle_order] (read ONCE):
# order=de:forced,de,en:forced,en
# unknown=end
# unknown_forced_first=1
#
# mkvmerge:
# - auto-detect native OR flatpak (org.bunkus.mkvtoolnix-gui etc.)
@@ -46,8 +53,47 @@ VERBOSE=1
# Robust subtitle folder detection (case-insensitive)
SUBDIR_CANDIDATES=("Subs" "Sub" "Subtitles" "Subtitle")
# Defaults for subtitle order (can be overridden in settings.ini)
SUB_ORDER_RAW_DEFAULT="de:forced,de,en:forced,en"
SUB_UNKNOWN_MODE_DEFAULT="end" # end | keep
SUB_UNKNOWN_FORCED_FIRST_DEFAULT="1" # 1/0
# Will be filled in main() once
SUB_ORDER_RAW=""
SUB_UNKNOWN_MODE=""
SUB_UNKNOWN_FORCED_FIRST=""
# mkvmerge command (native or flatpak)
MKVMERGE_CMD=()
# Order rank map built once from SUB_ORDER_RAW
declare -A SUB_ORDER_RANK=()
# ---------------- language "database" for display names (GERMAN) ----------------
# Maps ISO 639-1/2 codes to German display names
declare -A LANG_DB=(
[de]="Deutsch" [deu]="Deutsch" [ger]="Deutsch"
[en]="Englisch" [eng]="Englisch"
[fr]="Französisch" [fra]="Französisch" [fre]="Französisch"
[es]="Spanisch" [spa]="Spanisch"
[it]="Italienisch" [ita]="Italienisch"
[nl]="Niederländisch" [nld]="Niederländisch" [dut]="Niederländisch"
[pl]="Polnisch" [pol]="Polnisch"
[ru]="Russisch" [rus]="Russisch"
[pt]="Portugiesisch" [por]="Portugiesisch"
[tr]="Türkisch" [tur]="Türkisch"
[sv]="Schwedisch" [swe]="Schwedisch"
[no]="Norwegisch" [nor]="Norwegisch"
[da]="Dänisch" [dan]="Dänisch"
[fi]="Finnisch" [fin]="Finnisch"
[cs]="Tschechisch" [ces]="Tschechisch" [cze]="Tschechisch"
[hu]="Ungarisch" [hun]="Ungarisch"
[ro]="Rumänisch" [ron]="Rumänisch" [rum]="Rumänisch"
[bg]="Bulgarisch" [bul]="Bulgarisch"
[el]="Griechisch" [ell]="Griechisch" [gre]="Griechisch"
[he]="Hebräisch" [heb]="Hebräisch"
)
usage() {
cat <<'EOF'
Usage:
@@ -69,9 +115,6 @@ dbg() { (( VERBOSE == 1 )) && printf ' %s\n' "$*"; }
die() { printf 'ERROR: %s\n' "$*" >&2; exit 1; }
# ---------------- INI parsing (minimal, safe for values with spaces) ----------------
# Reads first matching key in a section:
# [section]
# key=value
ini_get() {
local ini_file="$1"
local section="$2"
@@ -84,14 +127,11 @@ ini_get() {
BEGIN{ insec=0 }
{
line=$0
# strip BOM if present
sub(/^\xef\xbb\xbf/, "", line)
sub(/^\xef\xbb\xbf/, "", line) # strip BOM if present
# ignore comments (full-line)
if (line ~ /^[ \t]*#/) next
if (line ~ /^[ \t]*;/) next
# section header
if (match(line, /^[ \t]*\[[^]]+\][ \t]*$/)) {
insec = (trim(line) == sec)
next
@@ -99,7 +139,6 @@ ini_get() {
if (!insec) next
# key=value, keep everything after first '=' (may contain spaces)
if (match(line, /^[ \t]*[^=]+=/)) {
split(line, a, "=")
k = trim(a[1])
@@ -109,7 +148,6 @@ ini_get() {
v = substr(line, pos+1)
v = trim(v)
# remove optional surrounding quotes
if (v ~ /^".*"$/) { sub(/^"/, "", v); sub(/"$/, "", v) }
print v
exit
@@ -157,7 +195,6 @@ detect_mkvmerge() {
}
# ---------------- sample exclusion ----------------
# Returns 0 if path should be excluded as sample; otherwise 1.
is_sample_mkv() {
local f="$1"
local base bn lc
@@ -165,17 +202,14 @@ is_sample_mkv() {
bn="${base%.*}"
lc="${bn,,}"
# 1) exactly "sample"
if [[ "$lc" == "sample" ]]; then
return 0
fi
# 2) token "sample" in basename (separators: dot, underscore, hyphen, space)
if [[ "$lc" =~ (^|[._\ \-])sample([._\ \-]|$) ]]; then
return 0
fi
# 3) any directory component equals "sample" (case-insensitive)
local d seg
d="$(dirname "$f")"
while [[ "$d" != "/" && -n "$d" ]]; do
@@ -189,7 +223,7 @@ is_sample_mkv() {
return 1
}
# ---------------- subtitle folder finder (case-insensitive candidates) ----------------
# ---------------- subtitle folder finder ----------------
find_subs_dir() {
local mkv="$1"
local dir cand hit
@@ -206,55 +240,13 @@ find_subs_dir() {
return 1
}
# ---------------- language "database" for display names (GERMAN) ----------------
# Maps ISO 639-1/2 codes to German display names
declare -A LANG_DB=(
# Deutsch
[de]="Deutsch" [deu]="Deutsch" [ger]="Deutsch"
# Englisch
[en]="Englisch" [eng]="Englisch"
# Französisch
[fr]="Französisch" [fra]="Französisch" [fre]="Französisch"
# Spanisch
[es]="Spanisch" [spa]="Spanisch"
# Italienisch
[it]="Italienisch" [ita]="Italienisch"
# Niederländisch
[nl]="Niederländisch" [nld]="Niederländisch" [dut]="Niederländisch"
# Polnisch
[pl]="Polnisch" [pol]="Polnisch"
# Russisch
[ru]="Russisch" [rus]="Russisch"
# Portugiesisch
[pt]="Portugiesisch" [por]="Portugiesisch"
# Türkisch
[tr]="Türkisch" [tur]="Türkisch"
# Schwedisch
[sv]="Schwedisch" [swe]="Schwedisch"
# Norwegisch
[no]="Norwegisch" [nor]="Norwegisch"
# Dänisch
[da]="Dänisch" [dan]="Dänisch"
# Finnisch
[fi]="Finnisch" [fin]="Finnisch"
# Tschechisch
[cs]="Tschechisch" [ces]="Tschechisch" [cze]="Tschechisch"
# Ungarisch
[hu]="Ungarisch" [hun]="Ungarisch"
# Rumänisch
[ro]="Rumänisch" [ron]="Rumänisch" [rum]="Rumänisch"
# Bulgarisch
[bg]="Bulgarisch" [bul]="Bulgarisch"
# Griechisch
[el]="Griechisch" [ell]="Griechisch" [gre]="Griechisch"
# Hebräisch
[he]="Hebräisch" [heb]="Hebräisch"
)
# ---------------- language helpers ----------------
lang_name_from_code() {
local code="${1,,}"
[[ -n "${LANG_DB[$code]:-}" ]] && printf '%s' "${LANG_DB[$code]}"
}
# Extract language code from VobSub .idx (first "id:" line), lowercased.
# Example line: "id: en, index: 0"
idx_lang_from_file() {
local idx="$1"
local code=""
@@ -279,20 +271,20 @@ idx_lang_from_file() {
[[ -n "$code" ]] && printf '%s' "$code"
}
# Try to infer a language "code" from filename tokens (best effort).
# Returns: code or empty
# Infer a language code from filename tokens (best effort).
# Returns: code or empty (exit 1)
lang_code_from_filename() {
local stem="$1"
local lc="${stem,,}"
# English tokens
# English
if [[ "$lc" =~ (^|[._\ \-])eng([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])en([._\ \-]|$) ]] || \
[[ "$lc" == *english* ]]; then
printf 'en'; return 0
fi
# German tokens
# German
if [[ "$lc" =~ (^|[._\ \-])deu([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])ger([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])de([._\ \-]|$) ]] || \
@@ -300,7 +292,7 @@ lang_code_from_filename() {
printf 'de'; return 0
fi
# French tokens
# French
if [[ "$lc" =~ (^|[._\ \-])fra([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])fre([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])fr([._\ \-]|$) ]] || \
@@ -308,38 +300,85 @@ lang_code_from_filename() {
printf 'fr'; return 0
fi
# Spanish tokens
# Spanish
if [[ "$lc" =~ (^|[._\ \-])spa([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])es([._\ \-]|$) ]] || \
[[ "$lc" == *spanish* ]]; then
printf 'es'; return 0
fi
# Italian tokens
# Italian
if [[ "$lc" =~ (^|[._\ \-])ita([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])it([._\ \-]|$) ]] || \
[[ "$lc" == *italian* ]]; then
printf 'it'; return 0
fi
# Add more here if you want...
# Dutch
if [[ "$lc" =~ (^|[._\ \-])nld([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])dut([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])nl([._\ \-]|$) ]] || \
[[ "$lc" == *dutch* ]]; then
printf 'nl'; return 0
fi
# Polish
if [[ "$lc" =~ (^|[._\ \-])pol([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])pl([._\ \-]|$) ]] || \
[[ "$lc" == *polish* ]]; then
printf 'pl'; return 0
fi
# Russian
if [[ "$lc" =~ (^|[._\ \-])rus([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])ru([._\ \-]|$) ]] || \
[[ "$lc" == *russian* ]]; then
printf 'ru'; return 0
fi
# Portuguese
if [[ "$lc" =~ (^|[._\ \-])por([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])pt([._\ \-]|$) ]] || \
[[ "$lc" == *portuguese* ]]; then
printf 'pt'; return 0
fi
# Turkish
if [[ "$lc" =~ (^|[._\ \-])tur([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])tr([._\ \-]|$) ]] || \
[[ "$lc" == *turkish* ]]; then
printf 'tr'; return 0
fi
# Hebrew
if [[ "$lc" =~ (^|[._\ \-])heb([._\ \-]|$) ]] || \
[[ "$lc" =~ (^|[._\ \-])he([._\ \-]|$) ]] || \
[[ "$lc" == *hebrew* ]]; then
printf 'he'; return 0
fi
return 1
}
# Convert a language code to a nice name using LANG_DB.
# Returns: name or empty
lang_name_from_code() {
local code="${1,,}"
[[ -n "${LANG_DB[$code]:-}" ]] && printf '%s' "${LANG_DB[$code]}"
# Infer language code for sorting:
# 1) filename tokens, else 2) idx id:xx, else "und"
infer_lang_code_for_idx() {
local idx="$1"
local stem code
stem="$(basename "$idx" .idx)"
code="$(lang_code_from_filename "$stem" || true)"
[[ -n "${code:-}" ]] && { printf '%s' "$code"; return 0; }
code="$(idx_lang_from_file "$idx" || true)"
[[ -n "${code:-}" ]] && { printf '%s' "$code"; return 0; }
printf 'und'
}
# Track naming (German display names), prefers filename tokens, falls back to idx content.
# Echoes: track_name|forcedFlag
# Strategy:
# 1) Prefer filename tokens (more reliable for scene releases)
# 2) If filename gives no hint -> read idx "id: xx" and map to display name
# 3) If still unknown -> empty name (=> don't set --track-name)
infer_meta_for_idx() {
local idx="$1"
local stem lc forced name code_file code_idx
@@ -351,14 +390,11 @@ infer_meta_for_idx() {
[[ "$lc" == *forced* ]] && forced="yes"
name=""
# 1) from filename
code_file="$(lang_code_from_filename "$stem" || true)"
if [[ -n "${code_file:-}" ]]; then
name="$(lang_name_from_code "$code_file" || true)"
fi
# 2) fallback: from idx file content
if [[ -z "$name" ]]; then
code_idx="$(idx_lang_from_file "$idx" || true)"
if [[ -n "${code_idx:-}" ]]; then
@@ -366,11 +402,11 @@ infer_meta_for_idx() {
fi
fi
# Optional: warn on mismatch if both exist (filename wins)
# warn on mismatch if both exist (filename wins)
if [[ -n "${code_file:-}" ]]; then
code_idx="$(idx_lang_from_file "$idx" || true)"
if [[ -n "${code_idx:-}" && "$code_idx" != "$code_file" ]]; then
dbg "WARN: language mismatch for $(basename "$idx"): filename=$code_file, idx=$code_idx (using filename)"
if [[ -n "${code_idx:-}" && "${code_idx,,}" != "${code_file,,}" ]]; then
dbg "WARN: Sprach-Mismatch für $(basename "$idx"): Dateiname=${code_file}, IDX=${code_idx} (Dateiname gewinnt)"
fi
fi
@@ -378,8 +414,50 @@ infer_meta_for_idx() {
echo "${name}|${forced}"
}
# ---------------- subtitle order helpers ----------------
build_sub_order_rank() {
local raw="$1"
SUB_ORDER_RANK=() # reset
local i=0 item
IFS=',' read -r -a _items <<<"$raw"
for item in "${_items[@]}"; do
item="${item//[[:space:]]/}"
[[ -z "$item" ]] && continue
SUB_ORDER_RANK["$item"]=$i
((i++))
done
}
subtitle_sort_rank() {
local lang="$1" forced="$2"
local ftok="normal"
[[ "$forced" == "yes" ]] && ftok="forced"
# exact match: de:forced
if [[ -n "${SUB_ORDER_RANK["$lang:$ftok"]+x}" ]]; then
printf '%05d' "${SUB_ORDER_RANK["$lang:$ftok"]}"
return 0
fi
# fallback: de
if [[ -n "${SUB_ORDER_RANK["$lang"]+x}" ]]; then
printf '%05d' "${SUB_ORDER_RANK["$lang"]}"
return 0
fi
# unknown handling
if [[ "${SUB_UNKNOWN_MODE:-end}" == "end" ]]; then
# unknowns go behind knowns; optionally forced unknown before non-forced unknown
local base=90000 bump=0
if [[ "${SUB_UNKNOWN_FORCED_FIRST:-1}" == "1" ]]; then
[[ "$forced" == "yes" ]] && bump=0 || bump=1
fi
printf '%05d' "$((base + bump))"
return 0
fi
# keep
printf '%05d' 90000
}
# ---------------- mux one mkv ----------------
mux_one_mkv() {
@@ -407,7 +485,31 @@ mux_one_mkv() {
return 0
fi
dbg "IDX files: ${#idxs[@]}"
# ---- sort idxs according to settings-defined order ----
local -a decorated=()
local idx forcedFlag langcode rank
for idx in "${idxs[@]}"; do
forcedFlag="no"
[[ "${idx,,}" == *forced* ]] && forcedFlag="yes"
langcode="$(infer_lang_code_for_idx "$idx")"
rank="$(subtitle_sort_rank "$langcode" "$forcedFlag")"
# Decorate line for sorting:
# rank<TAB>lang<TAB>forced<TAB>path
decorated+=( "${rank}"$'\t'"${langcode}"$'\t'"${forcedFlag}"$'\t'"${idx}" )
done
IFS=$'\n' decorated=($(printf '%s\n' "${decorated[@]}" | LC_ALL=C sort))
unset IFS
idxs=()
local row
for row in "${decorated[@]}"; do
idxs+=( "$(printf '%s' "$row" | cut -f4-)" )
done
dbg "IDX files (sorted): ${#idxs[@]}"
for f in "${idxs[@]}"; do dbg " - $(basename "$f")"; done
local out_tmp="${mkv%.*}.with-subs.tmp.mkv"
@@ -417,7 +519,6 @@ mux_one_mkv() {
cmd=("${MKVMERGE_CMD[@]}" -o "$out_tmp" "$mkv")
local added=0
local idx=""
for idx in "${idxs[@]}"; do
local sub="${idx%.*}.sub"
if [[ ! -f "$sub" ]]; then
@@ -425,24 +526,24 @@ mux_one_mkv() {
continue
fi
local meta name forcedFlag
local meta name forcedFlag2
meta="$(infer_meta_for_idx "$idx")"
IFS='|' read -r name forcedFlag <<<"$meta"
IFS='|' read -r name forcedFlag2 <<<"$meta"
dbg "Add VobSub: $(basename "$idx") -> name='$name', forced=$forcedFlag"
dbg "Add VobSub: $(basename "$idx") -> name='${name:-<none>}', forced=$forcedFlag2"
# IMPORTANT: do NOT override language; keep what is in the .idx
# Do NOT override language; keep what is in the .idx
if [[ -n "$name" ]]; then
cmd+=( --track-name 0:"$name" )
fi
if [[ "$forcedFlag" == "yes" ]]; then
if [[ "$forcedFlag2" == "yes" ]]; then
cmd+=( --forced-track 0:yes --default-track 0:no )
else
cmd+=( --forced-track 0:no --default-track 0:no )
fi
cmd+=( "$idx" )
cmd+=( "$idx" )
((++added))
done
@@ -493,12 +594,9 @@ process_root() {
done < <(find "$root" -type f -iname "*.mkv" -print0)
log "Found MKVs: ${#mkvs[@]} under $root"
if [[ ${#mkvs[@]} -eq 0 ]]; then
return 0
fi
[[ ${#mkvs[@]} -eq 0 ]] && return 0
local processed=0
local mkv=""
local processed=0 mkv=""
for mkv in "${mkvs[@]}"; do
mux_one_mkv "$mkv"
((++processed))
@@ -509,7 +607,7 @@ process_root() {
# ---------------- main ----------------
main() {
ROOTS=()
local -a ROOTS=()
while [[ $# -gt 0 ]]; do
case "$1" in
-n|--dry-run) DRY_RUN=1; shift ;;
@@ -527,7 +625,7 @@ main() {
base_dir="$(cd "$script_dir/.." && pwd)"
ini_file="$base_dir/settings.ini"
# If no CLI roots: read input_folder from settings.ini
# settings.ini only needed if no CLI roots
if [[ ${#ROOTS[@]} -eq 0 ]]; then
if [[ ! -f "$ini_file" ]]; then
echo
@@ -539,7 +637,6 @@ main() {
local input_folder
input_folder="$(ini_get "$ini_file" "pathes" "input_folder" || true)"
if [[ -z "${input_folder:-}" ]]; then
echo
echo -e "${WHITE_ON_RED} input_folder fehlt in settings.ini ([pathes]) ${NC}"
@@ -547,10 +644,22 @@ main() {
read -n 1 -s -r -p "Press any key to exit"
exit 1
fi
ROOTS=("$input_folder")
fi
# Read subtitle order ONCE (only if ini exists; if script is run with CLI roots and no ini, use defaults)
if [[ -f "$ini_file" ]]; then
SUB_ORDER_RAW="$(ini_get "$ini_file" "subtitle_order" "order" || true)"
SUB_UNKNOWN_MODE="$(ini_get "$ini_file" "subtitle_order" "unknown" || true)"
SUB_UNKNOWN_FORCED_FIRST="$(ini_get "$ini_file" "subtitle_order" "unknown_forced_first" || true)"
fi
SUB_ORDER_RAW="${SUB_ORDER_RAW:-$SUB_ORDER_RAW_DEFAULT}"
SUB_UNKNOWN_MODE="${SUB_UNKNOWN_MODE:-$SUB_UNKNOWN_MODE_DEFAULT}"
SUB_UNKNOWN_FORCED_FIRST="${SUB_UNKNOWN_FORCED_FIRST:-$SUB_UNKNOWN_FORCED_FIRST_DEFAULT}"
build_sub_order_rank "$SUB_ORDER_RAW"
detect_mkvmerge
echo
@@ -560,6 +669,7 @@ main() {
echo " "
echo -e "${WHITE_ON_GRAY} Input Folder ${NC} ${ROOTS[*]}"
echo -e "${WHITE_ON_GRAY} Subs Folders ${NC} ${SUBDIR_CANDIDATES[*]}"
echo -e "${WHITE_ON_GRAY} Reihenfolge ${NC} ${SUB_ORDER_RAW}"
echo -e "${WHITE_ON_GRAY} MKVMerge ${NC} ${MKVMERGE_CMD[*]}"
echo -e "${WHITE_ON_GRAY} Dry Run ${NC} ${DRY_RUN}"
echo -e "${WHITE_ON_GRAY} Keep Backup ${NC} ${KEEP_BAK}"