From aff572c9ac80748b86fe22996dc0103a55ee829f Mon Sep 17 00:00:00 2001 From: Florian Pelz Date: Tue, 16 Sep 2025 03:05:06 +0200 Subject: [PATCH] po: Add procedures to handle '\"', '\t' and '\\' escape sequences. They were handled with PEG before. Now `make download-po` has the same result as before PO files were minified. * guix/build/po.scm (interpret-newline-escape): Replace with ... (interpret-escape): ... this more general procedure. (replace-escaped-backslashes): New procedure, basically restored from the past implementation of 'interpret-newline-escape' but for '\\' escapes. (interpret-escape-sequences): New procedure to call them all. (parse-tree->assoc): Use it. Change-Id: I03226281019fa39ef7bca524278dbc434df95f2e --- guix/build/po.scm | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/guix/build/po.scm b/guix/build/po.scm index de11d85e54df2b052469a1eb01fad7183ef7b8cc..76cc6a8cf267d1e6b18dff37c330a7a3c57c5ee1 100644 --- a/guix/build/po.scm +++ b/guix/build/po.scm @@ -48,6 +48,17 @@ (and (ignore "\"") (* str-chr) (ignore "\"") (? (and (ignore (* whitespace)) content)))) +(define (replace-escaped-backslashes str) + "Replace '\\\\' sequences in STR with a single '\\'." + (let loop ((str str) + (result '())) + (match (string-contains str "\\\\") + (#f (string-concatenate-reverse (cons str result))) + (index + (let ((prefix (string-take str index))) + (loop (string-drop str (+ 2 index)) + (append (list "\\" prefix) result))))))) + (define (final-character-escapes? str last-index) "Check if STR ends in an incomplete escape sequence, that is ends in an uneven number of backslashes. LAST-INDEX is the index of its last character." @@ -55,19 +66,29 @@ number of backslashes. LAST-INDEX is the index of its last character." (eqv? (string-ref str last-index) #\\) (not (final-character-escapes? str (- last-index 1))))) -(define (interpret-newline-escape str) - "Replace unescaped '\\n' sequences in STR with a newline character." +(define (interpret-escape sequence replacement str) + "Replace backslash escape sequence SEQUENCE in STR with REPLACEMENT (a string) +when SEQUENCE is not escaped itself. For example, SEQUENCE '\\n' with a +newline string as REPLACEMENT." (let loop ((str str) (result '())) - (match (string-contains str "\\n") + (match (string-contains str sequence) (#f (string-concatenate-reverse (cons str result))) (index (let ((prefix (string-take str index))) (loop (string-drop str (+ 2 index)) - ;; Only add a newline when the backslash is not escaped itself. + ;; Only add REPLACEMENT when the backslash is not escaped itself. (if (final-character-escapes? str (- index 1)) (cons (string-take str (+ 2 index)) result) - (append (list "\n" prefix) result)))))))) + (append (list replacement prefix) result)))))))) + +(define (interpret-escape-sequences str) + "Unescape all escape sequences in STR." + (replace-escaped-backslashes + (interpret-escape "\\n" "\n" + (interpret-escape "\\\"" "\"" + (interpret-escape "\\t" "\t" + str))))) (define (parse-tree->assoc parse-tree) "Converts a po PARSE-TREE to an association list, where the key is the msgid @@ -103,18 +124,14 @@ and the value is the msgstr. The result only contains non fuzzy strings." (('entry _ ('msgid msgid) 'msgstr) (parse-tree->assoc parse-tree)) (('entry ('msgid msgid) ('msgstr msgstr)) - (acons (interpret-newline-escape msgid) - (interpret-newline-escape msgstr) - (parse-tree->assoc parse-tree))) - (('entry ('msgid msgid) ('msgstr msgstr)) - (acons (interpret-newline-escape msgid) - (interpret-newline-escape msgstr) + (acons (interpret-escape-sequences msgid) + (interpret-escape-sequences msgstr) (parse-tree->assoc parse-tree))) (('entry comments ('msgid msgid) ('msgstr msgstr)) (if (member 'fuzzy (comments->flags comments)) (parse-tree->assoc parse-tree) - (acons (interpret-newline-escape msgid) - (interpret-newline-escape msgstr) + (acons (interpret-escape-sequences msgid) + (interpret-escape-sequences msgstr) (parse-tree->assoc parse-tree)))))))) (define (read-po-file port)