~ruther/guix-local

aff572c9ac80748b86fe22996dc0103a55ee829f — Florian Pelz 6 months ago e16cfd6
po: Add procedures to handle '\"', '\t' and '\\' escape sequences.

They were handled with PEG before.

Now `make download-po` has the same result as before PO files were
minified.

* guix/build/po.scm (interpret-newline-escape): Replace with ...
(interpret-escape): ... this more general procedure.
(replace-escaped-backslashes): New procedure, basically restored from
the past implementation of 'interpret-newline-escape' but
for '\\' escapes.
(interpret-escape-sequences): New procedure to call them all.
(parse-tree->assoc): Use it.

Change-Id: I03226281019fa39ef7bca524278dbc434df95f2e
1 files changed, 30 insertions(+), 13 deletions(-)

M guix/build/po.scm
M guix/build/po.scm => guix/build/po.scm +30 -13
@@ 48,6 48,17 @@
  (and (ignore "\"") (* str-chr) (ignore "\"")
       (? (and (ignore (* whitespace)) content))))

(define (replace-escaped-backslashes str)
  "Replace '\\\\' sequences in STR with a single '\\'."
  (let loop ((str str)
             (result '()))
    (match (string-contains str "\\\\")
      (#f (string-concatenate-reverse (cons str result)))
      (index
       (let ((prefix (string-take str index)))
         (loop (string-drop str (+ 2 index))
               (append (list "\\" prefix) result)))))))

(define (final-character-escapes? str last-index)
  "Check if STR ends in an incomplete escape sequence, that is ends in an uneven
number of backslashes.  LAST-INDEX is the index of its last character."


@@ 55,19 66,29 @@ number of backslashes.  LAST-INDEX is the index of its last character."
       (eqv? (string-ref str last-index) #\\)
       (not (final-character-escapes? str (- last-index 1)))))

(define (interpret-newline-escape str)
  "Replace unescaped '\\n' sequences in STR with a newline character."
(define (interpret-escape sequence replacement str)
  "Replace backslash escape sequence SEQUENCE in STR with REPLACEMENT (a string)
when SEQUENCE is not escaped itself.  For example, SEQUENCE '\\n' with a
newline string as REPLACEMENT."
  (let loop ((str str)
             (result '()))
    (match (string-contains str "\\n")
    (match (string-contains str sequence)
      (#f (string-concatenate-reverse (cons str result)))
      (index
       (let ((prefix (string-take str index)))
         (loop (string-drop str (+ 2 index))
               ;; Only add a newline when the backslash is not escaped itself.
               ;; Only add REPLACEMENT when the backslash is not escaped itself.
               (if (final-character-escapes? str (- index 1))
                   (cons (string-take str (+ 2 index)) result)
                   (append (list "\n" prefix) result))))))))
                   (append (list replacement prefix) result))))))))

(define (interpret-escape-sequences str)
  "Unescape all escape sequences in STR."
  (replace-escaped-backslashes
   (interpret-escape "\\n" "\n"
    (interpret-escape "\\\"" "\""
     (interpret-escape "\\t" "\t"
      str)))))

(define (parse-tree->assoc parse-tree)
  "Converts a po PARSE-TREE to an association list, where the key is the msgid


@@ 103,18 124,14 @@ and the value is the msgstr.  The result only contains non fuzzy strings."
       (('entry _ ('msgid msgid) 'msgstr)
        (parse-tree->assoc parse-tree))
       (('entry ('msgid msgid) ('msgstr msgstr))
        (acons (interpret-newline-escape msgid)
               (interpret-newline-escape msgstr)
               (parse-tree->assoc parse-tree)))
       (('entry ('msgid msgid) ('msgstr msgstr))
        (acons (interpret-newline-escape msgid)
               (interpret-newline-escape msgstr)
        (acons (interpret-escape-sequences msgid)
               (interpret-escape-sequences msgstr)
               (parse-tree->assoc parse-tree)))
       (('entry comments ('msgid msgid) ('msgstr msgstr))
        (if (member 'fuzzy (comments->flags comments))
            (parse-tree->assoc parse-tree)
            (acons (interpret-newline-escape msgid)
                   (interpret-newline-escape msgstr)
            (acons (interpret-escape-sequences msgid)
                   (interpret-escape-sequences msgstr)
                   (parse-tree->assoc parse-tree))))))))

(define (read-po-file port)