~ruther/guix-local

ce72c780746776a86f59747f5eff8731cb4ff39b — Ludovic Courtès 11 years ago 472e4c4
store: Attempt to decode build logs as UTF-8.

* guix/serialization.scm (read-maybe-utf8-string): New procedure.
* guix/store.scm (process-stderr): Use it for the build log and errors.
* tests/store.scm ("current-build-output-port, UTF-8",
  "current-build-output-port, UTF-8 + garbage"): New tests.
3 files changed, 60 insertions(+), 4 deletions(-)

M guix/serialization.scm
M guix/store.scm
M tests/store.scm
M guix/serialization.scm => guix/serialization.scm +17 -1
@@ 29,7 29,8 @@
  #:export (write-int read-int
            write-long-long read-long-long
            write-padding
            write-string read-string read-latin1-string
            write-string
            read-string read-latin1-string read-maybe-utf8-string
            write-string-list read-string-list
            write-string-pairs
            write-store-path read-store-path


@@ 130,6 131,21 @@
    ;; upgraded to Guile >= 2.0.9.
    (list->string (map integer->char (bytevector->u8-list bv)))))

(define (read-maybe-utf8-string p)
  "Read a serialized string from port P.  Attempt to decode it as UTF-8 and
substitute invalid byte sequences with question marks.  This is a
\"permissive\" UTF-8 decoder."
  ;; XXX: We rely on the port's decoding mechanism to do permissive decoding
  ;; and substitute invalid byte sequences with question marks, but this is
  ;; not very efficient.  Eventually Guile may provide a lightweight
  ;; permissive UTF-8 decoder.
  (let* ((bv   (read-byte-string p))
         (port (with-fluids ((%default-port-encoding "UTF-8")
                             (%default-port-conversion-strategy
                              'substitute))
                 (open-bytevector-input-port bv))))
    (get-string-all port)))

(define (write-string-list l p)
  (write-int (length l) p)
  (for-each (cut write-string <> p) l))

M guix/store.scm => guix/store.scm +6 -3
@@ 418,15 418,18 @@ encoding conversion errors."
             (write-padding len p)
             #f))
          ((= k %stderr-next)
           ;; Log a string.
           (let ((s (read-latin1-string p)))
           ;; Log a string.  Build logs are usually UTF-8-encoded, but they
           ;; may also contain arbitrary byte sequences that should not cause
           ;; this to fail.  Thus, use the permissive
           ;; 'read-maybe-utf8-string'.
           (let ((s (read-maybe-utf8-string p)))
             (display s (current-build-output-port))
             (when (string-any %newlines s)
               (flush-output-port (current-build-output-port)))
             #f))
          ((= k %stderr-error)
           ;; Report an error.
           (let ((error  (read-latin1-string p))
           (let ((error  (read-maybe-utf8-string p))
                 ;; Currently the daemon fails to send a status code for early
                 ;; errors like DB schema version mismatches, so check for EOF.
                 (status (if (and (>= (nix-server-minor-version server) 8)

M tests/store.scm => tests/store.scm +37 -0
@@ 25,6 25,7 @@
  #:use-module (guix packages)
  #:use-module (guix derivations)
  #:use-module (guix serialization)
  #:use-module (guix gexp)
  #:use-module (gnu packages)
  #:use-module (gnu packages bootstrap)
  #:use-module (ice-9 match)


@@ 268,6 269,42 @@
                       (list a b c d w x y)))
           (lset= string=? s1 s3)))))

(test-assert "current-build-output-port, UTF-8"
  ;; Are UTF-8 strings in the build log properly interpreted?
  (string-contains
   (with-fluids ((%default-port-encoding "UTF-8")) ;for the string port
     (call-with-output-string
      (lambda (port)
        (parameterize ((current-build-output-port port))
          (let* ((s "Here’s a Greek letter: λ.")
                 (d (build-expression->derivation
                     %store "foo" `(display ,s)
                     #:guile-for-build
                     (package-derivation s %bootstrap-guile (%current-system)))))
            (guard (c ((nix-protocol-error? c) #t))
              (build-derivations %store (list d))))))))
   "Here’s a Greek letter: λ."))

(test-assert "current-build-output-port, UTF-8 + garbage"
  ;; What about a mixture of UTF-8 + garbage?
  (string-contains
   (with-fluids ((%default-port-encoding "UTF-8")) ;for the string port
     (call-with-output-string
      (lambda (port)
        (parameterize ((current-build-output-port port))
          (let ((d (build-expression->derivation
                    %store "foo"
                    `(begin
                       (use-modules (rnrs io ports))
                       (display "garbage: ")
                       (put-bytevector (current-output-port) #vu8(128))
                       (display "lambda: λ\n"))
                     #:guile-for-build
                     (package-derivation %store %bootstrap-guile))))
            (guard (c ((nix-protocol-error? c) #t))
              (build-derivations %store (list d))))))))
   "garbage: ?lambda: λ"))

(test-assert "log-file, derivation"
  (let* ((b (add-text-to-store %store "build" "echo $foo > $out" '()))
         (s (add-to-store %store "bash" #t "sha256"