~ruther/guix-local

6bfe66b44ac92d51fd1e2b2e8efcacf70fe1aa6e — Hilton Chain 1 year, 2 days ago f6adf70
gnu: python-tokenizers: Remove #:cargo-inputs.

* gnu/packages/machine-learning.scm (python-tokenizers)
[inputs]: Use ‘cargo-inputs’.
Add oniguruma.
[native-inputs]: Add pkg-config.
[arguments]: Remove #:cargo-inputs, #:cargo-development-inputs.
<#:install-source?>: Disable.
<#:modules>: Remove no longer needed ones.
<#:phases>: Remove 'loosen-requirements.
Simplify 'inject-safetensors and move it to...
[source]: ...here as a snippet.
* gnu/packages/rust-crates.scm (lookup-cargo-inputs)[python-tokenizers]: New
entry.

Change-Id: I806770a776e02deba1e4a1dddccd608897121605
2 files changed, 203 insertions(+), 49 deletions(-)

M gnu/packages/machine-learning.scm
M gnu/packages/rust-crates.scm
M gnu/packages/machine-learning.scm => gnu/packages/machine-learning.scm +11 -49
@@ 140,6 140,7 @@
  #:use-module (gnu packages sqlite)
  #:use-module (gnu packages statistics)
  #:use-module (gnu packages swig)
  #:use-module (gnu packages textutils)
  #:use-module (gnu packages time)
  #:use-module (gnu packages tls)
  #:use-module (gnu packages valgrind)


@@ 6112,49 6113,22 @@ tokenizers, with a focus on performances and versatility.")
                        (unless (member file '("." ".."))
                          (rename-file (string-append "bindings/python/" file) file)))
                      (scandir "bindings/python"))
            (delete-file-recursively ".cargo")))))
            (delete-file-recursively ".cargo")
            (substitute* "Cargo.toml"
              (("^path = .*")
               (format #f "version = ~s~%" #$version)))))))
    (build-system cargo-build-system)
    (arguments
     (list
      #:install-source? #f
      #:cargo-test-flags ''("--no-default-features")
      #:imported-modules `(,@%cargo-build-system-modules
                           ,@%pyproject-build-system-modules)
      #:modules '((guix build cargo-build-system)
                  ((guix build pyproject-build-system) #:prefix py:)
                  (guix build utils)
                  (ice-9 regex)
                  (ice-9 textual-ports))
                  (guix build utils))
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack-rust-crates 'inject-tokenizers
            (lambda _
              (substitute* "Cargo.toml"
                (("\\[dependencies\\]")
                 (format #f "
[dev-dependencies]
tempfile = ~s
pyo3 = { version = ~s, features = [\"auto-initialize\"] }

[dependencies]
tokenizers = ~s"
                         #$(package-version rust-tempfile-3)
                         #$(package-version rust-pyo3-0.21)
                         #$(package-version rust-tokenizers))))
              (let ((file-path "Cargo.toml"))
                (call-with-input-file file-path
                  (lambda (port)
                    (let* ((content (get-string-all port))
                           (top-match (string-match
                                       "\\[dependencies.tokenizers" content)))
                      (call-with-output-file file-path
                        (lambda (out)
                          (format out "~a" (match:prefix top-match))))))))))
          (add-after 'patch-cargo-checksums 'loosen-requirements
            (lambda _
              (substitute* "Cargo.toml"
                (("version = \"6.4\"")
                 (format #f "version = ~s"
                         #$(package-version rust-onig-6))))))
          (add-after 'check 'python-check
            (lambda _
              (copy-file "target/release/libtokenizers.so"


@@ 6177,23 6151,11 @@ tokenizers = ~s"
                (copy-file "PKG-INFO" (string-append info "/METADATA"))
                (copy-recursively
                 "py_src/tokenizers"
                 (string-append lib "tokenizers"))))))
      #:cargo-inputs
      `(("rust-rayon" ,rust-rayon-1)
        ("rust-serde" ,rust-serde-1)
        ("rust-serde-json" ,rust-serde-json-1)
        ("rust-libc" ,rust-libc-0.2)
        ("rust-env-logger" ,rust-env-logger-0.11)
        ("rust-pyo3" ,rust-pyo3-0.21)
        ("rust-numpy" ,rust-numpy-0.21)
        ("rust-ndarray" ,rust-ndarray-0.15)
        ("rust-onig" ,rust-onig-6)
        ("rust-itertools" ,rust-itertools-0.12)
        ("rust-tokenizers" ,rust-tokenizers))
      #:cargo-development-inputs
      `(("rust-tempfile" ,rust-tempfile-3))))
                 (string-append lib "tokenizers"))))))))
    (native-inputs
     (list python-minimal python-pytest))
     (list pkg-config python-minimal python-pytest))
    (inputs
     (cons oniguruma (cargo-inputs 'python-tokenizers)))
    (home-page "https://huggingface.co/docs/tokenizers")
    (synopsis "Implementation of various popular tokenizers")
    (description

M gnu/packages/rust-crates.scm => gnu/packages/rust-crates.scm +192 -0
@@ 2630,6 2630,10 @@
  (crate-source "error-stack" "0.5.0"
                "1lf5zy1fjjqdwjkc445sw80hpmxi63ymcxgjh3q6642x2hck6hgy"))

(define rust-esaxx-rs-0.1.10
  (crate-source "esaxx-rs" "0.1.10"
                "1rm6vm5yr7s3n5ly7k9x9j6ra5p2l2ld151gnaya8x03qcwf05yq"))

(define rust-etcetera-0.10.0
  (crate-source "etcetera" "0.10.0"
                "1rka6bskn93pdhx32xaagr147q95z5bnz7ym5xr85jw00wyv3ir6"))


@@ 5020,6 5024,10 @@
  (crate-source "jiff" "0.2.8"
                "18140w5m8bblbxaj3wpa55kj0808g6lr6a6wsi65xx8hj748gbg5"))

(define rust-jiff-0.2.9
  (crate-source "jiff" "0.2.9"
                "1l11404xxp75qcgfk9jf6bag53fvhl5za8dhw4agxrib2kvk1v2r"))

(define rust-jiff-static-0.2.5
  (crate-source "jiff-static" "0.2.5"
                "0k1v30mhbgh4zj2r9d7lfqlh5b20b5573cx0a4gip7rlkldf7pac"))


@@ 5032,6 5040,10 @@
  (crate-source "jiff-static" "0.2.8"
                "019n229ygcx5fviby0s2j07mqwm6lhyc80izfkqfcbvy9avdaxnh"))

(define rust-jiff-static-0.2.9
  (crate-source "jiff-static" "0.2.9"
                "1ghxrhwgf9i01gi62hwsfaqy3pcsdliw7c3nn0vrvmbzf96q6ssj"))

(define rust-jiff-tzdb-0.1.4
  (crate-source "jiff-tzdb" "0.1.4"
                "09350bna4vxdn2fv7gd08ay41llkflmfyvpx5d6l088axc2kfa61"


@@ 5692,6 5704,14 @@
  (crate-source "mach2" "0.4.2"
                "02gpyq89rcrqdbz4hgp5bpjas21dllxfc70jgw8vj0iaxg6mbf8r"))

(define rust-macro-rules-attribute-0.2.0
  (crate-source "macro_rules_attribute" "0.2.0"
                "04waa4qm28adwnxsxhx9135ki68mwkikr6m5pi5xhcy0gcgjg0la"))

(define rust-macro-rules-attribute-proc-macro-0.2.0
  (crate-source "macro_rules_attribute-proc_macro" "0.2.0"
                "0s45j4zm0a5d041g3vcbanvr76p331dfjb7gw9qdmh0w8mnqbpdq"))

(define rust-maildir-0.6.4
  (crate-source "maildir" "0.6.4"
                "0dqlkvhcrigs4y5vg0jf8ccgwns8jj85cjp6vsgj3f1sfkk6m6l7"))


@@ 5924,6 5944,14 @@
  (crate-source "mockall_derive" "0.13.1"
                "1608qajqrz23xbvv81alc6wm4l24as1bsqg4shdh3sggq8231ji5"))

(define rust-monostate-0.1.14
  (crate-source "monostate" "0.1.14"
                "1vpv8d9j8i7wachlcrpbwsy1rvzimpncgv8gwpil4mn7s3lipzma"))

(define rust-monostate-impl-0.1.14
  (crate-source "monostate-impl" "0.1.14"
                "1db3jrnbriivny6cahvhcc9af7w38q846mg1r4r4y82y5l4s80n4"))

(define rust-muldiv-0.2.1
  (crate-source "muldiv" "0.2.1"
                "014jlry2l2ph56mp8knw65637hh49q7fmrraim2bx9vz0a638684"))


@@ 6470,6 6498,10 @@
  (crate-source "number_prefix" "0.4.0"
                "1wvh13wvlajqxkb1filsfzbrnq0vrmrw298v2j3sy82z1rm282w3"))

(define rust-numpy-0.21.0
  (crate-source "numpy" "0.21.0"
                "1x1p5x7lwfc5nsccwj98sln5vx3g3n8sbgm5fmfmy5rpr8rhf5zc"))

(define rust-numpy-0.22.1
  (crate-source "numpy" "0.22.1"
                "0kpii5mvz4ag29qw4zrqzfmi3m2kmbg882kcxn2ls6m91ny2kfgd"))


@@ 7852,6 7884,10 @@
  (crate-source "rayon" "1.10.0"
                "1ylgnzwgllajalr4v00y4kj22klq2jbwllm70aha232iah0sc65l"))

(define rust-rayon-cond-0.3.0
  (crate-source "rayon-cond" "0.3.0"
                "1ybxppq84p3q60h9rng9j3dm79f6970hn4wljyf31lpgan5m77q5"))

(define rust-rayon-core-1.12.1
  (crate-source "rayon-core" "1.12.1"
                "1qpwim68ai5h0j7axa8ai8z0payaawv3id0lrgkqmapx7lx8fr8l"))


@@ 9122,6 9158,10 @@
                "17fj8k5fmx4w9mp27l970clrh5qa7r5sjdvbsln987xhb34dc7nr"
                #:snippet '(delete-file-recursively "tests")))

(define rust-spm-precompiled-0.1.4
  (crate-source "spm_precompiled" "0.1.4"
                "09pkdk2abr8xf4pb9kq3rk80dgziq6vzfk7aywv3diik82f6jlaq"))

(define rust-spmc-0.3.0
  (crate-source "spmc" "0.3.0"
                "1rgcqgj6b3d0cshi7277akr2xk0cx11rkmviaahy7a3pla6l5a02"))


@@ 9823,6 9863,10 @@
    (file-name (git-file-name "rust-tl" "0.7.8.6e25b2e"))
    (sha256 (base32 "0r6wwvw1apsfzdhzvw2vinjb3nwbyly2ycx09yfqc0wrwiav6khp"))))

(define rust-tokenizers-0.19.1
  (crate-source "tokenizers" "0.19.1"
                "1zg6ffpllygijb5bh227m9p4lrhf0pjkysky68kddwrsvp8zl075"))

(define rust-tokio-1.44.1
  (crate-source "tokio" "1.44.1"
                "06n90q5hh1yd844s6nf4j3fwbrkm2bnq533kp3a488l4bdhxm0pk"))


@@ 10242,6 10286,10 @@
  (crate-source "unicode-bom" "2.0.3"
                "05s2sqyjanqrbds3fxam35f92npp5ci2wz9zg7v690r0448mvv3y"))

(define rust-unicode-categories-0.1.1
  (crate-source "unicode_categories" "0.1.1"
                "0kp1d7fryxxm7hqywbk88yb9d1avsam9sg76xh36k5qx2arj9v1r"))

(define rust-unicode-ellipsis-0.2.0
  (crate-source "unicode-ellipsis" "0.2.0"
                "1zsdzmy6x1p8s35rgfmc7nx1qcs6j4bcfbfyiimrdngyqfwbajlj"))


@@ 10267,6 10315,10 @@
  (crate-source "unicode-normalization" "0.1.24"
                "0mnrk809z3ix1wspcqy97ld5wxdb31f3xz6nsvg5qcv289ycjcsh"))

(define rust-unicode-normalization-alignments-0.1.12
  (crate-source "unicode-normalization-alignments" "0.1.12"
                "1pk2f3arh3qvdsmrsiri0gr5y5vqpk2gv1yjin0njvh4zbj17xj3"))

(define rust-unicode-segmentation-1.10.1
  (crate-source "unicode-segmentation" "1.10.1"
                "0dky2hm5k51xy11hc3nk85p533rvghd462b6i0c532b7hl4j9mhx"))


@@ 23436,6 23488,146 @@
                                          rust-windows-x86-64-gnu-0.52.6
                                          rust-windows-x86-64-gnullvm-0.52.6
                                          rust-windows-x86-64-msvc-0.52.6))
                     (python-tokenizers =>
                                        (list rust-aho-corasick-1.1.3
                                         rust-anstream-0.6.18
                                         rust-anstyle-1.0.10
                                         rust-anstyle-parse-0.2.6
                                         rust-anstyle-query-1.1.2
                                         rust-anstyle-wincon-3.0.7
                                         rust-autocfg-1.4.0
                                         rust-base64-0.13.1
                                         rust-bitflags-1.3.2
                                         rust-bitflags-2.9.0
                                         rust-bumpalo-3.17.0
                                         rust-cc-1.2.19
                                         rust-cfg-if-1.0.0
                                         rust-colorchoice-1.0.3
                                         rust-console-0.15.11
                                         rust-crossbeam-deque-0.8.6
                                         rust-crossbeam-epoch-0.9.18
                                         rust-crossbeam-utils-0.8.21
                                         rust-darling-0.20.11
                                         rust-darling-core-0.20.11
                                         rust-darling-macro-0.20.11
                                         rust-derive-builder-0.20.2
                                         rust-derive-builder-core-0.20.2
                                         rust-derive-builder-macro-0.20.2
                                         rust-either-1.15.0
                                         rust-encode-unicode-1.0.0
                                         rust-env-filter-0.1.3
                                         rust-env-logger-0.11.8
                                         rust-errno-0.3.11
                                         rust-esaxx-rs-0.1.10
                                         rust-fastrand-2.3.0
                                         rust-fnv-1.0.7
                                         rust-getrandom-0.2.15
                                         rust-getrandom-0.3.2
                                         rust-heck-0.4.1
                                         rust-ident-case-1.0.1
                                         rust-indicatif-0.17.11
                                         rust-indoc-2.0.6
                                         rust-is-terminal-polyfill-1.70.1
                                         rust-itertools-0.11.0
                                         rust-itertools-0.12.1
                                         rust-itoa-1.0.15
                                         rust-jiff-0.2.9
                                         rust-jiff-static-0.2.9
                                         rust-js-sys-0.3.77
                                         rust-lazy-static-1.5.0
                                         rust-libc-0.2.172
                                         rust-linux-raw-sys-0.9.4
                                         rust-lock-api-0.4.12
                                         rust-log-0.4.27
                                         rust-macro-rules-attribute-0.2.0
                                         rust-macro-rules-attribute-proc-macro-0.2.0
                                         rust-matrixmultiply-0.3.9
                                         rust-memchr-2.7.4
                                         rust-memoffset-0.9.1
                                         rust-minimal-lexical-0.2.1
                                         rust-monostate-0.1.14
                                         rust-monostate-impl-0.1.14
                                         rust-ndarray-0.15.6
                                         rust-nom-7.1.3
                                         rust-num-complex-0.4.6
                                         rust-num-integer-0.1.46
                                         rust-num-traits-0.2.19
                                         rust-number-prefix-0.4.0
                                         rust-numpy-0.21.0
                                         rust-once-cell-1.21.3
                                         rust-onig-6.4.0
                                         rust-onig-sys-69.8.1
                                         rust-parking-lot-0.12.3
                                         rust-parking-lot-core-0.9.10
                                         rust-paste-1.0.15
                                         rust-pkg-config-0.3.32
                                         rust-portable-atomic-1.11.0
                                         rust-portable-atomic-util-0.2.4
                                         rust-ppv-lite86-0.2.21
                                         rust-proc-macro2-1.0.95
                                         rust-pyo3-0.21.2
                                         rust-pyo3-build-config-0.21.2
                                         rust-pyo3-ffi-0.21.2
                                         rust-pyo3-macros-0.21.2
                                         rust-pyo3-macros-backend-0.21.2
                                         rust-quote-1.0.40
                                         rust-r-efi-5.2.0
                                         rust-rand-0.8.5
                                         rust-rand-chacha-0.3.1
                                         rust-rand-core-0.6.4
                                         rust-rawpointer-0.2.1
                                         rust-rayon-1.10.0
                                         rust-rayon-cond-0.3.0
                                         rust-rayon-core-1.12.1
                                         rust-redox-syscall-0.5.11
                                         rust-regex-1.11.1
                                         rust-regex-automata-0.4.9
                                         rust-regex-syntax-0.8.5
                                         rust-rustc-hash-1.1.0
                                         rust-rustix-1.0.5
                                         rust-ryu-1.0.20
                                         rust-scopeguard-1.2.0
                                         rust-serde-1.0.219
                                         rust-serde-derive-1.0.219
                                         rust-serde-json-1.0.140
                                         rust-shlex-1.3.0
                                         rust-smallvec-1.15.0
                                         rust-spm-precompiled-0.1.4
                                         rust-strsim-0.11.1
                                         rust-syn-2.0.100
                                         rust-target-lexicon-0.12.16
                                         rust-tempfile-3.19.1
                                         rust-thiserror-1.0.69
                                         rust-thiserror-impl-1.0.69
                                         rust-tokenizers-0.19.1
                                         rust-unicode-ident-1.0.18
                                         rust-unicode-normalization-alignments-0.1.12
                                         rust-unicode-segmentation-1.12.0
                                         rust-unicode-width-0.2.0
                                         rust-unicode-categories-0.1.1
                                         rust-unindent-0.2.4
                                         rust-utf8parse-0.2.2
                                         rust-wasi-0.11.0+wasi-snapshot-preview1
                                         rust-wasi-0.14.2+wasi-0.2.4
                                         rust-wasm-bindgen-0.2.100
                                         rust-wasm-bindgen-backend-0.2.100
                                         rust-wasm-bindgen-macro-0.2.100
                                         rust-wasm-bindgen-macro-support-0.2.100
                                         rust-wasm-bindgen-shared-0.2.100
                                         rust-web-time-1.1.0
                                         rust-windows-sys-0.59.0
                                         rust-windows-targets-0.52.6
                                         rust-windows-aarch64-gnullvm-0.52.6
                                         rust-windows-aarch64-msvc-0.52.6
                                         rust-windows-i686-gnu-0.52.6
                                         rust-windows-i686-gnullvm-0.52.6
                                         rust-windows-i686-msvc-0.52.6
                                         rust-windows-x86-64-gnu-0.52.6
                                         rust-windows-x86-64-gnullvm-0.52.6
                                         rust-windows-x86-64-msvc-0.52.6
                                         rust-wit-bindgen-rt-0.39.0
                                         rust-zerocopy-0.8.24
                                         rust-zerocopy-derive-0.8.24))
                     (rav1e =>
                            (list rust-addr2line-0.24.2
                                  rust-adler2-2.0.0