diff options
| author | Ricardo Wurmus <[email protected]> | 2024-03-26 13:33:04 +0100 |
|---|---|---|
| committer | Ricardo Wurmus <[email protected]> | 2024-03-26 22:24:58 +0100 |
| commit | f4442e409cf05d0c7cc4d6a251626d22efaffe8c (patch) | |
| tree | a8cc891940a2d8e63bd539fc4220c082f56f7775 | |
| parent | 9db762cc380cdd4b1fdfab5d0e45a7d38f5041e7 (diff) | |
| download | gwl-master.tar.gz | |
* gwl/cache.scm (workflow->data-hashes, make-process->cache-prefix):
Use a hash table for FREE-INPUTS-MAP.
* gwl/workflows.scm (compute-workflow): Use hash table instead of
LSET-INTERSECTION.
(inputs->map): Return hash table.
(prepare-inputs): Expect a hash table for INPUTS-MAP.
* tests/cache.scm, tests/workflows.scm: Update to deal with hash
tables.
| -rw-r--r-- | gwl/cache.scm | 36 | ||||
| -rw-r--r-- | gwl/workflows.scm | 69 | ||||
| -rw-r--r-- | tests/cache.scm | 66 | ||||
| -rw-r--r-- | tests/workflows.scm | 85 |
4 files changed, 142 insertions, 114 deletions
diff --git a/gwl/cache.scm b/gwl/cache.scm index 5d3b89c..fcfbfd4 100644 --- a/gwl/cache.scm +++ b/gwl/cache.scm @@ -1,4 +1,4 @@ -;;; Copyright © 2019, 2020, 2021, 2022 Ricardo Wurmus <[email protected]> +;;; Copyright © 2019-2024 Ricardo Wurmus <[email protected]> ;;; ;;; This program is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by @@ -33,6 +33,7 @@ #:select (get-bytevector-all)) #:use-module ((gcrypt hash) #:select (sha256 hash-algorithm file-hash)) + #:use-module (ice-9 hash-table) #:use-module (ice-9 match) #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) @@ -102,20 +103,18 @@ large files can be acceptable as it only has to be done once." (file-hash (hash-algorithm sha256) file-name))) (define (workflow->data-hashes workflow processes free-inputs-map scripts-table) - "Return an alist associating each of the WORKFLOW's ordered list of -PROCESSES with the hash of all the process scripts used to generate -their outputs. FREE-INPUTS-MAP is an alist of input names to file + "Return a hash table associating each of the WORKFLOW's ordered list +of PROCESSES with the hash of all the process scripts used to generate +their outputs. FREE-INPUTS-MAP is a hash table of input names to file names that must be considered when computing the hash." (define graph (workflow-restrictions workflow)) (define (process-free-inputs process) - (filter-map (lambda (input) - (and=> (assoc-ref free-inputs-map input) first)) + (filter-map (cut hash-ref free-inputs-map <>) (process-inputs process))) (define input-hashes - (map (match-lambda - ((name file-name) - (cons file-name (hash-input-file* file-name)))) - free-inputs-map)) + (hash-map->list (lambda (name file-name) + (cons file-name (hash-input-file* file-name))) + free-inputs-map)) ;; Compute hashes for chains of scripts. (define (kons process acc) (cons (cons process @@ -127,13 +126,14 @@ names that must be considered when computing the hash." (append-map (cut assoc-ref acc <>) (or (assoc-ref graph process) '())))) acc)) - (map (match-lambda - ((process . hashes) - (cons process - (bytevector->base32-string - (sha256 - (u8-list->bytevector hashes)))))) - (fold kons input-hashes processes))) + (alist->hash-table + (map (match-lambda + ((process . hashes) + (cons process + (bytevector->base32-string + (sha256 + (u8-list->bytevector hashes)))))) + (fold kons input-hashes processes)))) (define (make-process->cache-prefix workflow free-inputs-map ordered-processes scripts-table) @@ -148,7 +148,7 @@ prefix for its outputs." free-inputs-map scripts-table))) (lambda (process) - (and=> (assoc-ref hashes process) + (and=> (hash-ref hashes process) (cut string-append (%cache-root) "/" <> "/"))))) (define (directory? file) diff --git a/gwl/workflows.scm b/gwl/workflows.scm index 1a276c8..88d6be5 100644 --- a/gwl/workflows.scm +++ b/gwl/workflows.scm @@ -42,6 +42,7 @@ #:use-module ((guix status) #:select (with-status-verbosity)) + #:use-module (ice-9 hash-table) #:use-module (ice-9 match) #:use-module (ice-9 format) #:use-module (ice-9 threads) @@ -244,10 +245,16 @@ Use \"processes\" to specify process dependencies.~%")) (define (scripts-by-process) (let ((h (make-hash-table))) (for-each (lambda (process) - (let ((input-files (lset-intersection - string=? - (map second inputs-map-with-extra-files) - (process-inputs process)))) + (let* ((target-files (let ((res (set))) + (hash-for-each (lambda (key value) + (set-insert value res)) + inputs-map-with-extra-files) + res)) + (input-files (fold (lambda (input acc) + (if (set-contains? target-files input) + (cons input acc) acc)) + '() + (process-inputs process)))) (log-event 'debug (G_ "Computing script for process `~a'~%") (process-name process)) @@ -476,26 +483,26 @@ can be used in a fold over a WORKFLOW's processes." (define (inputs->map inputs) "Given a list of strings INPUTS of the format \"a=b\" or just \"a\", -return a normalized mapping as a list of two element lists containing -\"a\" and \"b\" or just \"a\" and \"a\"." - (map (lambda (value) - ;; A mapping is optional, so normalize it. - (if (string-contains value "=") - (string-split value #\=) - (list value value))) - inputs)) +return a normalized mapping as a hash table mapping +\"a\" to \"b\" or \"a\" to \"a\", respectively." + (alist->hash-table + (map (lambda (value) + ;; A mapping is optional, so normalize it. + (if (string-contains value "=") + (apply cons (string-split value #\=)) + (cons value value))) + inputs))) (define (prepare-inputs workflow inputs-map) - "Ensure that all files in the INPUTS-MAP alist exist and are linked -to the expected locations. Pick unspecified inputs from the -environment. Return either the INPUTS-MAP alist with any additionally -used input file names added, or raise a condition containing the list -of missing files." - (define-values (input-names input-files) - (match inputs-map - (() (values '() '())) - (_ (apply values - (apply zip inputs-map))))) + "Ensure that all files in the INPUTS-MAP hash table exist and are +linked to the expected locations. Pick unspecified inputs from the +environment. Return either the INPUTS-MAP hash table with any +additionally used input file names added, or raise a condition +containing the list of missing files." + (define input-names + (hash-map->list (compose car cons) inputs-map)) + (define input-files + (hash-map->list (compose cdr cons) inputs-map)) (define unspecified-inputs (lset-difference equal? (workflow-free-inputs workflow) @@ -509,11 +516,12 @@ of missing files." (() ;; Link all mapped input files to their target locations ;; TODO: ensure that target directories exist. - (for-each (match-lambda - ((target source) - (unless (file-exists? target) - (link source target)))) - inputs-map) + (hash-for-each-handle (match-lambda + ((target . source) + (unless (file-exists? target) + (log-event 'debug "linking~%") + (link source target)))) + inputs-map) inputs-map) (missing (raise (condition @@ -522,9 +530,10 @@ of missing files." ;; Try to find the files in the environment. (let ((found really-missing (partition file-exists? missing))) (if (null? really-missing) - (append inputs-map - (map (lambda (file) (list file file)) - found)) + (begin + (for-each (lambda (file) (hash-set! inputs-map file file)) + found) + inputs-map) (raise (condition (&missing-inputs (files really-missing))))))))) diff --git a/tests/cache.scm b/tests/cache.scm index 28e0f8d..3bb53ca 100644 --- a/tests/cache.scm +++ b/tests/cache.scm @@ -1,4 +1,4 @@ -;;; Copyright © 2020, 2021, 2022 Ricardo Wurmus <[email protected]> +;;; Copyright © 2020-2024 Ricardo Wurmus <[email protected]> ;;; ;;; This program is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by @@ -26,6 +26,7 @@ #:select (u8-list->bytevector)) #:use-module (srfi srfi-1) #:use-module (srfi srfi-64) + #:use-module (ice-9 hash-table) #:use-module (ice-9 match)) (test-begin "cache") @@ -85,27 +86,28 @@ (define workflow->data-hashes (@@ (gwl cache) workflow->data-hashes)) -(test-assert "workflow->data-hashes returns a list" - (list? (workflow->data-hashes wf - ordered-processes - '() - scripts-table))) +(test-assert "workflow->data-hashes returns a hash table" + (hash-table? (workflow->data-hashes wf + ordered-processes + (alist->hash-table '()) + scripts-table))) -(test-assert "workflow->data-hashes returns an alist where all processes are keys" +(test-assert "workflow->data-hashes returns a hash table where all processes are keys" (let ((hashes (workflow->data-hashes wf ordered-processes - '() + (alist->hash-table '()) scripts-table))) (every (lambda (process) - (assoc-ref hashes process)) + (hash-ref hashes process)) ordered-processes))) -(test-assert "workflow->data-hashes returns an alist where all values are strings" - (let ((hashes (workflow->data-hashes wf - ordered-processes - '() - scripts-table))) - (every string? (map cdr hashes)))) +(test-assert "workflow->data-hashes returns an hash table where all values are strings" + (let* ((hashes (workflow->data-hashes wf + ordered-processes + (alist->hash-table '()) + scripts-table)) + (vals (hash-map->list (compose cdr cons) hashes))) + (every string? vals))) (define process->hash (@@ (gwl cache) process->hash)) @@ -120,34 +122,34 @@ (test-equal "workflow->data-hashes hashes just the script for an independent process" (hashes->hash-string (list (process->hash p1 scripts-table))) - (assoc-ref (workflow->data-hashes wf - ordered-processes - '() - scripts-table) - p1)) + (hash-ref (workflow->data-hashes wf + ordered-processes + (alist->hash-table '()) + scripts-table) + p1)) (test-equal "workflow->data-hashes hashes the script and its inputs" (hashes->hash-string (list (process->hash p4 scripts-table) (hash-input-file* input-file))) - (assoc-ref (workflow->data-hashes wf - ordered-processes - (list - (list input-file input-file)) - scripts-table) - p4)) + (hash-ref (workflow->data-hashes wf + ordered-processes + (alist->hash-table + `((,input-file . ,input-file))) + scripts-table) + p4)) (test-equal "workflow->data-hashes hashes all dependencies of a process" (hashes->hash-string (list (process->hash p3 scripts-table) (process->hash p4 scripts-table) (hash-input-file* input-file))) - (assoc-ref (workflow->data-hashes wf - ordered-processes - (list - (list input-file input-file)) - scripts-table) - p3)) + (hash-ref (workflow->data-hashes wf + ordered-processes + (alist->hash-table + `((,input-file . ,input-file))) + scripts-table) + p3)) (test-assert "cache! creates directories as needed" (begin diff --git a/tests/workflows.scm b/tests/workflows.scm index f0ffba1..210d2f1 100644 --- a/tests/workflows.scm +++ b/tests/workflows.scm @@ -1,4 +1,4 @@ -;;; Copyright © 2019, 2020, 2021, 2022 Ricardo Wurmus <[email protected]> +;;; Copyright © 2019-2024 Ricardo Wurmus <[email protected]> ;;; ;;; This program is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by @@ -25,7 +25,8 @@ #:use-module (srfi srfi-26) #:use-module (srfi srfi-34) #:use-module (srfi srfi-35) - #:use-module (srfi srfi-64)) + #:use-module (srfi srfi-64) + #:use-module (ice-9 hash-table)) (test-begin "workflows") @@ -150,22 +151,25 @@ make-workflow (eat-salad -> p2)))))) (workflow? wf))) -(define inputs->map (@@ (gwl workflows) inputs->map)) +(define inputs->map + (compose (cut sort <> (lambda (a b) (string<? (car a) (car b)))) + (cut hash-map->list list <>) + (@@ (gwl workflows) inputs->map))) (test-equal "inputs->map returns the empty list for an empty list of inputs" '() (inputs->map '())) -(test-equal "inputs->map returns a normalized list for plain input file names" - '(("this" "this") - ("that" "that")) +(test-equal "inputs->map returns a normalized hash table for plain input file names" + '(("that" "that") + ("this" "this")) (inputs->map '("this" "that"))) -(test-equal "inputs->map returns a normalized list for mapped input file names" - '(("this" "THIS") - ("that" "THAT")) +(test-equal "inputs->map returns a normalized hash table for mapped input file names" + '(("that" "THAT") + ("this" "THIS")) (inputs->map '("this=THIS" "that=THAT"))) -(test-equal "inputs->map returns a normalized list for all file names" - '(("this" "THIS") +(test-equal "inputs->map returns a normalized hash table for all file names" + '(("anything" "anything") ("that" "that") - ("anything" "anything")) + ("this" "THIS")) (inputs->map '("this=THIS" "that" "anything"))) (define (wf3 prefix) @@ -211,10 +215,10 @@ make-workflow (close port) name)) (define (inputs-map-full prefix) - (list - (list (string-append prefix "first") input-file) - (list (string-append prefix "second") input-file) - (list (string-append prefix "fourth") input-file))) + (alist->hash-table + `((,(string-append prefix "first") . ,input-file) + (,(string-append prefix "second") . ,input-file) + (,(string-append prefix "fourth") . ,input-file)))) (define prepare-inputs (@@ (gwl workflows) prepare-inputs)) @@ -227,45 +231,58 @@ make-workflow (let ((prefix (make-prefix))) (test-error "prepare-inputs throws a missing-inputs condition" (@@ (gwl workflows) &missing-inputs) - (prepare-inputs (wf3 prefix) '())) + (prepare-inputs (wf3 prefix) (alist->hash-table '()))) (test-equal "prepare-inputs throws a condition listing the missing input files" (map (cut string-append prefix <>) (list "first" "second" "fourth")) (guard (condition (((@@ (gwl workflows) missing-inputs-condition?) condition) ((@@ (gwl workflows) missing-inputs-files) condition))) - (prepare-inputs (wf3 prefix) '())))) + (prepare-inputs (wf3 prefix) (alist->hash-table '()))))) -(let ((prefix (make-prefix))) +(let ((prefix (make-prefix)) + (sorted (lambda (items) + (let ((alist (hash-map->list list items))) + (sort alist + (lambda (a b) (string<? (car a) (car b)))))))) ;; Create all undeclared files (for-each (lambda (name) (with-output-to-file (string-append prefix name) (lambda () (display name)))) (list "first" "second" "fourth")) (test-equal "prepare-inputs adds found files to an empty inputs-map" - (map (lambda (name) - (let ((file-name (string-append prefix name))) - (list file-name file-name))) - (list "first" "second" "fourth")) - (prepare-inputs (wf3 prefix) '()))) + (sorted + (alist->hash-table + (map (lambda (name) + (let ((file-name (string-append prefix name))) + (cons file-name file-name))) + (list "first" "second" "fourth")))) + (sorted + (prepare-inputs (wf3 prefix) (alist->hash-table '()))))) (let* ((prefix (make-prefix)) - (incomplete-inputs-map - (list - (list (string-append prefix "first") input-file)))) + (sorted (lambda (items) + (let ((alist (hash-map->list list items))) + (sort alist + (lambda (a b) (string<? (car a) (car b))))))) + (incomplete-inputs-alist + `((,(string-append prefix "first") . ,input-file)))) ;; Create undeclared files (for-each (lambda (name) (with-output-to-file (string-append prefix name) (lambda () (display name)))) (list "second" "fourth")) (test-equal "prepare-inputs adds found files to the incomplete inputs-map" - (append incomplete-inputs-map - (map (lambda (name) - (let ((file-name (string-append prefix name))) - (list file-name file-name))) - (list "second" "fourth"))) - (prepare-inputs (wf3 prefix) - incomplete-inputs-map))) + (sorted + (alist->hash-table + (append incomplete-inputs-alist + (map (lambda (name) + (let ((file-name (string-append prefix name))) + (cons file-name file-name))) + (list "second" "fourth"))))) + (sorted + (prepare-inputs (wf3 prefix) + (alist->hash-table incomplete-inputs-alist))))) (test-equal "make-workflow rejects invalid field names 1/3" '("workflow" (garbage)) |
