summaryrefslogtreecommitdiff
diff options
authorRicardo Wurmus <[email protected]>2024-03-26 13:33:04 +0100
committerRicardo Wurmus <[email protected]>2024-03-26 22:24:58 +0100
commitf4442e409cf05d0c7cc4d6a251626d22efaffe8c (patch)
treea8cc891940a2d8e63bd539fc4220c082f56f7775
parent9db762cc380cdd4b1fdfab5d0e45a7d38f5041e7 (diff)
downloadgwl-master.tar.gz
Use more hash tables instead of alists.HEADmaster
* gwl/cache.scm (workflow->data-hashes, make-process->cache-prefix): Use a hash table for FREE-INPUTS-MAP. * gwl/workflows.scm (compute-workflow): Use hash table instead of LSET-INTERSECTION. (inputs->map): Return hash table. (prepare-inputs): Expect a hash table for INPUTS-MAP. * tests/cache.scm, tests/workflows.scm: Update to deal with hash tables.
-rw-r--r--gwl/cache.scm36
-rw-r--r--gwl/workflows.scm69
-rw-r--r--tests/cache.scm66
-rw-r--r--tests/workflows.scm85
4 files changed, 142 insertions, 114 deletions
diff --git a/gwl/cache.scm b/gwl/cache.scm
index 5d3b89c..fcfbfd4 100644
--- a/gwl/cache.scm
+++ b/gwl/cache.scm
@@ -1,4 +1,4 @@
-;;; Copyright © 2019, 2020, 2021, 2022 Ricardo Wurmus <[email protected]>
+;;; Copyright © 2019-2024 Ricardo Wurmus <[email protected]>
;;;
;;; This program is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
@@ -33,6 +33,7 @@
#:select (get-bytevector-all))
#:use-module ((gcrypt hash)
#:select (sha256 hash-algorithm file-hash))
+ #:use-module (ice-9 hash-table)
#:use-module (ice-9 match)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
@@ -102,20 +103,18 @@ large files can be acceptable as it only has to be done once."
(file-hash (hash-algorithm sha256) file-name)))
(define (workflow->data-hashes workflow processes free-inputs-map scripts-table)
- "Return an alist associating each of the WORKFLOW's ordered list of
-PROCESSES with the hash of all the process scripts used to generate
-their outputs. FREE-INPUTS-MAP is an alist of input names to file
+ "Return a hash table associating each of the WORKFLOW's ordered list
+of PROCESSES with the hash of all the process scripts used to generate
+their outputs. FREE-INPUTS-MAP is a hash table of input names to file
names that must be considered when computing the hash."
(define graph (workflow-restrictions workflow))
(define (process-free-inputs process)
- (filter-map (lambda (input)
- (and=> (assoc-ref free-inputs-map input) first))
+ (filter-map (cut hash-ref free-inputs-map <>)
(process-inputs process)))
(define input-hashes
- (map (match-lambda
- ((name file-name)
- (cons file-name (hash-input-file* file-name))))
- free-inputs-map))
+ (hash-map->list (lambda (name file-name)
+ (cons file-name (hash-input-file* file-name)))
+ free-inputs-map))
;; Compute hashes for chains of scripts.
(define (kons process acc)
(cons (cons process
@@ -127,13 +126,14 @@ names that must be considered when computing the hash."
(append-map (cut assoc-ref acc <>)
(or (assoc-ref graph process) '()))))
acc))
- (map (match-lambda
- ((process . hashes)
- (cons process
- (bytevector->base32-string
- (sha256
- (u8-list->bytevector hashes))))))
- (fold kons input-hashes processes)))
+ (alist->hash-table
+ (map (match-lambda
+ ((process . hashes)
+ (cons process
+ (bytevector->base32-string
+ (sha256
+ (u8-list->bytevector hashes))))))
+ (fold kons input-hashes processes))))
(define (make-process->cache-prefix workflow free-inputs-map ordered-processes
scripts-table)
@@ -148,7 +148,7 @@ prefix for its outputs."
free-inputs-map
scripts-table)))
(lambda (process)
- (and=> (assoc-ref hashes process)
+ (and=> (hash-ref hashes process)
(cut string-append (%cache-root) "/" <> "/")))))
(define (directory? file)
diff --git a/gwl/workflows.scm b/gwl/workflows.scm
index 1a276c8..88d6be5 100644
--- a/gwl/workflows.scm
+++ b/gwl/workflows.scm
@@ -42,6 +42,7 @@
#:use-module ((guix status)
#:select (with-status-verbosity))
+ #:use-module (ice-9 hash-table)
#:use-module (ice-9 match)
#:use-module (ice-9 format)
#:use-module (ice-9 threads)
@@ -244,10 +245,16 @@ Use \"processes\" to specify process dependencies.~%"))
(define (scripts-by-process)
(let ((h (make-hash-table)))
(for-each (lambda (process)
- (let ((input-files (lset-intersection
- string=?
- (map second inputs-map-with-extra-files)
- (process-inputs process))))
+ (let* ((target-files (let ((res (set)))
+ (hash-for-each (lambda (key value)
+ (set-insert value res))
+ inputs-map-with-extra-files)
+ res))
+ (input-files (fold (lambda (input acc)
+ (if (set-contains? target-files input)
+ (cons input acc) acc))
+ '()
+ (process-inputs process))))
(log-event 'debug
(G_ "Computing script for process `~a'~%")
(process-name process))
@@ -476,26 +483,26 @@ can be used in a fold over a WORKFLOW's processes."
(define (inputs->map inputs)
"Given a list of strings INPUTS of the format \"a=b\" or just \"a\",
-return a normalized mapping as a list of two element lists containing
-\"a\" and \"b\" or just \"a\" and \"a\"."
- (map (lambda (value)
- ;; A mapping is optional, so normalize it.
- (if (string-contains value "=")
- (string-split value #\=)
- (list value value)))
- inputs))
+return a normalized mapping as a hash table mapping
+\"a\" to \"b\" or \"a\" to \"a\", respectively."
+ (alist->hash-table
+ (map (lambda (value)
+ ;; A mapping is optional, so normalize it.
+ (if (string-contains value "=")
+ (apply cons (string-split value #\=))
+ (cons value value)))
+ inputs)))
(define (prepare-inputs workflow inputs-map)
- "Ensure that all files in the INPUTS-MAP alist exist and are linked
-to the expected locations. Pick unspecified inputs from the
-environment. Return either the INPUTS-MAP alist with any additionally
-used input file names added, or raise a condition containing the list
-of missing files."
- (define-values (input-names input-files)
- (match inputs-map
- (() (values '() '()))
- (_ (apply values
- (apply zip inputs-map)))))
+ "Ensure that all files in the INPUTS-MAP hash table exist and are
+linked to the expected locations. Pick unspecified inputs from the
+environment. Return either the INPUTS-MAP hash table with any
+additionally used input file names added, or raise a condition
+containing the list of missing files."
+ (define input-names
+ (hash-map->list (compose car cons) inputs-map))
+ (define input-files
+ (hash-map->list (compose cdr cons) inputs-map))
(define unspecified-inputs
(lset-difference equal?
(workflow-free-inputs workflow)
@@ -509,11 +516,12 @@ of missing files."
(()
;; Link all mapped input files to their target locations
;; TODO: ensure that target directories exist.
- (for-each (match-lambda
- ((target source)
- (unless (file-exists? target)
- (link source target))))
- inputs-map)
+ (hash-for-each-handle (match-lambda
+ ((target . source)
+ (unless (file-exists? target)
+ (log-event 'debug "linking~%")
+ (link source target))))
+ inputs-map)
inputs-map)
(missing
(raise (condition
@@ -522,9 +530,10 @@ of missing files."
;; Try to find the files in the environment.
(let ((found really-missing (partition file-exists? missing)))
(if (null? really-missing)
- (append inputs-map
- (map (lambda (file) (list file file))
- found))
+ (begin
+ (for-each (lambda (file) (hash-set! inputs-map file file))
+ found)
+ inputs-map)
(raise (condition
(&missing-inputs (files really-missing)))))))))
diff --git a/tests/cache.scm b/tests/cache.scm
index 28e0f8d..3bb53ca 100644
--- a/tests/cache.scm
+++ b/tests/cache.scm
@@ -1,4 +1,4 @@
-;;; Copyright © 2020, 2021, 2022 Ricardo Wurmus <[email protected]>
+;;; Copyright © 2020-2024 Ricardo Wurmus <[email protected]>
;;;
;;; This program is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
@@ -26,6 +26,7 @@
#:select (u8-list->bytevector))
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-64)
+ #:use-module (ice-9 hash-table)
#:use-module (ice-9 match))
(test-begin "cache")
@@ -85,27 +86,28 @@
(define workflow->data-hashes
(@@ (gwl cache) workflow->data-hashes))
-(test-assert "workflow->data-hashes returns a list"
- (list? (workflow->data-hashes wf
- ordered-processes
- '()
- scripts-table)))
+(test-assert "workflow->data-hashes returns a hash table"
+ (hash-table? (workflow->data-hashes wf
+ ordered-processes
+ (alist->hash-table '())
+ scripts-table)))
-(test-assert "workflow->data-hashes returns an alist where all processes are keys"
+(test-assert "workflow->data-hashes returns a hash table where all processes are keys"
(let ((hashes (workflow->data-hashes wf
ordered-processes
- '()
+ (alist->hash-table '())
scripts-table)))
(every (lambda (process)
- (assoc-ref hashes process))
+ (hash-ref hashes process))
ordered-processes)))
-(test-assert "workflow->data-hashes returns an alist where all values are strings"
- (let ((hashes (workflow->data-hashes wf
- ordered-processes
- '()
- scripts-table)))
- (every string? (map cdr hashes))))
+(test-assert "workflow->data-hashes returns an hash table where all values are strings"
+ (let* ((hashes (workflow->data-hashes wf
+ ordered-processes
+ (alist->hash-table '())
+ scripts-table))
+ (vals (hash-map->list (compose cdr cons) hashes)))
+ (every string? vals)))
(define process->hash (@@ (gwl cache) process->hash))
@@ -120,34 +122,34 @@
(test-equal "workflow->data-hashes hashes just the script for an independent process"
(hashes->hash-string
(list (process->hash p1 scripts-table)))
- (assoc-ref (workflow->data-hashes wf
- ordered-processes
- '()
- scripts-table)
- p1))
+ (hash-ref (workflow->data-hashes wf
+ ordered-processes
+ (alist->hash-table '())
+ scripts-table)
+ p1))
(test-equal "workflow->data-hashes hashes the script and its inputs"
(hashes->hash-string
(list (process->hash p4 scripts-table)
(hash-input-file* input-file)))
- (assoc-ref (workflow->data-hashes wf
- ordered-processes
- (list
- (list input-file input-file))
- scripts-table)
- p4))
+ (hash-ref (workflow->data-hashes wf
+ ordered-processes
+ (alist->hash-table
+ `((,input-file . ,input-file)))
+ scripts-table)
+ p4))
(test-equal "workflow->data-hashes hashes all dependencies of a process"
(hashes->hash-string
(list (process->hash p3 scripts-table)
(process->hash p4 scripts-table)
(hash-input-file* input-file)))
- (assoc-ref (workflow->data-hashes wf
- ordered-processes
- (list
- (list input-file input-file))
- scripts-table)
- p3))
+ (hash-ref (workflow->data-hashes wf
+ ordered-processes
+ (alist->hash-table
+ `((,input-file . ,input-file)))
+ scripts-table)
+ p3))
(test-assert "cache! creates directories as needed"
(begin
diff --git a/tests/workflows.scm b/tests/workflows.scm
index f0ffba1..210d2f1 100644
--- a/tests/workflows.scm
+++ b/tests/workflows.scm
@@ -1,4 +1,4 @@
-;;; Copyright © 2019, 2020, 2021, 2022 Ricardo Wurmus <[email protected]>
+;;; Copyright © 2019-2024 Ricardo Wurmus <[email protected]>
;;;
;;; This program is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
@@ -25,7 +25,8 @@
#:use-module (srfi srfi-26)
#:use-module (srfi srfi-34)
#:use-module (srfi srfi-35)
- #:use-module (srfi srfi-64))
+ #:use-module (srfi srfi-64)
+ #:use-module (ice-9 hash-table))
(test-begin "workflows")
@@ -150,22 +151,25 @@ make-workflow
(eat-salad -> p2))))))
(workflow? wf)))
-(define inputs->map (@@ (gwl workflows) inputs->map))
+(define inputs->map
+ (compose (cut sort <> (lambda (a b) (string<? (car a) (car b))))
+ (cut hash-map->list list <>)
+ (@@ (gwl workflows) inputs->map)))
(test-equal "inputs->map returns the empty list for an empty list of inputs"
'()
(inputs->map '()))
-(test-equal "inputs->map returns a normalized list for plain input file names"
- '(("this" "this")
- ("that" "that"))
+(test-equal "inputs->map returns a normalized hash table for plain input file names"
+ '(("that" "that")
+ ("this" "this"))
(inputs->map '("this" "that")))
-(test-equal "inputs->map returns a normalized list for mapped input file names"
- '(("this" "THIS")
- ("that" "THAT"))
+(test-equal "inputs->map returns a normalized hash table for mapped input file names"
+ '(("that" "THAT")
+ ("this" "THIS"))
(inputs->map '("this=THIS" "that=THAT")))
-(test-equal "inputs->map returns a normalized list for all file names"
- '(("this" "THIS")
+(test-equal "inputs->map returns a normalized hash table for all file names"
+ '(("anything" "anything")
("that" "that")
- ("anything" "anything"))
+ ("this" "THIS"))
(inputs->map '("this=THIS" "that" "anything")))
(define (wf3 prefix)
@@ -211,10 +215,10 @@ make-workflow
(close port)
name))
(define (inputs-map-full prefix)
- (list
- (list (string-append prefix "first") input-file)
- (list (string-append prefix "second") input-file)
- (list (string-append prefix "fourth") input-file)))
+ (alist->hash-table
+ `((,(string-append prefix "first") . ,input-file)
+ (,(string-append prefix "second") . ,input-file)
+ (,(string-append prefix "fourth") . ,input-file))))
(define prepare-inputs (@@ (gwl workflows) prepare-inputs))
@@ -227,45 +231,58 @@ make-workflow
(let ((prefix (make-prefix)))
(test-error "prepare-inputs throws a missing-inputs condition"
(@@ (gwl workflows) &missing-inputs)
- (prepare-inputs (wf3 prefix) '()))
+ (prepare-inputs (wf3 prefix) (alist->hash-table '())))
(test-equal "prepare-inputs throws a condition listing the missing input files"
(map (cut string-append prefix <>)
(list "first" "second" "fourth"))
(guard (condition
(((@@ (gwl workflows) missing-inputs-condition?) condition)
((@@ (gwl workflows) missing-inputs-files) condition)))
- (prepare-inputs (wf3 prefix) '()))))
+ (prepare-inputs (wf3 prefix) (alist->hash-table '())))))
-(let ((prefix (make-prefix)))
+(let ((prefix (make-prefix))
+ (sorted (lambda (items)
+ (let ((alist (hash-map->list list items)))
+ (sort alist
+ (lambda (a b) (string<? (car a) (car b))))))))
;; Create all undeclared files
(for-each (lambda (name)
(with-output-to-file (string-append prefix name)
(lambda () (display name))))
(list "first" "second" "fourth"))
(test-equal "prepare-inputs adds found files to an empty inputs-map"
- (map (lambda (name)
- (let ((file-name (string-append prefix name)))
- (list file-name file-name)))
- (list "first" "second" "fourth"))
- (prepare-inputs (wf3 prefix) '())))
+ (sorted
+ (alist->hash-table
+ (map (lambda (name)
+ (let ((file-name (string-append prefix name)))
+ (cons file-name file-name)))
+ (list "first" "second" "fourth"))))
+ (sorted
+ (prepare-inputs (wf3 prefix) (alist->hash-table '())))))
(let* ((prefix (make-prefix))
- (incomplete-inputs-map
- (list
- (list (string-append prefix "first") input-file))))
+ (sorted (lambda (items)
+ (let ((alist (hash-map->list list items)))
+ (sort alist
+ (lambda (a b) (string<? (car a) (car b)))))))
+ (incomplete-inputs-alist
+ `((,(string-append prefix "first") . ,input-file))))
;; Create undeclared files
(for-each (lambda (name)
(with-output-to-file (string-append prefix name)
(lambda () (display name))))
(list "second" "fourth"))
(test-equal "prepare-inputs adds found files to the incomplete inputs-map"
- (append incomplete-inputs-map
- (map (lambda (name)
- (let ((file-name (string-append prefix name)))
- (list file-name file-name)))
- (list "second" "fourth")))
- (prepare-inputs (wf3 prefix)
- incomplete-inputs-map)))
+ (sorted
+ (alist->hash-table
+ (append incomplete-inputs-alist
+ (map (lambda (name)
+ (let ((file-name (string-append prefix name)))
+ (cons file-name file-name)))
+ (list "second" "fourth")))))
+ (sorted
+ (prepare-inputs (wf3 prefix)
+ (alist->hash-table incomplete-inputs-alist)))))
(test-equal "make-workflow rejects invalid field names 1/3"
'("workflow" (garbage))