From 577772566b3f4e9c07d2d033305e9abe5f221dd5 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Mon, 21 Jul 2025 20:01:38 +0200 Subject: [PATCH] Memoize undeclared systems The memoize function is not a pretty thing in terms of implementation, but it's exactly what we need to solve this UX problem and performance problem. Without this, every distinct `withSystem` call will cause a re-evaluation of the `perSystem` module, which is inefficient. Now, it's a one-time 13KB and length(system) attribute lookups: negligible compared to any instantiations and such. Nix doesn't offer memoization for functions yet, so this is the best we can do. --- dev/tests/eval-tests.nix | 22 ++++++++++++ lib.nix | 4 +++ lib/memoize/bytes.dat | 2 ++ lib/memoize/measure-bytes-per-char.nix | 23 ++++++++++++ lib/memoize/memoize.nix | 50 ++++++++++++++++++++++++++ lib/memoize/test.nix | 10 ++++++ modules/perSystem.nix | 11 ++++-- 7 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 lib/memoize/bytes.dat create mode 100644 lib/memoize/measure-bytes-per-char.nix create mode 100644 lib/memoize/memoize.nix create mode 100644 lib/memoize/test.nix diff --git a/dev/tests/eval-tests.nix b/dev/tests/eval-tests.nix index 355fca4..077edb1 100644 --- a/dev/tests/eval-tests.nix +++ b/dev/tests/eval-tests.nix @@ -169,6 +169,28 @@ rec { partitionedAttrs.devShells = "dev"; }); + /** + This one is for manual testing. Should look like: + + ``` + nix-repl> checks.x86_64-linux.eval-tests.internals.printSystem.withSystem "foo" ({ config, ... }: null) + trace: Evaluating perSystem for foo + null + + nix-repl> checks.x86_64-linux.eval-tests.internals.printSystem.withSystem "foo" ({ config, ... }: null) + null + + ``` + */ + printSystem = mkFlake + { inputs.self = { }; } + ({ withSystem, ... }: { + systems = [ ]; + perSystem = { config, system, ... }: + builtins.trace "Evaluating perSystem for ${system}" { }; + flake.withSystem = withSystem; + }); + dogfoodProvider = mkFlake { inputs.self = { }; } ({ flake-parts-lib, ... }: { diff --git a/lib.nix b/lib.nix index 5031a46..b76ce7a 100644 --- a/lib.nix +++ b/lib.nix @@ -222,6 +222,10 @@ let modulePath: staticArgs: lib.setDefaultModuleLocation modulePath (import modulePath staticArgs); + inherit (import ./lib/memoize/memoize.nix { + inherit lib; + }) memoizeStr; + /** `importAndPublish name module` returns a module that both imports the `module`, and exposes it as flake attribute `modules.flake.${name}`. diff --git a/lib/memoize/bytes.dat b/lib/memoize/bytes.dat new file mode 100644 index 0000000..017b909 --- /dev/null +++ b/lib/memoize/bytes.dat @@ -0,0 +1,2 @@ + +  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ \ No newline at end of file diff --git a/lib/memoize/measure-bytes-per-char.nix b/lib/memoize/measure-bytes-per-char.nix new file mode 100644 index 0000000..454a165 --- /dev/null +++ b/lib/memoize/measure-bytes-per-char.nix @@ -0,0 +1,23 @@ +# Run with: +# NIX_SHOW_STATS=1 nix eval --expr 'import ./measure-bytes-per-char.nix { control = false; size = 10; }' --impure +# NIX_SHOW_STATS=1 nix eval --expr 'import ./measure-bytes-per-char.nix { control = true; size = 10; }' --impure + +{ control ? false, size ? 10 }: + +let + lib = import ; + inherit (import ./memoize.nix { inherit lib; }) memoizeStr; + + # Create a string of the specified size + key = lib.concatStrings (lib.genList (i: "a") size); + + # Memoized identity function + memoId = memoizeStr (x: x); + + # Prime the trie with a minimal query to force its construction + prime = memoId ""; + +in +if control +then builtins.seq prime key # Return key after priming +else builtins.seq prime (memoId key) # Pass through memoization after priming diff --git a/lib/memoize/memoize.nix b/lib/memoize/memoize.nix new file mode 100644 index 0000000..b633c98 --- /dev/null +++ b/lib/memoize/memoize.nix @@ -0,0 +1,50 @@ +{ lib, ... }: +let + keys = + let + nonNullBytesStr = + builtins.readFile ./bytes.dat; + nonNullItems = + lib.stringToCharacters nonNullBytesStr; + + keysList = [ "" ] ++ nonNullItems; + + byteNames = lib.genAttrs keysList (k: null); + in + byteNames; + + /** + Produce an infinite trie for memoizing a function with a string input. + + This uses memory in terms of a large factor of the number of unique string suffixes passed to the memoizeStr / queryTrie functions. + */ + makeTrie = prefix: f: + lib.mapAttrs + (k: v: if k == "" then f prefix else makeTrie (prefix + k) f) + keys; + + queryTrie = + trie: needle: + let + needleList = lib.stringToCharacters needle; + destination = lib.foldl' + (subtrie: c: subtrie.${c}) + trie + needleList; + in + destination.""; + +in +{ + /** + Turn a function that accepts a string input into one that memoizes the results. + Make sure to partially apply it and use it over and over in e.g. the same let binding. + Otherwise, you're wasting kilobytes of memory allocations *for each letter in each call*. + That's 12+ KB per input byte on Nix 2.31, and more on older versions. + Yes, this function is surprisingly EXPENSIVE, but cheaper than e.g. reinvoking Nixpkgs. + Its memory cost is comparable to that of loading a small Nix file. + */ + memoizeStr = f: + let trie = makeTrie "" f; + in queryTrie trie; +} diff --git a/lib/memoize/test.nix b/lib/memoize/test.nix new file mode 100644 index 0000000..b2848e4 --- /dev/null +++ b/lib/memoize/test.nix @@ -0,0 +1,10 @@ +# Ad hoc manual test dependent on observing side effects +let + lib = import ~/src/nixpkgs-master/lib; + inherit (import ./memoize.nix { inherit lib; }) memoizeStr; + # Don't use this in the wild, it's too expensive! + printOnce = memoizeStr (x: builtins.trace "computing f ${lib.strings.escapeNixString x}" x); +in +{ + inherit printOnce memoizeStr lib; +} diff --git a/modules/perSystem.nix b/modules/perSystem.nix index 25e6b33..d4890ef 100644 --- a/modules/perSystem.nix +++ b/modules/perSystem.nix @@ -59,6 +59,14 @@ let } ''; + /** + We primarily use `systems` to help memoize the per system context, but that + doesn't extend to arbitrary `system`s. + For that, we use the slightly less efficient, but perfectly acceptable + `memoizeStr` function. + */ + otherMemoizedSystems = flake-parts-lib.memoizeStr config.perSystem; + in { options = { @@ -139,8 +147,7 @@ in config = { allSystems = genAttrs config.systems config.perSystem; - # TODO: Sub-optimal error message. Get Nix to support a memoization primop, or get Nix Flakes to support systems properly or get Nix Flakes to add a name to flakes. - _module.args.getSystem = system: config.allSystems.${system} or (builtins.trace "using non-memoized system ${system}" config.perSystem system); + _module.args.getSystem = system: config.allSystems.${system} or (otherMemoizedSystems system); # The warning is there for a reason. Only use this in situations where the # performance cost has already been incurred, such as in `flakeModules.easyOverlay`,