Memoize undeclared systems

The memoize function is not a pretty thing in terms of implementation,
but it's exactly what we need to solve this UX problem and performance
problem.

Without this, every distinct `withSystem` call will cause a re-evaluation
of the `perSystem` module, which is inefficient. Now, it's a one-time
13KB and length(system) attribute lookups: negligible compared to any
instantiations and such.

Nix doesn't offer memoization for functions yet, so this is the best we
can do.
This commit is contained in:
Robert Hensing 2025-07-21 20:01:38 +02:00
parent 7782624440
commit 577772566b
7 changed files with 120 additions and 2 deletions

View file

@ -169,6 +169,28 @@ rec {
partitionedAttrs.devShells = "dev";
});
/**
This one is for manual testing. Should look like:
```
nix-repl> checks.x86_64-linux.eval-tests.internals.printSystem.withSystem "foo" ({ config, ... }: null)
trace: Evaluating perSystem for foo
null
nix-repl> checks.x86_64-linux.eval-tests.internals.printSystem.withSystem "foo" ({ config, ... }: null)
null
```
*/
printSystem = mkFlake
{ inputs.self = { }; }
({ withSystem, ... }: {
systems = [ ];
perSystem = { config, system, ... }:
builtins.trace "Evaluating perSystem for ${system}" { };
flake.withSystem = withSystem;
});
dogfoodProvider = mkFlake
{ inputs.self = { }; }
({ flake-parts-lib, ... }: {

View file

@ -222,6 +222,10 @@ let
modulePath: staticArgs:
lib.setDefaultModuleLocation modulePath (import modulePath staticArgs);
inherit (import ./lib/memoize/memoize.nix {
inherit lib;
}) memoizeStr;
/**
`importAndPublish name module` returns a module that both imports the `module`, and exposes it as flake attribute `modules.flake.${name}`.

2
lib/memoize/bytes.dat Normal file
View file

@ -0,0 +1,2 @@

 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~€亗儎厗噲墛媽崕彁憭摂晼棙櫄洔潪煚、¥ウЖ┆<D096><E29486><EFBFBD>辈炒刀犯购患骄坷谅媚牌侨墒颂臀闲岩釉罩棕仝圮蒉哙徕沅彐玷殛腱眍镳耱篝貊鼬<E8B28A><E9BCAC><EFBFBD><EFBFBD>

View file

@ -0,0 +1,23 @@
# Run with:
# NIX_SHOW_STATS=1 nix eval --expr 'import ./measure-bytes-per-char.nix { control = false; size = 10; }' --impure
# NIX_SHOW_STATS=1 nix eval --expr 'import ./measure-bytes-per-char.nix { control = true; size = 10; }' --impure
{ control ? false, size ? 10 }:
let
lib = import <nixpkgs/lib>;
inherit (import ./memoize.nix { inherit lib; }) memoizeStr;
# Create a string of the specified size
key = lib.concatStrings (lib.genList (i: "a") size);
# Memoized identity function
memoId = memoizeStr (x: x);
# Prime the trie with a minimal query to force its construction
prime = memoId "";
in
if control
then builtins.seq prime key # Return key after priming
else builtins.seq prime (memoId key) # Pass through memoization after priming

50
lib/memoize/memoize.nix Normal file
View file

@ -0,0 +1,50 @@
{ lib, ... }:
let
keys =
let
nonNullBytesStr =
builtins.readFile ./bytes.dat;
nonNullItems =
lib.stringToCharacters nonNullBytesStr;
keysList = [ "" ] ++ nonNullItems;
byteNames = lib.genAttrs keysList (k: null);
in
byteNames;
/**
Produce an infinite trie for memoizing a function with a string input.
This uses memory in terms of a large factor of the number of unique string suffixes passed to the memoizeStr / queryTrie functions.
*/
makeTrie = prefix: f:
lib.mapAttrs
(k: v: if k == "" then f prefix else makeTrie (prefix + k) f)
keys;
queryTrie =
trie: needle:
let
needleList = lib.stringToCharacters needle;
destination = lib.foldl'
(subtrie: c: subtrie.${c})
trie
needleList;
in
destination."";
in
{
/**
Turn a function that accepts a string input into one that memoizes the results.
Make sure to partially apply it and use it over and over in e.g. the same let binding.
Otherwise, you're wasting kilobytes of memory allocations *for each letter in each call*.
That's 12+ KB per input byte on Nix 2.31, and more on older versions.
Yes, this function is surprisingly EXPENSIVE, but cheaper than e.g. reinvoking Nixpkgs.
Its memory cost is comparable to that of loading a small Nix file.
*/
memoizeStr = f:
let trie = makeTrie "" f;
in queryTrie trie;
}

10
lib/memoize/test.nix Normal file
View file

@ -0,0 +1,10 @@
# Ad hoc manual test dependent on observing side effects
let
lib = import ~/src/nixpkgs-master/lib;
inherit (import ./memoize.nix { inherit lib; }) memoizeStr;
# Don't use this in the wild, it's too expensive!
printOnce = memoizeStr (x: builtins.trace "computing f ${lib.strings.escapeNixString x}" x);
in
{
inherit printOnce memoizeStr lib;
}

View file

@ -59,6 +59,14 @@ let
}
'';
/**
We primarily use `systems` to help memoize the per system context, but that
doesn't extend to arbitrary `system`s.
For that, we use the slightly less efficient, but perfectly acceptable
`memoizeStr` function.
*/
otherMemoizedSystems = flake-parts-lib.memoizeStr config.perSystem;
in
{
options = {
@ -139,8 +147,7 @@ in
config = {
allSystems = genAttrs config.systems config.perSystem;
# TODO: Sub-optimal error message. Get Nix to support a memoization primop, or get Nix Flakes to support systems properly or get Nix Flakes to add a name to flakes.
_module.args.getSystem = system: config.allSystems.${system} or (builtins.trace "using non-memoized system ${system}" config.perSystem system);
_module.args.getSystem = system: config.allSystems.${system} or (otherMemoizedSystems system);
# The warning is there for a reason. Only use this in situations where the
# performance cost has already been incurred, such as in `flakeModules.easyOverlay`,