From bcc385e39918b21765f554a555ef4fbf807fea44 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Sun, 12 May 2019 13:48:32 +0200 Subject: [PATCH] Add IFD-free gitignoreSource implementation --- .gitignore | 3 + ci.nix | 1 + default.nix | 8 ++ find-files.nix | 272 +++++++++++++++++++++++++++++++++++++++++++ parse-git-config.nix | 63 ++++++++++ rules.nix | 17 +++ tests/default.nix | 24 ++++ tests/runner.nix | 126 ++++++++++++++++++++ 8 files changed, 514 insertions(+) create mode 100644 .gitignore create mode 100644 ci.nix create mode 100644 default.nix create mode 100644 find-files.nix create mode 100644 parse-git-config.nix create mode 100644 tests/default.nix create mode 100644 tests/runner.nix diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7dc3520 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +result +result-* + diff --git a/ci.nix b/ci.nix new file mode 100644 index 0000000..0bd4ef0 --- /dev/null +++ b/ci.nix @@ -0,0 +1 @@ +import ./tests/default.nix \ No newline at end of file diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..548b7f3 --- /dev/null +++ b/default.nix @@ -0,0 +1,8 @@ +{ lib ? import }: +let + find-files = import ./find-files.nix { inherit lib; }; +in +{ + inherit (find-files) gitignoreFilter; + gitignoreSource = p: lib.cleanSourceWith { filter = find-files.gitignoreFilter p; src = p; }; +} diff --git a/find-files.nix b/find-files.nix new file mode 100644 index 0000000..6c464fa --- /dev/null +++ b/find-files.nix @@ -0,0 +1,272 @@ +{ lib ? import }: +let + parse-ini = import ./parse-git-config.nix { inherit lib; }; + parse-gitignore = import ./rules.nix { inherit lib; }; +in +rec { + inherit (builtins) dirOf baseNameOf abort split hasAttr readFile readDir; + inherit (lib.lists) filter length head tail concatMap take; + inherit (lib.attrsets) filterAttrs mapAttrs; + inherit (lib.strings) hasPrefix removePrefix splitString; + inherit (lib) strings flip; + inherit lib; + inherit parse-ini; + + # TODO: 'filesystem.nix' + # - readLines function with CRLF support + # TODO: check assumption that a relative core.excludesFile is relative to HOME + # TODO: write test for trailing slash (matches dir only) + # TODO: rename Pattern' + + gitignoreFilter = basePath: + let + patternsBelowP = findPatternsTree basePath; + basePathStr = toString basePath; + in + path: type: let + localPath = removePrefix basePathStr (toString path); + localPathElements = splitString "/" localPath; + getPatterns = patternTree: pathElems: + if length pathElems == 0 + then patternTree + else let hd = head pathElems; in + if hd == "" || hd == "." + then getPatterns patternTree (tail pathElems) + else if hasAttr hd patternTree + then getPatterns patternTree."${hd}" (tail pathElems) + else patternTree # Files are not in the tree, so we return the + # most patterns we could find here. + ; + in parse-gitignore.runFilterPattern' (getPatterns patternsBelowP localPathElements)."/patterns" path type; + + ##### + # Constructing a tree of patterns per non-ignored subdirectory, recursively + # + + /* Given a dir, return a tree of patterns mirroring the directory structure, + where the patterns on the nodes towards the leaves become more specific. + + It's a tree where the nodes are attribute sets and the keys are directory basenames. + The patterns are mixed into the attrsets using the special key "/patterns". + Leaves are simply {} + */ + findPatternsTree = dir: + let + listOfStartingPatterns = map ({contextDir, file, ...}: + parse-gitignore.gitignoreFilter' (readFile file) contextDir + ) (findAncestryGitignores dir); + startingPatterns = builtins.foldl' + parse-gitignore.mergePattern' + (defaultPatterns dir) # not the unit of merge but a set of defaults + listOfStartingPatterns; + in + findDescendantPatternsTree startingPatterns dir; + + # We do an eager-looking descent ourselves, in order to memoize the patterns. + # In fact it is lazy, so some directories' patterns will not need to be + # evaluated if not requested. This works out nicely when the user adds a + # filter *before* the gitignore filter. + # + # This function assumes that the gitignore files that are specified *in* + # dir, in the *ancestry* of dir or globally are already included in + # currentPatterns. + findDescendantPatternsTree = currentPatterns: dir: + let nodes = readDir dir; + dirs = filterAttrs (name: type: + type == nodeTypes.directory && + (parse-gitignore.runFilterPattern' currentPatterns (dir + "/${name}") type) + ) nodes; + in mapAttrs (name: _t: + let subdir = dir + "/${name}"; + ignore = subdir + "/.gitignore"; + newPatterns = map (file: + parse-gitignore.mergePattern' + currentPatterns # Performance: this is where you could potentially filter out patterns irrelevant to subdir + (parse-gitignore.gitignoreFilter' (readFile file) subdir) + ) (guardFile ignore); + subdirPatterns = headOr currentPatterns newPatterns; + in + findDescendantPatternsTree subdirPatterns subdir + ) dirs // { "/patterns" = currentPatterns; }; + defaultPatterns = root: parse-gitignore.gitignoreFilter' ".git" root; # no trailing slash, because of worktree references + + + ##### + # Finding the gitignore files in the current directory, towards the root and + # in the user config. + # + findAncestryGitignores = path: + let + up = inspectDirAndUp path; + inherit (up) localIgnores gitDir worktreeRoot; + globalIgnores = map (file: { contextDir = worktreeRoot; inherit file; }) maybeGlobalIgnoresFile; + + # TODO: can local config override global core.excludesFile? + # localConfigItems = parse-ini.parseIniFile (gitDir + "/config"); + in + globalIgnores ++ localIgnores; + + + + ##### + # Functions for getting "context" from directory ancestry, repo + # + + /* path -> { localIgnores : list {contextDir, file} + , gitDir : path } + + Precondition: dir exists and is a directory + + + */ + inspectDirAndUp = dirPath: let + go = p: acc: + let + dirInfo = inspectDir p; + isHighest = dirInfo.isWorkTreeRoot || p == /. || p == "/"; + dirs = [dirInfo] ++ acc; + + getIgnores = di: if di.hasGitignore + then [{ contextDir = di.dirPath; file = di.dirPath + "/.gitignore"; }] + else []; + + in + if isHighest + then + { + localIgnores = concatMap getIgnores dirs; + worktreeRoot = p; + inherit (dirInfo) gitDir; + } + else + go (dirOf p) dirs + ; + in go dirPath []; + + # TODO: only readDir lazily for the .git type. Rest can be done efficiently with pathExists + inspectDir = dirPath: + let + d = readDir dirPath; + dotGitType = d.".git" or null; + isWorkTreeRoot = dotGitType != null; + gitDir = if dotGitType == nodeTypes.directory then dirPath + "/.git" + else if dotGitType == nodeTypes.regular then readDotGitFile (dirPath + "/.git") + else dotGitType; + hasGitignore = (d.".gitignore" or null) == nodeTypes.regular; + in { inherit isWorkTreeRoot hasGitignore gitDir dirPath; }; + + /* .git file path -> GIT_DIR + + Used for establishing $GIT_DIR when the worktree is an external worktree, + when .git is a file. + */ + readDotGitFile = filepath: + let contents = readFile filepath; + lines = lib.strings.splitString "\n" contents; + gitdirLines = map (strings.removePrefix "gitdir: ") (filter (lib.strings.hasPrefix "gitdir: ") lines); + errNoGitDirLine = abort ("Could not find a gitdir line in " + filepath); + in /. + headOr errNoGitDirLine gitdirLines + ; + + /* default -> list -> head or default + */ + headOr = default: l: + if length l == 0 then default else head l; + + + + ##### + # Finding git config + # + + maybeXdgGitConfigFile = + for + (guardNonEmptyString (/. + builtins.getEnv "XDG_CONFIG_HOME")) + (xdgConfigHome: + guardFile (xdgConfigHome + "/git/config") + ); + maybeGlobalConfig = take 1 (guardFile ~/.gitconfig + ++ maybeXdgGitConfigFile + ++ guardFile ~/.config/git/config); + + globalConfigItems = for maybeGlobalConfig (globalConfigFile: + parse-ini.parseIniFile globalConfigFile + ); + globalConfiguredExcludesFile = take 1 ( + for + globalConfigItems + ({section, key, value}: + for + (guard (section == "core" && key == "excludesFile")) + (_: + resolveFile (~/.) value + ) + ) + ); + xdgExcludesFile = for + (guardNonEmptyString (/. + builtins.getEnv "XDG_CONFIG_HOME")) + (xdgConfigHome: + guardFile (xdgConfigHome + "/git/ignore") + ); + maybeGlobalIgnoresFile = take 1 + ( globalConfiguredExcludesFile + ++ xdgExcludesFile + ++ guardFile ~/.config/git/ignore); + + /* Given baseDir, which generalizes the idea of working directory, + resolve a file path relative to that directory. + + It will return at most 1 path; 0 if no such file could be found. + Absolute paths and home-relative (~) paths ignore the baseDir, unless + the + */ + resolveFile = baseDir: path: take 1 + ( if hasPrefix "/" path then guardFile (/. + path) else + (if hasPrefix "~" path then guardFile (~/. + removePrefix "~" path) else []) + ++ guardFile (baseDir + "/" + path) + ) + ; + + + ##### + # List as a search and backtracking tool + # + + nullableToList = x: if x == null then [] else [x]; + for = l: f: concatMap f l; + guard = b: if b then [{}] else []; + guardFile = p: if nodeTypes.isFile (safeGetNodeType p) then [p] else []; + guardNonEmptyString = s: if s == "" then [s] else []; + guardNonNull = a: if a != null then a else []; + + + + ##### + # Working with readDir output + # + + nodeTypes.directory = "directory"; + nodeTypes.regular = "regular"; + nodeTypes.symlink = "symlink"; + + # TODO: Assumes that it's a file when it's a symlink + nodeTypes.isFile = p: p == nodeTypes.regular || p == nodeTypes.symlink; + + + + ##### + # Generic file system functions + # + + /* path -> nullable nodeType + * Without throwing (unrecoverable) errors + */ + safeGetNodeType = path: + if toString path == "/" then nodeTypes.directory + else if builtins.pathExists path + then let parentDir = readDir (dirOf path); + in parentDir."${baseNameOf path}" or null + else null; + + +} diff --git a/parse-git-config.nix b/parse-git-config.nix new file mode 100644 index 0000000..b98e6d1 --- /dev/null +++ b/parse-git-config.nix @@ -0,0 +1,63 @@ +# Basic git INI-like file format parser +# +# Probably not feature complete anytime soon... +# +# Notable omissions: +# - multiline values (if supported??) +# - proper subsections +# - includes +# - conditional includes +# - keys with embedded whitespace +# +# Low hanging fruit: +# - group by section if you need to query the file often +# +# Unknowns: +# - whitespace before section header? +# - what if no section is specified before first item? +# +{ lib ? import , ... }: +let + inherit (lib.strings) splitString hasPrefix removePrefix removeSuffix hasInfix replaceStrings; + inherit (lib.lists) foldl' head tail; + + parseIniText = text: + let + rawLines = splitString "\n" text; + folded = foldl' step zero rawLines; + zero = { section = ""; + items = []; + }; + step = r@{ section, items }: line: + if hasPrefix "[" line + then r // { + section = removePrefix "[" (removeSuffix "]" line); + } + else if hasInfix "=" line then + let + s = splitString "=" line; + s0 = head s; + key = replaceStrings [" " "\t"] ["" ""] s0; + v = removePrefix "${s0}=" line; + value = lstrip v; + in + r // { + items = items ++ [{ inherit section key value; }]; + } + else + r + ; + in + folded.items + ; + lstrip = s: if hasPrefix " " s then lstrip (removePrefix " " s) + else if hasPrefix "\t" s then lstrip (removePrefix "\t" s) + else s; + parseIniFile = p: + builtins.addErrorContext ("while parsing INI file " + toString p) ( + parseIniText (builtins.readFile p) + ) + ; +in { + inherit parseIniText parseIniFile; +} diff --git a/rules.nix b/rules.nix index c232512..75f5703 100644 --- a/rules.nix +++ b/rules.nix @@ -27,6 +27,22 @@ rec { last (last ([[true true]] ++ (filter head matched))) ); + # TODO: we only care about the last match, so it seems we can do a reverse + # scan per file and represent the outcome as true, false, and null for + # nothing said => default to true after all rules are processed. + runFilterPattern' = r: path: type: last (last ([[true true]] ++ r (toString path) type)); + filterPattern' = patterns: root: + (name: _type: + let + relPath = lib.removePrefix ((toString root) + "/") name; + matches = pair: (match (head pair) relPath) != null; + matched = map (pair: [(matches pair) (last pair)]) patterns; + in + filter head matched + ); + mergePattern' = pa: pb: (name: type: pa name type ++ pb name type); + unitPattern' = name: type: []; + # string -> [[regex bool]] gitignoreToPatterns = gitignore: assert throwIfOldNix; @@ -90,4 +106,5 @@ rec { (split "\n" gitignore)); gitignoreFilter = ign: root: filterPattern (gitignoreToPatterns ign) root; + gitignoreFilter' = ign: root: filterPattern' (gitignoreToPatterns ign) root; } diff --git a/tests/default.nix b/tests/default.nix new file mode 100644 index 0000000..d8a9ebd --- /dev/null +++ b/tests/default.nix @@ -0,0 +1,24 @@ +{ pkgs ? import {} }: + +let + testdata = import ./testdata.nix { inherit pkgs; }; + runner = import ./runner.nix { inherit pkgs; }; +in +{ + plain = runner.makeTest { name = "plain"; rootDir = testdata.sourceUnfiltered + "/test-tree"; }; + nested = runner.makeTest { name = "nested"; rootDir = testdata.sourceUnfilteredRecursive + "/test-tree"; }; + + plain-with-testdata-dir = runner.makeTest { name = "plain"; rootDir = testdata.sourceUnfiltered; }; + nested-with-testdata-dir = runner.makeTest { name = "nested"; rootDir = testdata.sourceUnfilteredRecursive; }; + + plain-with-testdata-subdir = runner.makeTest { name = "plain"; rootDir = testdata.sourceUnfiltered; subpath = "test-tree"; }; + nested-with-testdata-subdir = runner.makeTest { name = "nested"; rootDir = testdata.sourceUnfilteredRecursive; subpath = "test-tree"; }; + + subdir-1 = runner.makeTest { name = "subdir-1"; rootDir = testdata.sourceUnfiltered + "/test-tree"; subpath = "1-simpl"; }; + subdir-1x = runner.makeTest { name = "subdir-1x"; rootDir = testdata.sourceUnfiltered + "/test-tree"; subpath = "1-xxxxx"; }; + subdir-2 = runner.makeTest { name = "subdir-2"; rootDir = testdata.sourceUnfiltered + "/test-tree"; subpath = "2-negation"; }; + subdir-3 = runner.makeTest { name = "subdir-3"; rootDir = testdata.sourceUnfiltered + "/test-tree"; subpath = "3-wildcards"; }; + subdir-4 = runner.makeTest { name = "subdir-4"; rootDir = testdata.sourceUnfiltered + "/test-tree"; subpath = "4-escapes"; }; + subdir-9 = runner.makeTest { name = "subdir-9"; rootDir = testdata.sourceUnfiltered + "/test-tree"; subpath = "9-expected"; }; + +} \ No newline at end of file diff --git a/tests/runner.nix b/tests/runner.nix new file mode 100644 index 0000000..cca3d89 --- /dev/null +++ b/tests/runner.nix @@ -0,0 +1,126 @@ +{ pkgs ? import {} }: + +let + inherit (pkgs) lib; + inherit (import ../. { inherit lib; }) gitignoreFilter gitignoreSource; + inherit (lib) concatMap flip; + inherit (lib.attrsets) mapAttrsToList nameValuePair; + inherit (lib.strings) concatStringsSep; + for = l: f: concatMap f l; + guard = b: if b then [{}] else []; + + addPath = p: subp: if subp == "" then p else p + "/${subp}"; + + /* + Make a test case. + + name: Name of the test case. + + rootDir: Source for the native git implementation. + This is the root of the git repo; as required + by the native git implementation. + + rootDir + "/${subpath}": Source for the Nix implementation, which ought to + discover rootDir by itself. + + */ + makeTest = {name ? "source", rootDir, subpath ? ""}: + pkgs.runCommand "test-${name}" { + inherit name; + viaGit = listingViaGit { inherit name rootDir subpath; }; + viaNix = listingViaNixGitignore { inherit name rootDir subpath; }; + } '' + if diff $viaNix $viaGit; then + touch $out + else + echo + echo "Found a difference between nix-gitignore and native git." + echo "Above diff can be read as a 'fix' to the nix-gitignore output." + echo "< fix by excluding this in nix-gitignore" + echo "> fix by including this in nix-gitignore" + exit 1; + fi + ''; + + listingViaGit = {name ? "source", rootDir, subpath}: + pkgs.stdenv.mkDerivation { + name = "${name}-listing-via-git"; + src = rootDir; + buildInputs = [pkgs.git]; + buildPhase = '' + if ! test -d .git; then + rm .git || true + git init mkrepo + mv mkrepo/.git . || true + rm -rf mkrepo + fi + git add . + git config user.email a@b.c + git config user.name abc + git commit -m 'Add everything' + git archive HEAD -- ${subpath} | tar -t --quoting-style=literal | sed -e 's_/$__' -e 's@^${subpath}/*@@' | (grep -v '^$' || true) | sort >$out + ''; + preInstall = ""; + installPhase = ":"; + }; + + listingViaNixGitignore = {name ? "source", rootDir, subpath}: + pkgs.stdenv.mkDerivation { + name = "${name}-listing-via-nix"; + src = rootDir; + buildInputs = [ + pkgs.git pkgs.nix pkgs.jq + # optional + pkgs.git-crypt + ]; + NIX_PATH="nixpkgs=${pkgs.path}"; + inherit subpath; + buildPhase = '' + export NIX_LOG_DIR=$TMPDIR + export NIX_STATE_DIR=$TMPDIR + test -n "$subpath" && cd $subpath + nix-instantiate --eval --expr --json \ + --readonly-mode --option sandbox false \ + '(import ${gitignoreSource ../.}/tests/runner.nix {}).toStringNixGitignore ./.' \ + | jq -r . \ + | sort \ + >$out + ''; + preInstall = ""; + installPhase = ":"; + }; + + /* Like readDir but returning { name, type } + */ + listDir = dir: mapAttrsToList (name: type: { inherit name type; }) (builtins.readDir dir); + + + /* Like filtersource but only produces a list of paths instead of a source + */ + traverseDirectory = predicate: dir: + let + recurse = subpath: + for (listDir (dir + "/${subpath}")) ({name, type}: + let + subpath' = "${subpath}${if subpath == "" then "" else "/"}${name}"; + in + for (guard (predicate (dir + "/${subpath'}") type)) ({}: + [subpath'] ++ + for (guard (type == "directory")) (_: + recurse subpath' + ) + ) + ); + in + recurse "" + ; + + traverseNixGitignore = dir: traverseDirectory (gitignoreFilter dir) dir; + + /* Exposed for use *inside* the nix sandbox, called by listingViaNixGitignore. + */ + toStringNixGitignore = dir: concatStringsSep "\n" (traverseNixGitignore dir); +in +{ + inherit makeTest toStringNixGitignore listingViaNixGitignore; +}