From ccd8052beb84a889565ffd08a58cf643e2e439f3 Mon Sep 17 00:00:00 2001 From: Bozhidar Batsov Date: Sun, 26 Apr 2026 12:29:52 +0100 Subject: Skip git submodule scan when there is no .gitmodules `projectile-get-immediate-sub-projects' was unconditionally shelling out to `git submodule --quiet foreach ...' on every indexing call for git projects, even when the project had no submodules at all. For monorepos that re-index the project root often this is pure overhead. Use `locate-dominating-file' to look for `.gitmodules' along the parent chain (PATH may be inside a git repo without being its toplevel) and skip the shell-out when none is found. Also tighten `projectile-discover-projects-in-directory' to filter `.' / `..' via `directory-files-no-dot-files-regexp' instead of a post-filter `member' check, matching the indexing walker's style. --- projectile.el | 50 +++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/projectile.el b/projectile.el index 0f8c480..c4c0154 100644 --- a/projectile.el +++ b/projectile.el @@ -1330,9 +1330,10 @@ discover projects there." (format "Projectile is discovering projects in %s..." (propertize directory 'face 'font-lock-keyword-face))))) (progress-reporter-update progress-reporter) - (dolist (dir (ignore-errors (directory-files directory t))) - (when (and (file-directory-p dir) - (not (member (file-name-nondirectory dir) '(".." ".")))) + (dolist (dir (ignore-errors + (directory-files directory t + directory-files-no-dot-files-regexp))) + (when (file-directory-p dir) (projectile-discover-projects-in-directory dir (1- depth)))) (progress-reporter-done progress-reporter)) (when (projectile-project-p directory) @@ -1856,24 +1857,31 @@ searching, and should end with an appropriate path delimiter, such as If the vcs get-sub-projects query returns results outside of path, they are excluded from the results of this function." - (let* ((vcs (projectile-project-vcs path)) - ;; search for sub-projects under current project `project' - (submodules (mapcar - (lambda (s) - (file-name-as-directory (expand-file-name s path))) - (projectile-files-via-ext-command path (projectile-get-sub-projects-command vcs)))) - (project-child-folder-regex - (concat "\\`" - (regexp-quote path)))) - - ;; If project root is inside of an VCS folder, but not actually an - ;; VCS root itself, submodules external to the project will be - ;; included in the VCS get sub-projects result. Let's remove them. - (seq-filter - (lambda (submodule) - (string-match-p project-child-folder-regex - submodule)) - submodules))) + (let ((vcs (projectile-project-vcs path))) + ;; For Git projects without a `.gitmodules' file there is nothing + ;; for `git submodule foreach' to find, so we can skip the + ;; shell-out altogether. PATH may be inside a Git repo without + ;; being its toplevel (e.g. a subproject of an outer repo) so look + ;; for `.gitmodules' along the parent chain rather than just at + ;; PATH itself. This is hot for monorepos that index the project + ;; root often. + (unless (and (eq vcs 'git) + (not (locate-dominating-file path ".gitmodules"))) + (let* ((submodules (mapcar + (lambda (s) + (file-name-as-directory (expand-file-name s path))) + (projectile-files-via-ext-command + path (projectile-get-sub-projects-command vcs)))) + (project-child-folder-regex + (concat "\\`" (regexp-quote path)))) + ;; If project root is inside of an VCS folder, but not + ;; actually an VCS root itself, submodules external to the + ;; project will be included in the VCS get sub-projects + ;; result. Let's remove them. + (seq-filter + (lambda (submodule) + (string-match-p project-child-folder-regex submodule)) + submodules))))) (defun projectile-get-sub-projects-files (project-root vcs) "Get files from sub-projects for PROJECT-ROOT recursively. -- cgit v1.0 From 366049b8f4a14fc1842ae264e6544bdc279308b5 Mon Sep 17 00:00:00 2001 From: Bozhidar Batsov Date: Sun, 26 Apr 2026 12:29:59 +0100 Subject: Backfill tests for indexing command dispatch `projectile-get-ext-command' had no direct test coverage, so the non-git VCS branches (hg/svn/bzr/darcs/fossil/pijul/sapling/jj) were relying on indirect coverage that didn't actually exercise the dispatch. Add a `describe' block that pins each branch. Also extend the `projectile-dir-files-alien' tests with two cases that were previously uncovered: the fd path for git (verifying we don't also call `projectile-git-deleted-files') and the no-VCS fallback (verifying the generic command is used). --- test/projectile-test.el | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/test/projectile-test.el b/test/projectile-test.el index 02d65ba..52c3b98 100644 --- a/test/projectile-test.el +++ b/test/projectile-test.el @@ -928,7 +928,25 @@ Just delegates OPERATION and ARGS for all operations except for`shell-command`'. (let ((projectile-git-use-fd nil) (projectile-fd-executable nil)) (projectile-dir-files-alien "/my/root/" 'git)) - (expect 'projectile-project-vcs :not :to-have-been-called))) + (expect 'projectile-project-vcs :not :to-have-been-called)) + (it "uses the fd-based command when fd is configured for git" + (spy-on 'projectile-files-via-ext-command :and-return-value '("a")) + (spy-on 'projectile-get-sub-projects-files :and-return-value nil) + (spy-on 'projectile-git-deleted-files :and-return-value nil) + (let ((projectile-git-use-fd t) + (projectile-fd-executable "fd")) + (projectile-dir-files-alien "/my/root/" 'git) + ;; When fd is on we don't ask git for deleted files. + (expect 'projectile-git-deleted-files :not :to-have-been-called) + (let ((cmd (cadr (spy-calls-args-for 'projectile-files-via-ext-command 0)))) + (expect cmd :to-equal + (concat "fd " projectile-git-fd-args))))) + (it "falls back to the generic command for projects without a VCS" + (spy-on 'projectile-files-via-ext-command :and-return-value '("a.txt")) + (let ((files (projectile-dir-files-alien "/my/root/" 'none))) + (expect files :to-equal '("a.txt")) + (expect (cadr (spy-calls-args-for 'projectile-files-via-ext-command 0)) + :to-equal projectile-generic-command)))) (describe "hybrid indexing" (it "applies projectile-globally-ignored-file-suffixes on top of the alien result" @@ -1200,6 +1218,29 @@ Just delegates OPERATION and ARGS for all operations except for`shell-command`'. (it "returns nil when vcs is not supported" (expect (projectile-get-sub-projects-command 'none) :to-be nil))) +(describe "projectile-get-ext-command" + (it "returns the git command for git" + (let ((projectile-git-use-fd nil) + (projectile-fd-executable nil)) + (expect (projectile-get-ext-command 'git) :to-equal projectile-git-command))) + (it "uses fd for git when fd is configured" + (let ((projectile-git-use-fd t) + (projectile-fd-executable "fd") + (projectile-git-fd-args "-H -0")) + (expect (projectile-get-ext-command 'git) :to-equal "fd -H -0"))) + (it "returns the matching command for each non-git VCS" + (expect (projectile-get-ext-command 'hg) :to-equal projectile-hg-command) + (expect (projectile-get-ext-command 'svn) :to-equal projectile-svn-command) + (expect (projectile-get-ext-command 'bzr) :to-equal projectile-bzr-command) + (expect (projectile-get-ext-command 'darcs) :to-equal projectile-darcs-command) + (expect (projectile-get-ext-command 'fossil) :to-equal projectile-fossil-command) + (expect (projectile-get-ext-command 'pijul) :to-equal projectile-pijul-command) + (expect (projectile-get-ext-command 'sapling) :to-equal projectile-sapling-command) + (expect (projectile-get-ext-command 'jj) :to-equal projectile-jj-command)) + (it "falls back to the generic command for unknown / no VCS" + (expect (projectile-get-ext-command 'none) :to-equal projectile-generic-command) + (expect (projectile-get-ext-command nil) :to-equal projectile-generic-command))) + (describe "projectile-files-via-ext-command" (it "returns nil when command is nil or empty or fails" (expect (projectile-files-via-ext-command "/" "") :not :to-be-truthy) -- cgit v1.0 From a039e44622b644deb90893f7b27c0940c486bbb7 Mon Sep 17 00:00:00 2001 From: Bozhidar Batsov Date: Sun, 26 Apr 2026 12:30:10 +0100 Subject: Document remaining indexing semantics Three doc gaps surfaced during the indexing review: * The `*' prefix on `projectile-globally-ignored-directories' entries is not a glob - it is what promotes a basename match from "anchored at the project root" to "anywhere in the tree", and it only takes effect under hybrid indexing. * The default `projectile-generic-command' picks `fd' when it's on PATH and falls back to a `find ... | tr ...' pipeline. The fallback does not exclude common build directories, which is a trap for non-VCS projects under alien on hosts without `fd'. * `projectile-git-use-fd' controls how Projectile handles deleted-but-unstaged files: with `fd' they disappear immediately, with `git ls-files' Projectile post-filters via `git ls-files -zd'. --- CHANGELOG.md | 3 +++ doc/modules/ROOT/pages/configuration.adoc | 22 ++++++++++++++++++---- doc/modules/ROOT/pages/projects.adoc | 19 +++++++++++++++++++ 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 196958e..bed2f14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ ### Changes +* `projectile-get-immediate-sub-projects` skips the `git submodule foreach` shell-out for git projects with no `.gitmodules` file anywhere up the parent chain. Hot path for monorepos that index the project root often. +* `projectile-discover-projects-in-directory` now uses `directory-files-no-dot-files-regexp` to skip `.` and `..` at the C level instead of doing the post-filter in Elisp - matches the indexing walker. +* Document the anchored vs `*`-prefixed semantics of `projectile-globally-ignored-directories`, the `find` fallback's lack of common directory exclusions when `fd` isn't available, and how `fd`/`git ls-files` handle deleted-but-unstaged files differently. * Speed up native indexing on large trees: `projectile-index-directory` now hashes the ignored-files / ignored-directories / globally-ignored-directory-names lists once per indexing call (the per-file `member' scans were O(N*M)), expands dirconfig glob patterns once per directory level instead of once per (file, pattern) pair, and accumulates results into a shared cell so we no longer pay for an `apply append' at each recursion level. * `projectile-remove-ignored` (hybrid post-processing) now hashes the ignored-files basenames and pre-splits ignored-dirs into prefix-match and any-segment groups, so the per-file inner loops drop from O(M) `seq-some` walks to O(1) hash lookups (or O(segments) for `*`-prefixed entries). * Hybrid indexing now batches the external command into a single invocation when the project's `.projectile` declares multiple `+` keep entries, instead of shelling out once per kept subdirectory. The kept paths are passed to the indexing tool (e.g. `git ls-files`, `fd`, `find`) as positional pathspecs and submodule files outside those subdirectories are filtered out. Resolves the long-standing TODO in `projectile-project-files`. diff --git a/doc/modules/ROOT/pages/configuration.adoc b/doc/modules/ROOT/pages/configuration.adoc index 1492633..2eada23 100644 --- a/doc/modules/ROOT/pages/configuration.adoc +++ b/doc/modules/ROOT/pages/configuration.adoc @@ -116,18 +116,32 @@ WARNING: If you ever decide to tweak those keep in mind that the command should the list of files **relative** to the project root and the resulting file list should be 0-delimited (as opposed to newline delimited). -For non-VCS projects Projectile will invoke whatever is in `projectile-generic-command`. By default that's: +For non-VCS projects Projectile will invoke whatever is in `projectile-generic-command`. The default chooses `fd` when it's installed and falls back to `find`: +[source,elisp] ---- -find . -type f -print0 +;; Effective default value of projectile-generic-command, picked at load time: +;; when fd is on PATH: +"fd . -0 --type f --color=never --strip-cwd-prefix" +;; otherwise: +"find . -type f | cut -c3- | tr '\\n' '\\0'" ---- TIP: It's a great idea to install https://github.com/sharkdp/fd[fd] which is much faster than `find`. - If `fd` is found, projectile will use it as a replacement for `find`. + If `fd` is found, projectile will use it as a replacement for `find` for non-VCS projects. + +WARNING: The `find` fallback does *not* exclude common build/cache directories + (`.git`, `node_modules`, `target`, `build`, …); a non-VCS project under `alien` + indexing on a host without `fd` will list everything. Either install `fd`, + switch to `hybrid` indexing so `projectile-globally-ignored-directories` + applies, or override `projectile-generic-command` with a tighter recipe. By default, `fd` is also used inside Git repositories (instead of `git ls-files`), because `git ls-files` has the limitation that it lists deleted files until the -deletions are staged. You can control this with `projectile-git-use-fd`: +deletions are staged. With `fd`, deleted files disappear from the listing +immediately; with `git ls-files`, Projectile post-filters the listing against +`git ls-files -zd` to hide deletions until they're staged. You can control this +with `projectile-git-use-fd`: [source,elisp] ---- diff --git a/doc/modules/ROOT/pages/projects.adoc b/doc/modules/ROOT/pages/projects.adoc index d08bae8..d14092b 100644 --- a/doc/modules/ROOT/pages/projects.adoc +++ b/doc/modules/ROOT/pages/projects.adoc @@ -976,6 +976,25 @@ globally ignoring files and directories. These take effect with `native` and (setq projectile-global-ignore-file-patterns '("\\.min\\.js$" "\\.map$")) ---- +==== Anchored vs anywhere directory ignores + +`projectile-globally-ignored-directories` distinguishes between *anchored* +entries (matched as a path prefix relative to the project root) and *anywhere* +entries (matched at any depth in the tree). The leading `*` is **not** a glob +character — it is the marker that promotes an entry from anchored to anywhere. + +[source,elisp] +---- +(setq projectile-globally-ignored-directories + '("tmp" ; only ignores ./tmp at the project root + "*node_modules" ; ignores any directory named node_modules at any depth + )) +---- + +The `*` prefix only matters for the `hybrid` post-processor; in `native` +indexing every entry is matched by directory basename at every traversal step +(so `tmp` and `*tmp` behave the same). `alien` ignores both forms entirely. + You can also _unignore_ specific files or directories that would otherwise be excluded. This is useful when your VCS ignores files that you still want Projectile to show: -- cgit v1.0