From c10a86b74749b23fe1231915cad08163eb3038e1 Mon Sep 17 00:00:00 2001 From: Bozhidar Batsov Date: Sun, 26 Apr 2026 11:34:35 +0100 Subject: Batch hybrid indexing across `+' keep dirconfig entries When a project's `.projectile' declares multiple `+' keep entries, hybrid indexing used to walk each subdirectory individually, shelling out to the external indexing command once per entry. The TODO in `projectile-project-files' had been there for a while. Push the kept paths into the external command as positional pathspecs and run `projectile-adjust-files' once over the combined result. `git ls-files', `fd', `find', `hg locate', etc. all accept additional path arguments at the end of the command line. For Git submodules, `projectile-get-sub-projects-files' is queried once and the result is filtered to only those falling under one of the kept subdirectories. `projectile-files-via-ext-command' grows an optional `pathspecs' arg (shell-quoted before being appended); `projectile-dir-files-alien' grows a matching optional `subdirs' arg that threads through. --- CHANGELOG.md | 2 ++ projectile.el | 94 ++++++++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 73 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eca17ad..5db8568 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Changes +* Hybrid indexing now batches the external command into a single invocation when the project's `.projectile` declares multiple `+` keep entries, instead of shelling out once per kept subdirectory. The kept paths are passed to the indexing tool (e.g. `git ls-files`, `fd`, `find`) as positional pathspecs and submodule files outside those subdirectories are filtered out. Resolves the long-standing TODO in `projectile-project-files`. +* `projectile-files-via-ext-command` now accepts an optional `pathspecs` argument; entries are shell-quoted before being appended to the command. `projectile-dir-files-alien` similarly accepts an optional `subdirs` argument that threads through. * Document the `hybrid` indexing method in the manual and add a feature matrix showing which Projectile knobs (dirconfig, global ignores/unignores, sort order, default caching) apply under `native`/`hybrid`/`alien`. * `projectile-dir-files-alien` now accepts an optional `vcs` argument so the dispatcher can thread through the already-resolved VCS instead of recomputing it. Existing single-argument callers are unaffected. * `projectile-index-directory` (native indexing) now relies on `directory-files-no-dot-files-regexp` to filter out `.` and `..` at the C level instead of walking past them in Elisp. diff --git a/projectile.el b/projectile.el index 638642e..b03170d 100644 --- a/projectile.el +++ b/projectile.el @@ -1662,17 +1662,24 @@ IGNORED-DIRECTORIES may optionally be provided." ;; This corresponds to `projectile-indexing-method' being set to hybrid or alien. ;; The only difference between the two methods is that alien doesn't do ;; any post-processing of the files obtained via the external command. -(defun projectile-dir-files-alien (directory &optional vcs) +(defun projectile-dir-files-alien (directory &optional vcs subdirs) "Get the files for DIRECTORY using external tools. VCS, when supplied, must be the project's VCS as returned by `projectile-project-vcs'. It is computed from DIRECTORY when omitted; callers that already resolved the VCS can pass it in to -avoid the redundant work." +avoid the redundant work. + +SUBDIRS, when non-nil, is a list of subdirectory paths (relative +to DIRECTORY) restricting the listing. The external command +receives them as positional arguments and submodule files are +filtered to those falling under one of the subdirectories. This +is how dirconfig `+' keep entries are honoured by hybrid indexing +without shelling out per kept directory." (let ((vcs (or vcs (projectile-project-vcs directory)))) (cond ((eq vcs 'git) - (let* ((files (nconc (projectile-files-via-ext-command directory (projectile-get-ext-command vcs)) - (projectile-get-sub-projects-files directory vcs))) + (let* ((files (nconc (projectile-files-via-ext-command directory (projectile-get-ext-command vcs) subdirs) + (projectile--restricted-sub-projects-files directory vcs subdirs))) ;; When using git ls-files (not fd), deleted but unstaged ;; files are still reported. Remove them. (deleted (unless (and projectile-git-use-fd projectile-fd-executable) @@ -1682,7 +1689,22 @@ avoid the redundant work." (dolist (f deleted) (puthash f t deleted-set)) (seq-remove (lambda (f) (gethash f deleted-set)) files)) files))) - (t (projectile-files-via-ext-command directory (projectile-get-ext-command vcs)))))) + (t (projectile-files-via-ext-command directory (projectile-get-ext-command vcs) subdirs))))) + +(defun projectile--restricted-sub-projects-files (project-root vcs subdirs) + "Return git submodule files under PROJECT-ROOT, optionally restricted to SUBDIRS. +SUBDIRS is a list of paths relative to PROJECT-ROOT; when non-nil +only files whose project-relative path starts with one of those +subdirectories are returned. When nil, behaves exactly like +`projectile-get-sub-projects-files'." + (let ((files (projectile-get-sub-projects-files project-root vcs))) + (if subdirs + (let ((normalized (mapcar #'file-name-as-directory subdirs))) + (seq-filter + (lambda (f) + (seq-some (lambda (sd) (string-prefix-p sd f)) normalized)) + files)) + files))) (defun projectile-git-deleted-files (directory) "Get a list of deleted but unstaged files in DIRECTORY." @@ -1793,17 +1815,31 @@ VCS is the VCS of the project." (when cmd (projectile-files-via-ext-command project (concat cmd " " dir))))) -(defun projectile-files-via-ext-command (root command) +(defun projectile-files-via-ext-command (root command &optional pathspecs) "Get a list of relative file names in the project ROOT by executing COMMAND. +PATHSPECS, when non-nil, is a list of subdirectories (relative to +ROOT) appended to COMMAND as positional arguments. Each entry is +shell-quoted before being appended. All of the indexing commands +shipped with Projectile (`git ls-files', `fd', `find', `hg locate' +etc.) accept additional path arguments at the end of the command +line; users with heavily customised commands that don't should +either not rely on `+' keep entries in `.projectile' or arrange +their command to accept positional paths. + If `command' is nil or an empty string, return nil. This allows commands to be disabled. Only text sent to standard output is taken into account." (when (and (stringp command) (not (string-empty-p command))) - (let ((default-directory root)) + (let ((default-directory root) + (full-command (if pathspecs + (concat command " " + (mapconcat #'shell-quote-argument + pathspecs " ")) + command))) (with-temp-buffer - (shell-command command t "*projectile-files-errors*") + (shell-command full-command t "*projectile-files-errors*") (let ((shell-output (buffer-substring (point-min) (point-max)))) (mapcar (lambda (f) (string-remove-prefix "./" f)) @@ -2520,21 +2556,33 @@ is `alien', which bypasses dirconfig filtering. Switch to `hybrid' or \ (progn (projectile--maybe-warn-dirconfig-ignored project-root) (projectile-dir-files-alien project-root)) - ;; If a project is defined as a list of subfolders - ;; then we'll have the files returned for each subfolder, - ;; so they are relative to the project root. - ;; - ;; TODO: That's pretty slow and we need to improve it. - ;; One options would be to pass explicitly the subdirs - ;; to commands like `git ls-files` which would return - ;; files paths relative to the project root. - (mapcan - (lambda (dir) - (mapcar (lambda (f) - (file-relative-name (concat dir f) - project-root)) - (projectile-dir-files dir))) - (projectile-get-project-directories project-root)))) + (let ((dirs (projectile-get-project-directories project-root))) + (cond + ((and (eq projectile-indexing-method 'hybrid) (cdr dirs)) + ;; Hybrid + dirconfig `+' keep entries: batch the + ;; external command into a single invocation with + ;; the kept subdirectories as pathspecs, then run + ;; projectile-adjust-files once over the combined + ;; result. Avoids one shell-out per kept directory. + (let* ((vcs (projectile-project-vcs project-root)) + (subdirs (mapcar + (lambda (d) (file-relative-name d project-root)) + dirs))) + (projectile-adjust-files + project-root vcs + (projectile-dir-files-alien project-root vcs subdirs)))) + (t + ;; Native, or hybrid without keep entries: walk each + ;; project directory. For native this is the only + ;; implementation; for hybrid+single-dir it's + ;; equivalent to the batched call above. + (mapcan + (lambda (dir) + (mapcar (lambda (f) + (file-relative-name (concat dir f) + project-root)) + (projectile-dir-files dir))) + dirs)))))) ;; Save the cached list. (when projectile-enable-caching -- cgit v1.0 From 9ef4ffcf5f8187ca4ef501fd30e964e036449776 Mon Sep 17 00:00:00 2001 From: Bozhidar Batsov Date: Sun, 26 Apr 2026 11:34:43 +0100 Subject: Add tests for hybrid `+' keep batching `+' keep entries had no end-to-end test coverage. Add tests that verify: * Multiple `+' keep entries trigger a single batched call to the external command, with the kept paths passed as the new pathspecs argument. * The single-keep-dir case keeps going through `projectile-dir-files' for each subdirectory (no behavioural change there). * `projectile--restricted-sub-projects-files' returns all submodule files when no subdirs are supplied, drops files outside the supplied subdirs when they are, and normalises subdirs without a trailing slash. Also add a focused test for the new `pathspecs' argument of `projectile-files-via-ext-command'. --- test/projectile-test.el | 72 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/test/projectile-test.el b/test/projectile-test.el index a546a8f..3ef1eaa 100644 --- a/test/projectile-test.el +++ b/test/projectile-test.el @@ -992,6 +992,69 @@ Just delegates OPERATION and ARGS for all operations except for`shell-command`'. (expect 'projectile-dir-files-alien :to-have-been-called-with "/my/root/" 'git))) +(describe "hybrid indexing with `+' keep entries" + (it "batches dirconfig keep dirs into a single external command" + (spy-on 'projectile-project-vcs :and-return-value 'git) + (spy-on 'projectile-files-via-ext-command :and-return-value + '("src/a.el" "test/b.el")) + (spy-on 'projectile-get-sub-projects-files :and-return-value nil) + (spy-on 'projectile-git-deleted-files :and-return-value nil) + (spy-on 'projectile-parse-dirconfig-file :and-return-value + (make-projectile-dirconfig :keep '("src/" "test/"))) + (spy-on 'projectile-project-root :and-return-value "/my/root/") + (let ((projectile-indexing-method 'hybrid) + (projectile-enable-caching nil) + (projectile-files-cache-expire nil) + (projectile-git-use-fd nil) + (projectile-fd-executable nil) + (projectile-globally-ignored-files nil) + (projectile-globally-ignored-directories nil) + (projectile-globally-ignored-file-suffixes nil) + (projectile-globally-unignored-files nil) + (projectile-globally-unignored-directories nil)) + (projectile-project-files "/my/root/") + ;; The external command is invoked exactly once - not once per + ;; kept subdirectory - and receives the kept paths as pathspecs. + (expect 'projectile-files-via-ext-command :to-have-been-called-times 1) + (expect (spy-calls-args-for 'projectile-files-via-ext-command 0) + :to-equal (list "/my/root/" projectile-git-command '("src/" "test/"))))) + (it "leaves the single-keep-dir case on the per-directory path" + (spy-on 'projectile-project-vcs :and-return-value 'git) + (spy-on 'projectile-dir-files-alien :and-return-value '("a.el")) + (spy-on 'projectile-adjust-files :and-call-fake (lambda (_p _v files) files)) + (spy-on 'projectile-parse-dirconfig-file :and-return-value + (make-projectile-dirconfig :keep '("src/"))) + (spy-on 'projectile-project-root :and-return-value "/my/root/") + (spy-on 'file-directory-p :and-call-fake + (lambda (filename) + (member filename '("/my/root/" "/my/root/src/")))) + (let ((projectile-indexing-method 'hybrid) + (projectile-enable-caching nil) + (projectile-files-cache-expire nil)) + (projectile-project-files "/my/root/")) + ;; With one keep entry there are no extra shell calls to save, so + ;; we keep going through projectile-dir-files (which threads vcs + ;; through to projectile-dir-files-alien). + (expect 'projectile-dir-files-alien + :to-have-been-called-with "/my/root/src/" 'git))) + +(describe "projectile--restricted-sub-projects-files" + (it "returns all submodule files when no subdirs are supplied" + (spy-on 'projectile-get-sub-projects-files :and-return-value + '("vendor/foo/x.txt" "src/sub/y.txt")) + (expect (projectile--restricted-sub-projects-files "/r/" 'git nil) + :to-equal '("vendor/foo/x.txt" "src/sub/y.txt"))) + (it "drops submodule files outside the supplied subdirs" + (spy-on 'projectile-get-sub-projects-files :and-return-value + '("vendor/foo/x.txt" "src/sub/y.txt")) + (expect (projectile--restricted-sub-projects-files "/r/" 'git '("src/")) + :to-equal '("src/sub/y.txt"))) + (it "normalises subdirs without a trailing slash" + (spy-on 'projectile-get-sub-projects-files :and-return-value + '("src/sub/y.txt")) + (expect (projectile--restricted-sub-projects-files "/r/" 'git '("src")) + :to-equal '("src/sub/y.txt")))) + (describe "projectile-project-dirs" (it "includes intermediate directories that contain only subdirectories" (spy-on 'projectile-project-files @@ -1043,7 +1106,14 @@ Just delegates OPERATION and ARGS for all operations except for`shell-command`'. (it "strips ./ prefix from results" (expect (projectile-files-via-ext-command "" "printf './foo\\0./bar/baz\\0quux'") - :to-equal '("foo" "bar/baz" "quux")))) + :to-equal '("foo" "bar/baz" "quux"))) + + (it "appends shell-quoted pathspecs to the command when supplied" + (expect (projectile-files-via-ext-command + "" "printf 'args: %s\\0' --" + '("src dir" "test")) + :to-equal + '("args: --" "args: src dir" "args: test")))) (describe "projectile-ignored-project-p" (it "matches abbreviated paths against truename-resolved ignored list" -- cgit v1.0