aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md2
-rw-r--r--projectile.el207
-rw-r--r--test/projectile-test.el108
3 files changed, 258 insertions, 59 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5db8568..196958e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,8 @@
### Changes
+* Speed up native indexing on large trees: `projectile-index-directory` now hashes the ignored-files / ignored-directories / globally-ignored-directory-names lists once per indexing call (the per-file `member' scans were O(N*M)), expands dirconfig glob patterns once per directory level instead of once per (file, pattern) pair, and accumulates results into a shared cell so we no longer pay for an `apply append' at each recursion level.
+* `projectile-remove-ignored` (hybrid post-processing) now hashes the ignored-files basenames and pre-splits ignored-dirs into prefix-match and any-segment groups, so the per-file inner loops drop from O(M) `seq-some` walks to O(1) hash lookups (or O(segments) for `*`-prefixed entries).
* Hybrid indexing now batches the external command into a single invocation when the project's `.projectile` declares multiple `+` keep entries, instead of shelling out once per kept subdirectory. The kept paths are passed to the indexing tool (e.g. `git ls-files`, `fd`, `find`) as positional pathspecs and submodule files outside those subdirectories are filtered out. Resolves the long-standing TODO in `projectile-project-files`.
* `projectile-files-via-ext-command` now accepts an optional `pathspecs` argument; entries are shell-quoted before being appended to the command. `projectile-dir-files-alien` similarly accepts an optional `subdirs` argument that threads through.
* Document the `hybrid` indexing method in the manual and add a feature matrix showing which Projectile knobs (dirconfig, global ignores/unignores, sort order, default caching) apply under `native`/`hybrid`/`alien`.
diff --git a/projectile.el b/projectile.el
index b03170d..0f8c480 100644
--- a/projectile.el
+++ b/projectile.el
@@ -1622,40 +1622,126 @@ Files are returned as relative paths to DIRECTORY."
(projectile-index-directory directory (projectile-filtering-patterns)
progress-reporter))))
+(defun projectile--list->set (list)
+ "Return a hash table whose keys are the elements of LIST.
+Values are all `t'. Tests with `equal'."
+ (let ((set (make-hash-table :test 'equal :size (max 1 (length list)))))
+ (dolist (elt list set)
+ (puthash elt t set))))
+
+(defun projectile--make-walk-rules (ignored-files ignored-directories globally-ignored-directories)
+ "Build a plist of pre-computed rule sets used by `projectile-index-directory'.
+IGNORED-FILES and IGNORED-DIRECTORIES are the absolute paths to
+ignore; GLOBALLY-IGNORED-DIRECTORIES is the list of directory
+basenames to ignore anywhere in the tree."
+ (list :ignored-files-set (projectile--list->set ignored-files)
+ :ignored-dirs-set (projectile--list->set ignored-directories)
+ :globally-ignored-dir-names-set
+ (projectile--list->set globally-ignored-directories)))
+
+(defun projectile--ignored-file-fast-p (file rules)
+ "Like `projectile-ignored-file-p' but consulting pre-built RULES.
+Used on the hot indexing path to avoid O(N*M) `member' scans."
+ (or (gethash file (plist-get rules :ignored-files-set))
+ (seq-some (lambda (re) (string-match-p re file))
+ projectile-global-ignore-file-patterns)
+ (seq-some (lambda (suf) (string-suffix-p suf file t))
+ projectile-globally-ignored-file-suffixes)))
+
+(defun projectile--ignored-directory-fast-p (directory local-name rules)
+ "Like `projectile-ignored-directory-p' but consulting pre-built RULES.
+LOCAL-NAME is the basename of DIRECTORY."
+ (or (gethash directory (plist-get rules :ignored-dirs-set))
+ (seq-some (lambda (re) (string-match-p re directory))
+ projectile-global-ignore-file-patterns)
+ (gethash local-name (plist-get rules :globally-ignored-dir-names-set))))
+
+(defun projectile--expand-glob-set (patterns)
+ "Expand glob PATTERNS in the current `default-directory'.
+Return a hash-set of absolute file paths matched by any pattern.
+Patterns that don't contain wildcard metacharacters expand to the
+file itself if it exists, so a single hash lookup later catches
+both glob and literal matches without revisiting the filesystem."
+ (let ((set (make-hash-table :test 'equal)))
+ (dolist (p patterns set)
+ (dolist (f (file-expand-wildcards p t))
+ (puthash f t set)))))
+
+(defun projectile--matches-pattern-set-p (file pats glob-set)
+ "Return non-nil when FILE matches any of PATS or is in GLOB-SET.
+PATS is the original pattern list (for the literal-suffix fallback
+that `projectile-check-pattern-p' uses); GLOB-SET is a hash-set
+returned by `projectile--expand-glob-set' for the same patterns,
+or nil when PATS is empty."
+ (or (and glob-set (gethash file glob-set))
+ (seq-some (lambda (p)
+ (string-suffix-p (directory-file-name p)
+ (directory-file-name file)))
+ pats)))
+
(defun projectile-index-directory (directory patterns progress-reporter &optional ignored-files ignored-directories globally-ignored-directories)
"Index DIRECTORY taking into account PATTERNS.
-The function calls itself recursively until all sub-directories
-have been indexed. The PROGRESS-REPORTER is updated while the
-function is executing. The list of IGNORED-FILES and
-IGNORED-DIRECTORIES may optionally be provided."
- ;; we compute the ignored files and directories only once and then we reuse the
- ;; pre-computed values in the subsequent recursive invocations of the function
- (let ((ignored-files (or ignored-files (projectile-ignored-files)))
- (ignored-directories (or ignored-directories (projectile-ignored-directories)))
- (globally-ignored-directories (or globally-ignored-directories (projectile-globally-ignored-directory-names))))
- (apply #'append
- (mapcar
- (lambda (f)
- (let ((local-f (file-name-nondirectory (directory-file-name f))))
- (unless (and patterns (projectile-ignored-rel-p f directory patterns))
- (progress-reporter-update progress-reporter)
- (if (file-directory-p f)
- (unless (projectile-ignored-directory-p
- (file-name-as-directory f)
- ignored-directories
- local-f
- globally-ignored-directories)
- (projectile-index-directory f patterns progress-reporter ignored-files ignored-directories globally-ignored-directories))
- (unless (projectile-ignored-file-p f ignored-files)
- (list f))))))
- ;; Use ignore-errors to skip unreadable directories (e.g.
- ;; .Spotlight-V100 on macOS) instead of aborting the entire
- ;; indexing operation. `directory-files-no-dot-files-regexp'
- ;; filters out . and .. at the C level so we don't have to
- ;; do it again in the loop.
- (ignore-errors
- (directory-files directory t directory-files-no-dot-files-regexp))))))
+The function dispatches to an internal walker that uses pre-built
+hash sets, so the per-file membership checks stay O(1) on large
+projects. The PROGRESS-REPORTER is updated while the function is
+executing. Lists of IGNORED-FILES, IGNORED-DIRECTORIES, and
+GLOBALLY-IGNORED-DIRECTORIES may optionally be provided to share
+state across calls."
+ (let* ((ignored-files (or ignored-files (projectile-ignored-files)))
+ (ignored-directories (or ignored-directories (projectile-ignored-directories)))
+ (globally-ignored-directories (or globally-ignored-directories
+ (projectile-globally-ignored-directory-names)))
+ (rules (projectile--make-walk-rules ignored-files ignored-directories
+ globally-ignored-directories))
+ ;; A 1-element list whose car is the accumulator. Using a
+ ;; mutable cell lets the recursive walker push results onto a
+ ;; single shared list (O(N) total) instead of `apply append'-ing
+ ;; per-level results (O(N*depth)).
+ (acc-cell (list nil)))
+ (projectile--index-directory-walk directory patterns progress-reporter
+ rules acc-cell)
+ (nreverse (car acc-cell))))
+
+(defun projectile--index-directory-walk (directory patterns progress-reporter rules acc-cell)
+ "Recursive walker for `projectile-index-directory'.
+DIRECTORY, PATTERNS, PROGRESS-REPORTER and RULES carry the same
+state as the public entry point. ACC-CELL is a 1-element list
+whose car accumulates discovered file paths in reverse order."
+ ;; Resolve the directory listing first. We rebind `default-directory'
+ ;; below for `file-expand-wildcards', so the relative `directory'
+ ;; argument has to be resolved against the caller's `default-directory'
+ ;; before that rebind takes effect. Use ignore-errors to skip
+ ;; unreadable directories (e.g. .Spotlight-V100 on macOS) instead of
+ ;; aborting the entire indexing operation.
+ ;; `directory-files-no-dot-files-regexp' filters out . and .. at the
+ ;; C level so we don't have to do it again in the loop.
+ (let ((entries (ignore-errors
+ (directory-files directory t directory-files-no-dot-files-regexp))))
+ (let* ((default-directory (file-name-as-directory directory))
+ (ignore-pats (car patterns))
+ (ensure-pats (cdr patterns))
+ ;; Glob expansion is sensitive to `default-directory' so it
+ ;; has to happen at every recursion level - but only once
+ ;; per level rather than once per (file, pattern) pair as
+ ;; it used to.
+ (ignore-glob-set (and ignore-pats (projectile--expand-glob-set ignore-pats)))
+ (ensure-glob-set (and ensure-pats (projectile--expand-glob-set ensure-pats))))
+ (dolist (f entries)
+ (let ((local-f (file-name-nondirectory (directory-file-name f))))
+ (unless (and patterns
+ (projectile--matches-pattern-set-p f ignore-pats ignore-glob-set)
+ (not (projectile--matches-pattern-set-p f ensure-pats ensure-glob-set)))
+ (progress-reporter-update progress-reporter)
+ (cond
+ ((file-directory-p f)
+ (unless (projectile--ignored-directory-fast-p
+ (file-name-as-directory f) local-f rules)
+ (projectile--index-directory-walk f patterns progress-reporter
+ rules acc-cell)))
+ (t
+ (unless (projectile--ignored-file-fast-p f rules)
+ (setcar acc-cell (cons f (car acc-cell))))))))))))
;;; Alien Project Indexing
;;
@@ -1852,35 +1938,40 @@ Only text sent to standard output is taken into account."
(defun projectile-remove-ignored (files)
"Remove ignored files and folders from FILES.
-If ignored directory prefixed with '*', then ignore all
+If ignored directory prefixed with `*', then ignore all
directories/subdirectories with matching filename,
otherwise operates relative to project root."
- (let ((ignored-files (projectile-ignored-files-rel))
- (ignored-dirs (projectile-ignored-directories-rel)))
- (seq-remove
- (lambda (file)
- (or (seq-some
- (lambda (f)
- (string= f (file-name-nondirectory file)))
- ignored-files)
- (seq-some
- (lambda (dir)
- ;; if the directory is prefixed with '*' then ignore all directories matching that name
- (if (string-prefix-p "*" dir)
- ;; remove '*' and trailing slash from ignored directory name
- (let ((d (string-remove-suffix "/" (substring dir 1))))
- (seq-some
- (lambda (p)
- (string= d p))
- ;; split path by '/', remove empty strings, and check if any subdirs match name 'd'
- (delete "" (split-string (or (file-name-directory file) "") "/"))))
- (string-prefix-p dir file)))
- ignored-dirs)
- (seq-some
- (lambda (suf)
- (string-suffix-p suf file t))
- projectile-globally-ignored-file-suffixes)))
- files)))
+ (let* ((ignored-files (projectile-ignored-files-rel))
+ (ignored-dirs (projectile-ignored-directories-rel))
+ ;; Hash basenames of ignored files for O(1) lookup per project
+ ;; file (the original `seq-some'/`string=' over the list was
+ ;; O(M) per file).
+ (ignored-files-set (projectile--list->set ignored-files))
+ ;; Split ignored dirs into the two matching modes used below:
+ ;; entries prefixed with `*' are matched as a path *segment*
+ ;; (basename anywhere in the file's directory chain), the rest
+ ;; are matched as a literal path *prefix*. Hash the segment
+ ;; entries so the per-file segment loop becomes O(segments).
+ (any-segment-dir-names nil)
+ (prefix-dirs nil))
+ (dolist (dir ignored-dirs)
+ (if (string-prefix-p "*" dir)
+ (push (string-remove-suffix "/" (substring dir 1))
+ any-segment-dir-names)
+ (push dir prefix-dirs)))
+ (let ((any-segment-dir-set (projectile--list->set any-segment-dir-names))
+ (suffixes projectile-globally-ignored-file-suffixes))
+ (seq-remove
+ (lambda (file)
+ (or (gethash (file-name-nondirectory file) ignored-files-set)
+ (and any-segment-dir-names
+ (seq-some
+ (lambda (segment) (gethash segment any-segment-dir-set))
+ (delete "" (split-string
+ (or (file-name-directory file) "") "/"))))
+ (seq-some (lambda (dir) (string-prefix-p dir file)) prefix-dirs)
+ (seq-some (lambda (suf) (string-suffix-p suf file t)) suffixes)))
+ files))))
(defun projectile-keep-ignored-files (project vcs files)
"Filter FILES to retain only those that are ignored."
diff --git a/test/projectile-test.el b/test/projectile-test.el
index 3ef1eaa..02d65ba 100644
--- a/test/projectile-test.el
+++ b/test/projectile-test.el
@@ -1086,7 +1086,113 @@ Just delegates OPERATION and ARGS for all operations except for`shell-command`'.
(let ((files (projectile-index-directory project-dir nil progress-reporter)))
(expect (cl-some (lambda (f) (string-match-p "readable-file" f)) files) :to-be-truthy)
(expect (cl-some (lambda (f) (string-match-p "unreadable-dir" f)) files) :not :to-be-truthy))
- (set-file-modes unreadable-dir #o755))))))))
+ (set-file-modes unreadable-dir #o755)))))))
+ (it "honors dirconfig glob ignore patterns at every level"
+ (projectile-test-with-sandbox
+ (projectile-test-with-files
+ ("project/"
+ "project/.projectile"
+ "project/keep.el"
+ "project/skip.elc"
+ "project/src/"
+ "project/src/keep.el"
+ "project/src/skip.elc")
+ (let* ((project-dir (file-name-as-directory (expand-file-name "project")))
+ (progress-reporter (make-progress-reporter "Indexing...")))
+ (with-temp-file (expand-file-name ".projectile" project-dir)
+ (insert "-*.elc\n"))
+ (spy-on 'projectile-project-root :and-return-value project-dir)
+ (let ((files (projectile-index-directory project-dir
+ (projectile-filtering-patterns)
+ progress-reporter)))
+ (expect (cl-some (lambda (f) (string-match-p "/keep.el\\'" f)) files)
+ :to-be-truthy)
+ (expect (cl-some (lambda (f) (string-match-p "/src/keep.el\\'" f)) files)
+ :to-be-truthy)
+ (expect (cl-some (lambda (f) (string-match-p "skip\\.elc\\'" f)) files)
+ :not :to-be-truthy))))))
+ (it "honors dirconfig ensure entries that override an ignore pattern"
+ (projectile-test-with-sandbox
+ (projectile-test-with-files
+ ("project/"
+ "project/.projectile"
+ "project/keep.elc"
+ "project/skip.elc")
+ (let* ((project-dir (file-name-as-directory (expand-file-name "project")))
+ (progress-reporter (make-progress-reporter "Indexing...")))
+ (with-temp-file (expand-file-name ".projectile" project-dir)
+ (insert "-*.elc\n!keep.elc\n"))
+ (spy-on 'projectile-project-root :and-return-value project-dir)
+ (let ((files (projectile-index-directory project-dir
+ (projectile-filtering-patterns)
+ progress-reporter)))
+ (expect (cl-some (lambda (f) (string-match-p "/keep\\.elc\\'" f)) files)
+ :to-be-truthy)
+ (expect (cl-some (lambda (f) (string-match-p "skip\\.elc\\'" f)) files)
+ :not :to-be-truthy)))))))
+
+(describe "projectile--list->set"
+ (it "puts all elements as keys with value t and tests with equal"
+ (let ((set (projectile--list->set '("a" "b/" "c"))))
+ (expect (gethash "a" set) :to-be t)
+ (expect (gethash "b/" set) :to-be t)
+ (expect (gethash "missing" set) :to-be nil)))
+ (it "handles the empty list without error"
+ (let ((set (projectile--list->set nil)))
+ (expect (hash-table-count set) :to-equal 0))))
+
+(describe "projectile--ignored-file-fast-p"
+ (it "returns t for files in the pre-computed ignored-files-set"
+ (let ((rules (projectile--make-walk-rules
+ '("/r/TAGS") nil nil)))
+ (expect (projectile--ignored-file-fast-p "/r/TAGS" rules) :to-be-truthy)
+ (expect (projectile--ignored-file-fast-p "/r/keep.el" rules)
+ :not :to-be-truthy)))
+ (it "honors projectile-globally-ignored-file-suffixes"
+ (let ((rules (projectile--make-walk-rules nil nil nil))
+ (projectile-globally-ignored-file-suffixes '(".elc")))
+ (expect (projectile--ignored-file-fast-p "/r/foo.elc" rules) :to-be-truthy)
+ (expect (projectile--ignored-file-fast-p "/r/foo.el" rules)
+ :not :to-be-truthy)))
+ (it "honors projectile-global-ignore-file-patterns"
+ (let ((rules (projectile--make-walk-rules nil nil nil))
+ (projectile-global-ignore-file-patterns '("\\.min\\.js\\'")))
+ (expect (projectile--ignored-file-fast-p "/r/foo.min.js" rules) :to-be-truthy)
+ (expect (projectile--ignored-file-fast-p "/r/foo.js" rules)
+ :not :to-be-truthy))))
+
+(describe "projectile--ignored-directory-fast-p"
+ (it "matches absolute ignored-dirs entries"
+ (let ((rules (projectile--make-walk-rules
+ nil '("/r/build/") nil)))
+ (expect (projectile--ignored-directory-fast-p "/r/build/" "build" rules)
+ :to-be-truthy)))
+ (it "matches globally ignored directory basenames"
+ (let ((rules (projectile--make-walk-rules nil nil '(".git"))))
+ (expect (projectile--ignored-directory-fast-p "/r/.git/" ".git" rules)
+ :to-be-truthy)
+ (expect (projectile--ignored-directory-fast-p "/r/src/" "src" rules)
+ :not :to-be-truthy))))
+
+(describe "projectile-remove-ignored"
+ (it "drops files whose basename matches an ignored entry"
+ (spy-on 'projectile-ignored-files-rel :and-return-value '("TAGS"))
+ (spy-on 'projectile-ignored-directories-rel :and-return-value nil)
+ (expect (projectile-remove-ignored '("a/TAGS" "src/foo.el" "TAGS"))
+ :to-equal '("src/foo.el")))
+ (it "treats `*'-prefixed entries as any-segment matches"
+ (spy-on 'projectile-ignored-files-rel :and-return-value nil)
+ (spy-on 'projectile-ignored-directories-rel :and-return-value '("*node_modules/"))
+ (expect (projectile-remove-ignored
+ '("src/foo.js"
+ "node_modules/lib/index.js"
+ "vendor/node_modules/lib/index.js"))
+ :to-equal '("src/foo.js")))
+ (it "treats plain entries as path-prefix matches"
+ (spy-on 'projectile-ignored-files-rel :and-return-value nil)
+ (spy-on 'projectile-ignored-directories-rel :and-return-value '("build/"))
+ (expect (projectile-remove-ignored '("build/foo.o" "src/foo.c" "buildbot/x"))
+ :to-equal '("src/foo.c" "buildbot/x"))))
(describe "projectile-get-sub-projects-command"
(it "gets sub projects command for git"