summaryrefslogtreecommitdiff
path: root/lisp/pdf-sync.el
diff options
context:
space:
mode:
authorAndreas Politz <politza@hochschule-trier.de>2015-02-13 00:07:32 +0100
committerAndreas Politz <politza@hochschule-trier.de>2015-02-13 00:07:32 +0100
commit2b2010b31d53661dd9631c2967330160e59baeff (patch)
tree5fe9a21a08ec8a1197e3df19711c4a0bc604c5c5 /lisp/pdf-sync.el
parentf4c00bbfcbabbf93b3944fa615b9cc0b8d1bc5ca (diff)
Implemented a heuristic backward search.
* lisp/pdf-sync.el: (pdf-sync-backward-hook): Added hook as option for word-wise searching. (pdf-sync-minor-mode-map): Add seemingly standard binding C-mouse1 for backward searches. (pdf-sync-backward-use-heuristic): New variable. If this is non-nil the backward search uses a heuristic which will in most cases locate the character position. (pdf-sync-backward-text-translations): New variable describing a mapping between utf8 character and latex macros. (pdf-sync-backward-text-flush-regexp): Regexp for deleting unhelpfull character from the PDF's text when backward-searching. (pdf-sync-backward-source-flush-regexp): Regexp for deleting unhelpfull LaTeX fragments when backward-searching. (pdf-sync-backward-search): Use the heuristic. (pdf-sync-backward-correlate): Now returns a function (instead of line, column), which when called will move to the correct position. (pdf-sync-backward--find-position pdf-sync-backward--get-source-context, pdf-sync-backward--source-strip-comments, pdf-sync-backward--get-text-context, pdf-sync-backward--tokenize): New functions for the heuristic backward search. (pdf-sync-backward-debug-minor-mode, pdf-sync-backward-debug-wrapper, pdf-sync-backward-debug-explain): Debug functions.
Diffstat (limited to 'lisp/pdf-sync.el')
-rw-r--r--lisp/pdf-sync.el506
1 files changed, 494 insertions, 12 deletions
diff --git a/lisp/pdf-sync.el b/lisp/pdf-sync.el
index 3556aee..a51c314 100644
--- a/lisp/pdf-sync.el
+++ b/lisp/pdf-sync.el
@@ -19,6 +19,17 @@
;;; Commentary:
;;
+;; The backward search uses a heuristic, which is pretty simple, but
+;; effective: It extracts the text around the click-position in the
+;; PDF, normalizes it's whitespace, deletes certain notorious
+;; character and translates certain other character into their latex
+;; equivalents. This transformed text is split into a series of
+;; token. A similar operation is performed on the source code around
+;; the position synctex points at. These two sequences of token are
+;; aligned with a standard sequence alignment algorithm, resulting in
+;; an alist of matched and unmatched tokens. This is then used to
+;; find the corresponding word from the PDF file in the LaTeX buffer.
+
(require 'pdf-view)
(require 'pdf-info)
@@ -43,7 +54,8 @@ command `pdf-sync-minor-mode' is activated and this map is defined."
The hook is run in the TeX buffer."
:group 'pdf-sync
- :type 'hook)
+ :type 'hook
+ :options '(pdf-sync-backward-beginning-of-word))
(defcustom pdf-sync-forward-hook nil
"Hook ran after displaying the PDF buffer.
@@ -74,7 +86,8 @@ locate it."
(defvar pdf-sync-minor-mode-map
(let ((kmap (make-sparse-keymap)))
- (define-key kmap [double-mouse-1] 'pdf-sync-mouse-goto-tex)
+ (define-key kmap [double-mouse-1] 'pdf-sync-backward-search-mouse)
+ (define-key kmap [C-mouse-1] 'pdf-sync-backward-search-mouse)
kmap))
(defcustom pdf-sync-backward-redirect-functions nil
@@ -135,6 +148,144 @@ with AUCTeX."
;; * Backward search (PDF -> TeX)
;; * ================================================================== *
+(defcustom pdf-sync-backward-use-heuristic t
+ "Whether to apply a heuristic when backward searching.
+
+If nil, just go where Synctex tells us. Otherwise try to find
+the exact location of the clicked-upon text in the PDF."
+ :group 'pdf-sync
+ :type 'boolean)
+
+(defcustom pdf-sync-backward-text-translations
+ '((88 "X" "sum")
+ (94 "textasciicircum")
+ (126 "textasciitilde")
+ (169 "copyright" "textcopyright")
+ (172 "neg" "textlnot")
+ (174 "textregistered" "textregistered")
+ (176 "textdegree")
+ (177 "pm" "textpm")
+ (181 "upmu" "mu")
+ (182 "mathparagraph" "textparagraph" "P" "textparagraph")
+ (215 "times")
+ (240 "eth" "dh")
+ (915 "Upgamma" "Gamma")
+ (920 "Uptheta" "Theta")
+ (923 "Uplambda" "Lambda")
+ (926 "Upxi" "Xi")
+ (928 "Uppi" "Pi")
+ (931 "Upsigma" "Sigma")
+ (933 "Upupsilon" "Upsilon")
+ (934 "Upphi" "Phi")
+ (936 "Uppsi" "Psi")
+ (945 "upalpha" "alpha")
+ (946 "upbeta" "beta")
+ (947 "upgamma" "gamma")
+ (948 "updelta" "delta")
+ (949 "upvarepsilon" "varepsilon")
+ (950 "upzeta" "zeta")
+ (951 "upeta" "eta")
+ (952 "uptheta" "theta")
+ (953 "upiota" "iota")
+ (954 "upkappa" "varkappa" "kappa")
+ (955 "uplambda" "lambda")
+ (957 "upnu" "nu")
+ (958 "upxi" "xi")
+ (960 "uppi" "pi")
+ (961 "upvarrho" "uprho" "rho")
+ (962 "varsigma")
+ (963 "upvarsigma" "upsigma" "sigma")
+ (964 "uptau" "tau")
+ (965 "upupsilon" "upsilon")
+ (966 "upphi" "phi")
+ (967 "upchi" "chi")
+ (968 "uppsi" "psi")
+ (969 "upomega" "omega")
+ (977 "upvartheta" "vartheta")
+ (981 "upvarphi" "varphi")
+ (8224 "dagger")
+ (8225 "ddagger")
+ (8226 "bullet")
+ (8486 "Upomega" "Omega")
+ (8501 "aleph")
+ (8592 "mapsfrom" "leftarrow")
+ (8593 "uparrow")
+ (8594 "to" "mapsto" "rightarrow")
+ (8595 "downarrow")
+ (8596 "leftrightarrow")
+ (8656 "shortleftarrow" "Leftarrow")
+ (8657 "Uparrow")
+ (8658 "Mapsto" "rightrightarrows" "Rightarrow")
+ (8659 "Downarrow")
+ (8660 "Leftrightarrow")
+ (8704 "forall")
+ (8706 "partial")
+ (8707 "exists")
+ (8709 "varnothing" "emptyset")
+ (8710 "Updelta" "Delta")
+ (8711 "nabla")
+ (8712 "in")
+ (8722 "-")
+ (8725 "setminus")
+ (8727 "*")
+ (8734 "infty")
+ (8743 "wedge")
+ (8744 "vee")
+ (8745 "cap")
+ (8746 "cup")
+ (8756 "therefore")
+ (8757 "because")
+ (8764 "thicksim" "sim")
+ (8776 "thickapprox" "approx")
+ (8801 "equiv")
+ (8804 "leq")
+ (8805 "geq")
+ (8810 "lll")
+ (8811 "ggg")
+ (8814 "nless")
+ (8815 "ngtr")
+ (8822 "lessgtr")
+ (8823 "gtrless")
+ (8826 "prec")
+ (8832 "nprec")
+ (8834 "subset")
+ (8835 "supset")
+ (8838 "subseteq")
+ (8839 "supseteq")
+ (8853 "oplus")
+ (8855 "otimes")
+ (8869 "bot" "perp")
+ (9702 "circ")
+ (9792 "female" "venus")
+ (9793 "earth")
+ (9794 "male" "mars")
+ (9824 "spadesuit")
+ (9827 "clubsuit")
+ (9829 "heartsuit")
+ (9830 "diamondsuit"))
+ "Alist mapping PDF character to a list of LaTeX macro names.
+
+Adding a character here with it's LaTeX equivalent names allows
+the heuristic backward search to find it's location in the source
+file. These strings should not match
+`pdf-sync-backward-source-flush-regexp'.
+
+Has no effect if `pdf-sync-backward-use-heuristic' is nil."
+ :group 'pdf-sync
+ :type '(alist :key-type character
+ :value-type (repeat string)))
+
+(defconst pdf-sync-backward-text-flush-regexp
+ "[][.ยท{}|\\]\\|\\C.\\|-\n+"
+ "Regexp of ignored text when backward searching.")
+
+(defconst pdf-sync-backward-source-flush-regexp
+ "\\(?:\\\\\\(?:begin\\|end\\|\\(?:eq\\)?ref\\|label\\|cite\\){[^}]*}\\)\\|[][\\&{}$_]"
+ "Regexp of ignored source when backward searching.")
+
+(defconst pdf-sync-backward-context-limit 64
+ "Number of character to include in the backward search.")
+
(defun pdf-sync-backward-search-mouse (ev)
"Go to the source corresponding to position at event EV."
(interactive "@e")
@@ -146,23 +297,26 @@ with AUCTeX."
(pdf-sync-backward-search (car xy) (cdr xy))))
(defun pdf-sync-backward-search (x y)
- "Go to the source corresponding to image coordinates X, Y."
- (cl-destructuring-bind (source line column)
+ "Go to the source corresponding to image coordinates X, Y.
+
+Try to find the exact position, if
+`pdf-sync-backward-use-heuristic' is non-nil."
+ (cl-destructuring-bind (source finder)
(pdf-sync-backward-correlate x y)
(pop-to-buffer (or (find-buffer-visiting source)
(find-file-noselect source))
pdf-sync-backward-display-action)
(push-mark)
- (pdf-util-goto-position line column)
+ (funcall finder)
(run-hooks 'pdf-sync-backward-hook)))
(defun pdf-sync-backward-correlate (x y)
"Find the source corresponding to image coordinates X, Y.
-Returns a list \(SOURCE FN\), where SOURCE is the name of the TeX
-file and FN a function of no arguments which, when called in the
-buffer of the file, will try to move point to the correct
-position."
+Returns a list \(SOURCE FINDER\), where SOURCE is the name of the
+TeX file and FINDER a function of zero arguments which, when
+called in the buffer of the aforementioned file, will try to move
+point to the correct position."
(pdf-util-assert-pdf-window)
(let ((size (pdf-view-image-size))
@@ -173,10 +327,338 @@ position."
(pdf-info-synctex-backward-search page x y)
(let ((data (list (expand-file-name source)
line column)))
- (or (apply 'run-hook-with-args-until-success
- 'pdf-sync-backward-redirect-functions data)
- data)))))
+ (cl-destructuring-bind (source line column)
+ (or (save-selected-window
+ (apply 'run-hook-with-args-until-success
+ 'pdf-sync-backward-redirect-functions data))
+ data)
+ (list source
+ (if (not pdf-sync-backward-use-heuristic)
+ (lambda nil
+ (pdf-util-goto-position line column))
+ (let ((context (pdf-sync-backward--get-text-context page x y)))
+ (lambda nil
+ (pdf-sync-backward--find-position line column context))))))))))
+
+(defun pdf-sync-backward--find-position (line column context)
+ (pdf-util-goto-position line column)
+ (cl-destructuring-bind (windex chindex words)
+ context
+ (let* ((swords (pdf-sync-backward--get-source-context
+ nil (* 6 pdf-sync-backward-context-limit)))
+ (similarity-fn (lambda (text source)
+ (if (if (consp text)
+ (member source text)
+ (equal text source))
+ 1024 -1024)))
+ (alignment
+ (pdf-util-seq-alignment
+ words swords similarity-fn 'infix)))
+ (setq alignment (cl-remove-if-not 'car (cdr alignment)))
+ (cl-assert (< windex (length alignment)))
+
+ (let ((word (cdr (nth windex alignment))))
+ (unless word
+ (setq chindex 0
+ word (cdr (nth (1+ windex) alignment))))
+ (unless word
+ (setq word (cdr (nth (1- windex) alignment))
+ chindex (length word)))
+ (when word
+ (cl-assert (get-text-property 0 'position word) t)
+ (goto-char (get-text-property 0 'position word))
+ (forward-char chindex))))))
+
+(defun pdf-sync-backward--get-source-context (&optional position limit)
+ (save-excursion
+ (when position (goto-char position))
+ (goto-char (line-beginning-position))
+ (let* ((region
+ (cond
+ ((eq limit 'line)
+ (cons (line-beginning-position)
+ (line-end-position)))
+
+ ;; Synctex usually jumps to the end macro, in case it
+ ;; does not understand the environment.
+ ((and (fboundp 'LaTeX-find-matching-begin)
+ (looking-at " *\\\\\\(end\\){"))
+ (cons (or (ignore-errors
+ (save-excursion
+ (LaTeX-find-matching-begin)
+ (forward-line 1)
+ (point)))
+ (point))
+ (point)))
+ ((and (fboundp 'LaTeX-find-matching-end)
+ (looking-at " *\\\\\\(begin\\){"))
+ (goto-char (line-end-position))
+ (cons (point)
+ (or (ignore-errors
+ (save-excursion
+ (LaTeX-find-matching-end)
+ (forward-line 0)
+ (point)))
+ (point))))
+ (t (cons (point) (point)))))
+ (begin (car region))
+ (end (cdr region)))
+ (when (numberp limit)
+ (let ((delta (- limit (- end begin))))
+ (when (> delta 0)
+ (setq begin (max (point-min)
+ (- begin (/ delta 2)))
+ end (min (point-max)
+ (+ end (/ delta 2)))))))
+ (let ((string (buffer-substring-no-properties begin end)))
+ (dotimes (i (length string))
+ (put-text-property i (1+ i) 'position (+ begin i) string))
+ (nth 2 (pdf-sync-backward--tokenize
+ (pdf-sync-backward--source-strip-comments string)
+ nil
+ pdf-sync-backward-source-flush-regexp))))))
+
+(defun pdf-sync-backward--source-strip-comments (string)
+ "Strip all standard LaTeX comments from string."
+ (with-temp-buffer
+ (save-excursion (insert string))
+ (while (re-search-forward
+ "^\\(?:[^\\\n]\\|\\(?:\\\\\\\\\\)\\)*\\(%.*\\)" nil t)
+ (delete-region (match-beginning 1) (match-end 1)))
+ (buffer-string)))
+
+(defun pdf-sync-backward--get-text-context (page x y)
+ (cl-destructuring-bind (&optional char edges)
+ (car (pdf-info-charlayout page (cons x y)))
+ (when edges
+ (setq x (nth 0 edges)
+ y (nth 1 edges)))
+ (let* ((prefix (pdf-info-gettext page (list 0 0 x y)))
+ (suffix (pdf-info-gettext page (list x y 1 1)))
+ (need-suffix-space-p (memq char '(?\s ?\n)))
+ ;; Figure out whether we missed a space by matching the
+ ;; prefix's suffix with the line's prefix. Due to the text
+ ;; extraction in poppler, spaces are only inserted
+ ;; inbetween words. This test may fail, if prefix and line
+ ;; do not overlap, which may happen in various cases, but
+ ;; we don't care.
+ (need-prefix-space-p
+ (and (not need-suffix-space-p)
+ (memq
+ (ignore-errors
+ (aref (pdf-info-gettext page (list x y x y) 'line)
+ (- (length prefix)
+ (or (cl-position ?\n prefix :from-end t)
+ -1)
+ 1)))
+ '(?\s ?\n)))))
+ (setq prefix
+ (concat
+ (substring
+ prefix (max 0 (min (1- (length prefix))
+ (- (length prefix)
+ pdf-sync-backward-context-limit))))
+ (if need-prefix-space-p " "))
+ suffix
+ (concat
+ (if need-suffix-space-p " ")
+ (substring
+ suffix 0 (max 0 (min (1- (length suffix))
+ pdf-sync-backward-context-limit)))))
+ (pdf-sync-backward--tokenize
+ prefix suffix
+ pdf-sync-backward-text-flush-regexp
+ pdf-sync-backward-text-translations))))
+
+(defun pdf-sync-backward--tokenize (prefix &optional suffix flush-re translation)
+ (with-temp-buffer
+ (when prefix (insert prefix))
+ (let* ((center (copy-marker (point)))
+ (case-fold-search nil))
+ (when suffix (insert suffix))
+ (goto-char 1)
+ ;; Delete ignored text.
+ (when flush-re
+ (save-excursion
+ (while (re-search-forward flush-re nil t)
+ (replace-match " " t t))))
+ ;; Normalize whitespace.
+ (save-excursion
+ (while (re-search-forward "[ \t\f\n]+" nil t)
+ (replace-match " " t t)))
+ ;; Split words and non-words
+ (save-excursion
+ (while (re-search-forward "[^ ]\\b\\|[^ [:alnum:]]" nil t)
+ (insert-before-markers " ")))
+ ;; Replace character
+ (let ((translate
+ (lambda (string)
+ (or (and (= (length string) 1)
+ (cdr (assq (aref string 0)
+ translation)))
+ string)))
+ words
+ (windex -1)
+ (chindex 0))
+ (skip-chars-forward " ")
+ (while (and (not (eobp))
+ (<= (point) center))
+ (cl-incf windex)
+ (skip-chars-forward "^ ")
+ (skip-chars-forward " "))
+ (goto-char center)
+ (setq chindex (- (skip-chars-backward "^ ")))
+ (setq words (split-string (buffer-string)))
+ (when translation
+ (setq words (mapcar translate words)))
+ (list windex chindex words)))))
+
+(defun pdf-sync-backward-beginning-of-word ()
+ "Maybe move to the beginning of the word.
+
+Don't move if already at the beginning, or if not at a word
+character.
+
+This function is ment to be put on `pdf-sync-backward-hook', when
+word-level searching is desired."
+ (interactive)
+ (unless (or (looking-at "\\b\\w")
+ (not (looking-back "\\w" (1- (point)))))
+ (backward-word)))
+
+;; * ------------------------------------------------------------------ *
+;; * Debugging backward search
+;; * ------------------------------------------------------------------ *
+
+(defvar pdf-sync-backward-debug-trace nil)
+
+(defun pdf-sync-backward-debug-wrapper (fn-symbol fn &rest args)
+ (cond
+ ((eq fn-symbol 'pdf-sync-backward-search)
+ (setq pdf-sync-backward-debug-trace nil)
+ (apply fn args))
+ (t
+ (let ((retval (apply fn args)))
+ (push `(,args . ,retval)
+ pdf-sync-backward-debug-trace)
+ retval))))
+
+(define-minor-mode pdf-sync-backward-debug-minor-mode
+ "Aid in debugging the backward search."
+ nil nil nil
+ (if (and (fboundp 'advice-add)
+ (fboundp 'advice-remove))
+ (let ((functions
+ '(pdf-sync-backward-search
+ pdf-sync-backward--tokenize
+ pdf-util-seq-alignment)))
+ (cond
+ (pdf-sync-backward-debug-minor-mode
+ (dolist (fn functions)
+ (advice-add fn :around (apply-partially 'pdf-sync-backward-debug-wrapper
+ fn)
+ `((name . ,(format "%s-debug" fn))))))
+ (t
+ (dolist (fn functions)
+ (advice-remove fn (format "%s-debug" fn))))))
+ (error "Need Emacs version >= 24.4")))
+
+(defun pdf-sync-backward-debug-explain ()
+ "Explain the last backward search.
+
+Needs to have `pdf-sync-backward-debug-minor-mode' enabled."
+ (interactive)
+ (unless pdf-sync-backward-debug-trace
+ (error "No last search or `pdf-sync-backward-debug-minor-mode' not enabled."))
+
+ (with-current-buffer (get-buffer-create "*pdf-sync-backward trace*")
+ (cl-destructuring-bind (text source alignment &rest ignored)
+ (reverse pdf-sync-backward-debug-trace)
+ (let* ((fill-column 68)
+ (sep (format "\n%s\n" (make-string fill-column ?-)))
+ (highlight '(:background "chartreuse" :foreground "black"))
+ (or-sep "|")
+ (inhibit-read-only t)
+ (windex (nth 0 (cdr text)))
+ (chindex (nth 1 (cdr text))))
+ (erase-buffer)
+ (font-lock-mode -1)
+ (view-mode 1)
+ (insert (propertize "Text Raw:" 'face 'font-lock-keyword-face))
+ (insert sep)
+ (insert (nth 0 (car text)))
+ (insert (propertize "<|>" 'face highlight))
+ (insert (nth 1 (car text)))
+ (insert sep)
+ (insert (propertize "Text Token:" 'face 'font-lock-keyword-face))
+ (insert sep)
+ (fill-region (point)
+ (progn
+ (insert
+ (mapconcat (lambda (elt)
+ (if (consp elt)
+ (mapconcat 'identity elt or-sep)
+ elt))
+ (nth 2 (cdr text)) " "))
+ (point)))
+ (insert sep)
+
+ (insert (propertize "Source Raw:" 'face 'font-lock-keyword-face))
+ (insert sep)
+ (insert (nth 0 (car source)))
+ (insert sep)
+ (insert (propertize "Source Token:" 'face 'font-lock-keyword-face))
+ (insert sep)
+ (fill-region (point)
+ (progn (insert (mapconcat 'identity (nth 2 (cdr source)) " "))
+ (point)))
+ (insert sep)
+
+ (insert (propertize "Alignment:" 'face 'font-lock-keyword-face))
+ (insert (format " (windex=%d, chindex=%d" windex chindex))
+ (insert sep)
+ (save-excursion (newline 2))
+ (let ((column 0)
+ (index 0))
+ (dolist (a (cdr (cdr alignment)))
+ (let* ((source (cdr a))
+ (text (if (consp (car a))
+ (mapconcat 'identity (car a) or-sep)
+ (car a)))
+ (extend (max (length text)
+ (length source))))
+ (when (and (not (bolp))
+ (> (+ column extend)
+ fill-column))
+ (forward-line 2)
+ (newline 3)
+ (forward-line -2)
+ (setq column 0))
+ (when text
+ (insert (propertize text 'face
+ (if (= index windex)
+ highlight
+ (if source 'match
+ 'lazy-highlight)))))
+ (move-to-column (+ column extend) t)
+ (insert " ")
+ (save-excursion
+ (forward-line)
+ (move-to-column column t)
+ (when source
+ (insert (propertize source 'face (if text
+ 'match
+ 'lazy-highlight))))
+ (move-to-column (+ column extend) t)
+ (insert " "))
+ (cl-incf column (+ 1 extend))
+ (when text (cl-incf index)))))
+ (goto-char (point-max))
+ (insert sep)
+ (goto-char 1)
+ (pop-to-buffer (current-buffer))))))
+
;; * ================================================================== *
;; * Forward search (TeX -> PDF)