1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
;;; gen-confusables.el --- generate uni-confusables.el from confusables.txt -*- lexical-binding: t; -*-
;; Copyright (C) 2011, 2012, 2014, 2022 Free Software Foundation, Inc.
;; Author: Teodor Zlatanov <tzz@lifelogs.com>
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;;; Code:
(require 'cl-lib)
(defvar gen-confusables-char-table-single)
(defvar gen-confusables-char-table-multiple)
(defun gen-confusables-read (file)
(interactive "fConfusables filename: \n")
(cl-flet ((reader (h) (string-to-number h 16)))
(let ((stable (make-char-table 'confusables-single-script))
(mtable (make-char-table 'confusables-multiple-script))
(count 0)
(confusable-line-regexp (concat "^\\([[:xdigit:]]+\\)" ; \x+
" ;\t"
;; \x+ separated by spaces
"\\([[:space:][:xdigit:]]+\\)"
" ;\t"
"\\([SM]\\)[LA]"))) ; SL, SA, ML, MA
(setq gen-confusables-char-table-single stable)
(setq gen-confusables-char-table-multiple mtable)
(with-temp-buffer
(insert-file-contents file)
(goto-char (point-min))
(while (re-search-forward confusable-line-regexp nil t)
(cl-incf count)
(when (and (called-interactively-p 'interactive)
(zerop (mod count 100)))
(message "processed %d lines" count))
(let* ((from (match-string 1))
(to (match-string 2))
(class (match-string 3))
(table (if (string-equal "S" class) stable mtable)))
(set-char-table-range
table
(reader from)
(concat (mapcar #'reader (split-string to))))))))))
(defun gen-confusables-write (file)
(interactive "FDumped filename: \n")
(let ((coding-system-for-write 'utf-8-emacs))
(with-temp-file file
(insert ";;; uni-confusables.el --- Unicode confusables table -*- lexical-binding: t -*-
;; Copyright (C) 1991-2009, 2010 Unicode, Inc.
;; This file was generated from a local copy of the Unicode confusables
;; list (covered by the Unicode copyright terms in copyright.html)
;; which was retrieved from
;; http://www.unicode.org/Public/security/revision-04/confusables.txt
;; and mechanically from the source file
;; gen-confusables.el. The copyright and permissions are the same
;; as for gen-confusables.el.
;; Version: 0.3
;; Maintainer: Teodor Zlatanov <tzz@lifelogs.com>
;;; Code:\n\n")
(dolist (type '(single multiple))
(let* ((tablesym (intern (format "uni-confusables-char-table-%s" type)))
(oursym (intern (format "gen-confusables-char-table-%s" type)))
(ourtable (symbol-value oursym))
(tablename (symbol-name tablesym))
(prop (format "confusables-%s-script" type))
props)
(insert (format "(defvar %s (make-char-table '%s))\n\n"
tablename prop))
(map-char-table
(lambda (k v) (setq props (cons k (cons v props))))
ourtable)
(insert (format "(let ((k nil) (v nil) (ranges '(\n"))
(let ((i 0))
(dolist (p props)
(insert " ")
(prin1 p (current-buffer))
(when (zerop (mod (setq i (1+ i)) 10))
(insert "\n"))))
(insert ")))\n")
(insert (format "
(while ranges
(setq k (pop ranges)
v (pop ranges))
(set-char-table-range %s k v)))\n\n" tablename))
(insert (format "(ert-deftest uni-confusables-test-%s ()\n" type))
(dolist (offset '(100 200 800 3000 3500))
(insert (format "
(should (string-equal
(char-table-range %s %d)
%S))\n"
tablename
(nth (* 2 offset) props)
(nth (1+ (* 2 offset)) props))))
(insert ")\n\n")))
;; Use \s escapes in the string, so that this text isn't mis-recognized
;; as applying to this file, but only to the generated file.
(insert "
;;\sLocal\sVariables:
;;\scoding: utf-8
;;\sno-byte-compile: t
;;\sEnd:
(provide 'uni-confusables)
;;; uni-confusables.el ends here\n"))))
(provide 'gen-confusables)
;;; gen-confusables.el ends here
|