diff options
| -rw-r--r-- | NEWS.org | 7 | ||||
| -rw-r--r-- | README.org | 9 | ||||
| -rw-r--r-- | ellama-transient.el | 28 | ||||
| -rw-r--r-- | ellama.el | 44 | ||||
| -rw-r--r-- | ellama.info | 83 | ||||
| -rw-r--r-- | tests/test-ellama-transient.el | 23 | ||||
| -rw-r--r-- | tests/test-ellama.el | 42 |
7 files changed, 180 insertions, 56 deletions
@@ -1,3 +1,10 @@ +* Version 1.17.0 +- Add maximum output token support. Ellama now exposes ~ellama-max-tokens~ and + passes per-request ~:max-tokens~ values through ~llm-make-chat-prompt~, so + providers can cap generated responses using their standard token limit + support. The model transient menu can set or reset the max-token override, + and documentation and tests cover the new behavior. + * Version 1.16.3 - Sort saved session candidates newest first in ~ellama-load-session~. Ellama now orders session files by modification time and exposes standard completion @@ -174,9 +174,10 @@ More sophisticated configuration example: - ~ellama-provider-select~: Select ellama provider. - ~ellama-select-model~: Change the current provider model interactively. The model transient supports Ollama and OpenAI-compatible providers, including URL - editing for compatible APIs. Use "Reset model fields" to clear model, - temperature, and context-length overrides and let the provider use its - defaults; reset values are shown as ~default~ in the transient. + editing for compatible APIs. It can also set the maximum number of output + tokens. Use "Reset model fields" to clear model, temperature, context-length, + and max-token overrides and let the provider use its defaults; reset values + are shown as ~default~ in the transient. - ~ellama-code-complete~: Complete selected code or code in the current buffer according to a provided change using Ellama. - ~ellama-code-add~: Generate and insert new code based on description. This @@ -316,6 +317,8 @@ language is english. There are many supported providers: ~ollama~, ~open ai~, ~vertex~, ~GPT4All~. For more information see [[https://elpa.gnu.org/packages/llm.html][llm documentation]]. +- ~ellama-max-tokens~: Maximum number of tokens to generate. If not set, use + the provider default. - ~ellama-providers~: association list of model llm providers with name as key. - ~ellama-spinner-enabled~: Enable spinner during text generation. - ~ellama-spinner-type~: Spinner type for ellama. Default type is diff --git a/ellama-transient.el b/ellama-transient.el index 2815939..1e76468 100644 --- a/ellama-transient.el +++ b/ellama-transient.el @@ -51,6 +51,7 @@ (defvar ellama-transient-url nil) (defvar ellama-transient-provider nil) (defvar ellama--current-session-uid) +(defvar ellama-max-tokens) (declare-function ellama-ask-image "ellama" (image prompt &optional create-session &rest args)) @@ -100,6 +101,22 @@ Otherwise, prompt the user to enter a system message." (interactive) (setq ellama-transient-context-length (read-number "Enter context length: "))) +(transient-define-suffix ellama-transient-set-max-tokens () + "Set maximum output tokens." + (interactive) + (let* ((input (read-string + "Enter max tokens (empty for default): " + (when ellama-max-tokens + (number-to-string ellama-max-tokens)))) + (value (unless (string-empty-p input) + (unless (string-match-p "\\`[[:digit:]]+\\'" input) + (error "Max tokens must be a positive integer")) + (let ((number (string-to-number input))) + (unless (> number 0) + (error "Max tokens must be a positive integer")) + number)))) + (setq ellama-max-tokens value))) + (transient-define-suffix ellama-transient-set-host () "Set host address." (interactive) @@ -120,7 +137,8 @@ Otherwise, prompt the user to enter a system message." (interactive) (setq ellama-transient-model-name nil ellama-transient-temperature nil - ellama-transient-context-length nil)) + ellama-transient-context-length nil + ellama-max-tokens nil)) (defvar ellama-provider-list '(ellama-provider ellama-coding-provider @@ -183,6 +201,9 @@ Otherwise, prompt the user to enter a system message." ellama-transient-provider)) :transient t :description ellama-transient-context-length-description) + ("x" "Set Max Tokens" ellama-transient-set-max-tokens + :transient t + :description ellama-transient-max-tokens-description) ("r" "Reset model fields" ellama-transient-reset-model-fields :transient t) ("S" "Set provider" ellama-transient-set-provider @@ -243,6 +264,11 @@ FORMAT is used for non-default VALUE." (ellama-transient--field-description "Context Length" ellama-transient-context-length "%d")) +(defun ellama-transient-max-tokens-description () + "Return transient max tokens description." + (ellama-transient--field-description + "Max Tokens" ellama-max-tokens "%d")) + (defun ellama-transient--ollama-provider-p (provider) "Return non-nil when PROVIDER is an Ollama provider." (declare-function llm-ollama-p "ext:llm-ollama") @@ -6,7 +6,7 @@ ;; URL: http://github.com/s-kostyaev/ellama ;; Keywords: help local tools ;; Package-Requires: ((emacs "28.1") (llm "0.24.0") (plz "0.8") (transient "0.7") (compat "29.1") (yaml "1.2.3")) -;; Version: 1.16.3 +;; Version: 1.17.0 ;; SPDX-License-Identifier: GPL-3.0-or-later ;; Created: 8th Oct 2023 @@ -71,6 +71,12 @@ "Backend LLM provider." :type '(sexp :validate llm-standard-provider-p)) +(defcustom ellama-max-tokens nil + "Maximum number of tokens to generate. +When nil, use the provider default." + :type '(choice (const :tag "Use provider default" nil) + (integer :tag "Maximum tokens"))) + (defcustom ellama-session-remove-reasoning t "Remove internal reasoning from the session after ellama provide an answer. This can improve long-term communication with reasoning models." @@ -2712,6 +2718,8 @@ file by default. :images FILES -- attach image FILES to the prompt when provider supports it. +:max-tokens INTEGER -- maximum number of tokens to generate. + :on-error ON-ERROR -- ON-ERROR a function that's called with an error message on failure (with BUFFER current). @@ -2733,8 +2741,11 @@ failure (with BUFFER current). (or (plist-get args :provider) ellama-provider (ellama-get-first-ollama-chat-model)))) + (max-tokens (or (plist-get args :max-tokens) + ellama-max-tokens)) (reasoning-buffer (get-buffer-create - (concat (make-temp-name "*ellama-reasoning-") "*"))) + (concat (make-temp-name "*ellama-reasoning-") + "*"))) (point (or (plist-get args :point) (with-current-buffer buffer (point)))) (replace-beg (plist-get args :replace-beg)) @@ -2770,16 +2781,25 @@ failure (with BUFFER current). (llm-chat-prompt-append-response (ellama-session-prompt session) prompt-content) - (setf (llm-chat-prompt-tools (ellama-session-prompt session)) - tools) - ;; System message is part of prompt context and should not be - ;; appended on each interaction. + (setf + (llm-chat-prompt-tools + (ellama-session-prompt session)) + tools + (llm-chat-prompt-max-tokens + (ellama-session-prompt session)) + max-tokens) + ;; System message is part of prompt context and + ;; should not be appended on each interaction. (ellama-session-prompt session)) (setf (ellama-session-prompt session) - (llm-make-chat-prompt prompt-content :context system - :tools tools))) - (llm-make-chat-prompt prompt-content :context system - :tools tools)))) + (llm-make-chat-prompt + prompt-content :context system + :tools tools + :max-tokens max-tokens))) + (llm-make-chat-prompt + prompt-content :context system + :tools tools + :max-tokens max-tokens)))) (when (not (eq (null replace-beg) (null replace-end))) (error "Specify both :replace-beg and :replace-end")) (with-current-buffer reasoning-buffer @@ -3182,6 +3202,8 @@ ARGS contains keys for fine control. :images FILES -- attach image FILES to the prompt when provider supports it. +:max-tokens INTEGER -- maximum number of tokens to generate. + :ephemeral BOOL -- create an ephemeral session if set. :on-done ON-DONE -- ON-DONE a function that's called with @@ -3194,6 +3216,7 @@ the full response text when the request completes (with BUFFER current)." (variants (mapcar #'car providers)) (system (plist-get args :system)) (donecb (plist-get args :on-done)) + (max-tokens (plist-get args :max-tokens)) (images (ellama--normalize-image-files (or (plist-get args :images) (plist-get args :image)))) @@ -3294,6 +3317,7 @@ the full response text when the request completes (with BUFFER current)." :session session :system system :images images + :max-tokens max-tokens :on-done (if donecb (list 'ellama-chat-done donecb) 'ellama-chat-done) diff --git a/ellama.info b/ellama.info index f174071..0e4007e 100644 --- a/ellama.info +++ b/ellama.info @@ -289,8 +289,9 @@ File: ellama.info, Node: Commands, Next: Keymap, Prev: Installation, Up: Top • ‘ellama-select-model’: Change the current provider model interactively. The model transient supports Ollama and OpenAI-compatible providers, including URL editing for compatible - APIs. Use "Reset model fields" to clear model, temperature, and - context-length overrides and let the provider use its defaults; + APIs. It can also set the maximum number of output tokens. Use + "Reset model fields" to clear model, temperature, context-length, + and max-token overrides and let the provider use its defaults; reset values are shown as ‘default’ in the transient. • ‘ellama-code-complete’: Complete selected code or code in the current buffer according to a provided change using Ellama. @@ -449,6 +450,8 @@ language is english. There are many supported providers: ‘ollama’, ‘open ai’, ‘vertex’, ‘GPT4All’. For more information see llm documentation (https://elpa.gnu.org/packages/llm.html). + • ‘ellama-max-tokens’: Maximum number of tokens to generate. If not + set, use the provider default. • ‘ellama-providers’: association list of model llm providers with name as key. • ‘ellama-spinner-enabled’: Enable spinner during text generation. @@ -2343,44 +2346,44 @@ Tag Table: Node: Top1379 Node: Installation3893 Node: Commands8907 -Node: Keymap17466 -Node: Configuration20353 -Node: Session Compaction31766 -Node: Image Input33390 -Node: Task Tool Subagents35535 -Node: DLP for Tool Input/Output37638 -Node: SRT Filesystem Policy for Tools52540 -Node: Context Management58209 -Node: Transient Menus for Context Management59277 -Node: Managing the Context60956 -Node: Considerations61731 -Node: Minor modes62324 -Node: ellama-context-header-line-mode64312 -Node: ellama-context-header-line-global-mode65137 -Node: ellama-context-mode-line-mode65857 -Node: ellama-context-mode-line-global-mode66705 -Node: Ellama Session Header Line Mode67409 -Node: Enabling and Disabling67978 -Node: Customization68425 -Node: Ellama Session Mode Line Mode68713 -Node: Enabling and Disabling (1)69298 -Node: Customization (1)69745 -Node: Using Blueprints70039 -Node: Key Components of Ellama Blueprints70679 -Node: Creating and Managing Blueprints71286 -Node: Blueprints files72264 -Node: Variable Management72685 -Node: Keymap and Mode73138 -Node: Transient Menus74074 -Node: Running Blueprints programmatically74620 -Node: MCP Integration75207 -Node: Agent Skills76442 -Node: Directory Structure76805 -Node: Creating a Skill77832 -Node: How it works78207 -Node: Acknowledgments78598 -Node: Contributions79309 -Node: GNU Free Documentation License79695 +Node: Keymap17536 +Node: Configuration20423 +Node: Session Compaction31950 +Node: Image Input33574 +Node: Task Tool Subagents35719 +Node: DLP for Tool Input/Output37822 +Node: SRT Filesystem Policy for Tools52724 +Node: Context Management58393 +Node: Transient Menus for Context Management59461 +Node: Managing the Context61140 +Node: Considerations61915 +Node: Minor modes62508 +Node: ellama-context-header-line-mode64496 +Node: ellama-context-header-line-global-mode65321 +Node: ellama-context-mode-line-mode66041 +Node: ellama-context-mode-line-global-mode66889 +Node: Ellama Session Header Line Mode67593 +Node: Enabling and Disabling68162 +Node: Customization68609 +Node: Ellama Session Mode Line Mode68897 +Node: Enabling and Disabling (1)69482 +Node: Customization (1)69929 +Node: Using Blueprints70223 +Node: Key Components of Ellama Blueprints70863 +Node: Creating and Managing Blueprints71470 +Node: Blueprints files72448 +Node: Variable Management72869 +Node: Keymap and Mode73322 +Node: Transient Menus74258 +Node: Running Blueprints programmatically74804 +Node: MCP Integration75391 +Node: Agent Skills76626 +Node: Directory Structure76989 +Node: Creating a Skill78016 +Node: How it works78391 +Node: Acknowledgments78782 +Node: Contributions79493 +Node: GNU Free Documentation License79879 End Tag Table diff --git a/tests/test-ellama-transient.el b/tests/test-ellama-transient.el index 96eb88a..81950a6 100644 --- a/tests/test-ellama-transient.el +++ b/tests/test-ellama-transient.el @@ -99,17 +99,36 @@ (ert-deftest test-ellama-transient-reset-model-fields-and-descriptions () (let ((ellama-transient-model-name "model") (ellama-transient-temperature 0.4) - (ellama-transient-context-length 8192)) + (ellama-transient-context-length 8192) + (ellama-max-tokens 256)) (ellama-transient-reset-model-fields) (should-not ellama-transient-model-name) (should-not ellama-transient-temperature) (should-not ellama-transient-context-length) + (should-not ellama-max-tokens) (should (equal (ellama-transient-model-description) "Model (default)")) (should (equal (ellama-transient-temperature-description) "Temperature (default)")) (should (equal (ellama-transient-context-length-description) - "Context Length (default)")))) + "Context Length (default)")) + (should (equal (ellama-transient-max-tokens-description) + "Max Tokens (default)")))) + +(ert-deftest test-ellama-transient-set-max-tokens () + (let ((ellama-max-tokens nil)) + (cl-letf (((symbol-function 'read-string) + (lambda (&rest _args) + "42"))) + (ellama-transient-set-max-tokens) + (should (= ellama-max-tokens 42)) + (should (equal (ellama-transient-max-tokens-description) + "Max Tokens (42)"))) + (cl-letf (((symbol-function 'read-string) + (lambda (&rest _args) + ""))) + (ellama-transient-set-max-tokens) + (should-not ellama-max-tokens)))) (ert-deftest test-ellama-transient-set-model-keeps-reset-temperature () (let ((ellama-transient-provider :provider) diff --git a/tests/test-ellama.el b/tests/test-ellama.el index e6f9c72..583ec90 100644 --- a/tests/test-ellama.el +++ b/tests/test-ellama.el @@ -831,6 +831,48 @@ detailed comparison to help you decide: (when buf (kill-buffer buf)))))) +(ert-deftest test-ellama-stream-uses-max-tokens () + (let* ((provider (make-llm-fake)) + (ellama-provider provider) + (ellama-max-tokens 7) + (ellama-response-process-method 'streaming) + (ellama-spinner-enabled nil) + (ellama-fill-paragraphs nil) + captured-prompt + done-text) + (cl-letf (((symbol-function 'llm-chat-streaming) + (lambda (_provider prompt _partial-callback response-callback + _error-callback &optional _multi-output) + (setq captured-prompt prompt) + (funcall response-callback '(:text "ok")) + nil))) + (with-temp-buffer + (ellama-stream "test prompt" + :provider provider + :on-done (lambda (text) (setq done-text text))))) + (should (equal done-text "ok")) + (should (= (llm-chat-prompt-max-tokens captured-prompt) 7)))) + +(ert-deftest test-ellama-stream-max-tokens-argument-overrides-default () + (let* ((provider (make-llm-fake)) + (ellama-provider provider) + (ellama-max-tokens 7) + (ellama-response-process-method 'streaming) + (ellama-spinner-enabled nil) + (ellama-fill-paragraphs nil) + captured-prompt) + (cl-letf (((symbol-function 'llm-chat-streaming) + (lambda (_provider prompt _partial-callback response-callback + _error-callback &optional _multi-output) + (setq captured-prompt prompt) + (funcall response-callback '(:text "ok")) + nil))) + (with-temp-buffer + (ellama-stream "test prompt" + :provider provider + :max-tokens 2))) + (should (= (llm-chat-prompt-max-tokens captured-prompt) 2)))) + (ert-deftest test-ellama-stream-defaults-to-current-buffer-with-active-session () (let* ((ellama-provider (make-llm-fake |
