summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.org7
-rw-r--r--README.org9
-rw-r--r--ellama-transient.el28
-rw-r--r--ellama.el44
-rw-r--r--ellama.info83
-rw-r--r--tests/test-ellama-transient.el23
-rw-r--r--tests/test-ellama.el42
7 files changed, 180 insertions, 56 deletions
diff --git a/NEWS.org b/NEWS.org
index 625737f..21cf32b 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,3 +1,10 @@
+* Version 1.17.0
+- Add maximum output token support. Ellama now exposes ~ellama-max-tokens~ and
+ passes per-request ~:max-tokens~ values through ~llm-make-chat-prompt~, so
+ providers can cap generated responses using their standard token limit
+ support. The model transient menu can set or reset the max-token override,
+ and documentation and tests cover the new behavior.
+
* Version 1.16.3
- Sort saved session candidates newest first in ~ellama-load-session~. Ellama
now orders session files by modification time and exposes standard completion
diff --git a/README.org b/README.org
index fe12856..8f01f30 100644
--- a/README.org
+++ b/README.org
@@ -174,9 +174,10 @@ More sophisticated configuration example:
- ~ellama-provider-select~: Select ellama provider.
- ~ellama-select-model~: Change the current provider model interactively. The
model transient supports Ollama and OpenAI-compatible providers, including URL
- editing for compatible APIs. Use "Reset model fields" to clear model,
- temperature, and context-length overrides and let the provider use its
- defaults; reset values are shown as ~default~ in the transient.
+ editing for compatible APIs. It can also set the maximum number of output
+ tokens. Use "Reset model fields" to clear model, temperature, context-length,
+ and max-token overrides and let the provider use its defaults; reset values
+ are shown as ~default~ in the transient.
- ~ellama-code-complete~: Complete selected code or code in the current buffer
according to a provided change using Ellama.
- ~ellama-code-add~: Generate and insert new code based on description. This
@@ -316,6 +317,8 @@ language is english.
There are many supported providers: ~ollama~, ~open ai~, ~vertex~,
~GPT4All~. For more information see
[[https://elpa.gnu.org/packages/llm.html][llm documentation]].
+- ~ellama-max-tokens~: Maximum number of tokens to generate. If not set, use
+ the provider default.
- ~ellama-providers~: association list of model llm providers with name as key.
- ~ellama-spinner-enabled~: Enable spinner during text generation.
- ~ellama-spinner-type~: Spinner type for ellama. Default type is
diff --git a/ellama-transient.el b/ellama-transient.el
index 2815939..1e76468 100644
--- a/ellama-transient.el
+++ b/ellama-transient.el
@@ -51,6 +51,7 @@
(defvar ellama-transient-url nil)
(defvar ellama-transient-provider nil)
(defvar ellama--current-session-uid)
+(defvar ellama-max-tokens)
(declare-function ellama-ask-image "ellama"
(image prompt &optional create-session &rest args))
@@ -100,6 +101,22 @@ Otherwise, prompt the user to enter a system message."
(interactive)
(setq ellama-transient-context-length (read-number "Enter context length: ")))
+(transient-define-suffix ellama-transient-set-max-tokens ()
+ "Set maximum output tokens."
+ (interactive)
+ (let* ((input (read-string
+ "Enter max tokens (empty for default): "
+ (when ellama-max-tokens
+ (number-to-string ellama-max-tokens))))
+ (value (unless (string-empty-p input)
+ (unless (string-match-p "\\`[[:digit:]]+\\'" input)
+ (error "Max tokens must be a positive integer"))
+ (let ((number (string-to-number input)))
+ (unless (> number 0)
+ (error "Max tokens must be a positive integer"))
+ number))))
+ (setq ellama-max-tokens value)))
+
(transient-define-suffix ellama-transient-set-host ()
"Set host address."
(interactive)
@@ -120,7 +137,8 @@ Otherwise, prompt the user to enter a system message."
(interactive)
(setq ellama-transient-model-name nil
ellama-transient-temperature nil
- ellama-transient-context-length nil))
+ ellama-transient-context-length nil
+ ellama-max-tokens nil))
(defvar ellama-provider-list '(ellama-provider
ellama-coding-provider
@@ -183,6 +201,9 @@ Otherwise, prompt the user to enter a system message."
ellama-transient-provider))
:transient t
:description ellama-transient-context-length-description)
+ ("x" "Set Max Tokens" ellama-transient-set-max-tokens
+ :transient t
+ :description ellama-transient-max-tokens-description)
("r" "Reset model fields" ellama-transient-reset-model-fields
:transient t)
("S" "Set provider" ellama-transient-set-provider
@@ -243,6 +264,11 @@ FORMAT is used for non-default VALUE."
(ellama-transient--field-description
"Context Length" ellama-transient-context-length "%d"))
+(defun ellama-transient-max-tokens-description ()
+ "Return transient max tokens description."
+ (ellama-transient--field-description
+ "Max Tokens" ellama-max-tokens "%d"))
+
(defun ellama-transient--ollama-provider-p (provider)
"Return non-nil when PROVIDER is an Ollama provider."
(declare-function llm-ollama-p "ext:llm-ollama")
diff --git a/ellama.el b/ellama.el
index f858d46..f33ed33 100644
--- a/ellama.el
+++ b/ellama.el
@@ -6,7 +6,7 @@
;; URL: http://github.com/s-kostyaev/ellama
;; Keywords: help local tools
;; Package-Requires: ((emacs "28.1") (llm "0.24.0") (plz "0.8") (transient "0.7") (compat "29.1") (yaml "1.2.3"))
-;; Version: 1.16.3
+;; Version: 1.17.0
;; SPDX-License-Identifier: GPL-3.0-or-later
;; Created: 8th Oct 2023
@@ -71,6 +71,12 @@
"Backend LLM provider."
:type '(sexp :validate llm-standard-provider-p))
+(defcustom ellama-max-tokens nil
+ "Maximum number of tokens to generate.
+When nil, use the provider default."
+ :type '(choice (const :tag "Use provider default" nil)
+ (integer :tag "Maximum tokens")))
+
(defcustom ellama-session-remove-reasoning t
"Remove internal reasoning from the session after ellama provide an answer.
This can improve long-term communication with reasoning models."
@@ -2712,6 +2718,8 @@ file by default.
:images FILES -- attach image FILES to the prompt when provider supports it.
+:max-tokens INTEGER -- maximum number of tokens to generate.
+
:on-error ON-ERROR -- ON-ERROR a function that's called with an error message on
failure (with BUFFER current).
@@ -2733,8 +2741,11 @@ failure (with BUFFER current).
(or (plist-get args :provider)
ellama-provider
(ellama-get-first-ollama-chat-model))))
+ (max-tokens (or (plist-get args :max-tokens)
+ ellama-max-tokens))
(reasoning-buffer (get-buffer-create
- (concat (make-temp-name "*ellama-reasoning-") "*")))
+ (concat (make-temp-name "*ellama-reasoning-")
+ "*")))
(point (or (plist-get args :point)
(with-current-buffer buffer (point))))
(replace-beg (plist-get args :replace-beg))
@@ -2770,16 +2781,25 @@ failure (with BUFFER current).
(llm-chat-prompt-append-response
(ellama-session-prompt session)
prompt-content)
- (setf (llm-chat-prompt-tools (ellama-session-prompt session))
- tools)
- ;; System message is part of prompt context and should not be
- ;; appended on each interaction.
+ (setf
+ (llm-chat-prompt-tools
+ (ellama-session-prompt session))
+ tools
+ (llm-chat-prompt-max-tokens
+ (ellama-session-prompt session))
+ max-tokens)
+ ;; System message is part of prompt context and
+ ;; should not be appended on each interaction.
(ellama-session-prompt session))
(setf (ellama-session-prompt session)
- (llm-make-chat-prompt prompt-content :context system
- :tools tools)))
- (llm-make-chat-prompt prompt-content :context system
- :tools tools))))
+ (llm-make-chat-prompt
+ prompt-content :context system
+ :tools tools
+ :max-tokens max-tokens)))
+ (llm-make-chat-prompt
+ prompt-content :context system
+ :tools tools
+ :max-tokens max-tokens))))
(when (not (eq (null replace-beg) (null replace-end)))
(error "Specify both :replace-beg and :replace-end"))
(with-current-buffer reasoning-buffer
@@ -3182,6 +3202,8 @@ ARGS contains keys for fine control.
:images FILES -- attach image FILES to the prompt when provider supports it.
+:max-tokens INTEGER -- maximum number of tokens to generate.
+
:ephemeral BOOL -- create an ephemeral session if set.
:on-done ON-DONE -- ON-DONE a function that's called with
@@ -3194,6 +3216,7 @@ the full response text when the request completes (with BUFFER current)."
(variants (mapcar #'car providers))
(system (plist-get args :system))
(donecb (plist-get args :on-done))
+ (max-tokens (plist-get args :max-tokens))
(images (ellama--normalize-image-files
(or (plist-get args :images)
(plist-get args :image))))
@@ -3294,6 +3317,7 @@ the full response text when the request completes (with BUFFER current)."
:session session
:system system
:images images
+ :max-tokens max-tokens
:on-done (if donecb
(list 'ellama-chat-done donecb)
'ellama-chat-done)
diff --git a/ellama.info b/ellama.info
index f174071..0e4007e 100644
--- a/ellama.info
+++ b/ellama.info
@@ -289,8 +289,9 @@ File: ellama.info, Node: Commands, Next: Keymap, Prev: Installation, Up: Top
• ‘ellama-select-model’: Change the current provider model
interactively. The model transient supports Ollama and
OpenAI-compatible providers, including URL editing for compatible
- APIs. Use "Reset model fields" to clear model, temperature, and
- context-length overrides and let the provider use its defaults;
+ APIs. It can also set the maximum number of output tokens. Use
+ "Reset model fields" to clear model, temperature, context-length,
+ and max-token overrides and let the provider use its defaults;
reset values are shown as ‘default’ in the transient.
• ‘ellama-code-complete’: Complete selected code or code in the
current buffer according to a provided change using Ellama.
@@ -449,6 +450,8 @@ language is english.
There are many supported providers: ‘ollama’, ‘open ai’, ‘vertex’,
‘GPT4All’. For more information see llm documentation
(https://elpa.gnu.org/packages/llm.html).
+ • ‘ellama-max-tokens’: Maximum number of tokens to generate. If not
+ set, use the provider default.
• ‘ellama-providers’: association list of model llm providers with
name as key.
• ‘ellama-spinner-enabled’: Enable spinner during text generation.
@@ -2343,44 +2346,44 @@ Tag Table:
Node: Top1379
Node: Installation3893
Node: Commands8907
-Node: Keymap17466
-Node: Configuration20353
-Node: Session Compaction31766
-Node: Image Input33390
-Node: Task Tool Subagents35535
-Node: DLP for Tool Input/Output37638
-Node: SRT Filesystem Policy for Tools52540
-Node: Context Management58209
-Node: Transient Menus for Context Management59277
-Node: Managing the Context60956
-Node: Considerations61731
-Node: Minor modes62324
-Node: ellama-context-header-line-mode64312
-Node: ellama-context-header-line-global-mode65137
-Node: ellama-context-mode-line-mode65857
-Node: ellama-context-mode-line-global-mode66705
-Node: Ellama Session Header Line Mode67409
-Node: Enabling and Disabling67978
-Node: Customization68425
-Node: Ellama Session Mode Line Mode68713
-Node: Enabling and Disabling (1)69298
-Node: Customization (1)69745
-Node: Using Blueprints70039
-Node: Key Components of Ellama Blueprints70679
-Node: Creating and Managing Blueprints71286
-Node: Blueprints files72264
-Node: Variable Management72685
-Node: Keymap and Mode73138
-Node: Transient Menus74074
-Node: Running Blueprints programmatically74620
-Node: MCP Integration75207
-Node: Agent Skills76442
-Node: Directory Structure76805
-Node: Creating a Skill77832
-Node: How it works78207
-Node: Acknowledgments78598
-Node: Contributions79309
-Node: GNU Free Documentation License79695
+Node: Keymap17536
+Node: Configuration20423
+Node: Session Compaction31950
+Node: Image Input33574
+Node: Task Tool Subagents35719
+Node: DLP for Tool Input/Output37822
+Node: SRT Filesystem Policy for Tools52724
+Node: Context Management58393
+Node: Transient Menus for Context Management59461
+Node: Managing the Context61140
+Node: Considerations61915
+Node: Minor modes62508
+Node: ellama-context-header-line-mode64496
+Node: ellama-context-header-line-global-mode65321
+Node: ellama-context-mode-line-mode66041
+Node: ellama-context-mode-line-global-mode66889
+Node: Ellama Session Header Line Mode67593
+Node: Enabling and Disabling68162
+Node: Customization68609
+Node: Ellama Session Mode Line Mode68897
+Node: Enabling and Disabling (1)69482
+Node: Customization (1)69929
+Node: Using Blueprints70223
+Node: Key Components of Ellama Blueprints70863
+Node: Creating and Managing Blueprints71470
+Node: Blueprints files72448
+Node: Variable Management72869
+Node: Keymap and Mode73322
+Node: Transient Menus74258
+Node: Running Blueprints programmatically74804
+Node: MCP Integration75391
+Node: Agent Skills76626
+Node: Directory Structure76989
+Node: Creating a Skill78016
+Node: How it works78391
+Node: Acknowledgments78782
+Node: Contributions79493
+Node: GNU Free Documentation License79879

End Tag Table
diff --git a/tests/test-ellama-transient.el b/tests/test-ellama-transient.el
index 96eb88a..81950a6 100644
--- a/tests/test-ellama-transient.el
+++ b/tests/test-ellama-transient.el
@@ -99,17 +99,36 @@
(ert-deftest test-ellama-transient-reset-model-fields-and-descriptions ()
(let ((ellama-transient-model-name "model")
(ellama-transient-temperature 0.4)
- (ellama-transient-context-length 8192))
+ (ellama-transient-context-length 8192)
+ (ellama-max-tokens 256))
(ellama-transient-reset-model-fields)
(should-not ellama-transient-model-name)
(should-not ellama-transient-temperature)
(should-not ellama-transient-context-length)
+ (should-not ellama-max-tokens)
(should (equal (ellama-transient-model-description)
"Model (default)"))
(should (equal (ellama-transient-temperature-description)
"Temperature (default)"))
(should (equal (ellama-transient-context-length-description)
- "Context Length (default)"))))
+ "Context Length (default)"))
+ (should (equal (ellama-transient-max-tokens-description)
+ "Max Tokens (default)"))))
+
+(ert-deftest test-ellama-transient-set-max-tokens ()
+ (let ((ellama-max-tokens nil))
+ (cl-letf (((symbol-function 'read-string)
+ (lambda (&rest _args)
+ "42")))
+ (ellama-transient-set-max-tokens)
+ (should (= ellama-max-tokens 42))
+ (should (equal (ellama-transient-max-tokens-description)
+ "Max Tokens (42)")))
+ (cl-letf (((symbol-function 'read-string)
+ (lambda (&rest _args)
+ "")))
+ (ellama-transient-set-max-tokens)
+ (should-not ellama-max-tokens))))
(ert-deftest test-ellama-transient-set-model-keeps-reset-temperature ()
(let ((ellama-transient-provider :provider)
diff --git a/tests/test-ellama.el b/tests/test-ellama.el
index e6f9c72..583ec90 100644
--- a/tests/test-ellama.el
+++ b/tests/test-ellama.el
@@ -831,6 +831,48 @@ detailed comparison to help you decide:
(when buf
(kill-buffer buf))))))
+(ert-deftest test-ellama-stream-uses-max-tokens ()
+ (let* ((provider (make-llm-fake))
+ (ellama-provider provider)
+ (ellama-max-tokens 7)
+ (ellama-response-process-method 'streaming)
+ (ellama-spinner-enabled nil)
+ (ellama-fill-paragraphs nil)
+ captured-prompt
+ done-text)
+ (cl-letf (((symbol-function 'llm-chat-streaming)
+ (lambda (_provider prompt _partial-callback response-callback
+ _error-callback &optional _multi-output)
+ (setq captured-prompt prompt)
+ (funcall response-callback '(:text "ok"))
+ nil)))
+ (with-temp-buffer
+ (ellama-stream "test prompt"
+ :provider provider
+ :on-done (lambda (text) (setq done-text text)))))
+ (should (equal done-text "ok"))
+ (should (= (llm-chat-prompt-max-tokens captured-prompt) 7))))
+
+(ert-deftest test-ellama-stream-max-tokens-argument-overrides-default ()
+ (let* ((provider (make-llm-fake))
+ (ellama-provider provider)
+ (ellama-max-tokens 7)
+ (ellama-response-process-method 'streaming)
+ (ellama-spinner-enabled nil)
+ (ellama-fill-paragraphs nil)
+ captured-prompt)
+ (cl-letf (((symbol-function 'llm-chat-streaming)
+ (lambda (_provider prompt _partial-callback response-callback
+ _error-callback &optional _multi-output)
+ (setq captured-prompt prompt)
+ (funcall response-callback '(:text "ok"))
+ nil)))
+ (with-temp-buffer
+ (ellama-stream "test prompt"
+ :provider provider
+ :max-tokens 2)))
+ (should (= (llm-chat-prompt-max-tokens captured-prompt) 2))))
+
(ert-deftest test-ellama-stream-defaults-to-current-buffer-with-active-session ()
(let* ((ellama-provider
(make-llm-fake