From fd654cafdb8e31dd91d19b5c17e3141483e7f9ec Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Sat, 9 Nov 2024 16:56:47 +0530 Subject: [PATCH] feat: Add max tokens setting for model generations Adds a new setting to control the maximum number of tokens generated by the model. This provides more control over the length of responses and can be useful for limiting the amount of text generated in certain situations. --- src/assets/locale/da/common.json | 4 ++++ src/assets/locale/de/common.json | 4 ++++ src/assets/locale/en/common.json | 4 ++++ src/assets/locale/es/common.json | 4 ++++ src/assets/locale/fa/common.json | 4 ++++ src/assets/locale/fr/common.json | 4 ++++ src/assets/locale/it/common.json | 4 ++++ src/assets/locale/ja-JP/common.json | 4 ++++ src/assets/locale/ko/common.json | 5 +++- src/assets/locale/ml/common.json | 4 ++++ src/assets/locale/no/common.json | 4 ++++ src/assets/locale/pt-BR/common.json | 4 ++++ src/assets/locale/ru/common.json | 4 ++++ src/assets/locale/sv/common.json | 4 ++++ src/assets/locale/zh/common.json | 4 ++++ .../Settings/CurrentChatModelSettings.tsx | 13 +++++++++- .../Option/Settings/model-settings.tsx | 9 ++++++- src/hooks/useMessage.tsx | 24 ++++++++++++++----- src/hooks/useMessageOption.tsx | 19 +++++++++++---- src/models/index.ts | 10 +++++--- 20 files changed, 119 insertions(+), 17 deletions(-) diff --git a/src/assets/locale/da/common.json b/src/assets/locale/da/common.json index 881dab7..de82642 100644 --- a/src/assets/locale/da/common.json +++ b/src/assets/locale/da/common.json @@ -70,6 +70,10 @@ "label": "Længden af Kontekst", "placeholder": "Instast Længden af Kontekst værdien (standard: 2048)" }, + "numPredict": { + "label": "Maks Tokens (num_predict)", + "placeholder": "Indtast Maks Tokens værdi (fx. 2048, 4096)" + }, "seed": { "label": "Seed", "placeholder": "Indtast Seed værdi (fx. 1234)", diff --git a/src/assets/locale/de/common.json b/src/assets/locale/de/common.json index 9a69b22..65b72ab 100644 --- a/src/assets/locale/de/common.json +++ b/src/assets/locale/de/common.json @@ -70,6 +70,10 @@ "label": "Anzahl der Kontexte", "placeholder": "Geben Sie die Anzahl der Kontexte ein (Standard: 2048)" }, + "numPredict": { + "label": "Max Tokens (num_predict)", + "placeholder": "Geben Sie den Max-Tokens-Wert ein (z.B. 2048, 4096)" + }, "seed": { "label": "Seed", "placeholder": "Geben Sie den Seed-Wert ein (z.B. 1234)", diff --git a/src/assets/locale/en/common.json b/src/assets/locale/en/common.json index a7a3ecc..d033144 100644 --- a/src/assets/locale/en/common.json +++ b/src/assets/locale/en/common.json @@ -70,6 +70,10 @@ "label": "Number of Contexts", "placeholder": "Enter Number of Contexts value (default: 2048)" }, + "numPredict": { + "label": "Max Tokens (num_predict)", + "placeholder": "Enter Max Tokens value (e.g. 2048, 4096)" + }, "seed": { "label": "Seed", "placeholder": "Enter Seed value (e.g. 1234)", diff --git a/src/assets/locale/es/common.json b/src/assets/locale/es/common.json index cf5ccf8..210e043 100644 --- a/src/assets/locale/es/common.json +++ b/src/assets/locale/es/common.json @@ -70,6 +70,10 @@ "label": "Cantidad de contextos", "placeholder": "Ingresar el valor de tamaño de la ventana de contexto (por defecto: 2048)" }, + "numPredict": { + "label": "Máximo de Tokens (num_predict)", + "placeholder": "Ingrese el valor máximo de Tokens (ej: 2048, 4096)" + }, "seed": { "label": "Semilla", "placeholder": "Ingresar el valor de la semilla (ej: 1234)", diff --git a/src/assets/locale/fa/common.json b/src/assets/locale/fa/common.json index 6abb70a..72301c7 100644 --- a/src/assets/locale/fa/common.json +++ b/src/assets/locale/fa/common.json @@ -70,6 +70,10 @@ "label": "Number of Contexts", "placeholder": "مقدار Number of Contexts را وارد کنید (پیش فرض: 2048)" }, + "numPredict": { + "label": "حداکثر توکن‌ها (num_predict)", + "placeholder": "مقدار حداکثر توکن‌ها را وارد کنید (مثلا 2048، 4096)" + }, "seed": { "label": "Seed", "placeholder": "مقدار Seed را وارد کنید (e.g. 1234)", diff --git a/src/assets/locale/fr/common.json b/src/assets/locale/fr/common.json index 0775303..d11ef8c 100644 --- a/src/assets/locale/fr/common.json +++ b/src/assets/locale/fr/common.json @@ -70,6 +70,10 @@ "label": "Nombre de contextes", "placeholder": "Entrez la valeur du nombre de contextes (par défaut: 2048)" }, + "numPredict": { + "label": "Tokens maximum (num_predict)", + "placeholder": "Entrez la valeur des tokens maximum (par exemple 2048, 4096)" + }, "seed": { "label": "Graine", "placeholder": "Entrez la valeur des semences (par exemple 1234)", diff --git a/src/assets/locale/it/common.json b/src/assets/locale/it/common.json index 102b6c8..6b08509 100644 --- a/src/assets/locale/it/common.json +++ b/src/assets/locale/it/common.json @@ -70,6 +70,10 @@ "label": "Dimensione del Contesto", "placeholder": "Inserisci la Dimensione del Contesto (default: 2048)" }, + "numPredict": { + "label": "Token Massimi (num_predict)", + "placeholder": "Inserisci il valore dei Token Massimi (es. 2048, 4096)" + }, "seed": { "label": "Seed", "placeholder": "Inserisci il Valore Seed (e.g. 1234)", diff --git a/src/assets/locale/ja-JP/common.json b/src/assets/locale/ja-JP/common.json index 6f38578..92ba8e0 100644 --- a/src/assets/locale/ja-JP/common.json +++ b/src/assets/locale/ja-JP/common.json @@ -70,6 +70,10 @@ "label": "コンテキストの数", "placeholder": "コンテキスト数を入力してください(デフォルト:2048)" }, + "numPredict": { + "label": "最大トークン数 (num_predict)", + "placeholder": "最大トークン数を入力してください(例:2048、4096)" + }, "seed": { "label": "シード", "placeholder": "シード値を入力してください(例:1234)", diff --git a/src/assets/locale/ko/common.json b/src/assets/locale/ko/common.json index e59e785..ed9e0b9 100644 --- a/src/assets/locale/ko/common.json +++ b/src/assets/locale/ko/common.json @@ -70,7 +70,10 @@ "label": "컨텍스트 수", "placeholder": "컨텍스트 수를 입력하세요 (기본값: 2048)" }, - "seed": { + "numPredict": { + "label": "최대 토큰 수 (num_predict)", + "placeholder": "최대 토큰 수를 입력하세요 (예: 2048, 4096)" + }, "seed": { "label": "시드", "placeholder": "시드 값을 입력하세요 (예: 1234)", "help": "모델 출력의 재현성" diff --git a/src/assets/locale/ml/common.json b/src/assets/locale/ml/common.json index ed6988a..cd6868d 100644 --- a/src/assets/locale/ml/common.json +++ b/src/assets/locale/ml/common.json @@ -69,6 +69,10 @@ "label": "സന്ദർഭങ്ങളുടെ എണ്ണം", "placeholder": "സന്ദർഭങ്ങളുടെ സംഖ്യ നൽകുക (സ്ഥിരം: 2048)" }, + "numPredict": { + "label": "പരമാവധി ടോക്കണുകൾ (num_predict)", + "placeholder": "പരമാവധി ടോക്കൺ മൂല്യം നൽകുക (ഉദാ: 2048, 4096)" + }, "seed": { "label": "സീഡ്", "placeholder": "സീഡ് വില്യമ നൽകുക (ഉദാ: 1234)", diff --git a/src/assets/locale/no/common.json b/src/assets/locale/no/common.json index b95ffce..1720d1f 100644 --- a/src/assets/locale/no/common.json +++ b/src/assets/locale/no/common.json @@ -70,6 +70,10 @@ "label": "Kontekstlengde", "placeholder": "Skriv inn kontekstlengdeverdi (standard: 2048)" }, + "numPredict": { + "label": "Maks Tokens (num_predict)", + "placeholder": "Skriv inn Maks Tokens-verdi (f.eks. 2048, 4096)" + }, "seed": { "label": "Seed", "placeholder": "Skriv inn seedverdi (f.eks. 1234)", diff --git a/src/assets/locale/pt-BR/common.json b/src/assets/locale/pt-BR/common.json index a2f1a41..660514d 100644 --- a/src/assets/locale/pt-BR/common.json +++ b/src/assets/locale/pt-BR/common.json @@ -70,6 +70,10 @@ "label": "Número de Contextos", "placeholder": "Digite o valor do Número de Contextos (padrão: 2048)" }, + "numPredict": { + "label": "Máximo de Tokens (num_predict)", + "placeholder": "Digite o valor do Máximo de Tokens (ex: 2048, 4096)" + }, "seed": { "label": "Semente", "placeholder": "Digite o valor da Semente (ex: 1234)", diff --git a/src/assets/locale/ru/common.json b/src/assets/locale/ru/common.json index 04b54de..31291a0 100644 --- a/src/assets/locale/ru/common.json +++ b/src/assets/locale/ru/common.json @@ -70,6 +70,10 @@ "label": "Количество контекстов", "placeholder": "Введите значение количества контекстов (по умолчанию: 2048)" }, + "numPredict": { + "label": "Максимальное количество токенов (num_predict)", + "placeholder": "Введите значение максимального количества токенов (например, 2048, 4096)" + }, "seed": { "label": "Сид", "placeholder": "Введите значение сида (например, 1234)", diff --git a/src/assets/locale/sv/common.json b/src/assets/locale/sv/common.json index b0df981..4698dc7 100644 --- a/src/assets/locale/sv/common.json +++ b/src/assets/locale/sv/common.json @@ -70,6 +70,10 @@ "label": "Antal kontexter", "placeholder": "Ange antal kontextvärden (standard: 2048)" }, + "numPredict": { + "label": "Max antal tokens (num_predict)", + "placeholder": "Ange Max antal tokens värde (t.ex. 2048, 4096)" + }, "seed": { "label": "Frö", "placeholder": "Ange frövärde (t.ex. 1234)", diff --git a/src/assets/locale/zh/common.json b/src/assets/locale/zh/common.json index 7fb9f9d..080776a 100644 --- a/src/assets/locale/zh/common.json +++ b/src/assets/locale/zh/common.json @@ -70,6 +70,10 @@ "label": "上下文数量", "placeholder": "输入上下文数量(默认:2048)" }, + "numPredict": { + "label": "最大令牌数 (num_predict)", + "placeholder": "输入最大令牌数(例如:2048、4096)" + }, "seed": { "label": "随机种子", "placeholder": "输入随机种子值(例如:1234)", diff --git a/src/components/Common/Settings/CurrentChatModelSettings.tsx b/src/components/Common/Settings/CurrentChatModelSettings.tsx index 3e5c74f..cb37286 100644 --- a/src/components/Common/Settings/CurrentChatModelSettings.tsx +++ b/src/components/Common/Settings/CurrentChatModelSettings.tsx @@ -39,12 +39,14 @@ export const CurrentChatModelSettings = ({ numCtx: cUserSettings.numCtx ?? data.numCtx, seed: cUserSettings.seed, numGpu: cUserSettings.numGpu ?? data.numGpu, + numPredict: cUserSettings.numPredict ?? data.numPredict, systemPrompt: cUserSettings.systemPrompt ?? "" }) return data }, enabled: open, - refetchOnMount: true + refetchOnMount: false, + refetchOnWindowFocus: false }) const renderBody = () => { @@ -115,6 +117,15 @@ export const CurrentChatModelSettings = ({ /> + + + + { size="large" /> - + + + { currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, + }) let newMessage: Message[] = [] @@ -261,7 +263,9 @@ export const useMessage = () => { userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, + }) const response = await questionOllama.invoke(promptForQuestion) query = response.content.toString() @@ -475,7 +479,9 @@ export const useMessage = () => { currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, + }) let newMessage: Message[] = [] @@ -702,7 +708,9 @@ export const useMessage = () => { currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, + }) let newMessage: Message[] = [] @@ -777,7 +785,9 @@ export const useMessage = () => { userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, + }) const response = await questionOllama.invoke(promptForQuestion) query = response.content.toString() @@ -964,7 +974,9 @@ export const useMessage = () => { currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, + }) let newMessage: Message[] = [] diff --git a/src/hooks/useMessageOption.tsx b/src/hooks/useMessageOption.tsx index 1cccb62..959cb44 100644 --- a/src/hooks/useMessageOption.tsx +++ b/src/hooks/useMessageOption.tsx @@ -122,7 +122,9 @@ export const useMessageOption = () => { currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, + }) let newMessage: Message[] = [] @@ -197,7 +199,9 @@ export const useMessageOption = () => { userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, + }) const response = await questionOllama.invoke(promptForQuestion) query = response.content.toString() @@ -381,7 +385,8 @@ export const useMessageOption = () => { currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, }) let newMessage: Message[] = [] @@ -616,7 +621,9 @@ export const useMessageOption = () => { currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, + }) let newMessage: Message[] = [] @@ -707,7 +714,9 @@ export const useMessageOption = () => { userDefaultModelSettings?.numCtx, seed: currentChatModelSettings?.seed, numGpu: - currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu + currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu, + numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict, + }) const response = await questionOllama.invoke(promptForQuestion) query = response.content.toString() diff --git a/src/models/index.ts b/src/models/index.ts index 02f2ce8..4798f78 100644 --- a/src/models/index.ts +++ b/src/models/index.ts @@ -13,7 +13,8 @@ export const pageAssistModel = async ({ topP, numCtx, seed, - numGpu + numGpu, + numPredict, }: { model: string baseUrl: string @@ -24,12 +25,13 @@ export const pageAssistModel = async ({ numCtx?: number seed?: number numGpu?: number + numPredict?: number }) => { if (model === "chrome::gemini-nano::page-assist") { return new ChatChromeAI({ temperature, - topK + topK, }) } @@ -46,6 +48,7 @@ export const pageAssistModel = async ({ openAIApiKey: providerInfo.apiKey || "temp", temperature, topP, + maxTokens: numPredict, configuration: { apiKey: providerInfo.apiKey || "temp", baseURL: providerInfo.baseUrl || "", @@ -64,7 +67,8 @@ export const pageAssistModel = async ({ numCtx, seed, model, - numGpu + numGpu, + numPredict })