feat: Add max tokens setting for model generations

Adds a new setting to control the maximum number of tokens generated by the model. This provides more control over the length of responses and can be useful for limiting the amount of text generated in certain situations.
This commit is contained in:
n4ze3m 2024-11-09 16:56:47 +05:30
parent 7c805cfe22
commit fd654cafdb
20 changed files with 119 additions and 17 deletions

View File

@ -70,6 +70,10 @@
"label": "Længden af Kontekst",
"placeholder": "Instast Længden af Kontekst værdien (standard: 2048)"
},
"numPredict": {
"label": "Maks Tokens (num_predict)",
"placeholder": "Indtast Maks Tokens værdi (fx. 2048, 4096)"
},
"seed": {
"label": "Seed",
"placeholder": "Indtast Seed værdi (fx. 1234)",

View File

@ -70,6 +70,10 @@
"label": "Anzahl der Kontexte",
"placeholder": "Geben Sie die Anzahl der Kontexte ein (Standard: 2048)"
},
"numPredict": {
"label": "Max Tokens (num_predict)",
"placeholder": "Geben Sie den Max-Tokens-Wert ein (z.B. 2048, 4096)"
},
"seed": {
"label": "Seed",
"placeholder": "Geben Sie den Seed-Wert ein (z.B. 1234)",

View File

@ -70,6 +70,10 @@
"label": "Number of Contexts",
"placeholder": "Enter Number of Contexts value (default: 2048)"
},
"numPredict": {
"label": "Max Tokens (num_predict)",
"placeholder": "Enter Max Tokens value (e.g. 2048, 4096)"
},
"seed": {
"label": "Seed",
"placeholder": "Enter Seed value (e.g. 1234)",

View File

@ -70,6 +70,10 @@
"label": "Cantidad de contextos",
"placeholder": "Ingresar el valor de tamaño de la ventana de contexto (por defecto: 2048)"
},
"numPredict": {
"label": "Máximo de Tokens (num_predict)",
"placeholder": "Ingrese el valor máximo de Tokens (ej: 2048, 4096)"
},
"seed": {
"label": "Semilla",
"placeholder": "Ingresar el valor de la semilla (ej: 1234)",

View File

@ -70,6 +70,10 @@
"label": "Number of Contexts",
"placeholder": "مقدار Number of Contexts را وارد کنید (پیش فرض: 2048)"
},
"numPredict": {
"label": "حداکثر توکن‌ها (num_predict)",
"placeholder": "مقدار حداکثر توکن‌ها را وارد کنید (مثلا 2048، 4096)"
},
"seed": {
"label": "Seed",
"placeholder": "مقدار Seed را وارد کنید (e.g. 1234)",

View File

@ -70,6 +70,10 @@
"label": "Nombre de contextes",
"placeholder": "Entrez la valeur du nombre de contextes (par défaut: 2048)"
},
"numPredict": {
"label": "Tokens maximum (num_predict)",
"placeholder": "Entrez la valeur des tokens maximum (par exemple 2048, 4096)"
},
"seed": {
"label": "Graine",
"placeholder": "Entrez la valeur des semences (par exemple 1234)",

View File

@ -70,6 +70,10 @@
"label": "Dimensione del Contesto",
"placeholder": "Inserisci la Dimensione del Contesto (default: 2048)"
},
"numPredict": {
"label": "Token Massimi (num_predict)",
"placeholder": "Inserisci il valore dei Token Massimi (es. 2048, 4096)"
},
"seed": {
"label": "Seed",
"placeholder": "Inserisci il Valore Seed (e.g. 1234)",

View File

@ -70,6 +70,10 @@
"label": "コンテキストの数",
"placeholder": "コンテキスト数を入力してくださいデフォルト2048"
},
"numPredict": {
"label": "最大トークン数 (num_predict)",
"placeholder": "最大トークン数を入力してください2048、4096"
},
"seed": {
"label": "シード",
"placeholder": "シード値を入力してください1234",

View File

@ -70,7 +70,10 @@
"label": "컨텍스트 수",
"placeholder": "컨텍스트 수를 입력하세요 (기본값: 2048)"
},
"seed": {
"numPredict": {
"label": "최대 토큰 수 (num_predict)",
"placeholder": "최대 토큰 수를 입력하세요 (예: 2048, 4096)"
}, "seed": {
"label": "시드",
"placeholder": "시드 값을 입력하세요 (예: 1234)",
"help": "모델 출력의 재현성"

View File

@ -69,6 +69,10 @@
"label": "സന്ദർഭങ്ങളുടെ എണ്ണം",
"placeholder": "സന്ദർഭങ്ങളുടെ സംഖ്യ നൽകുക (സ്ഥിരം: 2048)"
},
"numPredict": {
"label": "പരമാവധി ടോക്കണുകൾ (num_predict)",
"placeholder": "പരമാവധി ടോക്കൺ മൂല്യം നൽകുക (ഉദാ: 2048, 4096)"
},
"seed": {
"label": "സീഡ്",
"placeholder": "സീഡ് വില്യമ നൽകുക (ഉദാ: 1234)",

View File

@ -70,6 +70,10 @@
"label": "Kontekstlengde",
"placeholder": "Skriv inn kontekstlengdeverdi (standard: 2048)"
},
"numPredict": {
"label": "Maks Tokens (num_predict)",
"placeholder": "Skriv inn Maks Tokens-verdi (f.eks. 2048, 4096)"
},
"seed": {
"label": "Seed",
"placeholder": "Skriv inn seedverdi (f.eks. 1234)",

View File

@ -70,6 +70,10 @@
"label": "Número de Contextos",
"placeholder": "Digite o valor do Número de Contextos (padrão: 2048)"
},
"numPredict": {
"label": "Máximo de Tokens (num_predict)",
"placeholder": "Digite o valor do Máximo de Tokens (ex: 2048, 4096)"
},
"seed": {
"label": "Semente",
"placeholder": "Digite o valor da Semente (ex: 1234)",

View File

@ -70,6 +70,10 @@
"label": "Количество контекстов",
"placeholder": "Введите значение количества контекстов (по умолчанию: 2048)"
},
"numPredict": {
"label": "Максимальное количество токенов (num_predict)",
"placeholder": "Введите значение максимального количества токенов (например, 2048, 4096)"
},
"seed": {
"label": "Сид",
"placeholder": "Введите значение сида (например, 1234)",

View File

@ -70,6 +70,10 @@
"label": "Antal kontexter",
"placeholder": "Ange antal kontextvärden (standard: 2048)"
},
"numPredict": {
"label": "Max antal tokens (num_predict)",
"placeholder": "Ange Max antal tokens värde (t.ex. 2048, 4096)"
},
"seed": {
"label": "Frö",
"placeholder": "Ange frövärde (t.ex. 1234)",

View File

@ -70,6 +70,10 @@
"label": "上下文数量",
"placeholder": "输入上下文数量默认2048"
},
"numPredict": {
"label": "最大令牌数 (num_predict)",
"placeholder": "输入最大令牌数例如2048、4096"
},
"seed": {
"label": "随机种子",
"placeholder": "输入随机种子值例如1234",

View File

@ -39,12 +39,14 @@ export const CurrentChatModelSettings = ({
numCtx: cUserSettings.numCtx ?? data.numCtx,
seed: cUserSettings.seed,
numGpu: cUserSettings.numGpu ?? data.numGpu,
numPredict: cUserSettings.numPredict ?? data.numPredict,
systemPrompt: cUserSettings.systemPrompt ?? ""
})
return data
},
enabled: open,
refetchOnMount: true
refetchOnMount: false,
refetchOnWindowFocus: false
})
const renderBody = () => {
@ -115,6 +117,15 @@ export const CurrentChatModelSettings = ({
/>
</Form.Item>
<Form.Item
name="numPredict"
label={t("modelSettings.form.numPredict.label")}>
<InputNumber
style={{ width: "100%" }}
placeholder={t("modelSettings.form.numPredict.placeholder")}
/>
</Form.Item>
<Collapse
ghost
className="border-none bg-transparent"

View File

@ -77,7 +77,14 @@ export const ModelSettings = () => {
size="large"
/>
</Form.Item>
<Form.Item
name="numPredict"
label={t("modelSettings.form.numPredict.label")}>
<InputNumber
style={{ width: "100%" }}
placeholder={t("modelSettings.form.numPredict.placeholder")}
/>
</Form.Item>
<Collapse
ghost
className="border-none bg-transparent"

View File

@ -133,7 +133,9 @@ export const useMessage = () => {
currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
let newMessage: Message[] = []
@ -261,7 +263,9 @@ export const useMessage = () => {
userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
const response = await questionOllama.invoke(promptForQuestion)
query = response.content.toString()
@ -475,7 +479,9 @@ export const useMessage = () => {
currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
let newMessage: Message[] = []
@ -702,7 +708,9 @@ export const useMessage = () => {
currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
let newMessage: Message[] = []
@ -777,7 +785,9 @@ export const useMessage = () => {
userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
const response = await questionOllama.invoke(promptForQuestion)
query = response.content.toString()
@ -964,7 +974,9 @@ export const useMessage = () => {
currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
let newMessage: Message[] = []

View File

@ -122,7 +122,9 @@ export const useMessageOption = () => {
currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
let newMessage: Message[] = []
@ -197,7 +199,9 @@ export const useMessageOption = () => {
userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
const response = await questionOllama.invoke(promptForQuestion)
query = response.content.toString()
@ -381,7 +385,8 @@ export const useMessageOption = () => {
currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
let newMessage: Message[] = []
@ -616,7 +621,9 @@ export const useMessageOption = () => {
currentChatModelSettings?.numCtx ?? userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
let newMessage: Message[] = []
@ -707,7 +714,9 @@ export const useMessageOption = () => {
userDefaultModelSettings?.numCtx,
seed: currentChatModelSettings?.seed,
numGpu:
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu
currentChatModelSettings?.numGpu ?? userDefaultModelSettings?.numGpu,
numPredict: currentChatModelSettings?.numPredict ?? userDefaultModelSettings?.numPredict,
})
const response = await questionOllama.invoke(promptForQuestion)
query = response.content.toString()

View File

@ -13,7 +13,8 @@ export const pageAssistModel = async ({
topP,
numCtx,
seed,
numGpu
numGpu,
numPredict,
}: {
model: string
baseUrl: string
@ -24,12 +25,13 @@ export const pageAssistModel = async ({
numCtx?: number
seed?: number
numGpu?: number
numPredict?: number
}) => {
if (model === "chrome::gemini-nano::page-assist") {
return new ChatChromeAI({
temperature,
topK
topK,
})
}
@ -46,6 +48,7 @@ export const pageAssistModel = async ({
openAIApiKey: providerInfo.apiKey || "temp",
temperature,
topP,
maxTokens: numPredict,
configuration: {
apiKey: providerInfo.apiKey || "temp",
baseURL: providerInfo.baseUrl || "",
@ -64,7 +67,8 @@ export const pageAssistModel = async ({
numCtx,
seed,
model,
numGpu
numGpu,
numPredict
})