feat: Add max tokens setting for model generations
Adds a new setting to control the maximum number of tokens generated by the model. This provides more control over the length of responses and can be useful for limiting the amount of text generated in certain situations.
This commit is contained in:
@@ -13,7 +13,8 @@ export const pageAssistModel = async ({
|
||||
topP,
|
||||
numCtx,
|
||||
seed,
|
||||
numGpu
|
||||
numGpu,
|
||||
numPredict,
|
||||
}: {
|
||||
model: string
|
||||
baseUrl: string
|
||||
@@ -24,12 +25,13 @@ export const pageAssistModel = async ({
|
||||
numCtx?: number
|
||||
seed?: number
|
||||
numGpu?: number
|
||||
numPredict?: number
|
||||
}) => {
|
||||
|
||||
if (model === "chrome::gemini-nano::page-assist") {
|
||||
return new ChatChromeAI({
|
||||
temperature,
|
||||
topK
|
||||
topK,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -46,6 +48,7 @@ export const pageAssistModel = async ({
|
||||
openAIApiKey: providerInfo.apiKey || "temp",
|
||||
temperature,
|
||||
topP,
|
||||
maxTokens: numPredict,
|
||||
configuration: {
|
||||
apiKey: providerInfo.apiKey || "temp",
|
||||
baseURL: providerInfo.baseUrl || "",
|
||||
@@ -64,7 +67,8 @@ export const pageAssistModel = async ({
|
||||
numCtx,
|
||||
seed,
|
||||
model,
|
||||
numGpu
|
||||
numGpu,
|
||||
numPredict
|
||||
})
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user