feat: Add max tokens setting for model generations

Adds a new setting to control the maximum number of tokens generated by the model. This provides more control over the length of responses and can be useful for limiting the amount of text generated in certain situations.
2024-11-09 16:56:47 +05:30
parent 7c805cfe22
commit fd654cafdb
20 changed files with 119 additions and 17 deletions
--- a/src/models/index.ts
+++ b/src/models/index.ts
@@ -13,7 +13,8 @@ export const pageAssistModel = async ({
  topP,
  numCtx,
  seed,
-  numGpu
+  numGpu,
+  numPredict,
 }: {
  model: string
  baseUrl: string
@@ -24,12 +25,13 @@ export const pageAssistModel = async ({
  numCtx?: number
  seed?: number
  numGpu?: number
+  numPredict?: number
 }) => {

  if (model === "chrome::gemini-nano::page-assist") {
    return new ChatChromeAI({
      temperature,
-      topK
+      topK,
    })
  }

@@ -46,6 +48,7 @@ export const pageAssistModel = async ({
      openAIApiKey: providerInfo.apiKey || "temp",
      temperature,
      topP,
+      maxTokens: numPredict,
      configuration: {
        apiKey: providerInfo.apiKey || "temp",
        baseURL: providerInfo.baseUrl || "",
@@ -64,7 +67,8 @@ export const pageAssistModel = async ({
    numCtx,
    seed,
    model,
-    numGpu
+    numGpu,
+    numPredict
  })