First upload version 0.0.1

This commit is contained in:
Neyra
2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions

View File

@@ -0,0 +1,433 @@
import { EventRelay } from "lifecycle-utils";
import { ChatWrapper } from "../../ChatWrapper.js";
import { ChatHistoryItem, ChatModelFunctionCall, ChatSessionModelFunctions, Token } from "../../types.js";
import { LlamaContextSequence } from "../LlamaContext/LlamaContext.js";
import { LlamaGrammar } from "../LlamaGrammar.js";
import { LLamaChatContextShiftOptions, LlamaChatResponseChunk, LlamaChatResponseFunctionCallParamsChunk } from "../LlamaChat/LlamaChat.js";
import { EvaluationPriority } from "../LlamaContext/types.js";
import { TokenBias } from "../TokenBias.js";
import { LlamaText } from "../../utils/LlamaText.js";
import { LLamaChatPromptCompletionEngineOptions, LlamaChatSessionPromptCompletionEngine } from "./utils/LlamaChatSessionPromptCompletionEngine.js";
export type LlamaChatSessionOptions = {
contextSequence: LlamaContextSequence;
/** `"auto"` is used by default */
chatWrapper?: "auto" | ChatWrapper;
systemPrompt?: string;
/**
* Add the system prompt even on models that don't support a system prompt.
*
* Each chat wrapper has its own workaround for adding a system prompt to a model that doesn't support it,
* but forcing the system prompt on unsupported models may not always work as expected.
*
* Use with caution.
*/
forceAddSystemPrompt?: boolean;
/**
* Automatically dispose the sequence when the session is disposed.
*
* Defaults to `false`.
*/
autoDisposeSequence?: boolean;
contextShift?: LlamaChatSessionContextShiftOptions;
};
export type LlamaChatSessionContextShiftOptions = {
/**
* The number of tokens to delete from the context window to make space for new ones.
* Defaults to 10% of the context size.
*/
size?: LLamaChatContextShiftOptions["size"];
/**
* The strategy to use when deleting tokens from the context window.
*
* Defaults to `"eraseFirstResponseAndKeepFirstSystem"`.
*/
strategy?: LLamaChatContextShiftOptions["strategy"];
};
export type LLamaChatPromptOptions<Functions extends ChatSessionModelFunctions | undefined = ChatSessionModelFunctions | undefined> = {
/**
* Called as the model generates the main response with the generated text chunk.
*
* Useful for streaming the generated response as it's being generated.
*
* Includes only the main response without any text segments (like thoughts).
* For streaming the response with segments, use {@link onResponseChunk `onResponseChunk`}.
*/
onTextChunk?: (text: string) => void;
/**
* Called as the model generates the main response with the generated tokens.
*
* Preferably, you'd want to use {@link onTextChunk `onTextChunk`} instead of this.
*
* Includes only the main response without any segments (like thoughts).
* For streaming the response with segments, use {@link onResponseChunk `onResponseChunk`}.
*/
onToken?: (tokens: Token[]) => void;
/**
* Called as the model generates a response with the generated text and tokens,
* including segment information (when the generated output is part of a segment).
*
* Useful for streaming the generated response as it's being generated, including the main response and all segments.
*
* Only use this function when you need the segmented texts, like thought segments (chain of thought text).
*/
onResponseChunk?: (chunk: LlamaChatResponseChunk) => void;
/**
* An AbortSignal to later abort the generation.
*
* When the signal is aborted, the generation will stop and throw `signal.reason` as the error.
*
* > To stop an ongoing generation without throwing an error, also set `stopOnAbortSignal` to `true`.
*/
signal?: AbortSignal;
/**
* When a response already started being generated and then the signal is aborted,
* the generation will stop and the response will be returned as is instead of throwing an error.
*
* Defaults to `false`.
*/
stopOnAbortSignal?: boolean;
/** Maximum number of tokens to generate */
maxTokens?: number;
/**
* Temperature is a hyperparameter that controls the randomness of the generated text.
* It affects the probability distribution of the model's output tokens.
*
* A higher temperature (e.g., 1.5) makes the output more random and creative,
* while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
*
* The suggested temperature is 0.8, which provides a balance between randomness and determinism.
*
* At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
*
* Set to `0` to disable.
* Disabled by default (set to `0`).
*/
temperature?: number;
/**
* From the next token candidates, discard the percentage of tokens with the lowest probability.
* For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded.
* This is useful for generating more high-quality results when using a high temperature.
* Set to a value between `0` and `1` to enable.
*
* Only relevant when `temperature` is set to a value greater than `0`.
* Disabled by default.
*/
minP?: number;
/**
* Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
* An integer number between `1` and the size of the vocabulary.
* Set to `0` to disable (which uses the full vocabulary).
*
* Only relevant when `temperature` is set to a value greater than 0.
*/
topK?: number;
/**
* Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
* and samples the next token only from this set.
* A float number between `0` and `1`.
* Set to `1` to disable.
*
* Only relevant when `temperature` is set to a value greater than `0`.
*/
topP?: number;
/**
* Used to control the randomness of the generated text.
*
* Change the seed to get different results.
*
* Only relevant when using `temperature`.
*/
seed?: number;
/**
* Trim whitespace from the end of the generated text
* Disabled by default.
*/
trimWhitespaceSuffix?: boolean;
/**
* Force a given text prefix to be the start of the model response, to make the model follow a certain direction.
*
* May cause some models to not use the given functions in some scenarios where they would have been used otherwise,
* so avoid using it together with function calling if you notice unexpected behavior.
*/
responsePrefix?: string;
/**
* See the parameter `evaluationPriority` on the `LlamaContextSequence.evaluate()` function for more information.
*/
evaluationPriority?: EvaluationPriority;
repeatPenalty?: false | LlamaChatSessionRepeatPenalty;
/**
* Adjust the probability of tokens being generated.
* Can be used to bias the model to generate tokens that you want it to lean towards,
* or to avoid generating tokens that you want it to avoid.
*/
tokenBias?: TokenBias | (() => TokenBias);
/**
* Custom stop triggers to stop the generation of the response when any of the provided triggers are found.
*/
customStopTriggers?: (LlamaText | string | (string | Token)[])[];
/**
* Called as the model generates function calls with the generated parameters chunk for each function call.
*
* Useful for streaming the generated function call parameters as they're being generated.
* Only useful in specific use cases,
* such as showing the generated textual file content as it's being generated (note that doing this requires parsing incomplete JSON).
*
* The constructed text from all the params chunks of a given function call can be parsed as a JSON object,
* according to the function parameters schema.
*
* Each function call has its own `callIndex` you can use to distinguish between them.
*
* Only relevant when using function calling (via passing the `functions` option).
*/
onFunctionCallParamsChunk?: (chunk: LlamaChatResponseFunctionCallParamsChunk) => void;
/**
* Set the maximum number of tokens that the model is allowed to spend on various segmented responses.
*/
budgets?: {
/**
* Budget for thought tokens.
*
* Defaults to `Infinity`.
*/
thoughtTokens?: number;
/**
* Budget for comment tokens.
*
* Defaults to `Infinity`.
*/
commentTokens?: number;
};
} & ({
grammar?: LlamaGrammar;
functions?: never;
documentFunctionParams?: never;
maxParallelFunctionCalls?: never;
onFunctionCallParamsChunk?: never;
} | {
grammar?: never;
functions?: Functions | ChatSessionModelFunctions;
documentFunctionParams?: boolean;
maxParallelFunctionCalls?: number;
onFunctionCallParamsChunk?: (chunk: LlamaChatResponseFunctionCallParamsChunk) => void;
});
export type LLamaChatCompletePromptOptions = {
/**
* Generate a completion for the given user prompt up to the given number of tokens.
*
* Defaults to `256` or half the context size, whichever is smaller.
*/
maxTokens?: LLamaChatPromptOptions["maxTokens"];
/**
* When a completion already started being generated and then the given `signal` is aborted,
* the generation will stop and the completion will be returned as-is instead of throwing an error.
*
* Defaults to `false`.
*/
stopOnAbortSignal?: LLamaChatPromptOptions["stopOnAbortSignal"];
/**
* Called as the model generates a completion with the generated text chunk.
*
* Useful for streaming the generated completion as it's being generated.
*/
onTextChunk?: LLamaChatPromptOptions["onTextChunk"];
/**
* Called as the model generates a completion with the generated tokens.
*
* Preferably, you'd want to use `onTextChunk` instead of this.
*/
onToken?: LLamaChatPromptOptions["onToken"];
signal?: LLamaChatPromptOptions["signal"];
temperature?: LLamaChatPromptOptions["temperature"];
minP?: LLamaChatPromptOptions["minP"];
topK?: LLamaChatPromptOptions["topK"];
topP?: LLamaChatPromptOptions["topP"];
seed?: LLamaChatPromptOptions["seed"];
trimWhitespaceSuffix?: LLamaChatPromptOptions["trimWhitespaceSuffix"];
evaluationPriority?: LLamaChatPromptOptions["evaluationPriority"];
repeatPenalty?: LLamaChatPromptOptions["repeatPenalty"];
tokenBias?: LLamaChatPromptOptions["tokenBias"];
customStopTriggers?: LLamaChatPromptOptions["customStopTriggers"];
grammar?: LlamaGrammar;
/**
* Functions are not used by the model here,
* but are used for keeping the instructions given to the model about the functions in the current context state,
* to avoid context shifts.
*
* It's best to provide the same functions that were used for the previous prompt here.
*/
functions?: ChatSessionModelFunctions;
/**
* Functions are not used by the model here,
* but are used for keeping the instructions given to the model about the functions in the current context state,
* to avoid context shifts.
*
* It's best to provide the same value that was used for the previous prompt here.
*/
documentFunctionParams?: boolean;
/**
* Whether to complete the prompt as a model response.
*
* - **`"auto"`**: Automatically determine whether to complete as a model response based on the model used.
* This is a good option to workaround some models that don't support used prompt completions.
* - **`true`**: Always complete as a model response
* - **`false`**: Never complete as a model response
*
* Defaults to `"auto"`.
*/
completeAsModel?: "auto" | boolean | {
/**
* Whether to complete the prompt as a model response.
*
* - **`"auto"`**: Automatically determine whether to complete as a model response based on the model used.
* This is a good option to workaround some models that don't support used prompt completions.
* - **`true`**: Always complete as a model response
* - **`false`**: Never complete as a model response
*
* Defaults to `"auto"`.
*/
enabled?: "auto" | boolean;
/**
* The messages to append to the chat history to generate a completion as a model response.
*
* If the last message is a model message, the prompt will be pushed to it for the completion,
* otherwise a new model message will be added with the prompt.
*
* It must contain a user message or a system message before the model message.
*
* Default to:
* ```ts
* [
* {
* type: "system",
* text: "For your next response predict what the user may send next. " +
* "No yapping, no whitespace. Match the user's language and tone."
* },
* {type: "user", text: ""},
* {type: "model", response: [""]}
* ]
* ```
*/
appendedMessages?: ChatHistoryItem[];
};
};
export type LLamaChatPreloadPromptOptions = {
signal?: LLamaChatCompletePromptOptions["signal"];
evaluationPriority?: LLamaChatCompletePromptOptions["evaluationPriority"];
functions?: LLamaChatCompletePromptOptions["functions"];
documentFunctionParams?: LLamaChatCompletePromptOptions["documentFunctionParams"];
};
export type LlamaChatSessionRepeatPenalty = {
/**
* Number of recent tokens generated by the model to apply penalties to repetition of.
* Defaults to `64`.
*/
lastTokens?: number;
punishTokensFilter?: (tokens: Token[]) => Token[];
/**
* Penalize new line tokens.
* Enabled by default.
*/
penalizeNewLine?: boolean;
/**
* The relative amount to lower the probability of the tokens in `punishTokens` by
* Defaults to `1.1`.
* Set to `1` to disable.
*/
penalty?: number;
/**
* For n time a token is in the `punishTokens` array, lower its probability by `n * frequencyPenalty`
* Disabled by default (`0`).
* Set to a value between `0` and `1` to enable.
*/
frequencyPenalty?: number;
/**
* Lower the probability of all the tokens in the `punishTokens` array by `presencePenalty`
* Disabled by default (`0`).
* Set to a value between `0` and `1` to enable.
*/
presencePenalty?: number;
};
/**
* @see [Using `LlamaChatSession`](https://node-llama-cpp.withcat.ai/guide/chat-session) tutorial
*/
export declare class LlamaChatSession {
readonly onDispose: EventRelay<void>;
constructor(options: LlamaChatSessionOptions);
dispose({ disposeSequence }?: {
disposeSequence?: boolean;
}): void;
/** @hidden */
[Symbol.dispose](): void;
get disposed(): boolean;
get chatWrapper(): ChatWrapper;
get sequence(): LlamaContextSequence;
get context(): import("../LlamaContext/LlamaContext.js").LlamaContext;
get model(): import("../LlamaModel/LlamaModel.js").LlamaModel;
prompt<const Functions extends ChatSessionModelFunctions | undefined = undefined>(prompt: string, options?: LLamaChatPromptOptions<Functions>): Promise<string>;
/**
* @param prompt
* @param [options]
*/
promptWithMeta<const Functions extends ChatSessionModelFunctions | undefined = undefined>(prompt: string, { functions, documentFunctionParams, maxParallelFunctionCalls, onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix, responsePrefix, repeatPenalty, tokenBias, customStopTriggers, evaluationPriority }?: LLamaChatPromptOptions<Functions>): Promise<{
response: (string | ChatModelFunctionCall | import("../../types.js").ChatModelSegment)[];
responseText: string;
stopReason: "customStopTrigger";
customStopTrigger: (string | Token)[];
remainingGenerationAfterStop: string | Token[] | undefined;
} | {
response: (string | ChatModelFunctionCall | import("../../types.js").ChatModelSegment)[];
responseText: string;
stopReason: "abort" | "maxTokens" | "eogToken" | "stopGenerationTrigger" | "functionCalls";
remainingGenerationAfterStop: string | Token[] | undefined;
customStopTrigger?: undefined;
}>;
/**
* Preload a user prompt into the current context sequence state to make later inference of the model response begin sooner
* and feel faster.
*
* > **Note:** Preloading a long user prompt can incur context shifts, so consider limiting the length of prompts you preload
* @param prompt - the prompt to preload
* @param [options]
*/
preloadPrompt(prompt: string, options?: LLamaChatPreloadPromptOptions): Promise<void>;
/**
* Preload a user prompt into the current context sequence state and generate a completion for it.
*
* > **Note:** Preloading a long user prompt and completing a user prompt with a high number of `maxTokens` can incur context shifts,
* > so consider limiting the length of prompts you preload.
* >
* > Also, it's recommended to limit the number of tokens generated to a reasonable amount by configuring `maxTokens`.
* @param prompt - the prompt to preload
* @param [options]
*/
completePrompt(prompt: string, options?: LLamaChatCompletePromptOptions): Promise<string>;
/**
* Create a smart completion engine that caches the prompt completions
* and reuses them when the user prompt matches the beginning of the cached prompt or completion.
*
* All completions are made and cache is used only for the current chat session state.
* You can create a single completion engine for an entire chat session.
*/
createPromptCompletionEngine(options?: LLamaChatPromptCompletionEngineOptions): LlamaChatSessionPromptCompletionEngine;
/**
* See `completePrompt` for more information.
* @param prompt
* @param [options]
*/
completePromptWithMeta(prompt: string, { maxTokens, stopOnAbortSignal, functions, documentFunctionParams, onTextChunk, onToken, signal, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix, repeatPenalty, tokenBias, customStopTriggers, evaluationPriority, completeAsModel }?: LLamaChatCompletePromptOptions): Promise<{
completion: string;
stopReason: "customStopTrigger";
customStopTrigger: (string | Token)[];
remainingGenerationAfterStop: string | Token[] | undefined;
} | {
completion: string;
stopReason: "abort" | "maxTokens" | "eogToken" | "stopGenerationTrigger" | "functionCalls";
remainingGenerationAfterStop: string | Token[] | undefined;
customStopTrigger?: undefined;
}>;
getChatHistory(): ChatHistoryItem[];
getLastEvaluationContextWindow(): ChatHistoryItem[] | null;
setChatHistory(chatHistory: ChatHistoryItem[]): void;
/** Clear the chat history and reset it to the initial state. */
resetChatHistory(): void;
}

View File

@@ -0,0 +1,622 @@
import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
import { appendUserMessageToChatHistory } from "../../utils/appendUserMessageToChatHistory.js";
import { LlamaChat } from "../LlamaChat/LlamaChat.js";
import { wrapAbortSignal } from "../../utils/wrapAbortSignal.js";
import { safeEventCallback } from "../../utils/safeEventCallback.js";
import { GgufArchitectureType } from "../../gguf/types/GgufMetadataTypes.js";
import { LlamaChatSessionPromptCompletionEngine } from "./utils/LlamaChatSessionPromptCompletionEngine.js";
const defaultCompleteAsModel = {
enabled: "auto",
appendedMessages: [
{
type: "system",
text: "For your next response predict what the user may send next. No yapping, no whitespace. Match the user's language and tone."
},
{ type: "user", text: "" },
{ type: "model", response: [""] }
]
};
/**
* @see [Using `LlamaChatSession`](https://node-llama-cpp.withcat.ai/guide/chat-session) tutorial
*/
export class LlamaChatSession {
/** @internal */ _disposeAggregator = new DisposeAggregator();
/** @internal */ _autoDisposeSequence;
/** @internal */ _contextShift;
/** @internal */ _forceAddSystemPrompt;
/** @internal */ _systemPrompt;
/** @internal */ _chatLock = {};
/** @internal */ _chatHistory;
/** @internal */ _lastEvaluation;
/** @internal */ _canUseContextWindowForCompletion = true;
/** @internal */ _chat;
/** @internal */ _chatHistoryStateRef = {};
/** @internal */ _preloadAndCompleteAbortControllers = new Set();
onDispose = new EventRelay();
constructor(options) {
const { contextSequence, chatWrapper = "auto", systemPrompt, forceAddSystemPrompt = false, autoDisposeSequence = false, contextShift } = options;
if (contextSequence == null)
throw new Error("contextSequence cannot be null");
if (contextSequence.disposed)
throw new DisposedError();
this._contextShift = contextShift;
this._forceAddSystemPrompt = forceAddSystemPrompt;
this._systemPrompt = systemPrompt;
this._chat = new LlamaChat({
autoDisposeSequence,
chatWrapper,
contextSequence
});
const chatWrapperSupportsSystemMessages = this._chat.chatWrapper.settings.supportsSystemMessages;
if (chatWrapperSupportsSystemMessages == null || chatWrapperSupportsSystemMessages || this._forceAddSystemPrompt)
this._chatHistory = this._chat.chatWrapper.generateInitialChatHistory({ systemPrompt: this._systemPrompt });
else
this._chatHistory = [];
this._autoDisposeSequence = autoDisposeSequence;
this._disposeAggregator.add(this._chat.onDispose.createListener(() => {
this.dispose();
}));
this._disposeAggregator.add(this.onDispose.dispatchEvent);
}
dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
if (this._chat == null)
return;
this._chat.dispose({ disposeSequence });
this._chat = null;
this._disposeAggregator.dispose();
}
/** @hidden */
[Symbol.dispose]() {
return this.dispose();
}
get disposed() {
return this._chat == null || this._chat.disposed;
}
get chatWrapper() {
if (this._chat == null)
throw new DisposedError();
return this._chat.chatWrapper;
}
get sequence() {
if (this._chat == null)
throw new DisposedError();
return this._chat.sequence;
}
get context() {
return this.sequence.context;
}
get model() {
return this.sequence.model;
}
async prompt(prompt, options = {}) {
const { functions, documentFunctionParams, maxParallelFunctionCalls, onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = false, responsePrefix, repeatPenalty, tokenBias, customStopTriggers } = options;
const { responseText } = await this.promptWithMeta(prompt, {
// this is a workaround to allow passing both `functions` and `grammar`
functions: functions,
grammar: grammar,
documentFunctionParams: documentFunctionParams,
maxParallelFunctionCalls: maxParallelFunctionCalls,
onFunctionCallParamsChunk: onFunctionCallParamsChunk,
onTextChunk, onToken, onResponseChunk, budgets, signal, stopOnAbortSignal, maxTokens,
temperature, minP, topK, topP, seed,
trimWhitespaceSuffix, responsePrefix, repeatPenalty, tokenBias, customStopTriggers
});
return responseText;
}
/**
* @param prompt
* @param [options]
*/
async promptWithMeta(prompt, { functions, documentFunctionParams, maxParallelFunctionCalls, onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = false, responsePrefix, repeatPenalty, tokenBias, customStopTriggers, evaluationPriority } = {}) {
this._ensureNotDisposed();
if (grammar != null && grammar._llama !== this.model._llama)
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
this._stopAllPreloadAndPromptCompletions();
return await withLock([this._chatLock, "evaluation"], signal, async () => {
this._ensureNotDisposed();
this._stopAllPreloadAndPromptCompletions();
if (this._chat == null)
throw new DisposedError();
const supportsParallelFunctionCalling = this._chat.chatWrapper.settings.functions.parallelism != null;
const [abortController, disposeAbortController] = wrapAbortSignal(signal);
let lastEvaluation = this._canUseContextWindowForCompletion
? this._lastEvaluation
: undefined;
let newChatHistory = appendUserMessageToChatHistory(this._chatHistory, prompt);
let newContextWindowChatHistory = lastEvaluation?.contextWindow == null
? undefined
: appendUserMessageToChatHistory(lastEvaluation?.contextWindow, prompt);
let previousFunctionCalls = 0;
const resolvedResponsePrefix = (responsePrefix != null && responsePrefix !== "")
? responsePrefix
: undefined;
newChatHistory.push({
type: "model",
response: resolvedResponsePrefix != null
? [resolvedResponsePrefix]
: []
});
if (newContextWindowChatHistory != null)
newContextWindowChatHistory.push({
type: "model",
response: resolvedResponsePrefix != null
? [resolvedResponsePrefix]
: []
});
if (resolvedResponsePrefix != null) {
safeEventCallback(onToken)?.(this.model.tokenize(resolvedResponsePrefix));
safeEventCallback(onTextChunk)?.(resolvedResponsePrefix);
safeEventCallback(onResponseChunk)?.({
type: undefined,
segmentType: undefined,
text: resolvedResponsePrefix,
tokens: this.model.tokenize(resolvedResponsePrefix)
});
}
try {
while (true) {
const functionCallsAndResults = [];
let canThrowFunctionCallingErrors = false;
let abortedOnFunctionCallError = false;
const initialOutputTokens = this._chat.sequence.tokenMeter.usedOutputTokens;
const { lastEvaluation: currentLastEvaluation, metadata } = await this._chat.generateResponse(newChatHistory, {
functions,
documentFunctionParams,
maxParallelFunctionCalls,
grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
onTextChunk: safeEventCallback(onTextChunk),
onToken: safeEventCallback(onToken),
onResponseChunk: safeEventCallback(onResponseChunk),
onFunctionCallParamsChunk: onFunctionCallParamsChunk == null
? undefined
: safeEventCallback((chunk) => onFunctionCallParamsChunk?.({
callIndex: previousFunctionCalls + chunk.callIndex,
functionName: chunk.functionName,
paramsChunk: chunk.paramsChunk,
done: chunk.done
})),
budgets: {
includeCurrentResponse: true,
thoughtTokens: budgets?.thoughtTokens,
commentTokens: budgets?.commentTokens
},
signal: abortController.signal,
stopOnAbortSignal,
repeatPenalty,
minP,
topK,
topP,
seed,
tokenBias,
customStopTriggers,
maxTokens,
temperature,
trimWhitespaceSuffix,
contextShift: {
...this._contextShift,
lastEvaluationMetadata: lastEvaluation?.contextShiftMetadata
},
evaluationPriority,
lastEvaluationContextWindow: {
history: newContextWindowChatHistory,
minimumOverlapPercentageToPreventContextShift: 0.5
},
onFunctionCall: async (functionCall) => {
functionCallsAndResults.push((async () => {
try {
const functionDefinition = functions?.[functionCall.functionName];
if (functionDefinition == null)
throw new Error(`The model tried to call function "${functionCall.functionName}" which is not defined`);
const functionCallResult = await functionDefinition.handler(functionCall.params);
return {
functionCall,
functionDefinition,
functionCallResult
};
}
catch (err) {
if (!abortController.signal.aborted) {
abortedOnFunctionCallError = true;
abortController.abort(err);
}
if (canThrowFunctionCallingErrors)
throw err;
return null;
}
})());
}
});
this._ensureNotDisposed();
if (abortController.signal.aborted && (abortedOnFunctionCallError || !stopOnAbortSignal))
throw abortController.signal.reason;
if (maxTokens != null)
maxTokens = Math.max(0, maxTokens - (this._chat.sequence.tokenMeter.usedOutputTokens - initialOutputTokens));
lastEvaluation = currentLastEvaluation;
newChatHistory = lastEvaluation.cleanHistory;
if (functionCallsAndResults.length > 0) {
canThrowFunctionCallingErrors = true;
const functionCallResultsPromise = Promise.all(functionCallsAndResults);
const raceEventAbortController = new AbortController();
await Promise.race([
functionCallResultsPromise,
new Promise((accept, reject) => {
abortController.signal.addEventListener("abort", () => {
if (abortedOnFunctionCallError || !stopOnAbortSignal)
reject(abortController.signal.reason);
else
accept();
}, { signal: raceEventAbortController.signal });
if (abortController.signal.aborted) {
if (abortedOnFunctionCallError || !stopOnAbortSignal)
reject(abortController.signal.reason);
else
accept();
}
})
]);
raceEventAbortController.abort();
this._ensureNotDisposed();
if (!abortController.signal.aborted) {
const functionCallResults = (await functionCallResultsPromise)
.filter((result) => result != null);
this._ensureNotDisposed();
if (abortController.signal.aborted && (abortedOnFunctionCallError || !stopOnAbortSignal))
throw abortController.signal.reason;
newContextWindowChatHistory = lastEvaluation.contextWindow;
let startNewChunk = supportsParallelFunctionCalling;
for (const { functionCall, functionDefinition, functionCallResult } of functionCallResults) {
newChatHistory = addFunctionCallToChatHistory({
chatHistory: newChatHistory,
functionName: functionCall.functionName,
functionDescription: functionDefinition.description,
callParams: functionCall.params,
callResult: functionCallResult,
rawCall: functionCall.raw,
startsNewChunk: startNewChunk
});
newContextWindowChatHistory = addFunctionCallToChatHistory({
chatHistory: newContextWindowChatHistory,
functionName: functionCall.functionName,
functionDescription: functionDefinition.description,
callParams: functionCall.params,
callResult: functionCallResult,
rawCall: functionCall.raw,
startsNewChunk: startNewChunk
});
startNewChunk = false;
previousFunctionCalls++;
}
lastEvaluation.cleanHistory = newChatHistory;
lastEvaluation.contextWindow = newContextWindowChatHistory;
if (abortController.signal.aborted && !abortedOnFunctionCallError && stopOnAbortSignal) {
metadata.stopReason = "abort";
metadata.remainingGenerationAfterStop = undefined;
}
else
continue;
}
}
this._lastEvaluation = lastEvaluation;
this._canUseContextWindowForCompletion = true;
this._chatHistory = newChatHistory;
this._chatHistoryStateRef = {};
const lastModelResponseItem = getLastModelResponseItem(newChatHistory);
const responseText = lastModelResponseItem.response
.filter((item) => typeof item === "string")
.join("");
if (metadata.stopReason === "customStopTrigger")
return {
response: lastModelResponseItem.response,
responseText,
stopReason: metadata.stopReason,
customStopTrigger: metadata.customStopTrigger,
remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
};
return {
response: lastModelResponseItem.response,
responseText,
stopReason: metadata.stopReason,
remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
};
}
}
finally {
disposeAbortController();
}
});
}
/**
* Preload a user prompt into the current context sequence state to make later inference of the model response begin sooner
* and feel faster.
*
* > **Note:** Preloading a long user prompt can incur context shifts, so consider limiting the length of prompts you preload
* @param prompt - the prompt to preload
* @param [options]
*/
async preloadPrompt(prompt, options = {}) {
await this.completePromptWithMeta(prompt, {
...options,
completeAsModel: false,
maxTokens: 0
});
}
/**
* Preload a user prompt into the current context sequence state and generate a completion for it.
*
* > **Note:** Preloading a long user prompt and completing a user prompt with a high number of `maxTokens` can incur context shifts,
* > so consider limiting the length of prompts you preload.
* >
* > Also, it's recommended to limit the number of tokens generated to a reasonable amount by configuring `maxTokens`.
* @param prompt - the prompt to preload
* @param [options]
*/
async completePrompt(prompt, options = {}) {
const { completion } = await this.completePromptWithMeta(prompt, options);
return completion;
}
/**
* Create a smart completion engine that caches the prompt completions
* and reuses them when the user prompt matches the beginning of the cached prompt or completion.
*
* All completions are made and cache is used only for the current chat session state.
* You can create a single completion engine for an entire chat session.
*/
createPromptCompletionEngine(options) {
return LlamaChatSessionPromptCompletionEngine._create(this, options);
}
/**
* See `completePrompt` for more information.
* @param prompt
* @param [options]
*/
async completePromptWithMeta(prompt, { maxTokens, stopOnAbortSignal = false, functions, documentFunctionParams, onTextChunk, onToken, signal, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = false, repeatPenalty, tokenBias, customStopTriggers, evaluationPriority, completeAsModel } = {}) {
this._ensureNotDisposed();
if (grammar != null) {
if (grammar._llama == null)
throw new Error("The grammar passed to this function is not a LlamaGrammar instance.");
else if (grammar._llama !== this.model._llama)
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
}
const [abortController, disposeAbortController] = wrapAbortSignal(signal);
this._preloadAndCompleteAbortControllers.add(abortController);
const completeAsModelEnabled = typeof completeAsModel == "boolean"
? completeAsModel
: completeAsModel === "auto"
? "auto"
: completeAsModel?.enabled ?? defaultCompleteAsModel.enabled;
const modelArchitecture = this.model.fileInfo.metadata?.general?.architecture;
const shouldCompleteAsModel = completeAsModelEnabled === "auto"
? modelArchitecture === GgufArchitectureType.gptOss
: completeAsModelEnabled;
try {
return await withLock([this._chatLock, "evaluation"], abortController.signal, async () => {
this._ensureNotDisposed();
if (this._chat == null)
throw new DisposedError();
if (shouldCompleteAsModel) {
const messagesToAppendOption = (typeof completeAsModel == "boolean" || completeAsModel === "auto")
? defaultCompleteAsModel.appendedMessages
: completeAsModel?.appendedMessages ?? defaultCompleteAsModel.appendedMessages;
const messagesToAppend = messagesToAppendOption.length === 0
? defaultCompleteAsModel.appendedMessages
: messagesToAppendOption;
const addMessageToChatHistory = (chatHistory) => {
const newHistory = chatHistory.slice();
if (messagesToAppend.at(0)?.type === "model")
newHistory.push({ type: "user", text: "" });
for (let i = 0; i < messagesToAppend.length; i++) {
const item = messagesToAppend[i];
const isLastItem = i === messagesToAppend.length - 1;
if (item == null)
continue;
if (isLastItem && item.type === "model") {
const newResponse = item.response.slice();
if (typeof newResponse.at(-1) === "string")
newResponse.push(newResponse.pop() + prompt);
else
newResponse.push(prompt);
newHistory.push({
type: "model",
response: newResponse
});
}
else
newHistory.push(item);
}
if (messagesToAppend.at(-1)?.type !== "model")
newHistory.push({ type: "model", response: [prompt] });
return {
history: newHistory,
addedCount: newHistory.length - chatHistory.length
};
};
const { history: messagesWithPrompt, addedCount } = addMessageToChatHistory(this._chatHistory);
const { response, lastEvaluation, metadata } = await this._chat.generateResponse(messagesWithPrompt, {
abortOnNonText: true,
functions,
documentFunctionParams,
grammar: grammar, // this is allowed only because `abortOnNonText` is enabled
onTextChunk,
onToken,
signal: abortController.signal,
stopOnAbortSignal: true,
repeatPenalty,
minP,
topK,
topP,
seed,
tokenBias,
customStopTriggers,
maxTokens: maxTokens == null
? undefined
: Math.min(1, maxTokens), // regular prompting ignores `maxTokens: 0`
temperature,
trimWhitespaceSuffix,
contextShift: {
...this._contextShift,
lastEvaluationMetadata: this._lastEvaluation?.contextShiftMetadata
},
evaluationPriority,
lastEvaluationContextWindow: {
history: this._lastEvaluation?.contextWindow == null
? undefined
: addMessageToChatHistory(this._lastEvaluation?.contextWindow).history,
minimumOverlapPercentageToPreventContextShift: 0.8
}
});
this._ensureNotDisposed();
this._lastEvaluation = {
cleanHistory: this._chatHistory,
contextWindow: lastEvaluation.contextWindow.slice(0, -addedCount),
contextShiftMetadata: lastEvaluation.contextShiftMetadata
};
this._canUseContextWindowForCompletion = this._chatHistory.at(-1)?.type === "user";
if (!stopOnAbortSignal && metadata.stopReason === "abort" && abortController.signal?.aborted)
throw abortController.signal.reason;
if (metadata.stopReason === "customStopTrigger")
return {
completion: response,
stopReason: metadata.stopReason,
customStopTrigger: metadata.customStopTrigger,
remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
};
return {
completion: response,
stopReason: metadata.stopReason,
remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
};
}
else {
const { completion, lastEvaluation, metadata } = await this._chat.loadChatAndCompleteUserMessage(asWithLastUserMessageRemoved(this._chatHistory), {
initialUserPrompt: prompt,
functions,
documentFunctionParams,
grammar,
onTextChunk,
onToken,
signal: abortController.signal,
stopOnAbortSignal: true,
repeatPenalty,
minP,
topK,
topP,
seed,
tokenBias,
customStopTriggers,
maxTokens,
temperature,
trimWhitespaceSuffix,
contextShift: {
...this._contextShift,
lastEvaluationMetadata: this._lastEvaluation?.contextShiftMetadata
},
evaluationPriority,
lastEvaluationContextWindow: {
history: asWithLastUserMessageRemoved(this._lastEvaluation?.contextWindow),
minimumOverlapPercentageToPreventContextShift: 0.8
}
});
this._ensureNotDisposed();
this._lastEvaluation = {
cleanHistory: this._chatHistory,
contextWindow: asWithLastUserMessageRemoved(lastEvaluation.contextWindow),
contextShiftMetadata: lastEvaluation.contextShiftMetadata
};
this._canUseContextWindowForCompletion = this._chatHistory.at(-1)?.type === "user";
if (!stopOnAbortSignal && metadata.stopReason === "abort" && abortController.signal?.aborted)
throw abortController.signal.reason;
if (metadata.stopReason === "customStopTrigger")
return {
completion: completion,
stopReason: metadata.stopReason,
customStopTrigger: metadata.customStopTrigger,
remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
};
return {
completion: completion,
stopReason: metadata.stopReason,
remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
};
}
});
}
finally {
this._preloadAndCompleteAbortControllers.delete(abortController);
disposeAbortController();
}
}
getChatHistory() {
return structuredClone(this._chatHistory);
}
getLastEvaluationContextWindow() {
if (this._lastEvaluation == null)
return null;
return structuredClone(this._lastEvaluation?.contextWindow);
}
setChatHistory(chatHistory) {
this._chatHistory = structuredClone(chatHistory);
this._chatHistoryStateRef = {};
this._lastEvaluation = undefined;
this._canUseContextWindowForCompletion = false;
}
/** Clear the chat history and reset it to the initial state. */
resetChatHistory() {
if (this._chat == null || this.disposed)
throw new DisposedError();
const chatWrapperSupportsSystemMessages = this._chat.chatWrapper.settings.supportsSystemMessages;
if (chatWrapperSupportsSystemMessages == null || chatWrapperSupportsSystemMessages || this._forceAddSystemPrompt)
this.setChatHistory(this._chat.chatWrapper.generateInitialChatHistory({ systemPrompt: this._systemPrompt }));
else
this.setChatHistory([]);
}
/** @internal */
_stopAllPreloadAndPromptCompletions() {
for (const abortController of this._preloadAndCompleteAbortControllers)
abortController.abort();
this._preloadAndCompleteAbortControllers.clear();
}
/** @internal */
_ensureNotDisposed() {
if (this.disposed)
throw new DisposedError();
}
}
function addFunctionCallToChatHistory({ chatHistory, functionName, functionDescription, callParams, callResult, rawCall, startsNewChunk }) {
const newChatHistory = chatHistory.slice();
if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
newChatHistory.push({
type: "model",
response: []
});
const lastModelResponseItem = newChatHistory[newChatHistory.length - 1];
const newLastModelResponseItem = { ...lastModelResponseItem };
newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem;
const modelResponse = newLastModelResponseItem.response.slice();
newLastModelResponseItem.response = modelResponse;
const functionCall = {
type: "functionCall",
name: functionName,
description: functionDescription,
params: callParams,
result: callResult,
rawCall
};
if (startsNewChunk)
functionCall.startsNewChunk = true;
modelResponse.push(functionCall);
return newChatHistory;
}
function getLastModelResponseItem(chatHistory) {
if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
throw new Error("Expected chat history to end with a model response");
return chatHistory[chatHistory.length - 1];
}
function asWithLastUserMessageRemoved(chatHistory) {
if (chatHistory == null)
return chatHistory;
const newChatHistory = chatHistory.slice();
while (newChatHistory.at(-1)?.type === "user")
newChatHistory.pop();
return newChatHistory;
}
//# sourceMappingURL=LlamaChatSession.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,43 @@
import type { LlamaContextSequence } from "../../LlamaContext/LlamaContext.js";
import type { LLamaChatCompletePromptOptions } from "../LlamaChatSession.js";
export type LLamaChatPromptCompletionEngineOptions = {
/**
* Max tokens to allow for preloading a prompt and generating a completion for it.
*
* Defaults to `256` or half of the context size, whichever is smaller.
*/
maxPreloadTokens?: number;
onGeneration?(prompt: string, completion: string): void;
/**
* Max number of completions to cache.
*
* Defaults to `100`.
*/
maxCachedCompletions?: number;
temperature?: LLamaChatCompletePromptOptions["temperature"];
minP?: LLamaChatCompletePromptOptions["minP"];
topK?: LLamaChatCompletePromptOptions["topK"];
topP?: LLamaChatCompletePromptOptions["topP"];
seed?: LLamaChatCompletePromptOptions["seed"];
trimWhitespaceSuffix?: LLamaChatCompletePromptOptions["trimWhitespaceSuffix"];
evaluationPriority?: LLamaChatCompletePromptOptions["evaluationPriority"];
repeatPenalty?: LLamaChatCompletePromptOptions["repeatPenalty"];
tokenBias?: LLamaChatCompletePromptOptions["tokenBias"];
customStopTriggers?: LLamaChatCompletePromptOptions["customStopTriggers"];
grammar?: LLamaChatCompletePromptOptions["grammar"];
functions?: LLamaChatCompletePromptOptions["functions"];
documentFunctionParams?: LLamaChatCompletePromptOptions["documentFunctionParams"];
completeAsModel?: LLamaChatCompletePromptOptions["completeAsModel"];
};
export declare const defaultMaxPreloadTokens: (sequence: LlamaContextSequence) => number;
export declare class LlamaChatSessionPromptCompletionEngine {
private constructor();
dispose(): void;
/**
* Get completion for the prompt from the cache,
* and begin preloading this prompt into the context sequence and completing it.
*
* On completion progress, `onGeneration` (configured for this engine instance) will be called.
*/
complete(prompt: string): string;
}

View File

@@ -0,0 +1,191 @@
import { DisposeAggregator, DisposedError } from "lifecycle-utils";
import { getConsoleLogPrefix } from "../../../utils/getConsoleLogPrefix.js";
import { LruCache } from "../../../utils/LruCache.js";
import { safeEventCallback } from "../../../utils/safeEventCallback.js";
export const defaultMaxPreloadTokens = (sequence) => {
const defaultValue = 256;
return sequence.model.fileInsights.swaSize != null
? Math.min(Math.ceil(sequence.model.fileInsights.swaSize / 2), defaultValue, Math.ceil(sequence.contextSize / 2))
: Math.min(defaultValue, Math.ceil(sequence.contextSize / 2));
};
const defaultMaxCachedCompletions = 100;
export class LlamaChatSessionPromptCompletionEngine {
/** @internal */ _chatSession;
/** @internal */ _maxPreloadTokens;
/** @internal */ _maxCachedCompletions;
/** @internal */ _onGeneration;
/** @internal */ _completionOptions;
/** @internal */ _completionCaches = new WeakMap();
/** @internal */ _disposeAggregator = new DisposeAggregator();
/** @internal */ _currentCompletionAbortController = new AbortController();
/** @internal */ _lastPrompt;
/** @internal */ _disposed = false;
constructor(chatSession, { maxPreloadTokens = defaultMaxPreloadTokens(chatSession.sequence), onGeneration, maxCachedCompletions = defaultMaxCachedCompletions, ...options }) {
this._chatSession = chatSession;
this._maxPreloadTokens = Math.max(1, maxPreloadTokens);
this._maxCachedCompletions = Math.max(1, maxCachedCompletions);
this._onGeneration = safeEventCallback(onGeneration);
this._completionOptions = options;
this.dispose = this.dispose.bind(this);
this._disposeAggregator.add(this._chatSession.onDispose.createListener(this.dispose));
this._disposeAggregator.add(() => {
this._disposed = true;
this._currentCompletionAbortController.abort();
});
}
dispose() {
if (this._disposed)
return;
this._disposeAggregator.dispose();
}
/**
* Get completion for the prompt from the cache,
* and begin preloading this prompt into the context sequence and completing it.
*
* On completion progress, `onGeneration` (configured for this engine instance) will be called.
*/
complete(prompt) {
if (this._disposed)
throw new DisposedError();
const completionCache = this._getCurrentCompletionCache();
const completion = completionCache.getCompletion(prompt);
if (this._lastPrompt == null || !(this._lastPrompt + (completion ?? "")).startsWith(prompt)) {
this._lastPrompt = prompt;
this._restartCompletion(completionCache);
}
this._lastPrompt = prompt;
return completion ?? "";
}
/** @internal */
_getCurrentCompletionCache() {
const completionCache = this._completionCaches.get(this._chatSession._chatHistoryStateRef);
if (completionCache != null)
return completionCache;
const newCompletionCache = new CompletionCache(this._maxCachedCompletions);
this._completionCaches.set(this._chatSession._chatHistoryStateRef, newCompletionCache);
return newCompletionCache;
}
/** @internal */
_restartCompletion(completionCache) {
if (this._disposed)
return;
this._currentCompletionAbortController.abort();
this._currentCompletionAbortController = new AbortController();
const prompt = this._lastPrompt;
if (prompt == null)
return;
const existingCompletion = completionCache.getCompletion(prompt);
const promptToComplete = prompt + (existingCompletion ?? "");
const currentPromptTokens = this._chatSession.model.tokenize(promptToComplete, false, "trimLeadingSpace").length;
const leftTokens = Math.max(0, this._maxPreloadTokens - currentPromptTokens);
if (leftTokens === 0)
return;
const currentAbortController = this._currentCompletionAbortController;
const currentAbortSignal = this._currentCompletionAbortController.signal;
let currentCompletion = "";
void this._chatSession.completePrompt(promptToComplete, {
...this._completionOptions,
stopOnAbortSignal: false,
maxTokens: leftTokens,
signal: currentAbortSignal,
onTextChunk: (chunk) => {
currentCompletion += chunk;
const completion = (existingCompletion ?? "") + currentCompletion;
completionCache.putCompletion(prompt, completion);
if (this._getCurrentCompletionCache() !== completionCache) {
currentAbortController.abort();
return;
}
if (this._lastPrompt === prompt)
this._onGeneration?.(prompt, completion);
}
})
.then(() => {
if (this._lastPrompt !== prompt && this._getCurrentCompletionCache() === completionCache)
return this._restartCompletion(completionCache);
})
.catch((err) => {
if ((currentAbortSignal.aborted && err === currentAbortSignal.reason) || err instanceof DOMException)
return;
console.error(getConsoleLogPrefix(false, false), err);
});
}
/** @internal */
static _create(chatSession, options = {}) {
return new LlamaChatSessionPromptCompletionEngine(chatSession, options);
}
}
class CompletionCache {
/** @internal */ _cache;
/** @internal */ _rootNode = [new Map()];
constructor(maxInputs) {
this._cache = new LruCache(maxInputs, {
onDelete: (key) => {
this._deleteInput(key);
}
});
}
get maxInputs() {
return this._cache.maxSize;
}
getCompletion(input) {
let node = this._rootNode;
for (let i = 0; i < input.length; i++) {
if (node == null)
return null;
const [next, completion] = node;
const char = input[i];
if (!next.has(char)) {
if (completion != null && completion.startsWith(input.slice(i))) {
this._cache.get(input.slice(0, i));
return completion.slice(input.length - i);
}
}
node = next.get(char);
}
if (node == null)
return null;
const [, possibleCompletion] = node;
if (possibleCompletion != null) {
this._cache.get(input);
return possibleCompletion;
}
return null;
}
putCompletion(input, completion) {
this._cache.set(input, null);
let node = this._rootNode;
for (let i = 0; i < input.length; i++) {
const [next] = node;
const char = input[i];
if (!next.has(char))
next.set(char, [new Map()]);
node = next.get(char);
}
const currentCompletion = node[1];
if (currentCompletion != null && currentCompletion.startsWith(completion))
return currentCompletion;
node[1] = completion;
return completion;
}
/** @internal */
_deleteInput(input) {
let lastNodeWithMultipleChildren = this._rootNode;
let lastNodeWithMultipleChildrenDeleteChar = input[0];
let node = this._rootNode;
for (let i = 0; i < input.length; i++) {
const [next] = node;
const char = input[i];
if (next.size > 1) {
lastNodeWithMultipleChildren = node;
lastNodeWithMultipleChildrenDeleteChar = char;
}
if (!next.has(char))
return;
node = next.get(char);
}
if (lastNodeWithMultipleChildrenDeleteChar !== "")
lastNodeWithMultipleChildren[0].delete(lastNodeWithMultipleChildrenDeleteChar);
}
}
//# sourceMappingURL=LlamaChatSessionPromptCompletionEngine.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,15 @@
import { GbnfJsonDefList, GbnfJsonSchema, GbnfJsonSchemaToType } from "../../../utils/gbnfJson/types.js";
import { ChatSessionModelFunction } from "../../../types.js";
/**
* Define a function that can be used by the model in a chat session, and return it.
*
* This is a helper function to facilitate defining functions with full TypeScript type information.
*
* The handler function can return a Promise, and the return value will be awaited before being returned to the model.
* @param functionDefinition
*/
export declare function defineChatSessionFunction<const Params extends GbnfJsonSchema<Defs>, const Defs extends GbnfJsonDefList<Defs>>({ description, params, handler }: {
description?: string;
params?: Readonly<Params> & GbnfJsonSchema<Defs>;
handler: (params: GbnfJsonSchemaToType<NoInfer<Params>>) => Promise<any> | any;
}): ChatSessionModelFunction<NoInfer<Params>>;

View File

@@ -0,0 +1,16 @@
/**
* Define a function that can be used by the model in a chat session, and return it.
*
* This is a helper function to facilitate defining functions with full TypeScript type information.
*
* The handler function can return a Promise, and the return value will be awaited before being returned to the model.
* @param functionDefinition
*/
export function defineChatSessionFunction({ description, params, handler }) {
return {
description,
params,
handler
};
}
//# sourceMappingURL=defineChatSessionFunction.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"defineChatSessionFunction.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChatSession/utils/defineChatSessionFunction.ts"],"names":[],"mappings":"AAGA;;;;;;;GAOG;AACH,MAAM,UAAU,yBAAyB,CAGvC,EACE,WAAW,EACX,MAAM,EACN,OAAO,EAKV;IACG,OAAO;QACH,WAAW;QACX,MAAM;QACN,OAAO;KACV,CAAC;AACN,CAAC"}