First upload version 0.0.1

2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts
@@ -0,0 +1,433 @@
+import { EventRelay } from "lifecycle-utils";
+import { ChatWrapper } from "../../ChatWrapper.js";
+import { ChatHistoryItem, ChatModelFunctionCall, ChatSessionModelFunctions, Token } from "../../types.js";
+import { LlamaContextSequence } from "../LlamaContext/LlamaContext.js";
+import { LlamaGrammar } from "../LlamaGrammar.js";
+import { LLamaChatContextShiftOptions, LlamaChatResponseChunk, LlamaChatResponseFunctionCallParamsChunk } from "../LlamaChat/LlamaChat.js";
+import { EvaluationPriority } from "../LlamaContext/types.js";
+import { TokenBias } from "../TokenBias.js";
+import { LlamaText } from "../../utils/LlamaText.js";
+import { LLamaChatPromptCompletionEngineOptions, LlamaChatSessionPromptCompletionEngine } from "./utils/LlamaChatSessionPromptCompletionEngine.js";
+export type LlamaChatSessionOptions = {
+    contextSequence: LlamaContextSequence;
+    /** `"auto"` is used by default */
+    chatWrapper?: "auto" | ChatWrapper;
+    systemPrompt?: string;
+    /**
+     * Add the system prompt even on models that don't support a system prompt.
+     *
+     * Each chat wrapper has its own workaround for adding a system prompt to a model that doesn't support it,
+     * but forcing the system prompt on unsupported models may not always work as expected.
+     *
+     * Use with caution.
+     */
+    forceAddSystemPrompt?: boolean;
+    /**
+     * Automatically dispose the sequence when the session is disposed.
+     *
+     * Defaults to `false`.
+     */
+    autoDisposeSequence?: boolean;
+    contextShift?: LlamaChatSessionContextShiftOptions;
+};
+export type LlamaChatSessionContextShiftOptions = {
+    /**
+     * The number of tokens to delete from the context window to make space for new ones.
+     * Defaults to 10% of the context size.
+     */
+    size?: LLamaChatContextShiftOptions["size"];
+    /**
+     * The strategy to use when deleting tokens from the context window.
+     *
+     * Defaults to `"eraseFirstResponseAndKeepFirstSystem"`.
+     */
+    strategy?: LLamaChatContextShiftOptions["strategy"];
+};
+export type LLamaChatPromptOptions<Functions extends ChatSessionModelFunctions | undefined = ChatSessionModelFunctions | undefined> = {
+    /**
+     * Called as the model generates the main response with the generated text chunk.
+     *
+     * Useful for streaming the generated response as it's being generated.
+     *
+     * Includes only the main response without any text segments (like thoughts).
+     * For streaming the response with segments, use {@link onResponseChunk `onResponseChunk`}.
+     */
+    onTextChunk?: (text: string) => void;
+    /**
+     * Called as the model generates the main response with the generated tokens.
+     *
+     * Preferably, you'd want to use {@link onTextChunk `onTextChunk`} instead of this.
+     *
+     * Includes only the main response without any segments (like thoughts).
+     * For streaming the response with segments, use {@link onResponseChunk `onResponseChunk`}.
+     */
+    onToken?: (tokens: Token[]) => void;
+    /**
+     * Called as the model generates a response with the generated text and tokens,
+     * including segment information (when the generated output is part of a segment).
+     *
+     * Useful for streaming the generated response as it's being generated, including the main response and all segments.
+     *
+     * Only use this function when you need the segmented texts, like thought segments (chain of thought text).
+     */
+    onResponseChunk?: (chunk: LlamaChatResponseChunk) => void;
+    /**
+     * An AbortSignal to later abort the generation.
+     *
+     * When the signal is aborted, the generation will stop and throw `signal.reason` as the error.
+     *
+     * > To stop an ongoing generation without throwing an error, also set `stopOnAbortSignal` to `true`.
+     */
+    signal?: AbortSignal;
+    /**
+     * When a response already started being generated and then the signal is aborted,
+     * the generation will stop and the response will be returned as is instead of throwing an error.
+     *
+     * Defaults to `false`.
+     */
+    stopOnAbortSignal?: boolean;
+    /** Maximum number of tokens to generate */
+    maxTokens?: number;
+    /**
+     * Temperature is a hyperparameter that controls the randomness of the generated text.
+     * It affects the probability distribution of the model's output tokens.
+     *
+     * A higher temperature (e.g., 1.5) makes the output more random and creative,
+     * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
+     *
+     * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
+     *
+     * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
+     *
+     * Set to `0` to disable.
+     * Disabled by default (set to `0`).
+     */
+    temperature?: number;
+    /**
+     * From the next token candidates, discard the percentage of tokens with the lowest probability.
+     * For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded.
+     * This is useful for generating more high-quality results when using a high temperature.
+     * Set to a value between `0` and `1` to enable.
+     *
+     * Only relevant when `temperature` is set to a value greater than `0`.
+     * Disabled by default.
+     */
+    minP?: number;
+    /**
+     * Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
+     * An integer number between `1` and the size of the vocabulary.
+     * Set to `0` to disable (which uses the full vocabulary).
+     *
+     * Only relevant when `temperature` is set to a value greater than 0.
+     */
+    topK?: number;
+    /**
+     * Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
+     * and samples the next token only from this set.
+     * A float number between `0` and `1`.
+     * Set to `1` to disable.
+     *
+     * Only relevant when `temperature` is set to a value greater than `0`.
+     */
+    topP?: number;
+    /**
+     * Used to control the randomness of the generated text.
+     *
+     * Change the seed to get different results.
+     *
+     * Only relevant when using `temperature`.
+     */
+    seed?: number;
+    /**
+     * Trim whitespace from the end of the generated text
+     * Disabled by default.
+     */
+    trimWhitespaceSuffix?: boolean;
+    /**
+     * Force a given text prefix to be the start of the model response, to make the model follow a certain direction.
+     *
+     * May cause some models to not use the given functions in some scenarios where they would have been used otherwise,
+     * so avoid using it together with function calling if you notice unexpected behavior.
+     */
+    responsePrefix?: string;
+    /**
+     * See the parameter `evaluationPriority` on the `LlamaContextSequence.evaluate()` function for more information.
+     */
+    evaluationPriority?: EvaluationPriority;
+    repeatPenalty?: false | LlamaChatSessionRepeatPenalty;
+    /**
+     * Adjust the probability of tokens being generated.
+     * Can be used to bias the model to generate tokens that you want it to lean towards,
+     * or to avoid generating tokens that you want it to avoid.
+     */
+    tokenBias?: TokenBias | (() => TokenBias);
+    /**
+     * Custom stop triggers to stop the generation of the response when any of the provided triggers are found.
+     */
+    customStopTriggers?: (LlamaText | string | (string | Token)[])[];
+    /**
+     * Called as the model generates function calls with the generated parameters chunk for each function call.
+     *
+     * Useful for streaming the generated function call parameters as they're being generated.
+     * Only useful in specific use cases,
+     * such as showing the generated textual file content as it's being generated (note that doing this requires parsing incomplete JSON).
+     *
+     * The constructed text from all the params chunks of a given function call can be parsed as a JSON object,
+     * according to the function parameters schema.
+     *
+     * Each function call has its own `callIndex` you can use to distinguish between them.
+     *
+     * Only relevant when using function calling (via passing the `functions` option).
+     */
+    onFunctionCallParamsChunk?: (chunk: LlamaChatResponseFunctionCallParamsChunk) => void;
+    /**
+     * Set the maximum number of tokens that the model is allowed to spend on various segmented responses.
+     */
+    budgets?: {
+        /**
+         * Budget for thought tokens.
+         *
+         * Defaults to `Infinity`.
+         */
+        thoughtTokens?: number;
+        /**
+         * Budget for comment tokens.
+         *
+         * Defaults to `Infinity`.
+         */
+        commentTokens?: number;
+    };
+} & ({
+    grammar?: LlamaGrammar;
+    functions?: never;
+    documentFunctionParams?: never;
+    maxParallelFunctionCalls?: never;
+    onFunctionCallParamsChunk?: never;
+} | {
+    grammar?: never;
+    functions?: Functions | ChatSessionModelFunctions;
+    documentFunctionParams?: boolean;
+    maxParallelFunctionCalls?: number;
+    onFunctionCallParamsChunk?: (chunk: LlamaChatResponseFunctionCallParamsChunk) => void;
+});
+export type LLamaChatCompletePromptOptions = {
+    /**
+     * Generate a completion for the given user prompt up to the given number of tokens.
+     *
+     * Defaults to `256` or half the context size, whichever is smaller.
+     */
+    maxTokens?: LLamaChatPromptOptions["maxTokens"];
+    /**
+     * When a completion already started being generated and then the given `signal` is aborted,
+     * the generation will stop and the completion will be returned as-is instead of throwing an error.
+     *
+     * Defaults to `false`.
+     */
+    stopOnAbortSignal?: LLamaChatPromptOptions["stopOnAbortSignal"];
+    /**
+     * Called as the model generates a completion with the generated text chunk.
+     *
+     * Useful for streaming the generated completion as it's being generated.
+     */
+    onTextChunk?: LLamaChatPromptOptions["onTextChunk"];
+    /**
+     * Called as the model generates a completion with the generated tokens.
+     *
+     * Preferably, you'd want to use `onTextChunk` instead of this.
+     */
+    onToken?: LLamaChatPromptOptions["onToken"];
+    signal?: LLamaChatPromptOptions["signal"];
+    temperature?: LLamaChatPromptOptions["temperature"];
+    minP?: LLamaChatPromptOptions["minP"];
+    topK?: LLamaChatPromptOptions["topK"];
+    topP?: LLamaChatPromptOptions["topP"];
+    seed?: LLamaChatPromptOptions["seed"];
+    trimWhitespaceSuffix?: LLamaChatPromptOptions["trimWhitespaceSuffix"];
+    evaluationPriority?: LLamaChatPromptOptions["evaluationPriority"];
+    repeatPenalty?: LLamaChatPromptOptions["repeatPenalty"];
+    tokenBias?: LLamaChatPromptOptions["tokenBias"];
+    customStopTriggers?: LLamaChatPromptOptions["customStopTriggers"];
+    grammar?: LlamaGrammar;
+    /**
+     * Functions are not used by the model here,
+     * but are used for keeping the instructions given to the model about the functions in the current context state,
+     * to avoid context shifts.
+     *
+     * It's best to provide the same functions that were used for the previous prompt here.
+     */
+    functions?: ChatSessionModelFunctions;
+    /**
+     * Functions are not used by the model here,
+     * but are used for keeping the instructions given to the model about the functions in the current context state,
+     * to avoid context shifts.
+     *
+     * It's best to provide the same value that was used for the previous prompt here.
+     */
+    documentFunctionParams?: boolean;
+    /**
+     * Whether to complete the prompt as a model response.
+     *
+     * - **`"auto"`**: Automatically determine whether to complete as a model response based on the model used.
+     *   This is a good option to workaround some models that don't support used prompt completions.
+     * - **`true`**: Always complete as a model response
+     * - **`false`**: Never complete as a model response
+     *
+     * Defaults to `"auto"`.
+     */
+    completeAsModel?: "auto" | boolean | {
+        /**
+         * Whether to complete the prompt as a model response.
+         *
+         * - **`"auto"`**: Automatically determine whether to complete as a model response based on the model used.
+         *   This is a good option to workaround some models that don't support used prompt completions.
+         * - **`true`**: Always complete as a model response
+         * - **`false`**: Never complete as a model response
+         *
+         * Defaults to `"auto"`.
+         */
+        enabled?: "auto" | boolean;
+        /**
+         * The messages to append to the chat history to generate a completion as a model response.
+         *
+         * If the last message is a model message, the prompt will be pushed to it for the completion,
+         * otherwise a new model message will be added with the prompt.
+         *
+         * It must contain a user message or a system message before the model message.
+         *
+         * Default to:
+         * ```ts
+         * [
+         *     {
+         *         type: "system",
+         *         text: "For your next response predict what the user may send next. " +
+         *             "No yapping, no whitespace. Match the user's language and tone."
+         *     },
+         *     {type: "user", text: ""},
+         *     {type: "model", response: [""]}
+         * ]
+         * ```
+         */
+        appendedMessages?: ChatHistoryItem[];
+    };
+};
+export type LLamaChatPreloadPromptOptions = {
+    signal?: LLamaChatCompletePromptOptions["signal"];
+    evaluationPriority?: LLamaChatCompletePromptOptions["evaluationPriority"];
+    functions?: LLamaChatCompletePromptOptions["functions"];
+    documentFunctionParams?: LLamaChatCompletePromptOptions["documentFunctionParams"];
+};
+export type LlamaChatSessionRepeatPenalty = {
+    /**
+     * Number of recent tokens generated by the model to apply penalties to repetition of.
+     * Defaults to `64`.
+     */
+    lastTokens?: number;
+    punishTokensFilter?: (tokens: Token[]) => Token[];
+    /**
+     * Penalize new line tokens.
+     * Enabled by default.
+     */
+    penalizeNewLine?: boolean;
+    /**
+     * The relative amount to lower the probability of the tokens in `punishTokens` by
+     * Defaults to `1.1`.
+     * Set to `1` to disable.
+     */
+    penalty?: number;
+    /**
+     * For n time a token is in the `punishTokens` array, lower its probability by `n * frequencyPenalty`
+     * Disabled by default (`0`).
+     * Set to a value between `0` and `1` to enable.
+     */
+    frequencyPenalty?: number;
+    /**
+     * Lower the probability of all the tokens in the `punishTokens` array by `presencePenalty`
+     * Disabled by default (`0`).
+     * Set to a value between `0` and `1` to enable.
+     */
+    presencePenalty?: number;
+};
+/**
+ * @see [Using `LlamaChatSession`](https://node-llama-cpp.withcat.ai/guide/chat-session) tutorial
+ */
+export declare class LlamaChatSession {
+    readonly onDispose: EventRelay<void>;
+    constructor(options: LlamaChatSessionOptions);
+    dispose({ disposeSequence }?: {
+        disposeSequence?: boolean;
+    }): void;
+    /** @hidden */
+    [Symbol.dispose](): void;
+    get disposed(): boolean;
+    get chatWrapper(): ChatWrapper;
+    get sequence(): LlamaContextSequence;
+    get context(): import("../LlamaContext/LlamaContext.js").LlamaContext;
+    get model(): import("../LlamaModel/LlamaModel.js").LlamaModel;
+    prompt<const Functions extends ChatSessionModelFunctions | undefined = undefined>(prompt: string, options?: LLamaChatPromptOptions<Functions>): Promise<string>;
+    /**
+     * @param prompt
+     * @param [options]
+     */
+    promptWithMeta<const Functions extends ChatSessionModelFunctions | undefined = undefined>(prompt: string, { functions, documentFunctionParams, maxParallelFunctionCalls, onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix, responsePrefix, repeatPenalty, tokenBias, customStopTriggers, evaluationPriority }?: LLamaChatPromptOptions<Functions>): Promise<{
+        response: (string | ChatModelFunctionCall | import("../../types.js").ChatModelSegment)[];
+        responseText: string;
+        stopReason: "customStopTrigger";
+        customStopTrigger: (string | Token)[];
+        remainingGenerationAfterStop: string | Token[] | undefined;
+    } | {
+        response: (string | ChatModelFunctionCall | import("../../types.js").ChatModelSegment)[];
+        responseText: string;
+        stopReason: "abort" | "maxTokens" | "eogToken" | "stopGenerationTrigger" | "functionCalls";
+        remainingGenerationAfterStop: string | Token[] | undefined;
+        customStopTrigger?: undefined;
+    }>;
+    /**
+     * Preload a user prompt into the current context sequence state to make later inference of the model response begin sooner
+     * and feel faster.
+     *
+     * > **Note:** Preloading a long user prompt can incur context shifts, so consider limiting the length of prompts you preload
+     * @param prompt - the prompt to preload
+     * @param [options]
+     */
+    preloadPrompt(prompt: string, options?: LLamaChatPreloadPromptOptions): Promise<void>;
+    /**
+     * Preload a user prompt into the current context sequence state and generate a completion for it.
+     *
+     * > **Note:** Preloading a long user prompt and completing a user prompt with a high number of `maxTokens` can incur context shifts,
+     * > so consider limiting the length of prompts you preload.
+     * >
+     * > Also, it's recommended to limit the number of tokens generated to a reasonable amount by configuring `maxTokens`.
+     * @param prompt - the prompt to preload
+     * @param [options]
+     */
+    completePrompt(prompt: string, options?: LLamaChatCompletePromptOptions): Promise<string>;
+    /**
+     * Create a smart completion engine that caches the prompt completions
+     * and reuses them when the user prompt matches the beginning of the cached prompt or completion.
+     *
+     * All completions are made and cache is used only for the current chat session state.
+     * You can create a single completion engine for an entire chat session.
+     */
+    createPromptCompletionEngine(options?: LLamaChatPromptCompletionEngineOptions): LlamaChatSessionPromptCompletionEngine;
+    /**
+     * See `completePrompt` for more information.
+     * @param prompt
+     * @param [options]
+     */
+    completePromptWithMeta(prompt: string, { maxTokens, stopOnAbortSignal, functions, documentFunctionParams, onTextChunk, onToken, signal, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix, repeatPenalty, tokenBias, customStopTriggers, evaluationPriority, completeAsModel }?: LLamaChatCompletePromptOptions): Promise<{
+        completion: string;
+        stopReason: "customStopTrigger";
+        customStopTrigger: (string | Token)[];
+        remainingGenerationAfterStop: string | Token[] | undefined;
+    } | {
+        completion: string;
+        stopReason: "abort" | "maxTokens" | "eogToken" | "stopGenerationTrigger" | "functionCalls";
+        remainingGenerationAfterStop: string | Token[] | undefined;
+        customStopTrigger?: undefined;
+    }>;
+    getChatHistory(): ChatHistoryItem[];
+    getLastEvaluationContextWindow(): ChatHistoryItem[] | null;
+    setChatHistory(chatHistory: ChatHistoryItem[]): void;
+    /** Clear the chat history and reset it to the initial state. */
+    resetChatHistory(): void;
+}
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/LlamaChatSession.js
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/LlamaChatSession.js
@@ -0,0 +1,622 @@
+import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
+import { appendUserMessageToChatHistory } from "../../utils/appendUserMessageToChatHistory.js";
+import { LlamaChat } from "../LlamaChat/LlamaChat.js";
+import { wrapAbortSignal } from "../../utils/wrapAbortSignal.js";
+import { safeEventCallback } from "../../utils/safeEventCallback.js";
+import { GgufArchitectureType } from "../../gguf/types/GgufMetadataTypes.js";
+import { LlamaChatSessionPromptCompletionEngine } from "./utils/LlamaChatSessionPromptCompletionEngine.js";
+const defaultCompleteAsModel = {
+    enabled: "auto",
+    appendedMessages: [
+        {
+            type: "system",
+            text: "For your next response predict what the user may send next. No yapping, no whitespace. Match the user's language and tone."
+        },
+        { type: "user", text: "" },
+        { type: "model", response: [""] }
+    ]
+};
+/**
+ * @see [Using `LlamaChatSession`](https://node-llama-cpp.withcat.ai/guide/chat-session) tutorial
+ */
+export class LlamaChatSession {
+    /** @internal */ _disposeAggregator = new DisposeAggregator();
+    /** @internal */ _autoDisposeSequence;
+    /** @internal */ _contextShift;
+    /** @internal */ _forceAddSystemPrompt;
+    /** @internal */ _systemPrompt;
+    /** @internal */ _chatLock = {};
+    /** @internal */ _chatHistory;
+    /** @internal */ _lastEvaluation;
+    /** @internal */ _canUseContextWindowForCompletion = true;
+    /** @internal */ _chat;
+    /** @internal */ _chatHistoryStateRef = {};
+    /** @internal */ _preloadAndCompleteAbortControllers = new Set();
+    onDispose = new EventRelay();
+    constructor(options) {
+        const { contextSequence, chatWrapper = "auto", systemPrompt, forceAddSystemPrompt = false, autoDisposeSequence = false, contextShift } = options;
+        if (contextSequence == null)
+            throw new Error("contextSequence cannot be null");
+        if (contextSequence.disposed)
+            throw new DisposedError();
+        this._contextShift = contextShift;
+        this._forceAddSystemPrompt = forceAddSystemPrompt;
+        this._systemPrompt = systemPrompt;
+        this._chat = new LlamaChat({
+            autoDisposeSequence,
+            chatWrapper,
+            contextSequence
+        });
+        const chatWrapperSupportsSystemMessages = this._chat.chatWrapper.settings.supportsSystemMessages;
+        if (chatWrapperSupportsSystemMessages == null || chatWrapperSupportsSystemMessages || this._forceAddSystemPrompt)
+            this._chatHistory = this._chat.chatWrapper.generateInitialChatHistory({ systemPrompt: this._systemPrompt });
+        else
+            this._chatHistory = [];
+        this._autoDisposeSequence = autoDisposeSequence;
+        this._disposeAggregator.add(this._chat.onDispose.createListener(() => {
+            this.dispose();
+        }));
+        this._disposeAggregator.add(this.onDispose.dispatchEvent);
+    }
+    dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
+        if (this._chat == null)
+            return;
+        this._chat.dispose({ disposeSequence });
+        this._chat = null;
+        this._disposeAggregator.dispose();
+    }
+    /** @hidden */
+    [Symbol.dispose]() {
+        return this.dispose();
+    }
+    get disposed() {
+        return this._chat == null || this._chat.disposed;
+    }
+    get chatWrapper() {
+        if (this._chat == null)
+            throw new DisposedError();
+        return this._chat.chatWrapper;
+    }
+    get sequence() {
+        if (this._chat == null)
+            throw new DisposedError();
+        return this._chat.sequence;
+    }
+    get context() {
+        return this.sequence.context;
+    }
+    get model() {
+        return this.sequence.model;
+    }
+    async prompt(prompt, options = {}) {
+        const { functions, documentFunctionParams, maxParallelFunctionCalls, onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = false, responsePrefix, repeatPenalty, tokenBias, customStopTriggers } = options;
+        const { responseText } = await this.promptWithMeta(prompt, {
+            // this is a workaround to allow passing both `functions` and `grammar`
+            functions: functions,
+            grammar: grammar,
+            documentFunctionParams: documentFunctionParams,
+            maxParallelFunctionCalls: maxParallelFunctionCalls,
+            onFunctionCallParamsChunk: onFunctionCallParamsChunk,
+            onTextChunk, onToken, onResponseChunk, budgets, signal, stopOnAbortSignal, maxTokens,
+            temperature, minP, topK, topP, seed,
+            trimWhitespaceSuffix, responsePrefix, repeatPenalty, tokenBias, customStopTriggers
+        });
+        return responseText;
+    }
+    /**
+     * @param prompt
+     * @param [options]
+     */
+    async promptWithMeta(prompt, { functions, documentFunctionParams, maxParallelFunctionCalls, onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = false, responsePrefix, repeatPenalty, tokenBias, customStopTriggers, evaluationPriority } = {}) {
+        this._ensureNotDisposed();
+        if (grammar != null && grammar._llama !== this.model._llama)
+            throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
+        this._stopAllPreloadAndPromptCompletions();
+        return await withLock([this._chatLock, "evaluation"], signal, async () => {
+            this._ensureNotDisposed();
+            this._stopAllPreloadAndPromptCompletions();
+            if (this._chat == null)
+                throw new DisposedError();
+            const supportsParallelFunctionCalling = this._chat.chatWrapper.settings.functions.parallelism != null;
+            const [abortController, disposeAbortController] = wrapAbortSignal(signal);
+            let lastEvaluation = this._canUseContextWindowForCompletion
+                ? this._lastEvaluation
+                : undefined;
+            let newChatHistory = appendUserMessageToChatHistory(this._chatHistory, prompt);
+            let newContextWindowChatHistory = lastEvaluation?.contextWindow == null
+                ? undefined
+                : appendUserMessageToChatHistory(lastEvaluation?.contextWindow, prompt);
+            let previousFunctionCalls = 0;
+            const resolvedResponsePrefix = (responsePrefix != null && responsePrefix !== "")
+                ? responsePrefix
+                : undefined;
+            newChatHistory.push({
+                type: "model",
+                response: resolvedResponsePrefix != null
+                    ? [resolvedResponsePrefix]
+                    : []
+            });
+            if (newContextWindowChatHistory != null)
+                newContextWindowChatHistory.push({
+                    type: "model",
+                    response: resolvedResponsePrefix != null
+                        ? [resolvedResponsePrefix]
+                        : []
+                });
+            if (resolvedResponsePrefix != null) {
+                safeEventCallback(onToken)?.(this.model.tokenize(resolvedResponsePrefix));
+                safeEventCallback(onTextChunk)?.(resolvedResponsePrefix);
+                safeEventCallback(onResponseChunk)?.({
+                    type: undefined,
+                    segmentType: undefined,
+                    text: resolvedResponsePrefix,
+                    tokens: this.model.tokenize(resolvedResponsePrefix)
+                });
+            }
+            try {
+                while (true) {
+                    const functionCallsAndResults = [];
+                    let canThrowFunctionCallingErrors = false;
+                    let abortedOnFunctionCallError = false;
+                    const initialOutputTokens = this._chat.sequence.tokenMeter.usedOutputTokens;
+                    const { lastEvaluation: currentLastEvaluation, metadata } = await this._chat.generateResponse(newChatHistory, {
+                        functions,
+                        documentFunctionParams,
+                        maxParallelFunctionCalls,
+                        grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
+                        onTextChunk: safeEventCallback(onTextChunk),
+                        onToken: safeEventCallback(onToken),
+                        onResponseChunk: safeEventCallback(onResponseChunk),
+                        onFunctionCallParamsChunk: onFunctionCallParamsChunk == null
+                            ? undefined
+                            : safeEventCallback((chunk) => onFunctionCallParamsChunk?.({
+                                callIndex: previousFunctionCalls + chunk.callIndex,
+                                functionName: chunk.functionName,
+                                paramsChunk: chunk.paramsChunk,
+                                done: chunk.done
+                            })),
+                        budgets: {
+                            includeCurrentResponse: true,
+                            thoughtTokens: budgets?.thoughtTokens,
+                            commentTokens: budgets?.commentTokens
+                        },
+                        signal: abortController.signal,
+                        stopOnAbortSignal,
+                        repeatPenalty,
+                        minP,
+                        topK,
+                        topP,
+                        seed,
+                        tokenBias,
+                        customStopTriggers,
+                        maxTokens,
+                        temperature,
+                        trimWhitespaceSuffix,
+                        contextShift: {
+                            ...this._contextShift,
+                            lastEvaluationMetadata: lastEvaluation?.contextShiftMetadata
+                        },
+                        evaluationPriority,
+                        lastEvaluationContextWindow: {
+                            history: newContextWindowChatHistory,
+                            minimumOverlapPercentageToPreventContextShift: 0.5
+                        },
+                        onFunctionCall: async (functionCall) => {
+                            functionCallsAndResults.push((async () => {
+                                try {
+                                    const functionDefinition = functions?.[functionCall.functionName];
+                                    if (functionDefinition == null)
+                                        throw new Error(`The model tried to call function "${functionCall.functionName}" which is not defined`);
+                                    const functionCallResult = await functionDefinition.handler(functionCall.params);
+                                    return {
+                                        functionCall,
+                                        functionDefinition,
+                                        functionCallResult
+                                    };
+                                }
+                                catch (err) {
+                                    if (!abortController.signal.aborted) {
+                                        abortedOnFunctionCallError = true;
+                                        abortController.abort(err);
+                                    }
+                                    if (canThrowFunctionCallingErrors)
+                                        throw err;
+                                    return null;
+                                }
+                            })());
+                        }
+                    });
+                    this._ensureNotDisposed();
+                    if (abortController.signal.aborted && (abortedOnFunctionCallError || !stopOnAbortSignal))
+                        throw abortController.signal.reason;
+                    if (maxTokens != null)
+                        maxTokens = Math.max(0, maxTokens - (this._chat.sequence.tokenMeter.usedOutputTokens - initialOutputTokens));
+                    lastEvaluation = currentLastEvaluation;
+                    newChatHistory = lastEvaluation.cleanHistory;
+                    if (functionCallsAndResults.length > 0) {
+                        canThrowFunctionCallingErrors = true;
+                        const functionCallResultsPromise = Promise.all(functionCallsAndResults);
+                        const raceEventAbortController = new AbortController();
+                        await Promise.race([
+                            functionCallResultsPromise,
+                            new Promise((accept, reject) => {
+                                abortController.signal.addEventListener("abort", () => {
+                                    if (abortedOnFunctionCallError || !stopOnAbortSignal)
+                                        reject(abortController.signal.reason);
+                                    else
+                                        accept();
+                                }, { signal: raceEventAbortController.signal });
+                                if (abortController.signal.aborted) {
+                                    if (abortedOnFunctionCallError || !stopOnAbortSignal)
+                                        reject(abortController.signal.reason);
+                                    else
+                                        accept();
+                                }
+                            })
+                        ]);
+                        raceEventAbortController.abort();
+                        this._ensureNotDisposed();
+                        if (!abortController.signal.aborted) {
+                            const functionCallResults = (await functionCallResultsPromise)
+                                .filter((result) => result != null);
+                            this._ensureNotDisposed();
+                            if (abortController.signal.aborted && (abortedOnFunctionCallError || !stopOnAbortSignal))
+                                throw abortController.signal.reason;
+                            newContextWindowChatHistory = lastEvaluation.contextWindow;
+                            let startNewChunk = supportsParallelFunctionCalling;
+                            for (const { functionCall, functionDefinition, functionCallResult } of functionCallResults) {
+                                newChatHistory = addFunctionCallToChatHistory({
+                                    chatHistory: newChatHistory,
+                                    functionName: functionCall.functionName,
+                                    functionDescription: functionDefinition.description,
+                                    callParams: functionCall.params,
+                                    callResult: functionCallResult,
+                                    rawCall: functionCall.raw,
+                                    startsNewChunk: startNewChunk
+                                });
+                                newContextWindowChatHistory = addFunctionCallToChatHistory({
+                                    chatHistory: newContextWindowChatHistory,
+                                    functionName: functionCall.functionName,
+                                    functionDescription: functionDefinition.description,
+                                    callParams: functionCall.params,
+                                    callResult: functionCallResult,
+                                    rawCall: functionCall.raw,
+                                    startsNewChunk: startNewChunk
+                                });
+                                startNewChunk = false;
+                                previousFunctionCalls++;
+                            }
+                            lastEvaluation.cleanHistory = newChatHistory;
+                            lastEvaluation.contextWindow = newContextWindowChatHistory;
+                            if (abortController.signal.aborted && !abortedOnFunctionCallError && stopOnAbortSignal) {
+                                metadata.stopReason = "abort";
+                                metadata.remainingGenerationAfterStop = undefined;
+                            }
+                            else
+                                continue;
+                        }
+                    }
+                    this._lastEvaluation = lastEvaluation;
+                    this._canUseContextWindowForCompletion = true;
+                    this._chatHistory = newChatHistory;
+                    this._chatHistoryStateRef = {};
+                    const lastModelResponseItem = getLastModelResponseItem(newChatHistory);
+                    const responseText = lastModelResponseItem.response
+                        .filter((item) => typeof item === "string")
+                        .join("");
+                    if (metadata.stopReason === "customStopTrigger")
+                        return {
+                            response: lastModelResponseItem.response,
+                            responseText,
+                            stopReason: metadata.stopReason,
+                            customStopTrigger: metadata.customStopTrigger,
+                            remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
+                        };
+                    return {
+                        response: lastModelResponseItem.response,
+                        responseText,
+                        stopReason: metadata.stopReason,
+                        remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
+                    };
+                }
+            }
+            finally {
+                disposeAbortController();
+            }
+        });
+    }
+    /**
+     * Preload a user prompt into the current context sequence state to make later inference of the model response begin sooner
+     * and feel faster.
+     *
+     * > **Note:** Preloading a long user prompt can incur context shifts, so consider limiting the length of prompts you preload
+     * @param prompt - the prompt to preload
+     * @param [options]
+     */
+    async preloadPrompt(prompt, options = {}) {
+        await this.completePromptWithMeta(prompt, {
+            ...options,
+            completeAsModel: false,
+            maxTokens: 0
+        });
+    }
+    /**
+     * Preload a user prompt into the current context sequence state and generate a completion for it.
+     *
+     * > **Note:** Preloading a long user prompt and completing a user prompt with a high number of `maxTokens` can incur context shifts,
+     * > so consider limiting the length of prompts you preload.
+     * >
+     * > Also, it's recommended to limit the number of tokens generated to a reasonable amount by configuring `maxTokens`.
+     * @param prompt - the prompt to preload
+     * @param [options]
+     */
+    async completePrompt(prompt, options = {}) {
+        const { completion } = await this.completePromptWithMeta(prompt, options);
+        return completion;
+    }
+    /**
+     * Create a smart completion engine that caches the prompt completions
+     * and reuses them when the user prompt matches the beginning of the cached prompt or completion.
+     *
+     * All completions are made and cache is used only for the current chat session state.
+     * You can create a single completion engine for an entire chat session.
+     */
+    createPromptCompletionEngine(options) {
+        return LlamaChatSessionPromptCompletionEngine._create(this, options);
+    }
+    /**
+     * See `completePrompt` for more information.
+     * @param prompt
+     * @param [options]
+     */
+    async completePromptWithMeta(prompt, { maxTokens, stopOnAbortSignal = false, functions, documentFunctionParams, onTextChunk, onToken, signal, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = false, repeatPenalty, tokenBias, customStopTriggers, evaluationPriority, completeAsModel } = {}) {
+        this._ensureNotDisposed();
+        if (grammar != null) {
+            if (grammar._llama == null)
+                throw new Error("The grammar passed to this function is not a LlamaGrammar instance.");
+            else if (grammar._llama !== this.model._llama)
+                throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
+        }
+        const [abortController, disposeAbortController] = wrapAbortSignal(signal);
+        this._preloadAndCompleteAbortControllers.add(abortController);
+        const completeAsModelEnabled = typeof completeAsModel == "boolean"
+            ? completeAsModel
+            : completeAsModel === "auto"
+                ? "auto"
+                : completeAsModel?.enabled ?? defaultCompleteAsModel.enabled;
+        const modelArchitecture = this.model.fileInfo.metadata?.general?.architecture;
+        const shouldCompleteAsModel = completeAsModelEnabled === "auto"
+            ? modelArchitecture === GgufArchitectureType.gptOss
+            : completeAsModelEnabled;
+        try {
+            return await withLock([this._chatLock, "evaluation"], abortController.signal, async () => {
+                this._ensureNotDisposed();
+                if (this._chat == null)
+                    throw new DisposedError();
+                if (shouldCompleteAsModel) {
+                    const messagesToAppendOption = (typeof completeAsModel == "boolean" || completeAsModel === "auto")
+                        ? defaultCompleteAsModel.appendedMessages
+                        : completeAsModel?.appendedMessages ?? defaultCompleteAsModel.appendedMessages;
+                    const messagesToAppend = messagesToAppendOption.length === 0
+                        ? defaultCompleteAsModel.appendedMessages
+                        : messagesToAppendOption;
+                    const addMessageToChatHistory = (chatHistory) => {
+                        const newHistory = chatHistory.slice();
+                        if (messagesToAppend.at(0)?.type === "model")
+                            newHistory.push({ type: "user", text: "" });
+                        for (let i = 0; i < messagesToAppend.length; i++) {
+                            const item = messagesToAppend[i];
+                            const isLastItem = i === messagesToAppend.length - 1;
+                            if (item == null)
+                                continue;
+                            if (isLastItem && item.type === "model") {
+                                const newResponse = item.response.slice();
+                                if (typeof newResponse.at(-1) === "string")
+                                    newResponse.push(newResponse.pop() + prompt);
+                                else
+                                    newResponse.push(prompt);
+                                newHistory.push({
+                                    type: "model",
+                                    response: newResponse
+                                });
+                            }
+                            else
+                                newHistory.push(item);
+                        }
+                        if (messagesToAppend.at(-1)?.type !== "model")
+                            newHistory.push({ type: "model", response: [prompt] });
+                        return {
+                            history: newHistory,
+                            addedCount: newHistory.length - chatHistory.length
+                        };
+                    };
+                    const { history: messagesWithPrompt, addedCount } = addMessageToChatHistory(this._chatHistory);
+                    const { response, lastEvaluation, metadata } = await this._chat.generateResponse(messagesWithPrompt, {
+                        abortOnNonText: true,
+                        functions,
+                        documentFunctionParams,
+                        grammar: grammar, // this is allowed only because `abortOnNonText` is enabled
+                        onTextChunk,
+                        onToken,
+                        signal: abortController.signal,
+                        stopOnAbortSignal: true,
+                        repeatPenalty,
+                        minP,
+                        topK,
+                        topP,
+                        seed,
+                        tokenBias,
+                        customStopTriggers,
+                        maxTokens: maxTokens == null
+                            ? undefined
+                            : Math.min(1, maxTokens), // regular prompting ignores `maxTokens: 0`
+                        temperature,
+                        trimWhitespaceSuffix,
+                        contextShift: {
+                            ...this._contextShift,
+                            lastEvaluationMetadata: this._lastEvaluation?.contextShiftMetadata
+                        },
+                        evaluationPriority,
+                        lastEvaluationContextWindow: {
+                            history: this._lastEvaluation?.contextWindow == null
+                                ? undefined
+                                : addMessageToChatHistory(this._lastEvaluation?.contextWindow).history,
+                            minimumOverlapPercentageToPreventContextShift: 0.8
+                        }
+                    });
+                    this._ensureNotDisposed();
+                    this._lastEvaluation = {
+                        cleanHistory: this._chatHistory,
+                        contextWindow: lastEvaluation.contextWindow.slice(0, -addedCount),
+                        contextShiftMetadata: lastEvaluation.contextShiftMetadata
+                    };
+                    this._canUseContextWindowForCompletion = this._chatHistory.at(-1)?.type === "user";
+                    if (!stopOnAbortSignal && metadata.stopReason === "abort" && abortController.signal?.aborted)
+                        throw abortController.signal.reason;
+                    if (metadata.stopReason === "customStopTrigger")
+                        return {
+                            completion: response,
+                            stopReason: metadata.stopReason,
+                            customStopTrigger: metadata.customStopTrigger,
+                            remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
+                        };
+                    return {
+                        completion: response,
+                        stopReason: metadata.stopReason,
+                        remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
+                    };
+                }
+                else {
+                    const { completion, lastEvaluation, metadata } = await this._chat.loadChatAndCompleteUserMessage(asWithLastUserMessageRemoved(this._chatHistory), {
+                        initialUserPrompt: prompt,
+                        functions,
+                        documentFunctionParams,
+                        grammar,
+                        onTextChunk,
+                        onToken,
+                        signal: abortController.signal,
+                        stopOnAbortSignal: true,
+                        repeatPenalty,
+                        minP,
+                        topK,
+                        topP,
+                        seed,
+                        tokenBias,
+                        customStopTriggers,
+                        maxTokens,
+                        temperature,
+                        trimWhitespaceSuffix,
+                        contextShift: {
+                            ...this._contextShift,
+                            lastEvaluationMetadata: this._lastEvaluation?.contextShiftMetadata
+                        },
+                        evaluationPriority,
+                        lastEvaluationContextWindow: {
+                            history: asWithLastUserMessageRemoved(this._lastEvaluation?.contextWindow),
+                            minimumOverlapPercentageToPreventContextShift: 0.8
+                        }
+                    });
+                    this._ensureNotDisposed();
+                    this._lastEvaluation = {
+                        cleanHistory: this._chatHistory,
+                        contextWindow: asWithLastUserMessageRemoved(lastEvaluation.contextWindow),
+                        contextShiftMetadata: lastEvaluation.contextShiftMetadata
+                    };
+                    this._canUseContextWindowForCompletion = this._chatHistory.at(-1)?.type === "user";
+                    if (!stopOnAbortSignal && metadata.stopReason === "abort" && abortController.signal?.aborted)
+                        throw abortController.signal.reason;
+                    if (metadata.stopReason === "customStopTrigger")
+                        return {
+                            completion: completion,
+                            stopReason: metadata.stopReason,
+                            customStopTrigger: metadata.customStopTrigger,
+                            remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
+                        };
+                    return {
+                        completion: completion,
+                        stopReason: metadata.stopReason,
+                        remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
+                    };
+                }
+            });
+        }
+        finally {
+            this._preloadAndCompleteAbortControllers.delete(abortController);
+            disposeAbortController();
+        }
+    }
+    getChatHistory() {
+        return structuredClone(this._chatHistory);
+    }
+    getLastEvaluationContextWindow() {
+        if (this._lastEvaluation == null)
+            return null;
+        return structuredClone(this._lastEvaluation?.contextWindow);
+    }
+    setChatHistory(chatHistory) {
+        this._chatHistory = structuredClone(chatHistory);
+        this._chatHistoryStateRef = {};
+        this._lastEvaluation = undefined;
+        this._canUseContextWindowForCompletion = false;
+    }
+    /** Clear the chat history and reset it to the initial state. */
+    resetChatHistory() {
+        if (this._chat == null || this.disposed)
+            throw new DisposedError();
+        const chatWrapperSupportsSystemMessages = this._chat.chatWrapper.settings.supportsSystemMessages;
+        if (chatWrapperSupportsSystemMessages == null || chatWrapperSupportsSystemMessages || this._forceAddSystemPrompt)
+            this.setChatHistory(this._chat.chatWrapper.generateInitialChatHistory({ systemPrompt: this._systemPrompt }));
+        else
+            this.setChatHistory([]);
+    }
+    /** @internal */
+    _stopAllPreloadAndPromptCompletions() {
+        for (const abortController of this._preloadAndCompleteAbortControllers)
+            abortController.abort();
+        this._preloadAndCompleteAbortControllers.clear();
+    }
+    /** @internal */
+    _ensureNotDisposed() {
+        if (this.disposed)
+            throw new DisposedError();
+    }
+}
+function addFunctionCallToChatHistory({ chatHistory, functionName, functionDescription, callParams, callResult, rawCall, startsNewChunk }) {
+    const newChatHistory = chatHistory.slice();
+    if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
+        newChatHistory.push({
+            type: "model",
+            response: []
+        });
+    const lastModelResponseItem = newChatHistory[newChatHistory.length - 1];
+    const newLastModelResponseItem = { ...lastModelResponseItem };
+    newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem;
+    const modelResponse = newLastModelResponseItem.response.slice();
+    newLastModelResponseItem.response = modelResponse;
+    const functionCall = {
+        type: "functionCall",
+        name: functionName,
+        description: functionDescription,
+        params: callParams,
+        result: callResult,
+        rawCall
+    };
+    if (startsNewChunk)
+        functionCall.startsNewChunk = true;
+    modelResponse.push(functionCall);
+    return newChatHistory;
+}
+function getLastModelResponseItem(chatHistory) {
+    if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
+        throw new Error("Expected chat history to end with a model response");
+    return chatHistory[chatHistory.length - 1];
+}
+function asWithLastUserMessageRemoved(chatHistory) {
+    if (chatHistory == null)
+        return chatHistory;
+    const newChatHistory = chatHistory.slice();
+    while (newChatHistory.at(-1)?.type === "user")
+        newChatHistory.pop();
+    return newChatHistory;
+}
+//# sourceMappingURL=LlamaChatSession.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts
@@ -0,0 +1,43 @@
+import type { LlamaContextSequence } from "../../LlamaContext/LlamaContext.js";
+import type { LLamaChatCompletePromptOptions } from "../LlamaChatSession.js";
+export type LLamaChatPromptCompletionEngineOptions = {
+    /**
+     * Max tokens to allow for preloading a prompt and generating a completion for it.
+     *
+     * Defaults to `256` or half of the context size, whichever is smaller.
+     */
+    maxPreloadTokens?: number;
+    onGeneration?(prompt: string, completion: string): void;
+    /**
+     * Max number of completions to cache.
+     *
+     * Defaults to `100`.
+     */
+    maxCachedCompletions?: number;
+    temperature?: LLamaChatCompletePromptOptions["temperature"];
+    minP?: LLamaChatCompletePromptOptions["minP"];
+    topK?: LLamaChatCompletePromptOptions["topK"];
+    topP?: LLamaChatCompletePromptOptions["topP"];
+    seed?: LLamaChatCompletePromptOptions["seed"];
+    trimWhitespaceSuffix?: LLamaChatCompletePromptOptions["trimWhitespaceSuffix"];
+    evaluationPriority?: LLamaChatCompletePromptOptions["evaluationPriority"];
+    repeatPenalty?: LLamaChatCompletePromptOptions["repeatPenalty"];
+    tokenBias?: LLamaChatCompletePromptOptions["tokenBias"];
+    customStopTriggers?: LLamaChatCompletePromptOptions["customStopTriggers"];
+    grammar?: LLamaChatCompletePromptOptions["grammar"];
+    functions?: LLamaChatCompletePromptOptions["functions"];
+    documentFunctionParams?: LLamaChatCompletePromptOptions["documentFunctionParams"];
+    completeAsModel?: LLamaChatCompletePromptOptions["completeAsModel"];
+};
+export declare const defaultMaxPreloadTokens: (sequence: LlamaContextSequence) => number;
+export declare class LlamaChatSessionPromptCompletionEngine {
+    private constructor();
+    dispose(): void;
+    /**
+     * Get completion for the prompt from the cache,
+     * and begin preloading this prompt into the context sequence and completing it.
+     *
+     * On completion progress, `onGeneration` (configured for this engine instance) will be called.
+     */
+    complete(prompt: string): string;
+}
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js
@@ -0,0 +1,191 @@
+import { DisposeAggregator, DisposedError } from "lifecycle-utils";
+import { getConsoleLogPrefix } from "../../../utils/getConsoleLogPrefix.js";
+import { LruCache } from "../../../utils/LruCache.js";
+import { safeEventCallback } from "../../../utils/safeEventCallback.js";
+export const defaultMaxPreloadTokens = (sequence) => {
+    const defaultValue = 256;
+    return sequence.model.fileInsights.swaSize != null
+        ? Math.min(Math.ceil(sequence.model.fileInsights.swaSize / 2), defaultValue, Math.ceil(sequence.contextSize / 2))
+        : Math.min(defaultValue, Math.ceil(sequence.contextSize / 2));
+};
+const defaultMaxCachedCompletions = 100;
+export class LlamaChatSessionPromptCompletionEngine {
+    /** @internal */ _chatSession;
+    /** @internal */ _maxPreloadTokens;
+    /** @internal */ _maxCachedCompletions;
+    /** @internal */ _onGeneration;
+    /** @internal */ _completionOptions;
+    /** @internal */ _completionCaches = new WeakMap();
+    /** @internal */ _disposeAggregator = new DisposeAggregator();
+    /** @internal */ _currentCompletionAbortController = new AbortController();
+    /** @internal */ _lastPrompt;
+    /** @internal */ _disposed = false;
+    constructor(chatSession, { maxPreloadTokens = defaultMaxPreloadTokens(chatSession.sequence), onGeneration, maxCachedCompletions = defaultMaxCachedCompletions, ...options }) {
+        this._chatSession = chatSession;
+        this._maxPreloadTokens = Math.max(1, maxPreloadTokens);
+        this._maxCachedCompletions = Math.max(1, maxCachedCompletions);
+        this._onGeneration = safeEventCallback(onGeneration);
+        this._completionOptions = options;
+        this.dispose = this.dispose.bind(this);
+        this._disposeAggregator.add(this._chatSession.onDispose.createListener(this.dispose));
+        this._disposeAggregator.add(() => {
+            this._disposed = true;
+            this._currentCompletionAbortController.abort();
+        });
+    }
+    dispose() {
+        if (this._disposed)
+            return;
+        this._disposeAggregator.dispose();
+    }
+    /**
+     * Get completion for the prompt from the cache,
+     * and begin preloading this prompt into the context sequence and completing it.
+     *
+     * On completion progress, `onGeneration` (configured for this engine instance) will be called.
+     */
+    complete(prompt) {
+        if (this._disposed)
+            throw new DisposedError();
+        const completionCache = this._getCurrentCompletionCache();
+        const completion = completionCache.getCompletion(prompt);
+        if (this._lastPrompt == null || !(this._lastPrompt + (completion ?? "")).startsWith(prompt)) {
+            this._lastPrompt = prompt;
+            this._restartCompletion(completionCache);
+        }
+        this._lastPrompt = prompt;
+        return completion ?? "";
+    }
+    /** @internal */
+    _getCurrentCompletionCache() {
+        const completionCache = this._completionCaches.get(this._chatSession._chatHistoryStateRef);
+        if (completionCache != null)
+            return completionCache;
+        const newCompletionCache = new CompletionCache(this._maxCachedCompletions);
+        this._completionCaches.set(this._chatSession._chatHistoryStateRef, newCompletionCache);
+        return newCompletionCache;
+    }
+    /** @internal */
+    _restartCompletion(completionCache) {
+        if (this._disposed)
+            return;
+        this._currentCompletionAbortController.abort();
+        this._currentCompletionAbortController = new AbortController();
+        const prompt = this._lastPrompt;
+        if (prompt == null)
+            return;
+        const existingCompletion = completionCache.getCompletion(prompt);
+        const promptToComplete = prompt + (existingCompletion ?? "");
+        const currentPromptTokens = this._chatSession.model.tokenize(promptToComplete, false, "trimLeadingSpace").length;
+        const leftTokens = Math.max(0, this._maxPreloadTokens - currentPromptTokens);
+        if (leftTokens === 0)
+            return;
+        const currentAbortController = this._currentCompletionAbortController;
+        const currentAbortSignal = this._currentCompletionAbortController.signal;
+        let currentCompletion = "";
+        void this._chatSession.completePrompt(promptToComplete, {
+            ...this._completionOptions,
+            stopOnAbortSignal: false,
+            maxTokens: leftTokens,
+            signal: currentAbortSignal,
+            onTextChunk: (chunk) => {
+                currentCompletion += chunk;
+                const completion = (existingCompletion ?? "") + currentCompletion;
+                completionCache.putCompletion(prompt, completion);
+                if (this._getCurrentCompletionCache() !== completionCache) {
+                    currentAbortController.abort();
+                    return;
+                }
+                if (this._lastPrompt === prompt)
+                    this._onGeneration?.(prompt, completion);
+            }
+        })
+            .then(() => {
+            if (this._lastPrompt !== prompt && this._getCurrentCompletionCache() === completionCache)
+                return this._restartCompletion(completionCache);
+        })
+            .catch((err) => {
+            if ((currentAbortSignal.aborted && err === currentAbortSignal.reason) || err instanceof DOMException)
+                return;
+            console.error(getConsoleLogPrefix(false, false), err);
+        });
+    }
+    /** @internal */
+    static _create(chatSession, options = {}) {
+        return new LlamaChatSessionPromptCompletionEngine(chatSession, options);
+    }
+}
+class CompletionCache {
+    /** @internal */ _cache;
+    /** @internal */ _rootNode = [new Map()];
+    constructor(maxInputs) {
+        this._cache = new LruCache(maxInputs, {
+            onDelete: (key) => {
+                this._deleteInput(key);
+            }
+        });
+    }
+    get maxInputs() {
+        return this._cache.maxSize;
+    }
+    getCompletion(input) {
+        let node = this._rootNode;
+        for (let i = 0; i < input.length; i++) {
+            if (node == null)
+                return null;
+            const [next, completion] = node;
+            const char = input[i];
+            if (!next.has(char)) {
+                if (completion != null && completion.startsWith(input.slice(i))) {
+                    this._cache.get(input.slice(0, i));
+                    return completion.slice(input.length - i);
+                }
+            }
+            node = next.get(char);
+        }
+        if (node == null)
+            return null;
+        const [, possibleCompletion] = node;
+        if (possibleCompletion != null) {
+            this._cache.get(input);
+            return possibleCompletion;
+        }
+        return null;
+    }
+    putCompletion(input, completion) {
+        this._cache.set(input, null);
+        let node = this._rootNode;
+        for (let i = 0; i < input.length; i++) {
+            const [next] = node;
+            const char = input[i];
+            if (!next.has(char))
+                next.set(char, [new Map()]);
+            node = next.get(char);
+        }
+        const currentCompletion = node[1];
+        if (currentCompletion != null && currentCompletion.startsWith(completion))
+            return currentCompletion;
+        node[1] = completion;
+        return completion;
+    }
+    /** @internal */
+    _deleteInput(input) {
+        let lastNodeWithMultipleChildren = this._rootNode;
+        let lastNodeWithMultipleChildrenDeleteChar = input[0];
+        let node = this._rootNode;
+        for (let i = 0; i < input.length; i++) {
+            const [next] = node;
+            const char = input[i];
+            if (next.size > 1) {
+                lastNodeWithMultipleChildren = node;
+                lastNodeWithMultipleChildrenDeleteChar = char;
+            }
+            if (!next.has(char))
+                return;
+            node = next.get(char);
+        }
+        if (lastNodeWithMultipleChildrenDeleteChar !== "")
+            lastNodeWithMultipleChildren[0].delete(lastNodeWithMultipleChildrenDeleteChar);
+    }
+}
+//# sourceMappingURL=LlamaChatSessionPromptCompletionEngine.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts
@@ -0,0 +1,15 @@
+import { GbnfJsonDefList, GbnfJsonSchema, GbnfJsonSchemaToType } from "../../../utils/gbnfJson/types.js";
+import { ChatSessionModelFunction } from "../../../types.js";
+/**
+ * Define a function that can be used by the model in a chat session, and return it.
+ *
+ * This is a helper function to facilitate defining functions with full TypeScript type information.
+ *
+ * The handler function can return a Promise, and the return value will be awaited before being returned to the model.
+ * @param functionDefinition
+ */
+export declare function defineChatSessionFunction<const Params extends GbnfJsonSchema<Defs>, const Defs extends GbnfJsonDefList<Defs>>({ description, params, handler }: {
+    description?: string;
+    params?: Readonly<Params> & GbnfJsonSchema<Defs>;
+    handler: (params: GbnfJsonSchemaToType<NoInfer<Params>>) => Promise<any> | any;
+}): ChatSessionModelFunction<NoInfer<Params>>;
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js
@@ -0,0 +1,16 @@
+/**
+ * Define a function that can be used by the model in a chat session, and return it.
+ *
+ * This is a helper function to facilitate defining functions with full TypeScript type information.
+ *
+ * The handler function can return a Promise, and the return value will be awaited before being returned to the model.
+ * @param functionDefinition
+ */
+export function defineChatSessionFunction({ description, params, handler }) {
+    return {
+        description,
+        params,
+        handler
+    };
+}
+//# sourceMappingURL=defineChatSessionFunction.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"defineChatSessionFunction.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChatSession/utils/defineChatSessionFunction.ts"],"names":[],"mappings":"AAGA;;;;;;;GAOG;AACH,MAAM,UAAU,yBAAyB,CAGvC,EACE,WAAW,EACX,MAAM,EACN,OAAO,EAKV;IACG,OAAO;QACH,WAAW;QACX,MAAM;QACN,OAAO;KACV,CAAC;AACN,CAAC"}
				`@@ -0,0 +1 @@`
				`{"version":3,"file":"defineChatSessionFunction.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChatSession/utils/defineChatSessionFunction.ts"],"names":[],"mappings":"AAGA;;;;;;;GAOG;AACH,MAAM,UAAU,yBAAyB,CAGvC,EACE,WAAW,EACX,MAAM,EACN,OAAO,EAKV;IACG,OAAO;QACH,WAAW;QACX,MAAM;QACN,OAAO;KACV,CAAC;AACN,CAAC"}`