First upload version 0.0.1

2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.d.ts
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.d.ts
@@ -0,0 +1,459 @@
+import { EventRelay } from "lifecycle-utils";
+import { ChatWrapper } from "../../ChatWrapper.js";
+import { LlamaContextSequence } from "../LlamaContext/LlamaContext.js";
+import { ChatHistoryItem, ChatModelFunctions, ChatModelSegmentType, LLamaContextualRepeatPenalty, Token, Tokenizer } from "../../types.js";
+import { GbnfJsonSchemaToType } from "../../utils/gbnfJson/types.js";
+import { LlamaGrammar } from "../LlamaGrammar.js";
+import { LlamaText, LlamaTextJSON } from "../../utils/LlamaText.js";
+import { EvaluationPriority } from "../LlamaContext/types.js";
+import { TokenBias } from "../TokenBias.js";
+import { LlamaModel } from "../LlamaModel/LlamaModel.js";
+export type LlamaChatOptions = {
+    contextSequence: LlamaContextSequence;
+    /** `"auto"` is used by default */
+    chatWrapper?: "auto" | ChatWrapper;
+    /**
+     * Automatically dispose the sequence when the session is disposed
+     *
+     * Defaults to `false`.
+     */
+    autoDisposeSequence?: boolean;
+};
+export type LlamaChatResponseChunk = LlamaChatResponseTextChunk | LlamaChatResponseSegmentChunk;
+export type LlamaChatResponseTextChunk = {
+    /** When `type` is `undefined`, the chunk is part of the main response and is not a segment */
+    type: undefined;
+    /**
+     * `segmentType` has no purpose when `type` is `undefined` (meaning that this chunk is part of the main response and is not a segment).
+     */
+    segmentType: undefined;
+    /**
+     * The generated text chunk.
+     *
+     * Detokenized from the `tokens` property,
+     * but with the context of the previous generation (for better spacing of the text with some models).
+     *
+     * Prefer using this property over `tokens` when streaming the generated response as text.
+     */
+    text: string;
+    /** The generated tokens */
+    tokens: Token[];
+};
+export type LlamaChatResponseSegmentChunk = {
+    type: "segment";
+    /** Segment type */
+    segmentType: ChatModelSegmentType;
+    /**
+     * The generated text chunk.
+     *
+     * Detokenized from the `tokens` property,
+     * but with the context of the previous generation (for better spacing of the text with some models).
+     *
+     * Prefer using this property over `tokens` when streaming the generated response as text.
+     */
+    text: string;
+    /** The generated tokens */
+    tokens: Token[];
+    /**
+     * When the current chunk is the start of a segment, this field will be set.
+     *
+     * It's possible that a chunk with no tokens and empty text will be emitted just to set this field
+     * to signify that the segment has started.
+     */
+    segmentStartTime?: Date;
+    /**
+     * When the current chunk is the last one of a segment (meaning the current segment has ended), this field will be set.
+     *
+     * It's possible that a chunk with no tokens and empty text will be emitted just to set this field
+     * to signify that the segment has ended.
+     */
+    segmentEndTime?: Date;
+};
+export type LlamaChatResponseFunctionCallParamsChunk = {
+    /**
+     * Each different function call has a different `callIndex`.
+     *
+     * When the previous function call has finished being generated, the `callIndex` of the next one will increment.
+     *
+     * Use this value to distinguish between different function calls.
+     */
+    callIndex: number;
+    /**
+     * The name of the function being called
+     */
+    functionName: string;
+    /**
+     * A chunk of the generated text used for the function call parameters.
+     *
+     * Collect all the chunks together to construct the full function call parameters.
+     *
+     * After the function call is finished, the entire constructed params text can be parsed as a JSON object,
+     * according to the function parameters schema.
+     */
+    paramsChunk: string;
+    /**
+     * When this is `true`, the current chunk is the last chunk in the generation of the current function call parameters.
+     */
+    done: boolean;
+};
+export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunctions | undefined = undefined> = {
+    /**
+     * Called as the model generates the main response with the generated text chunk.
+     *
+     * Useful for streaming the generated response as it's being generated.
+     *
+     * Includes only the main response without any text segments (like thoughts).
+     * For streaming the response with segments, use {@link onResponseChunk `onResponseChunk`}.
+     */
+    onTextChunk?: (text: string) => void;
+    /**
+     * Called as the model generates the main response with the generated tokens.
+     *
+     * Preferably, you'd want to use {@link onTextChunk `onTextChunk`} instead of this.
+     *
+     * Includes only the main response without any segments (like thoughts).
+     * For streaming the response with segments, use {@link onResponseChunk `onResponseChunk`}.
+     */
+    onToken?: (tokens: Token[]) => void;
+    /**
+     * Called as the model generates a response with the generated text and tokens,
+     * including segment information (when the generated output is part of a segment).
+     *
+     * Useful for streaming the generated response as it's being generated, including the main response and all segments.
+     *
+     * Only use this function when you need the segmented texts, like thought segments (chain of thought text).
+     */
+    onResponseChunk?: (chunk: LlamaChatResponseChunk) => void;
+    /**
+     * An AbortSignal to later abort the generation.
+     *
+     * When the signal is aborted, the generation will stop and throw `signal.reason` as the error.
+     *
+     * > To stop an ongoing generation without throwing an error, also set `stopOnAbortSignal` to `true`.
+     */
+    signal?: AbortSignal;
+    /**
+     * When a response already started being generated and then the signal is aborted,
+     * the generation will stop and the response will be returned as is instead of throwing an error.
+     *
+     * Defaults to `false`.
+     */
+    stopOnAbortSignal?: boolean;
+    /** Maximum number of tokens to generate */
+    maxTokens?: number;
+    /**
+     * Temperature is a hyperparameter that controls the randomness of the generated text.
+     * It affects the probability distribution of the model's output tokens.
+     *
+     * A higher temperature (e.g., 1.5) makes the output more random and creative,
+     * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
+     *
+     * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
+     *
+     * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
+     *
+     * Set to `0` to disable.
+     * Disabled by default (set to `0`).
+     */
+    temperature?: number;
+    /**
+     * From the next token candidates, discard the percentage of tokens with the lowest probability.
+     * For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded.
+     * This is useful for generating more high-quality results when using a high temperature.
+     * Set to a value between `0` and `1` to enable.
+     *
+     * Only relevant when `temperature` is set to a value greater than `0`.
+     * Disabled by default.
+     */
+    minP?: number;
+    /**
+     * Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
+     * An integer number between `1` and the size of the vocabulary.
+     * Set to `0` to disable (which uses the full vocabulary).
+     *
+     * Only relevant when `temperature` is set to a value greater than 0.
+     */
+    topK?: number;
+    /**
+     * Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
+     * and samples the next token only from this set.
+     * A float number between `0` and `1`.
+     * Set to `1` to disable.
+     *
+     * Only relevant when `temperature` is set to a value greater than `0`.
+     */
+    topP?: number;
+    /**
+     * Used to control the randomness of the generated text.
+     *
+     * Change the seed to get different results.
+     *
+     * Only relevant when using `temperature`.
+     */
+    seed?: number;
+    /**
+     * Trim whitespace from the end of the generated text
+     *
+     * Defaults to `false`.
+     */
+    trimWhitespaceSuffix?: boolean;
+    repeatPenalty?: false | LLamaContextualRepeatPenalty;
+    /**
+     * Adjust the probability of tokens being generated.
+     * Can be used to bias the model to generate tokens that you want it to lean towards,
+     * or to avoid generating tokens that you want it to avoid.
+     */
+    tokenBias?: TokenBias | (() => TokenBias);
+    /**
+     * See the parameter `evaluationPriority` on the `LlamaContextSequence.evaluate()` function for more information.
+     */
+    evaluationPriority?: EvaluationPriority;
+    contextShift?: LLamaChatContextShiftOptions;
+    /**
+     * Custom stop triggers to stop the generation of the response when any of the provided triggers are found.
+     */
+    customStopTriggers?: readonly (LlamaText | string | readonly (string | Token)[])[];
+    /**
+     * The evaluation context window returned from the last evaluation.
+     * This is an optimization to utilize existing context sequence state better when possible.
+     */
+    lastEvaluationContextWindow?: {
+        /** The history of the last evaluation. */
+        history?: ChatHistoryItem[];
+        /**
+         * Minimum overlap percentage with existing context sequence state to use the last evaluation context window.
+         * If the last evaluation context window is not used, a new context will be generated based on the full history,
+         * which will decrease the likelihood of another context shift happening so soon.
+         *
+         * A number between `0` (exclusive) and `1` (inclusive).
+         */
+        minimumOverlapPercentageToPreventContextShift?: number;
+    };
+    /**
+     * Called as the model generates function calls with the generated parameters chunk for each function call.
+     *
+     * Useful for streaming the generated function call parameters as they're being generated.
+     * Only useful in specific use cases,
+     * such as showing the generated textual file content as it's being generated (note that doing this requires parsing incomplete JSON).
+     *
+     * The constructed text from all the params chunks of a given function call can be parsed as a JSON object,
+     * according to the function parameters schema.
+     *
+     * Each function call has its own `callIndex` you can use to distinguish between them.
+     *
+     * Only relevant when using function calling (via passing the `functions` option).
+     */
+    onFunctionCallParamsChunk?: (chunk: LlamaChatResponseFunctionCallParamsChunk) => void;
+    /**
+     * Set the maximum number of tokens the model is allowed to spend on various segmented responses.
+     */
+    budgets?: {
+        /**
+         * Whether to include the tokens already consumed by the current model response being completed in the budget.
+         *
+         * Defaults to `true`.
+         */
+        includeCurrentResponse?: boolean;
+        /**
+         * Budget for thought tokens.
+         *
+         * Defaults to `Infinity`.
+         */
+        thoughtTokens?: number;
+        /**
+         * Budget for comment tokens.
+         *
+         * Defaults to `Infinity`.
+         */
+        commentTokens?: number;
+    };
+    /**
+     * Stop the generation when the model tries to generate a non-textual segment or call a function.
+     *
+     * Useful for generating completions in a form of a model response.
+     *
+     * Defaults to `false`.
+     */
+    abortOnNonText?: boolean;
+} & ({
+    grammar?: LlamaGrammar;
+    functions?: never;
+    documentFunctionParams?: never;
+    maxParallelFunctionCalls?: never;
+    onFunctionCall?: never;
+    onFunctionCallParamsChunk?: never;
+} | {
+    grammar?: never;
+    functions?: Functions | ChatModelFunctions;
+    documentFunctionParams?: boolean;
+    maxParallelFunctionCalls?: number;
+    onFunctionCall?: (functionCall: LlamaChatResponseFunctionCall<Functions extends ChatModelFunctions ? Functions : ChatModelFunctions>) => void;
+    onFunctionCallParamsChunk?: (chunk: LlamaChatResponseFunctionCallParamsChunk) => void;
+});
+export type LLamaChatLoadAndCompleteUserMessageOptions<Functions extends ChatModelFunctions | undefined = undefined> = {
+    /**
+     * Complete the given user prompt without adding it or the completion to the returned context window.
+     */
+    initialUserPrompt?: string;
+    /**
+     * When a completion already started being generated and then the signal is aborted,
+     * the generation will stop and the completion will be returned as is instead of throwing an error.
+     *
+     * Defaults to `false`.
+     */
+    stopOnAbortSignal?: boolean;
+    /**
+     * Called as the model generates a completion with the generated text chunk.
+     *
+     * Useful for streaming the generated completion as it's being generated.
+     */
+    onTextChunk?: LLamaChatGenerateResponseOptions<Functions>["onTextChunk"];
+    /**
+     * Called as the model generates a completion with the generated tokens.
+     *
+     * Preferably, you'd want to use `onTextChunk` instead of this.
+     */
+    onToken?: LLamaChatGenerateResponseOptions<Functions>["onToken"];
+    signal?: LLamaChatGenerateResponseOptions<Functions>["signal"];
+    maxTokens?: LLamaChatGenerateResponseOptions<Functions>["maxTokens"];
+    temperature?: LLamaChatGenerateResponseOptions<Functions>["temperature"];
+    minP?: LLamaChatGenerateResponseOptions<Functions>["minP"];
+    topK?: LLamaChatGenerateResponseOptions<Functions>["topK"];
+    topP?: LLamaChatGenerateResponseOptions<Functions>["topP"];
+    seed?: LLamaChatGenerateResponseOptions<Functions>["seed"];
+    trimWhitespaceSuffix?: LLamaChatGenerateResponseOptions<Functions>["trimWhitespaceSuffix"];
+    repeatPenalty?: LLamaChatGenerateResponseOptions<Functions>["repeatPenalty"];
+    tokenBias?: LLamaChatGenerateResponseOptions<Functions>["tokenBias"];
+    evaluationPriority?: LLamaChatGenerateResponseOptions<Functions>["evaluationPriority"];
+    contextShift?: LLamaChatGenerateResponseOptions<Functions>["contextShift"];
+    customStopTriggers?: LLamaChatGenerateResponseOptions<Functions>["customStopTriggers"];
+    lastEvaluationContextWindow?: LLamaChatGenerateResponseOptions<Functions>["lastEvaluationContextWindow"];
+    grammar?: LlamaGrammar;
+    /**
+     * Functions are not used by the model here,
+     * but are used for keeping the instructions given to the model about the functions in the current context state,
+     * to avoid context shifts.
+     *
+     * It's best to provide the same functions that were used for the previous prompt here.
+     */
+    functions?: Functions | ChatModelFunctions;
+    /**
+     * Functions are not used by the model here,
+     * but are used for keeping the instructions given to the model about the functions in the current context state,
+     * to avoid context shifts.
+     *
+     * It's best to provide the same value that was used for the previous prompt here.
+     */
+    documentFunctionParams?: boolean;
+};
+export type LLamaChatContextShiftOptions = {
+    /**
+     * The number of tokens to delete from the context window to make space for new ones.
+     * Defaults to 10% of the context size.
+     */
+    size?: number | ((sequence: LlamaContextSequence) => number | Promise<number>);
+    /**
+     * The strategy to use when deleting tokens from the context window.
+     *
+     * Defaults to `"eraseFirstResponseAndKeepFirstSystem"`.
+     */
+    strategy?: "eraseFirstResponseAndKeepFirstSystem" | ((options: {
+        /** Full chat history */
+        chatHistory: readonly ChatHistoryItem[];
+        /** Maximum number of tokens that the new chat history should fit under when tokenized */
+        maxTokensCount: number;
+        /** Tokenizer used to tokenize the chat history */
+        tokenizer: Tokenizer;
+        /** Chat wrapper used to generate the context state */
+        chatWrapper: ChatWrapper;
+        /**
+         * The metadata returned from the last context shift strategy call.
+         * Will be `null` on the first call.
+         */
+        lastShiftMetadata?: object | null;
+    }) => {
+        chatHistory: ChatHistoryItem[];
+        metadata?: object | null;
+    } | Promise<{
+        chatHistory: ChatHistoryItem[];
+        metadata?: object | null;
+    }>);
+    /**
+     * The `contextShiftMetadata` returned from the last evaluation.
+     * This is an optimization to utilize the existing context state better when possible.
+     */
+    lastEvaluationMetadata?: object | undefined | null;
+};
+export declare class LlamaChat {
+    readonly onDispose: EventRelay<void>;
+    constructor({ contextSequence, chatWrapper, autoDisposeSequence }: LlamaChatOptions);
+    dispose({ disposeSequence }?: {
+        disposeSequence?: boolean;
+    }): void;
+    /** @hidden */
+    [Symbol.dispose](): void;
+    get disposed(): boolean;
+    get chatWrapper(): ChatWrapper;
+    get sequence(): LlamaContextSequence;
+    get context(): import("../LlamaContext/LlamaContext.js").LlamaContext;
+    get model(): LlamaModel;
+    generateResponse<const Functions extends ChatModelFunctions | undefined = undefined>(history: ChatHistoryItem[], options?: LLamaChatGenerateResponseOptions<Functions>): Promise<LlamaChatResponse<Functions>>;
+    loadChatAndCompleteUserMessage<const Functions extends ChatModelFunctions | undefined = undefined>(history: ChatHistoryItem[], options?: LLamaChatLoadAndCompleteUserMessageOptions<Functions>): Promise<LlamaChatLoadAndCompleteUserResponse>;
+}
+export type LlamaChatResponse<Functions extends ChatModelFunctions | undefined = undefined> = {
+    /**
+     * The response text only, _without_ any text segments (like thoughts).
+     */
+    response: string;
+    /**
+     * The full response, including all text and text segments (like thoughts).
+     */
+    fullResponse: Array<string | LlamaChatResponseSegment>;
+    functionCalls?: Functions extends ChatModelFunctions ? LlamaChatResponseFunctionCall<Functions>[] : never;
+    lastEvaluation: {
+        cleanHistory: ChatHistoryItem[];
+        contextWindow: ChatHistoryItem[];
+        contextShiftMetadata: any;
+    };
+    metadata: {
+        remainingGenerationAfterStop?: string | Token[];
+        stopReason: "eogToken" | "stopGenerationTrigger" | "functionCalls" | "maxTokens" | "abort";
+    } | {
+        remainingGenerationAfterStop?: string | Token[];
+        stopReason: "customStopTrigger";
+        customStopTrigger: (string | Token)[];
+    };
+};
+export type LlamaChatResponseFunctionCall<Functions extends ChatModelFunctions, FunctionCallName extends keyof Functions & string = string & keyof Functions, Params = Functions[FunctionCallName]["params"] extends undefined | null | void ? undefined : GbnfJsonSchemaToType<Functions[FunctionCallName]["params"]>> = {
+    functionName: FunctionCallName;
+    params: Params;
+    raw: LlamaTextJSON;
+};
+export type LlamaChatResponseSegment = {
+    type: "segment";
+    segmentType: ChatModelSegmentType;
+    text: string;
+    ended: boolean;
+    raw: LlamaTextJSON;
+    startTime?: string;
+    endTime?: string;
+};
+export type LlamaChatLoadAndCompleteUserResponse = {
+    completion: string;
+    lastEvaluation: {
+        /**
+         * The completion and initial user prompt are not added to this context window result,
+         * but are loaded to the current context sequence state as tokens
+         */
+        contextWindow: ChatHistoryItem[];
+        contextShiftMetadata: any;
+    };
+    metadata: {
+        remainingGenerationAfterStop?: string | Token[];
+        stopReason: "eogToken" | "stopGenerationTrigger" | "maxTokens" | "abort";
+    } | {
+        remainingGenerationAfterStop?: string | Token[];
+        stopReason: "customStopTrigger";
+        customStopTrigger: (string | Token)[];
+    };
+};
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.js
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.js
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.js.map
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts
@@ -0,0 +1,11 @@
+import { LlamaGrammar } from "../../LlamaGrammar.js";
+import { ChatModelFunctions } from "../../../types.js";
+import { ChatWrapper } from "../../../ChatWrapper.js";
+import { Llama } from "../../../bindings/Llama.js";
+export declare class FunctionCallNameGrammar<const Functions extends ChatModelFunctions> extends LlamaGrammar {
+    private readonly _functions;
+    private readonly _chatWrapper;
+    constructor(llama: Llama, functions: Functions, chatWrapper: ChatWrapper);
+    parseFunctionName(generatedFunctionName: string): keyof Functions & string;
+    private _validateFunctions;
+}
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js
@@ -0,0 +1,55 @@
+import { LlamaGrammar } from "../../LlamaGrammar.js";
+import { LlamaText } from "../../../utils/LlamaText.js";
+import { GbnfGrammarGenerator } from "../../../utils/gbnfJson/GbnfGrammarGenerator.js";
+import { GbnfGrammar } from "../../../utils/gbnfJson/terminals/GbnfGrammar.js";
+import { GbnfOr } from "../../../utils/gbnfJson/terminals/GbnfOr.js";
+import { GbnfVerbatimText } from "../../../utils/gbnfJson/terminals/GbnfVerbatimText.js";
+import { LlamaFunctionCallValidationError } from "./LlamaFunctionCallValidationError.js";
+export class FunctionCallNameGrammar extends LlamaGrammar {
+    _functions;
+    _chatWrapper;
+    constructor(llama, functions, chatWrapper) {
+        const grammar = getGbnfGrammarForFunctionName(functions, chatWrapper);
+        super(llama, {
+            grammar,
+            stopGenerationTriggers: [LlamaText("\n")],
+            trimWhitespaceSuffix: true
+        });
+        this._functions = functions;
+        this._chatWrapper = chatWrapper;
+        this._validateFunctions();
+    }
+    parseFunctionName(generatedFunctionName) {
+        if (this._chatWrapper.settings.functions.call.optionalPrefixSpace && generatedFunctionName[0] === " ")
+            generatedFunctionName = generatedFunctionName.slice(1);
+        const newlineIndex = generatedFunctionName.indexOf("\n");
+        const functionName = generatedFunctionName.slice(0, newlineIndex < 0
+            ? generatedFunctionName.length
+            : newlineIndex);
+        if (!Object.hasOwn(this._functions, functionName))
+            throw new LlamaFunctionCallValidationError(`Function name "${functionName}" is not in the supplied functions object`, this._functions, this._chatWrapper, generatedFunctionName);
+        return functionName;
+    }
+    _validateFunctions() {
+        for (const functionsName of Object.keys(this._functions)) {
+            if (functionsName.includes(" ") || functionsName.includes("\n") || functionsName.includes("\t"))
+                throw new Error(`Function name "${functionsName}" contains spaces, new lines or tabs`);
+            else if (functionsName === "")
+                throw new Error("Function name cannot be an empty string");
+        }
+    }
+}
+function getGbnfGrammarForFunctionName(functions, chatWrapper) {
+    const grammarGenerator = new GbnfGrammarGenerator();
+    const functionNameGrammars = [];
+    for (const functionName of Object.keys(functions))
+        functionNameGrammars.push(new GbnfVerbatimText(functionName));
+    const callGrammar = new GbnfOr(functionNameGrammars);
+    const rootTerminal = new GbnfGrammar([
+        ...(chatWrapper.settings.functions.call.optionalPrefixSpace ? ["[ ]?"] : []),
+        callGrammar.resolve(grammarGenerator)
+    ]);
+    const rootGrammar = rootTerminal.getGrammar();
+    return grammarGenerator.generateGbnfFile(rootGrammar + " [\\n]");
+}
+//# sourceMappingURL=FunctionCallNameGrammar.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"FunctionCallNameGrammar.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/FunctionCallNameGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAC,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAC,SAAS,EAAC,MAAM,6BAA6B,CAAC;AAEtD,OAAO,EAAC,oBAAoB,EAAC,MAAM,iDAAiD,CAAC;AAErF,OAAO,EAAC,WAAW,EAAC,MAAM,kDAAkD,CAAC;AAE7E,OAAO,EAAC,MAAM,EAAC,MAAM,6CAA6C,CAAC;AACnE,OAAO,EAAC,gBAAgB,EAAC,MAAM,uDAAuD,CAAC;AAEvF,OAAO,EAAC,gCAAgC,EAAC,MAAM,uCAAuC,CAAC;AAGvF,MAAM,OAAO,uBAAoE,SAAQ,YAAY;IAChF,UAAU,CAAY;IACtB,YAAY,CAAc;IAE3C,YAAmB,KAAY,EAAE,SAAoB,EAAE,WAAwB;QAC3E,MAAM,OAAO,GAAG,6BAA6B,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;QAEtE,KAAK,CAAC,KAAK,EAAE;YACT,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACzC,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAEhC,IAAI,CAAC,kBAAkB,EAAE,CAAC;IAC9B,CAAC;IAEM,iBAAiB,CAAC,qBAA6B;QAClD,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,mBAAmB,IAAI,qBAAqB,CAAC,CAAC,CAAC,KAAK,GAAG;YACjG,qBAAqB,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAE3D,MAAM,YAAY,GAAG,qBAAqB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAEzD,MAAM,YAAY,GAAG,qBAAqB,CAAC,KAAK,CAC5C,CAAC,EACD,YAAY,GAAG,CAAC;YACZ,CAAC,CAAC,qBAAqB,CAAC,MAAM;YAC9B,CAAC,CAAC,YAAY,CACO,CAAC;QAE9B,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,CAAC;YAC7C,MAAM,IAAI,gCAAgC,CACtC,kBAAkB,YAAY,2CAA2C,EACzE,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,qBAAqB,CACxB,CAAC;QAEN,OAAO,YAAY,CAAC;IACxB,CAAC;IAEO,kBAAkB;QACtB,KAAK,MAAM,aAAa,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YACvD,IAAI,aAAa,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC;gBAC3F,MAAM,IAAI,KAAK,CAAC,kBAAkB,aAAa,sCAAsC,CAAC,CAAC;iBACtF,IAAI,aAAa,KAAK,EAAE;gBACzB,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACnE,CAAC;IACL,CAAC;CACJ;AAED,SAAS,6BAA6B,CAClC,SAAoB,EAAE,WAAwB;IAE9C,MAAM,gBAAgB,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAEpD,MAAM,oBAAoB,GAAmB,EAAE,CAAC;IAEhD,KAAK,MAAM,YAAY,IAAI,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC;QAC7C,oBAAoB,CAAC,IAAI,CAAC,IAAI,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC;IAElE,MAAM,WAAW,GAAG,IAAI,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAErD,MAAM,YAAY,GAAG,IAAI,WAAW,CAAC;QACjC,GAAG,CAAC,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5E,WAAW,CAAC,OAAO,CAAC,gBAAgB,CAAC;KACxC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,YAAY,CAAC,UAAU,EAAE,CAAC;IAE9C,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,WAAW,GAAG,QAAQ,CAAC,CAAC;AACrE,CAAC"}
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts
@@ -0,0 +1,16 @@
+import { LlamaGrammar } from "../../LlamaGrammar.js";
+import { ChatModelFunctions } from "../../../types.js";
+import { ChatWrapper } from "../../../ChatWrapper.js";
+import { Llama } from "../../../bindings/Llama.js";
+import { GbnfJsonSchema } from "../../../utils/gbnfJson/types.js";
+export declare class FunctionCallParamsGrammar<const Functions extends ChatModelFunctions> extends LlamaGrammar {
+    private readonly _functions;
+    private readonly _chatWrapper;
+    private readonly _functionName;
+    private readonly _paramsSchema;
+    constructor(llama: Llama, functions: Functions, chatWrapper: ChatWrapper, functionName: string, paramsSchema: GbnfJsonSchema);
+    parseParams(callText: string): {
+        params: any;
+        raw: string;
+    };
+}
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js
@@ -0,0 +1,45 @@
+import { LlamaGrammar } from "../../LlamaGrammar.js";
+import { LlamaText } from "../../../utils/LlamaText.js";
+import { validateObjectAgainstGbnfSchema } from "../../../utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js";
+import { GbnfGrammarGenerator } from "../../../utils/gbnfJson/GbnfGrammarGenerator.js";
+import { getGbnfJsonTerminalForGbnfJsonSchema } from "../../../utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js";
+import { LlamaFunctionCallValidationError } from "./LlamaFunctionCallValidationError.js";
+export class FunctionCallParamsGrammar extends LlamaGrammar {
+    _functions;
+    _chatWrapper;
+    _functionName;
+    _paramsSchema;
+    constructor(llama, functions, chatWrapper, functionName, paramsSchema) {
+        const grammar = getGbnfGrammarForFunctionParams(paramsSchema);
+        super(llama, {
+            grammar,
+            stopGenerationTriggers: [LlamaText("\n".repeat(4))],
+            trimWhitespaceSuffix: true
+        });
+        this._functions = functions;
+        this._chatWrapper = chatWrapper;
+        this._functionName = functionName;
+        this._paramsSchema = paramsSchema;
+    }
+    parseParams(callText) {
+        const endIndex = callText.lastIndexOf("\n".repeat(4));
+        if (endIndex < 0)
+            throw new LlamaFunctionCallValidationError(`Expected function call params for function "${this._functionName}" to end with stop generation trigger`, this._functions, this._chatWrapper, callText);
+        const paramsString = callText.slice(0, endIndex);
+        if (paramsString.trim().length === 0)
+            throw new LlamaFunctionCallValidationError(`Expected function call params for function "${this._functionName}" to not be empty`, this._functions, this._chatWrapper, callText);
+        const params = JSON.parse(paramsString);
+        validateObjectAgainstGbnfSchema(params, this._paramsSchema);
+        return {
+            params: params, // prevent infinite TS type instantiation
+            raw: paramsString
+        };
+    }
+}
+function getGbnfGrammarForFunctionParams(paramsSchema) {
+    const grammarGenerator = new GbnfGrammarGenerator();
+    const rootTerminal = getGbnfJsonTerminalForGbnfJsonSchema(paramsSchema, grammarGenerator);
+    const rootGrammar = rootTerminal.resolve(grammarGenerator, true);
+    return grammarGenerator.generateGbnfFile(rootGrammar + ` "${"\\n".repeat(4)}"`);
+}
+//# sourceMappingURL=FunctionCallParamsGrammar.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"FunctionCallParamsGrammar.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAC,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAC,SAAS,EAAC,MAAM,6BAA6B,CAAC;AACtD,OAAO,EAAC,+BAA+B,EAAC,MAAM,kEAAkE,CAAC;AAEjH,OAAO,EAAC,oBAAoB,EAAC,MAAM,iDAAiD,CAAC;AACrF,OAAO,EAAC,oCAAoC,EAAC,MAAM,uEAAuE,CAAC;AAI3H,OAAO,EAAC,gCAAgC,EAAC,MAAM,uCAAuC,CAAC;AAGvF,MAAM,OAAO,yBAAsE,SAAQ,YAAY;IAClF,UAAU,CAAY;IACtB,YAAY,CAAc;IAC1B,aAAa,CAAS;IACtB,aAAa,CAAiB;IAE/C,YAAmB,KAAY,EAAE,SAAoB,EAAE,WAAwB,EAAE,YAAoB,EAAE,YAA4B;QAC/H,MAAM,OAAO,GAAG,+BAA+B,CAAC,YAAY,CAAC,CAAC;QAE9D,KAAK,CAAC,KAAK,EAAE;YACT,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YACnD,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAChC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAClC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAEM,WAAW,CAAC,QAAgB;QAC/B,MAAM,QAAQ,GAAG,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtD,IAAI,QAAQ,GAAG,CAAC;YACZ,MAAM,IAAI,gCAAgC,CACtC,+CAA+C,IAAI,CAAC,aAAa,uCAAuC,EACxG,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,QAAQ,CACX,CAAC;QAEN,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;QAEjD,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAChC,MAAM,IAAI,gCAAgC,CACtC,+CAA+C,IAAI,CAAC,aAAa,mBAAmB,EACpF,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,QAAQ,CACX,CAAC;QAEN,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QAExC,+BAA+B,CAAC,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QAE5D,OAAO;YACH,MAAM,EAAE,MAAa,EAAE,yCAAyC;YAChE,GAAG,EAAE,YAAY;SACpB,CAAC;IACN,CAAC;CACJ;AAED,SAAS,+BAA+B,CAAC,YAA4B;IACjE,MAAM,gBAAgB,GAAG,IAAI,oBAAoB,EAAE,CAAC;IACpD,MAAM,YAAY,GAAG,oCAAoC,CAAC,YAAY,EAAE,gBAAgB,CAAC,CAAC;IAC1F,MAAM,WAAW,GAAG,YAAY,CAAC,OAAO,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAC;IAEjE,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,WAAW,GAAG,KAAK,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;AACpF,CAAC"}
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts
@@ -0,0 +1,8 @@
+import { ChatModelFunctions } from "../../../types.js";
+import { ChatWrapper } from "../../../ChatWrapper.js";
+export declare class LlamaFunctionCallValidationError<const Functions extends ChatModelFunctions> extends Error {
+    readonly functions: Functions;
+    readonly chatWrapper: ChatWrapper;
+    readonly callText: string;
+    constructor(message: string, functions: Functions, chatWrapper: ChatWrapper, callText: string);
+}
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js
@@ -0,0 +1,12 @@
+export class LlamaFunctionCallValidationError extends Error {
+    functions;
+    chatWrapper;
+    callText;
+    constructor(message, functions, chatWrapper, callText) {
+        super(message);
+        this.functions = functions;
+        this.chatWrapper = chatWrapper;
+        this.callText = callText;
+    }
+}
+//# sourceMappingURL=LlamaFunctionCallValidationError.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"LlamaFunctionCallValidationError.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,gCAA6E,SAAQ,KAAK;IACnF,SAAS,CAAY;IACrB,WAAW,CAAc;IACzB,QAAQ,CAAS;IAEjC,YAAmB,OAAe,EAAE,SAAoB,EAAE,WAAwB,EAAE,QAAgB;QAChG,KAAK,CAAC,OAAO,CAAC,CAAC;QAEf,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC7B,CAAC;CACJ"}
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts
@@ -0,0 +1,16 @@
+import { ChatHistoryItem, Tokenizer } from "../../../../types.js";
+import { ChatWrapper } from "../../../../ChatWrapper.js";
+export declare function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({ chatHistory, maxTokensCount, tokenizer, chatWrapper, lastShiftMetadata }: {
+    chatHistory: ChatHistoryItem[];
+    maxTokensCount: number;
+    tokenizer: Tokenizer;
+    chatWrapper: ChatWrapper;
+    lastShiftMetadata?: object | null;
+}): Promise<{
+    chatHistory: ChatHistoryItem[];
+    metadata: CalculationMetadata;
+}>;
+type CalculationMetadata = {
+    removedCharactersNumber: number;
+};
+export {};
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js
@@ -0,0 +1,254 @@
+import { isChatModelResponseFunctionCall, isChatModelResponseSegment } from "../../../../types.js";
+import { findCharacterRemovalCountToFitChatHistoryInContext } from "../../../../utils/findCharacterRemovalCountToFitChatHistoryInContext.js";
+import { truncateLlamaTextAndRoundToWords, truncateTextAndRoundToWords } from "../../../../utils/truncateTextAndRoundToWords.js";
+import { LlamaText } from "../../../../utils/LlamaText.js";
+export async function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({ chatHistory, maxTokensCount, tokenizer, chatWrapper, lastShiftMetadata }) {
+    let initialCharactersRemovalCount = 0;
+    if (isCalculationMetadata(lastShiftMetadata))
+        initialCharactersRemovalCount = lastShiftMetadata.removedCharactersNumber;
+    const { removedCharactersCount, compressedChatHistory } = await findCharacterRemovalCountToFitChatHistoryInContext({
+        chatHistory,
+        tokensCountToFit: maxTokensCount,
+        initialCharactersRemovalCount,
+        tokenizer,
+        chatWrapper,
+        failedCompressionErrorMessage: "Failed to compress chat history for context shift due to a too long prompt or system message that cannot be compressed without affecting the generation quality. " +
+            "Consider increasing the context size or shortening the long prompt or system message.",
+        compressChatHistory({ chatHistory, charactersToRemove, estimatedCharactersPerToken }) {
+            const res = chatHistory.map((item) => structuredClone(item));
+            let charactersLeftToRemove = charactersToRemove;
+            function compressFunctionCalls() {
+                for (let i = res.length - 1; i >= 0 && charactersLeftToRemove > 0; i--) {
+                    const historyItem = res[i];
+                    if (historyItem.type !== "model")
+                        continue;
+                    for (let t = historyItem.response.length - 1; t >= 0 && charactersLeftToRemove > 0; t--) {
+                        const item = historyItem.response[t];
+                        if (typeof item === "string" || item.type !== "functionCall")
+                            continue;
+                        if (item.rawCall == null)
+                            continue;
+                        const originalRawCallTokensLength = LlamaText.fromJSON(item.rawCall).tokenize(tokenizer, "trimLeadingSpace").length;
+                        const newRawCallText = chatWrapper.generateFunctionCall(item.name, item.params);
+                        const newRawCallTextTokensLength = newRawCallText.tokenize(tokenizer, "trimLeadingSpace").length;
+                        if (newRawCallTextTokensLength < originalRawCallTokensLength) {
+                            item.rawCall = newRawCallText.toJSON();
+                            charactersLeftToRemove -= ((originalRawCallTokensLength - newRawCallTextTokensLength) * estimatedCharactersPerToken);
+                        }
+                    }
+                }
+            }
+            function removeHistoryThatLedToModelResponseAtIndex(index) {
+                let removedItems = 0;
+                for (let i = index - 1; i >= 0; i--) {
+                    const historyItem = res[i];
+                    if (historyItem == null)
+                        continue;
+                    if (historyItem.type === "model")
+                        break; // stop removing history items if we reach another model response
+                    if (i === 0 && historyItem.type === "system")
+                        break; // keep the first system message
+                    if (historyItem.type === "user" || historyItem.type === "system") {
+                        const newText = truncateLlamaTextAndRoundToWords(LlamaText.fromJSON(historyItem.text), charactersLeftToRemove, undefined, false);
+                        const newTextString = newText.toString();
+                        const historyItemString = LlamaText.fromJSON(historyItem.text).toString();
+                        if (newText.values.length === 0) {
+                            res.splice(i, 1);
+                            i++;
+                            removedItems++;
+                            charactersLeftToRemove -= historyItemString.length;
+                        }
+                        else if (newTextString.length < historyItemString.length) {
+                            charactersLeftToRemove -= historyItemString.length - newTextString.length;
+                            if (historyItem.type === "user")
+                                historyItem.text = newText.toString();
+                            else
+                                historyItem.text = newText.toJSON();
+                        }
+                    }
+                    else {
+                        void historyItem;
+                    }
+                }
+                return removedItems;
+            }
+            function compressHistoryThatLedToModelResponseAtIndex(index, keepTokensCount = 0) {
+                let removedItems = 0;
+                let promptStartIndex = undefined;
+                for (let i = index - 1; i >= 0; i--) {
+                    const historyItem = res[i];
+                    if (historyItem == null)
+                        continue;
+                    if (historyItem.type === "model") {
+                        promptStartIndex = i + 1;
+                        break;
+                    }
+                    if (i === 0 && historyItem.type === "system") {
+                        promptStartIndex = i + 1;
+                        break; // keep the first system message
+                    }
+                }
+                if (promptStartIndex == null || promptStartIndex >= index)
+                    return 0;
+                for (let i = promptStartIndex; i < index && charactersLeftToRemove > 0; i++) {
+                    const historyItem = res[i];
+                    if (historyItem == null || historyItem.type !== "user")
+                        continue;
+                    let removeChars = Math.min(charactersLeftToRemove, historyItem.text.length);
+                    if (keepTokensCount > 0) {
+                        removeChars -= Math.floor(keepTokensCount * estimatedCharactersPerToken);
+                        if (removeChars < 0)
+                            removeChars = 0;
+                        keepTokensCount -= Math.min(keepTokensCount, Math.max(0, historyItem.text.length - removeChars) / estimatedCharactersPerToken);
+                    }
+                    const newText = truncateTextAndRoundToWords(historyItem.text, removeChars, undefined, false);
+                    if (newText.length === 0) {
+                        res.splice(i, 1);
+                        i--;
+                        index--;
+                        removedItems++;
+                        charactersLeftToRemove -= historyItem.text.length;
+                    }
+                    else {
+                        charactersLeftToRemove -= historyItem.text.length - newText.length;
+                        historyItem.text = newText;
+                    }
+                }
+                return removedItems;
+            }
+            function removeEmptySegmentsFromModelResponse(modelResponse) {
+                const stack = [];
+                for (let t = 0; t < modelResponse.length && charactersLeftToRemove > 0; t++) {
+                    const item = modelResponse[t];
+                    const isLastItem = t === modelResponse.length - 1;
+                    if (!isChatModelResponseSegment(item))
+                        continue;
+                    const type = item.segmentType;
+                    const topStack = stack.at(-1);
+                    if (topStack?.type === type) {
+                        if (item.ended && item.text === "" && topStack.canRemove) {
+                            modelResponse.splice(t, 1);
+                            t--;
+                            modelResponse.splice(topStack.startIndex, 1);
+                            t--;
+                            stack.pop();
+                        }
+                        else if (!item.ended && item.text === "" && !isLastItem) {
+                            modelResponse.splice(t, 1);
+                            t--;
+                        }
+                        else if (!item.ended && item.text !== "")
+                            topStack.canRemove = false;
+                        else if (item.ended)
+                            stack.pop();
+                    }
+                    else if (!item.ended)
+                        stack.push({
+                            type,
+                            startIndex: t,
+                            canRemove: item.text === ""
+                        });
+                }
+            }
+            function compressFirstModelResponse() {
+                for (let i = 0; i < res.length && charactersLeftToRemove > 0; i++) {
+                    const historyItem = res[i];
+                    const isLastHistoryItem = i === res.length - 1;
+                    if (historyItem.type !== "model")
+                        continue;
+                    for (let t = 0; t < historyItem.response.length && charactersLeftToRemove > 0; t++) {
+                        const item = historyItem.response[t];
+                        const isLastText = t === historyItem.response.length - 1;
+                        if (isLastHistoryItem && isLastText)
+                            continue;
+                        if (typeof item === "string") {
+                            const newText = truncateTextAndRoundToWords(item, charactersLeftToRemove, undefined, true);
+                            if (newText === "") {
+                                historyItem.response.splice(t, 1);
+                                t--;
+                                charactersLeftToRemove -= item.length;
+                            }
+                            else if (newText.length < item.length) {
+                                historyItem.response[t] = newText;
+                                charactersLeftToRemove -= item.length - newText.length;
+                            }
+                        }
+                        else if (isChatModelResponseFunctionCall(item)) {
+                            historyItem.response.splice(t, 1);
+                            t--;
+                            const functionCallAndResultTokenUsage = chatWrapper.generateFunctionCallsAndResults([item], true)
+                                .tokenize(tokenizer, "trimLeadingSpace").length;
+                            charactersLeftToRemove -= functionCallAndResultTokenUsage * estimatedCharactersPerToken;
+                        }
+                        else if (isChatModelResponseSegment(item)) {
+                            if (item.text !== "") {
+                                const newText = truncateTextAndRoundToWords(item.text, charactersLeftToRemove, undefined, true);
+                                if (newText === "" && item.ended) {
+                                    const emptySegmentTokenUsage = chatWrapper.generateModelResponseText([{ ...item, text: "" }], true)
+                                        .tokenize(tokenizer, "trimLeadingSpace").length;
+                                    historyItem.response.splice(t, 1);
+                                    t--;
+                                    charactersLeftToRemove -= item.text.length + emptySegmentTokenUsage * estimatedCharactersPerToken;
+                                }
+                                else {
+                                    charactersLeftToRemove -= item.text.length - newText.length;
+                                    item.text = newText;
+                                }
+                            }
+                        }
+                        else
+                            void item;
+                    }
+                    removeEmptySegmentsFromModelResponse(historyItem.response);
+                    if (historyItem.response.length === 0) {
+                        // if the model response is removed from the history,
+                        // the things that led to it are not important anymore
+                        i -= removeHistoryThatLedToModelResponseAtIndex(i);
+                        res.splice(i, 1);
+                        i--;
+                    }
+                }
+            }
+            function compressLastModelResponse(minCharactersToKeep = 60) {
+                const lastHistoryItem = res[res.length - 1];
+                if (lastHistoryItem == null || lastHistoryItem.type !== "model")
+                    return;
+                const lastResponseItem = lastHistoryItem.response[lastHistoryItem.response.length - 1];
+                if (lastResponseItem == null || typeof lastResponseItem !== "string")
+                    return;
+                compressHistoryThatLedToModelResponseAtIndex(res.length - 1, maxTokensCount / 4);
+                if (charactersLeftToRemove <= 0)
+                    return;
+                const nextTextLength = Math.max(Math.min(lastResponseItem.length, minCharactersToKeep), lastResponseItem.length - charactersLeftToRemove);
+                const charactersToRemoveFromText = lastResponseItem.length - nextTextLength;
+                const newText = truncateTextAndRoundToWords(lastResponseItem, charactersToRemoveFromText, undefined, true);
+                if (newText.length < lastResponseItem.length) {
+                    lastHistoryItem.response[lastHistoryItem.response.length - 1] = newText;
+                    charactersLeftToRemove -= lastResponseItem.length - newText.length;
+                }
+                if (charactersLeftToRemove <= 0)
+                    return;
+                compressHistoryThatLedToModelResponseAtIndex(res.length - 1);
+            }
+            compressFunctionCalls();
+            if (charactersLeftToRemove <= 0)
+                return res;
+            compressFirstModelResponse();
+            if (charactersLeftToRemove <= 0)
+                return res;
+            compressLastModelResponse();
+            return res;
+        }
+    });
+    const newMetadata = {
+        removedCharactersNumber: removedCharactersCount
+    };
+    return {
+        chatHistory: compressedChatHistory,
+        metadata: newMetadata
+    };
+}
+function isCalculationMetadata(metadata) {
+    return metadata != null && typeof metadata === "object" && typeof metadata.removedCharactersNumber === "number";
+}
+//# sourceMappingURL=eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map
				`@@ -0,0 +1 @@`
				{"version":3,"file":"FunctionCallNameGrammar.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/FunctionCallNameGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAC,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAC,SAAS,EAAC,MAAM,6BAA6B,CAAC;AAEtD,OAAO,EAAC,oBAAoB,EAAC,MAAM,iDAAiD,CAAC;AAErF,OAAO,EAAC,WAAW,EAAC,MAAM,kDAAkD,CAAC;AAE7E,OAAO,EAAC,MAAM,EAAC,MAAM,6CAA6C,CAAC;AACnE,OAAO,EAAC,gBAAgB,EAAC,MAAM,uDAAuD,CAAC;AAEvF,OAAO,EAAC,gCAAgC,EAAC,MAAM,uCAAuC,CAAC;AAGvF,MAAM,OAAO,uBAAoE,SAAQ,YAAY;IAChF,UAAU,CAAY;IACtB,YAAY,CAAc;IAE3C,YAAmB,KAAY,EAAE,SAAoB,EAAE,WAAwB;QAC3E,MAAM,OAAO,GAAG,6BAA6B,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;QAEtE,KAAK,CAAC,KAAK,EAAE;YACT,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACzC,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAEhC,IAAI,CAAC,kBAAkB,EAAE,CAAC;IAC9B,CAAC;IAEM,iBAAiB,CAAC,qBAA6B;QAClD,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,mBAAmB,IAAI,qBAAqB,CAAC,CAAC,CAAC,KAAK,GAAG;YACjG,qBAAqB,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAE3D,MAAM,YAAY,GAAG,qBAAqB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAEzD,MAAM,YAAY,GAAG,qBAAqB,CAAC,KAAK,CAC5C,CAAC,EACD,YAAY,GAAG,CAAC;YACZ,CAAC,CAAC,qBAAqB,CAAC,MAAM;YAC9B,CAAC,CAAC,YAAY,CACO,CAAC;QAE9B,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,CAAC;YAC7C,MAAM,IAAI,gCAAgC,CACtC,kBAAkB,YAAY,2CAA2C,EACzE,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,qBAAqB,CACxB,CAAC;QAEN,OAAO,YAAY,CAAC;IACxB,CAAC;IAEO,kBAAkB;QACtB,KAAK,MAAM,aAAa,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YACvD,IAAI,aAAa,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC;gBAC3F,MAAM,IAAI,KAAK,CAAC,kBAAkB,aAAa,sCAAsC,CAAC,CAAC;iBACtF,IAAI,aAAa,KAAK,EAAE;gBACzB,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACnE,CAAC;IACL,CAAC;CACJ;AAED,SAAS,6BAA6B,CAClC,SAAoB,EAAE,WAAwB;IAE9C,MAAM,gBAAgB,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAEpD,MAAM,oBAAoB,GAAmB,EAAE,CAAC;IAEhD,KAAK,MAAM,YAAY,IAAI,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC;QAC7C,oBAAoB,CAAC,IAAI,CAAC,IAAI,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC;IAElE,MAAM,WAAW,GAAG,IAAI,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAErD,MAAM,YAAY,GAAG,IAAI,WAAW,CAAC;QACjC,GAAG,CAAC,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5E,WAAW,CAAC,OAAO,CAAC,gBAAgB,CAAC;KACxC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,YAAY,CAAC,UAAU,EAAE,CAAC;IAE9C,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,WAAW,GAAG,QAAQ,CAAC,CAAC;AACrE,CAAC"}
				`@@ -0,0 +1 @@`
				{"version":3,"file":"FunctionCallParamsGrammar.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAC,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAC,SAAS,EAAC,MAAM,6BAA6B,CAAC;AACtD,OAAO,EAAC,+BAA+B,EAAC,MAAM,kEAAkE,CAAC;AAEjH,OAAO,EAAC,oBAAoB,EAAC,MAAM,iDAAiD,CAAC;AACrF,OAAO,EAAC,oCAAoC,EAAC,MAAM,uEAAuE,CAAC;AAI3H,OAAO,EAAC,gCAAgC,EAAC,MAAM,uCAAuC,CAAC;AAGvF,MAAM,OAAO,yBAAsE,SAAQ,YAAY;IAClF,UAAU,CAAY;IACtB,YAAY,CAAc;IAC1B,aAAa,CAAS;IACtB,aAAa,CAAiB;IAE/C,YAAmB,KAAY,EAAE,SAAoB,EAAE,WAAwB,EAAE,YAAoB,EAAE,YAA4B;QAC/H,MAAM,OAAO,GAAG,+BAA+B,CAAC,YAAY,CAAC,CAAC;QAE9D,KAAK,CAAC,KAAK,EAAE;YACT,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YACnD,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAChC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAClC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAEM,WAAW,CAAC,QAAgB;QAC/B,MAAM,QAAQ,GAAG,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtD,IAAI,QAAQ,GAAG,CAAC;YACZ,MAAM,IAAI,gCAAgC,CACtC,+CAA+C,IAAI,CAAC,aAAa,uCAAuC,EACxG,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,QAAQ,CACX,CAAC;QAEN,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;QAEjD,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAChC,MAAM,IAAI,gCAAgC,CACtC,+CAA+C,IAAI,CAAC,aAAa,mBAAmB,EACpF,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,QAAQ,CACX,CAAC;QAEN,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QAExC,+BAA+B,CAAC,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QAE5D,OAAO;YACH,MAAM,EAAE,MAAa,EAAE,yCAAyC;YAChE,GAAG,EAAE,YAAY;SACpB,CAAC;IACN,CAAC;CACJ;AAED,SAAS,+BAA+B,CAAC,YAA4B;IACjE,MAAM,gBAAgB,GAAG,IAAI,oBAAoB,EAAE,CAAC;IACpD,MAAM,YAAY,GAAG,oCAAoC,CAAC,YAAY,EAAE,gBAAgB,CAAC,CAAC;IAC1F,MAAM,WAAW,GAAG,YAAY,CAAC,OAAO,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAC;IAEjE,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,WAAW,GAAG,KAAK,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;AACpF,CAAC"}
				`@@ -0,0 +1 @@`
				`{"version":3,"file":"LlamaFunctionCallValidationError.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,gCAA6E,SAAQ,KAAK;IACnF,SAAS,CAAY;IACrB,WAAW,CAAc;IACzB,QAAQ,CAAS;IAEjC,YAAmB,OAAe,EAAE,SAAoB,EAAE,WAAwB,EAAE,QAAgB;QAChG,KAAK,CAAC,OAAO,CAAC,CAAC;QAEf,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC7B,CAAC;CACJ"}`