First upload version 0.0.1
This commit is contained in:
459
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.d.ts
generated
vendored
Normal file
459
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.d.ts
generated
vendored
Normal file
@@ -0,0 +1,459 @@
|
||||
import { EventRelay } from "lifecycle-utils";
|
||||
import { ChatWrapper } from "../../ChatWrapper.js";
|
||||
import { LlamaContextSequence } from "../LlamaContext/LlamaContext.js";
|
||||
import { ChatHistoryItem, ChatModelFunctions, ChatModelSegmentType, LLamaContextualRepeatPenalty, Token, Tokenizer } from "../../types.js";
|
||||
import { GbnfJsonSchemaToType } from "../../utils/gbnfJson/types.js";
|
||||
import { LlamaGrammar } from "../LlamaGrammar.js";
|
||||
import { LlamaText, LlamaTextJSON } from "../../utils/LlamaText.js";
|
||||
import { EvaluationPriority } from "../LlamaContext/types.js";
|
||||
import { TokenBias } from "../TokenBias.js";
|
||||
import { LlamaModel } from "../LlamaModel/LlamaModel.js";
|
||||
export type LlamaChatOptions = {
|
||||
contextSequence: LlamaContextSequence;
|
||||
/** `"auto"` is used by default */
|
||||
chatWrapper?: "auto" | ChatWrapper;
|
||||
/**
|
||||
* Automatically dispose the sequence when the session is disposed
|
||||
*
|
||||
* Defaults to `false`.
|
||||
*/
|
||||
autoDisposeSequence?: boolean;
|
||||
};
|
||||
export type LlamaChatResponseChunk = LlamaChatResponseTextChunk | LlamaChatResponseSegmentChunk;
|
||||
export type LlamaChatResponseTextChunk = {
|
||||
/** When `type` is `undefined`, the chunk is part of the main response and is not a segment */
|
||||
type: undefined;
|
||||
/**
|
||||
* `segmentType` has no purpose when `type` is `undefined` (meaning that this chunk is part of the main response and is not a segment).
|
||||
*/
|
||||
segmentType: undefined;
|
||||
/**
|
||||
* The generated text chunk.
|
||||
*
|
||||
* Detokenized from the `tokens` property,
|
||||
* but with the context of the previous generation (for better spacing of the text with some models).
|
||||
*
|
||||
* Prefer using this property over `tokens` when streaming the generated response as text.
|
||||
*/
|
||||
text: string;
|
||||
/** The generated tokens */
|
||||
tokens: Token[];
|
||||
};
|
||||
export type LlamaChatResponseSegmentChunk = {
|
||||
type: "segment";
|
||||
/** Segment type */
|
||||
segmentType: ChatModelSegmentType;
|
||||
/**
|
||||
* The generated text chunk.
|
||||
*
|
||||
* Detokenized from the `tokens` property,
|
||||
* but with the context of the previous generation (for better spacing of the text with some models).
|
||||
*
|
||||
* Prefer using this property over `tokens` when streaming the generated response as text.
|
||||
*/
|
||||
text: string;
|
||||
/** The generated tokens */
|
||||
tokens: Token[];
|
||||
/**
|
||||
* When the current chunk is the start of a segment, this field will be set.
|
||||
*
|
||||
* It's possible that a chunk with no tokens and empty text will be emitted just to set this field
|
||||
* to signify that the segment has started.
|
||||
*/
|
||||
segmentStartTime?: Date;
|
||||
/**
|
||||
* When the current chunk is the last one of a segment (meaning the current segment has ended), this field will be set.
|
||||
*
|
||||
* It's possible that a chunk with no tokens and empty text will be emitted just to set this field
|
||||
* to signify that the segment has ended.
|
||||
*/
|
||||
segmentEndTime?: Date;
|
||||
};
|
||||
export type LlamaChatResponseFunctionCallParamsChunk = {
|
||||
/**
|
||||
* Each different function call has a different `callIndex`.
|
||||
*
|
||||
* When the previous function call has finished being generated, the `callIndex` of the next one will increment.
|
||||
*
|
||||
* Use this value to distinguish between different function calls.
|
||||
*/
|
||||
callIndex: number;
|
||||
/**
|
||||
* The name of the function being called
|
||||
*/
|
||||
functionName: string;
|
||||
/**
|
||||
* A chunk of the generated text used for the function call parameters.
|
||||
*
|
||||
* Collect all the chunks together to construct the full function call parameters.
|
||||
*
|
||||
* After the function call is finished, the entire constructed params text can be parsed as a JSON object,
|
||||
* according to the function parameters schema.
|
||||
*/
|
||||
paramsChunk: string;
|
||||
/**
|
||||
* When this is `true`, the current chunk is the last chunk in the generation of the current function call parameters.
|
||||
*/
|
||||
done: boolean;
|
||||
};
|
||||
export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunctions | undefined = undefined> = {
|
||||
/**
|
||||
* Called as the model generates the main response with the generated text chunk.
|
||||
*
|
||||
* Useful for streaming the generated response as it's being generated.
|
||||
*
|
||||
* Includes only the main response without any text segments (like thoughts).
|
||||
* For streaming the response with segments, use {@link onResponseChunk `onResponseChunk`}.
|
||||
*/
|
||||
onTextChunk?: (text: string) => void;
|
||||
/**
|
||||
* Called as the model generates the main response with the generated tokens.
|
||||
*
|
||||
* Preferably, you'd want to use {@link onTextChunk `onTextChunk`} instead of this.
|
||||
*
|
||||
* Includes only the main response without any segments (like thoughts).
|
||||
* For streaming the response with segments, use {@link onResponseChunk `onResponseChunk`}.
|
||||
*/
|
||||
onToken?: (tokens: Token[]) => void;
|
||||
/**
|
||||
* Called as the model generates a response with the generated text and tokens,
|
||||
* including segment information (when the generated output is part of a segment).
|
||||
*
|
||||
* Useful for streaming the generated response as it's being generated, including the main response and all segments.
|
||||
*
|
||||
* Only use this function when you need the segmented texts, like thought segments (chain of thought text).
|
||||
*/
|
||||
onResponseChunk?: (chunk: LlamaChatResponseChunk) => void;
|
||||
/**
|
||||
* An AbortSignal to later abort the generation.
|
||||
*
|
||||
* When the signal is aborted, the generation will stop and throw `signal.reason` as the error.
|
||||
*
|
||||
* > To stop an ongoing generation without throwing an error, also set `stopOnAbortSignal` to `true`.
|
||||
*/
|
||||
signal?: AbortSignal;
|
||||
/**
|
||||
* When a response already started being generated and then the signal is aborted,
|
||||
* the generation will stop and the response will be returned as is instead of throwing an error.
|
||||
*
|
||||
* Defaults to `false`.
|
||||
*/
|
||||
stopOnAbortSignal?: boolean;
|
||||
/** Maximum number of tokens to generate */
|
||||
maxTokens?: number;
|
||||
/**
|
||||
* Temperature is a hyperparameter that controls the randomness of the generated text.
|
||||
* It affects the probability distribution of the model's output tokens.
|
||||
*
|
||||
* A higher temperature (e.g., 1.5) makes the output more random and creative,
|
||||
* while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
|
||||
*
|
||||
* The suggested temperature is 0.8, which provides a balance between randomness and determinism.
|
||||
*
|
||||
* At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
|
||||
*
|
||||
* Set to `0` to disable.
|
||||
* Disabled by default (set to `0`).
|
||||
*/
|
||||
temperature?: number;
|
||||
/**
|
||||
* From the next token candidates, discard the percentage of tokens with the lowest probability.
|
||||
* For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded.
|
||||
* This is useful for generating more high-quality results when using a high temperature.
|
||||
* Set to a value between `0` and `1` to enable.
|
||||
*
|
||||
* Only relevant when `temperature` is set to a value greater than `0`.
|
||||
* Disabled by default.
|
||||
*/
|
||||
minP?: number;
|
||||
/**
|
||||
* Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
|
||||
* An integer number between `1` and the size of the vocabulary.
|
||||
* Set to `0` to disable (which uses the full vocabulary).
|
||||
*
|
||||
* Only relevant when `temperature` is set to a value greater than 0.
|
||||
*/
|
||||
topK?: number;
|
||||
/**
|
||||
* Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
|
||||
* and samples the next token only from this set.
|
||||
* A float number between `0` and `1`.
|
||||
* Set to `1` to disable.
|
||||
*
|
||||
* Only relevant when `temperature` is set to a value greater than `0`.
|
||||
*/
|
||||
topP?: number;
|
||||
/**
|
||||
* Used to control the randomness of the generated text.
|
||||
*
|
||||
* Change the seed to get different results.
|
||||
*
|
||||
* Only relevant when using `temperature`.
|
||||
*/
|
||||
seed?: number;
|
||||
/**
|
||||
* Trim whitespace from the end of the generated text
|
||||
*
|
||||
* Defaults to `false`.
|
||||
*/
|
||||
trimWhitespaceSuffix?: boolean;
|
||||
repeatPenalty?: false | LLamaContextualRepeatPenalty;
|
||||
/**
|
||||
* Adjust the probability of tokens being generated.
|
||||
* Can be used to bias the model to generate tokens that you want it to lean towards,
|
||||
* or to avoid generating tokens that you want it to avoid.
|
||||
*/
|
||||
tokenBias?: TokenBias | (() => TokenBias);
|
||||
/**
|
||||
* See the parameter `evaluationPriority` on the `LlamaContextSequence.evaluate()` function for more information.
|
||||
*/
|
||||
evaluationPriority?: EvaluationPriority;
|
||||
contextShift?: LLamaChatContextShiftOptions;
|
||||
/**
|
||||
* Custom stop triggers to stop the generation of the response when any of the provided triggers are found.
|
||||
*/
|
||||
customStopTriggers?: readonly (LlamaText | string | readonly (string | Token)[])[];
|
||||
/**
|
||||
* The evaluation context window returned from the last evaluation.
|
||||
* This is an optimization to utilize existing context sequence state better when possible.
|
||||
*/
|
||||
lastEvaluationContextWindow?: {
|
||||
/** The history of the last evaluation. */
|
||||
history?: ChatHistoryItem[];
|
||||
/**
|
||||
* Minimum overlap percentage with existing context sequence state to use the last evaluation context window.
|
||||
* If the last evaluation context window is not used, a new context will be generated based on the full history,
|
||||
* which will decrease the likelihood of another context shift happening so soon.
|
||||
*
|
||||
* A number between `0` (exclusive) and `1` (inclusive).
|
||||
*/
|
||||
minimumOverlapPercentageToPreventContextShift?: number;
|
||||
};
|
||||
/**
|
||||
* Called as the model generates function calls with the generated parameters chunk for each function call.
|
||||
*
|
||||
* Useful for streaming the generated function call parameters as they're being generated.
|
||||
* Only useful in specific use cases,
|
||||
* such as showing the generated textual file content as it's being generated (note that doing this requires parsing incomplete JSON).
|
||||
*
|
||||
* The constructed text from all the params chunks of a given function call can be parsed as a JSON object,
|
||||
* according to the function parameters schema.
|
||||
*
|
||||
* Each function call has its own `callIndex` you can use to distinguish between them.
|
||||
*
|
||||
* Only relevant when using function calling (via passing the `functions` option).
|
||||
*/
|
||||
onFunctionCallParamsChunk?: (chunk: LlamaChatResponseFunctionCallParamsChunk) => void;
|
||||
/**
|
||||
* Set the maximum number of tokens the model is allowed to spend on various segmented responses.
|
||||
*/
|
||||
budgets?: {
|
||||
/**
|
||||
* Whether to include the tokens already consumed by the current model response being completed in the budget.
|
||||
*
|
||||
* Defaults to `true`.
|
||||
*/
|
||||
includeCurrentResponse?: boolean;
|
||||
/**
|
||||
* Budget for thought tokens.
|
||||
*
|
||||
* Defaults to `Infinity`.
|
||||
*/
|
||||
thoughtTokens?: number;
|
||||
/**
|
||||
* Budget for comment tokens.
|
||||
*
|
||||
* Defaults to `Infinity`.
|
||||
*/
|
||||
commentTokens?: number;
|
||||
};
|
||||
/**
|
||||
* Stop the generation when the model tries to generate a non-textual segment or call a function.
|
||||
*
|
||||
* Useful for generating completions in a form of a model response.
|
||||
*
|
||||
* Defaults to `false`.
|
||||
*/
|
||||
abortOnNonText?: boolean;
|
||||
} & ({
|
||||
grammar?: LlamaGrammar;
|
||||
functions?: never;
|
||||
documentFunctionParams?: never;
|
||||
maxParallelFunctionCalls?: never;
|
||||
onFunctionCall?: never;
|
||||
onFunctionCallParamsChunk?: never;
|
||||
} | {
|
||||
grammar?: never;
|
||||
functions?: Functions | ChatModelFunctions;
|
||||
documentFunctionParams?: boolean;
|
||||
maxParallelFunctionCalls?: number;
|
||||
onFunctionCall?: (functionCall: LlamaChatResponseFunctionCall<Functions extends ChatModelFunctions ? Functions : ChatModelFunctions>) => void;
|
||||
onFunctionCallParamsChunk?: (chunk: LlamaChatResponseFunctionCallParamsChunk) => void;
|
||||
});
|
||||
export type LLamaChatLoadAndCompleteUserMessageOptions<Functions extends ChatModelFunctions | undefined = undefined> = {
|
||||
/**
|
||||
* Complete the given user prompt without adding it or the completion to the returned context window.
|
||||
*/
|
||||
initialUserPrompt?: string;
|
||||
/**
|
||||
* When a completion already started being generated and then the signal is aborted,
|
||||
* the generation will stop and the completion will be returned as is instead of throwing an error.
|
||||
*
|
||||
* Defaults to `false`.
|
||||
*/
|
||||
stopOnAbortSignal?: boolean;
|
||||
/**
|
||||
* Called as the model generates a completion with the generated text chunk.
|
||||
*
|
||||
* Useful for streaming the generated completion as it's being generated.
|
||||
*/
|
||||
onTextChunk?: LLamaChatGenerateResponseOptions<Functions>["onTextChunk"];
|
||||
/**
|
||||
* Called as the model generates a completion with the generated tokens.
|
||||
*
|
||||
* Preferably, you'd want to use `onTextChunk` instead of this.
|
||||
*/
|
||||
onToken?: LLamaChatGenerateResponseOptions<Functions>["onToken"];
|
||||
signal?: LLamaChatGenerateResponseOptions<Functions>["signal"];
|
||||
maxTokens?: LLamaChatGenerateResponseOptions<Functions>["maxTokens"];
|
||||
temperature?: LLamaChatGenerateResponseOptions<Functions>["temperature"];
|
||||
minP?: LLamaChatGenerateResponseOptions<Functions>["minP"];
|
||||
topK?: LLamaChatGenerateResponseOptions<Functions>["topK"];
|
||||
topP?: LLamaChatGenerateResponseOptions<Functions>["topP"];
|
||||
seed?: LLamaChatGenerateResponseOptions<Functions>["seed"];
|
||||
trimWhitespaceSuffix?: LLamaChatGenerateResponseOptions<Functions>["trimWhitespaceSuffix"];
|
||||
repeatPenalty?: LLamaChatGenerateResponseOptions<Functions>["repeatPenalty"];
|
||||
tokenBias?: LLamaChatGenerateResponseOptions<Functions>["tokenBias"];
|
||||
evaluationPriority?: LLamaChatGenerateResponseOptions<Functions>["evaluationPriority"];
|
||||
contextShift?: LLamaChatGenerateResponseOptions<Functions>["contextShift"];
|
||||
customStopTriggers?: LLamaChatGenerateResponseOptions<Functions>["customStopTriggers"];
|
||||
lastEvaluationContextWindow?: LLamaChatGenerateResponseOptions<Functions>["lastEvaluationContextWindow"];
|
||||
grammar?: LlamaGrammar;
|
||||
/**
|
||||
* Functions are not used by the model here,
|
||||
* but are used for keeping the instructions given to the model about the functions in the current context state,
|
||||
* to avoid context shifts.
|
||||
*
|
||||
* It's best to provide the same functions that were used for the previous prompt here.
|
||||
*/
|
||||
functions?: Functions | ChatModelFunctions;
|
||||
/**
|
||||
* Functions are not used by the model here,
|
||||
* but are used for keeping the instructions given to the model about the functions in the current context state,
|
||||
* to avoid context shifts.
|
||||
*
|
||||
* It's best to provide the same value that was used for the previous prompt here.
|
||||
*/
|
||||
documentFunctionParams?: boolean;
|
||||
};
|
||||
export type LLamaChatContextShiftOptions = {
|
||||
/**
|
||||
* The number of tokens to delete from the context window to make space for new ones.
|
||||
* Defaults to 10% of the context size.
|
||||
*/
|
||||
size?: number | ((sequence: LlamaContextSequence) => number | Promise<number>);
|
||||
/**
|
||||
* The strategy to use when deleting tokens from the context window.
|
||||
*
|
||||
* Defaults to `"eraseFirstResponseAndKeepFirstSystem"`.
|
||||
*/
|
||||
strategy?: "eraseFirstResponseAndKeepFirstSystem" | ((options: {
|
||||
/** Full chat history */
|
||||
chatHistory: readonly ChatHistoryItem[];
|
||||
/** Maximum number of tokens that the new chat history should fit under when tokenized */
|
||||
maxTokensCount: number;
|
||||
/** Tokenizer used to tokenize the chat history */
|
||||
tokenizer: Tokenizer;
|
||||
/** Chat wrapper used to generate the context state */
|
||||
chatWrapper: ChatWrapper;
|
||||
/**
|
||||
* The metadata returned from the last context shift strategy call.
|
||||
* Will be `null` on the first call.
|
||||
*/
|
||||
lastShiftMetadata?: object | null;
|
||||
}) => {
|
||||
chatHistory: ChatHistoryItem[];
|
||||
metadata?: object | null;
|
||||
} | Promise<{
|
||||
chatHistory: ChatHistoryItem[];
|
||||
metadata?: object | null;
|
||||
}>);
|
||||
/**
|
||||
* The `contextShiftMetadata` returned from the last evaluation.
|
||||
* This is an optimization to utilize the existing context state better when possible.
|
||||
*/
|
||||
lastEvaluationMetadata?: object | undefined | null;
|
||||
};
|
||||
export declare class LlamaChat {
|
||||
readonly onDispose: EventRelay<void>;
|
||||
constructor({ contextSequence, chatWrapper, autoDisposeSequence }: LlamaChatOptions);
|
||||
dispose({ disposeSequence }?: {
|
||||
disposeSequence?: boolean;
|
||||
}): void;
|
||||
/** @hidden */
|
||||
[Symbol.dispose](): void;
|
||||
get disposed(): boolean;
|
||||
get chatWrapper(): ChatWrapper;
|
||||
get sequence(): LlamaContextSequence;
|
||||
get context(): import("../LlamaContext/LlamaContext.js").LlamaContext;
|
||||
get model(): LlamaModel;
|
||||
generateResponse<const Functions extends ChatModelFunctions | undefined = undefined>(history: ChatHistoryItem[], options?: LLamaChatGenerateResponseOptions<Functions>): Promise<LlamaChatResponse<Functions>>;
|
||||
loadChatAndCompleteUserMessage<const Functions extends ChatModelFunctions | undefined = undefined>(history: ChatHistoryItem[], options?: LLamaChatLoadAndCompleteUserMessageOptions<Functions>): Promise<LlamaChatLoadAndCompleteUserResponse>;
|
||||
}
|
||||
export type LlamaChatResponse<Functions extends ChatModelFunctions | undefined = undefined> = {
|
||||
/**
|
||||
* The response text only, _without_ any text segments (like thoughts).
|
||||
*/
|
||||
response: string;
|
||||
/**
|
||||
* The full response, including all text and text segments (like thoughts).
|
||||
*/
|
||||
fullResponse: Array<string | LlamaChatResponseSegment>;
|
||||
functionCalls?: Functions extends ChatModelFunctions ? LlamaChatResponseFunctionCall<Functions>[] : never;
|
||||
lastEvaluation: {
|
||||
cleanHistory: ChatHistoryItem[];
|
||||
contextWindow: ChatHistoryItem[];
|
||||
contextShiftMetadata: any;
|
||||
};
|
||||
metadata: {
|
||||
remainingGenerationAfterStop?: string | Token[];
|
||||
stopReason: "eogToken" | "stopGenerationTrigger" | "functionCalls" | "maxTokens" | "abort";
|
||||
} | {
|
||||
remainingGenerationAfterStop?: string | Token[];
|
||||
stopReason: "customStopTrigger";
|
||||
customStopTrigger: (string | Token)[];
|
||||
};
|
||||
};
|
||||
export type LlamaChatResponseFunctionCall<Functions extends ChatModelFunctions, FunctionCallName extends keyof Functions & string = string & keyof Functions, Params = Functions[FunctionCallName]["params"] extends undefined | null | void ? undefined : GbnfJsonSchemaToType<Functions[FunctionCallName]["params"]>> = {
|
||||
functionName: FunctionCallName;
|
||||
params: Params;
|
||||
raw: LlamaTextJSON;
|
||||
};
|
||||
export type LlamaChatResponseSegment = {
|
||||
type: "segment";
|
||||
segmentType: ChatModelSegmentType;
|
||||
text: string;
|
||||
ended: boolean;
|
||||
raw: LlamaTextJSON;
|
||||
startTime?: string;
|
||||
endTime?: string;
|
||||
};
|
||||
export type LlamaChatLoadAndCompleteUserResponse = {
|
||||
completion: string;
|
||||
lastEvaluation: {
|
||||
/**
|
||||
* The completion and initial user prompt are not added to this context window result,
|
||||
* but are loaded to the current context sequence state as tokens
|
||||
*/
|
||||
contextWindow: ChatHistoryItem[];
|
||||
contextShiftMetadata: any;
|
||||
};
|
||||
metadata: {
|
||||
remainingGenerationAfterStop?: string | Token[];
|
||||
stopReason: "eogToken" | "stopGenerationTrigger" | "maxTokens" | "abort";
|
||||
} | {
|
||||
remainingGenerationAfterStop?: string | Token[];
|
||||
stopReason: "customStopTrigger";
|
||||
customStopTrigger: (string | Token)[];
|
||||
};
|
||||
};
|
||||
2584
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.js
generated
vendored
Normal file
2584
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.js
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.js.map
generated
vendored
Normal file
1
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/LlamaChat.js.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
11
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts
generated
vendored
Normal file
11
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts
generated
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
import { LlamaGrammar } from "../../LlamaGrammar.js";
|
||||
import { ChatModelFunctions } from "../../../types.js";
|
||||
import { ChatWrapper } from "../../../ChatWrapper.js";
|
||||
import { Llama } from "../../../bindings/Llama.js";
|
||||
export declare class FunctionCallNameGrammar<const Functions extends ChatModelFunctions> extends LlamaGrammar {
|
||||
private readonly _functions;
|
||||
private readonly _chatWrapper;
|
||||
constructor(llama: Llama, functions: Functions, chatWrapper: ChatWrapper);
|
||||
parseFunctionName(generatedFunctionName: string): keyof Functions & string;
|
||||
private _validateFunctions;
|
||||
}
|
||||
55
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js
generated
vendored
Normal file
55
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js
generated
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
import { LlamaGrammar } from "../../LlamaGrammar.js";
|
||||
import { LlamaText } from "../../../utils/LlamaText.js";
|
||||
import { GbnfGrammarGenerator } from "../../../utils/gbnfJson/GbnfGrammarGenerator.js";
|
||||
import { GbnfGrammar } from "../../../utils/gbnfJson/terminals/GbnfGrammar.js";
|
||||
import { GbnfOr } from "../../../utils/gbnfJson/terminals/GbnfOr.js";
|
||||
import { GbnfVerbatimText } from "../../../utils/gbnfJson/terminals/GbnfVerbatimText.js";
|
||||
import { LlamaFunctionCallValidationError } from "./LlamaFunctionCallValidationError.js";
|
||||
export class FunctionCallNameGrammar extends LlamaGrammar {
|
||||
_functions;
|
||||
_chatWrapper;
|
||||
constructor(llama, functions, chatWrapper) {
|
||||
const grammar = getGbnfGrammarForFunctionName(functions, chatWrapper);
|
||||
super(llama, {
|
||||
grammar,
|
||||
stopGenerationTriggers: [LlamaText("\n")],
|
||||
trimWhitespaceSuffix: true
|
||||
});
|
||||
this._functions = functions;
|
||||
this._chatWrapper = chatWrapper;
|
||||
this._validateFunctions();
|
||||
}
|
||||
parseFunctionName(generatedFunctionName) {
|
||||
if (this._chatWrapper.settings.functions.call.optionalPrefixSpace && generatedFunctionName[0] === " ")
|
||||
generatedFunctionName = generatedFunctionName.slice(1);
|
||||
const newlineIndex = generatedFunctionName.indexOf("\n");
|
||||
const functionName = generatedFunctionName.slice(0, newlineIndex < 0
|
||||
? generatedFunctionName.length
|
||||
: newlineIndex);
|
||||
if (!Object.hasOwn(this._functions, functionName))
|
||||
throw new LlamaFunctionCallValidationError(`Function name "${functionName}" is not in the supplied functions object`, this._functions, this._chatWrapper, generatedFunctionName);
|
||||
return functionName;
|
||||
}
|
||||
_validateFunctions() {
|
||||
for (const functionsName of Object.keys(this._functions)) {
|
||||
if (functionsName.includes(" ") || functionsName.includes("\n") || functionsName.includes("\t"))
|
||||
throw new Error(`Function name "${functionsName}" contains spaces, new lines or tabs`);
|
||||
else if (functionsName === "")
|
||||
throw new Error("Function name cannot be an empty string");
|
||||
}
|
||||
}
|
||||
}
|
||||
function getGbnfGrammarForFunctionName(functions, chatWrapper) {
|
||||
const grammarGenerator = new GbnfGrammarGenerator();
|
||||
const functionNameGrammars = [];
|
||||
for (const functionName of Object.keys(functions))
|
||||
functionNameGrammars.push(new GbnfVerbatimText(functionName));
|
||||
const callGrammar = new GbnfOr(functionNameGrammars);
|
||||
const rootTerminal = new GbnfGrammar([
|
||||
...(chatWrapper.settings.functions.call.optionalPrefixSpace ? ["[ ]?"] : []),
|
||||
callGrammar.resolve(grammarGenerator)
|
||||
]);
|
||||
const rootGrammar = rootTerminal.getGrammar();
|
||||
return grammarGenerator.generateGbnfFile(rootGrammar + " [\\n]");
|
||||
}
|
||||
//# sourceMappingURL=FunctionCallNameGrammar.js.map
|
||||
1
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map
generated
vendored
Normal file
1
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"FunctionCallNameGrammar.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/FunctionCallNameGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAC,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAC,SAAS,EAAC,MAAM,6BAA6B,CAAC;AAEtD,OAAO,EAAC,oBAAoB,EAAC,MAAM,iDAAiD,CAAC;AAErF,OAAO,EAAC,WAAW,EAAC,MAAM,kDAAkD,CAAC;AAE7E,OAAO,EAAC,MAAM,EAAC,MAAM,6CAA6C,CAAC;AACnE,OAAO,EAAC,gBAAgB,EAAC,MAAM,uDAAuD,CAAC;AAEvF,OAAO,EAAC,gCAAgC,EAAC,MAAM,uCAAuC,CAAC;AAGvF,MAAM,OAAO,uBAAoE,SAAQ,YAAY;IAChF,UAAU,CAAY;IACtB,YAAY,CAAc;IAE3C,YAAmB,KAAY,EAAE,SAAoB,EAAE,WAAwB;QAC3E,MAAM,OAAO,GAAG,6BAA6B,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;QAEtE,KAAK,CAAC,KAAK,EAAE;YACT,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACzC,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAEhC,IAAI,CAAC,kBAAkB,EAAE,CAAC;IAC9B,CAAC;IAEM,iBAAiB,CAAC,qBAA6B;QAClD,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,mBAAmB,IAAI,qBAAqB,CAAC,CAAC,CAAC,KAAK,GAAG;YACjG,qBAAqB,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAE3D,MAAM,YAAY,GAAG,qBAAqB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAEzD,MAAM,YAAY,GAAG,qBAAqB,CAAC,KAAK,CAC5C,CAAC,EACD,YAAY,GAAG,CAAC;YACZ,CAAC,CAAC,qBAAqB,CAAC,MAAM;YAC9B,CAAC,CAAC,YAAY,CACO,CAAC;QAE9B,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,CAAC;YAC7C,MAAM,IAAI,gCAAgC,CACtC,kBAAkB,YAAY,2CAA2C,EACzE,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,qBAAqB,CACxB,CAAC;QAEN,OAAO,YAAY,CAAC;IACxB,CAAC;IAEO,kBAAkB;QACtB,KAAK,MAAM,aAAa,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YACvD,IAAI,aAAa,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC;gBAC3F,MAAM,IAAI,KAAK,CAAC,kBAAkB,aAAa,sCAAsC,CAAC,CAAC;iBACtF,IAAI,aAAa,KAAK,EAAE;gBACzB,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACnE,CAAC;IACL,CAAC;CACJ;AAED,SAAS,6BAA6B,CAClC,SAAoB,EAAE,WAAwB;IAE9C,MAAM,gBAAgB,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAEpD,MAAM,oBAAoB,GAAmB,EAAE,CAAC;IAEhD,KAAK,MAAM,YAAY,IAAI,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC;QAC7C,oBAAoB,CAAC,IAAI,CAAC,IAAI,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC;IAElE,MAAM,WAAW,GAAG,IAAI,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAErD,MAAM,YAAY,GAAG,IAAI,WAAW,CAAC;QACjC,GAAG,CAAC,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5E,WAAW,CAAC,OAAO,CAAC,gBAAgB,CAAC;KACxC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,YAAY,CAAC,UAAU,EAAE,CAAC;IAE9C,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,WAAW,GAAG,QAAQ,CAAC,CAAC;AACrE,CAAC"}
|
||||
16
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts
generated
vendored
Normal file
16
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts
generated
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
import { LlamaGrammar } from "../../LlamaGrammar.js";
|
||||
import { ChatModelFunctions } from "../../../types.js";
|
||||
import { ChatWrapper } from "../../../ChatWrapper.js";
|
||||
import { Llama } from "../../../bindings/Llama.js";
|
||||
import { GbnfJsonSchema } from "../../../utils/gbnfJson/types.js";
|
||||
export declare class FunctionCallParamsGrammar<const Functions extends ChatModelFunctions> extends LlamaGrammar {
|
||||
private readonly _functions;
|
||||
private readonly _chatWrapper;
|
||||
private readonly _functionName;
|
||||
private readonly _paramsSchema;
|
||||
constructor(llama: Llama, functions: Functions, chatWrapper: ChatWrapper, functionName: string, paramsSchema: GbnfJsonSchema);
|
||||
parseParams(callText: string): {
|
||||
params: any;
|
||||
raw: string;
|
||||
};
|
||||
}
|
||||
45
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js
generated
vendored
Normal file
45
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js
generated
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
import { LlamaGrammar } from "../../LlamaGrammar.js";
|
||||
import { LlamaText } from "../../../utils/LlamaText.js";
|
||||
import { validateObjectAgainstGbnfSchema } from "../../../utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js";
|
||||
import { GbnfGrammarGenerator } from "../../../utils/gbnfJson/GbnfGrammarGenerator.js";
|
||||
import { getGbnfJsonTerminalForGbnfJsonSchema } from "../../../utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js";
|
||||
import { LlamaFunctionCallValidationError } from "./LlamaFunctionCallValidationError.js";
|
||||
export class FunctionCallParamsGrammar extends LlamaGrammar {
|
||||
_functions;
|
||||
_chatWrapper;
|
||||
_functionName;
|
||||
_paramsSchema;
|
||||
constructor(llama, functions, chatWrapper, functionName, paramsSchema) {
|
||||
const grammar = getGbnfGrammarForFunctionParams(paramsSchema);
|
||||
super(llama, {
|
||||
grammar,
|
||||
stopGenerationTriggers: [LlamaText("\n".repeat(4))],
|
||||
trimWhitespaceSuffix: true
|
||||
});
|
||||
this._functions = functions;
|
||||
this._chatWrapper = chatWrapper;
|
||||
this._functionName = functionName;
|
||||
this._paramsSchema = paramsSchema;
|
||||
}
|
||||
parseParams(callText) {
|
||||
const endIndex = callText.lastIndexOf("\n".repeat(4));
|
||||
if (endIndex < 0)
|
||||
throw new LlamaFunctionCallValidationError(`Expected function call params for function "${this._functionName}" to end with stop generation trigger`, this._functions, this._chatWrapper, callText);
|
||||
const paramsString = callText.slice(0, endIndex);
|
||||
if (paramsString.trim().length === 0)
|
||||
throw new LlamaFunctionCallValidationError(`Expected function call params for function "${this._functionName}" to not be empty`, this._functions, this._chatWrapper, callText);
|
||||
const params = JSON.parse(paramsString);
|
||||
validateObjectAgainstGbnfSchema(params, this._paramsSchema);
|
||||
return {
|
||||
params: params, // prevent infinite TS type instantiation
|
||||
raw: paramsString
|
||||
};
|
||||
}
|
||||
}
|
||||
function getGbnfGrammarForFunctionParams(paramsSchema) {
|
||||
const grammarGenerator = new GbnfGrammarGenerator();
|
||||
const rootTerminal = getGbnfJsonTerminalForGbnfJsonSchema(paramsSchema, grammarGenerator);
|
||||
const rootGrammar = rootTerminal.resolve(grammarGenerator, true);
|
||||
return grammarGenerator.generateGbnfFile(rootGrammar + ` "${"\\n".repeat(4)}"`);
|
||||
}
|
||||
//# sourceMappingURL=FunctionCallParamsGrammar.js.map
|
||||
1
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map
generated
vendored
Normal file
1
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"FunctionCallParamsGrammar.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAC,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAC,SAAS,EAAC,MAAM,6BAA6B,CAAC;AACtD,OAAO,EAAC,+BAA+B,EAAC,MAAM,kEAAkE,CAAC;AAEjH,OAAO,EAAC,oBAAoB,EAAC,MAAM,iDAAiD,CAAC;AACrF,OAAO,EAAC,oCAAoC,EAAC,MAAM,uEAAuE,CAAC;AAI3H,OAAO,EAAC,gCAAgC,EAAC,MAAM,uCAAuC,CAAC;AAGvF,MAAM,OAAO,yBAAsE,SAAQ,YAAY;IAClF,UAAU,CAAY;IACtB,YAAY,CAAc;IAC1B,aAAa,CAAS;IACtB,aAAa,CAAiB;IAE/C,YAAmB,KAAY,EAAE,SAAoB,EAAE,WAAwB,EAAE,YAAoB,EAAE,YAA4B;QAC/H,MAAM,OAAO,GAAG,+BAA+B,CAAC,YAAY,CAAC,CAAC;QAE9D,KAAK,CAAC,KAAK,EAAE;YACT,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YACnD,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAChC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAClC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAEM,WAAW,CAAC,QAAgB;QAC/B,MAAM,QAAQ,GAAG,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtD,IAAI,QAAQ,GAAG,CAAC;YACZ,MAAM,IAAI,gCAAgC,CACtC,+CAA+C,IAAI,CAAC,aAAa,uCAAuC,EACxG,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,QAAQ,CACX,CAAC;QAEN,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;QAEjD,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAChC,MAAM,IAAI,gCAAgC,CACtC,+CAA+C,IAAI,CAAC,aAAa,mBAAmB,EACpF,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,QAAQ,CACX,CAAC;QAEN,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QAExC,+BAA+B,CAAC,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QAE5D,OAAO;YACH,MAAM,EAAE,MAAa,EAAE,yCAAyC;YAChE,GAAG,EAAE,YAAY;SACpB,CAAC;IACN,CAAC;CACJ;AAED,SAAS,+BAA+B,CAAC,YAA4B;IACjE,MAAM,gBAAgB,GAAG,IAAI,oBAAoB,EAAE,CAAC;IACpD,MAAM,YAAY,GAAG,oCAAoC,CAAC,YAAY,EAAE,gBAAgB,CAAC,CAAC;IAC1F,MAAM,WAAW,GAAG,YAAY,CAAC,OAAO,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAC;IAEjE,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,WAAW,GAAG,KAAK,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;AACpF,CAAC"}
|
||||
8
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts
generated
vendored
Normal file
8
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts
generated
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
import { ChatModelFunctions } from "../../../types.js";
|
||||
import { ChatWrapper } from "../../../ChatWrapper.js";
|
||||
export declare class LlamaFunctionCallValidationError<const Functions extends ChatModelFunctions> extends Error {
|
||||
readonly functions: Functions;
|
||||
readonly chatWrapper: ChatWrapper;
|
||||
readonly callText: string;
|
||||
constructor(message: string, functions: Functions, chatWrapper: ChatWrapper, callText: string);
|
||||
}
|
||||
12
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js
generated
vendored
Normal file
12
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js
generated
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
export class LlamaFunctionCallValidationError extends Error {
|
||||
functions;
|
||||
chatWrapper;
|
||||
callText;
|
||||
constructor(message, functions, chatWrapper, callText) {
|
||||
super(message);
|
||||
this.functions = functions;
|
||||
this.chatWrapper = chatWrapper;
|
||||
this.callText = callText;
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=LlamaFunctionCallValidationError.js.map
|
||||
1
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map
generated
vendored
Normal file
1
node_modules/node-llama-cpp/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"LlamaFunctionCallValidationError.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,gCAA6E,SAAQ,KAAK;IACnF,SAAS,CAAY;IACrB,WAAW,CAAc;IACzB,QAAQ,CAAS;IAEjC,YAAmB,OAAe,EAAE,SAAoB,EAAE,WAAwB,EAAE,QAAgB;QAChG,KAAK,CAAC,OAAO,CAAC,CAAC;QAEf,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC7B,CAAC;CACJ"}
|
||||
@@ -0,0 +1,16 @@
|
||||
import { ChatHistoryItem, Tokenizer } from "../../../../types.js";
|
||||
import { ChatWrapper } from "../../../../ChatWrapper.js";
|
||||
export declare function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({ chatHistory, maxTokensCount, tokenizer, chatWrapper, lastShiftMetadata }: {
|
||||
chatHistory: ChatHistoryItem[];
|
||||
maxTokensCount: number;
|
||||
tokenizer: Tokenizer;
|
||||
chatWrapper: ChatWrapper;
|
||||
lastShiftMetadata?: object | null;
|
||||
}): Promise<{
|
||||
chatHistory: ChatHistoryItem[];
|
||||
metadata: CalculationMetadata;
|
||||
}>;
|
||||
type CalculationMetadata = {
|
||||
removedCharactersNumber: number;
|
||||
};
|
||||
export {};
|
||||
@@ -0,0 +1,254 @@
|
||||
import { isChatModelResponseFunctionCall, isChatModelResponseSegment } from "../../../../types.js";
|
||||
import { findCharacterRemovalCountToFitChatHistoryInContext } from "../../../../utils/findCharacterRemovalCountToFitChatHistoryInContext.js";
|
||||
import { truncateLlamaTextAndRoundToWords, truncateTextAndRoundToWords } from "../../../../utils/truncateTextAndRoundToWords.js";
|
||||
import { LlamaText } from "../../../../utils/LlamaText.js";
|
||||
export async function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({ chatHistory, maxTokensCount, tokenizer, chatWrapper, lastShiftMetadata }) {
|
||||
let initialCharactersRemovalCount = 0;
|
||||
if (isCalculationMetadata(lastShiftMetadata))
|
||||
initialCharactersRemovalCount = lastShiftMetadata.removedCharactersNumber;
|
||||
const { removedCharactersCount, compressedChatHistory } = await findCharacterRemovalCountToFitChatHistoryInContext({
|
||||
chatHistory,
|
||||
tokensCountToFit: maxTokensCount,
|
||||
initialCharactersRemovalCount,
|
||||
tokenizer,
|
||||
chatWrapper,
|
||||
failedCompressionErrorMessage: "Failed to compress chat history for context shift due to a too long prompt or system message that cannot be compressed without affecting the generation quality. " +
|
||||
"Consider increasing the context size or shortening the long prompt or system message.",
|
||||
compressChatHistory({ chatHistory, charactersToRemove, estimatedCharactersPerToken }) {
|
||||
const res = chatHistory.map((item) => structuredClone(item));
|
||||
let charactersLeftToRemove = charactersToRemove;
|
||||
function compressFunctionCalls() {
|
||||
for (let i = res.length - 1; i >= 0 && charactersLeftToRemove > 0; i--) {
|
||||
const historyItem = res[i];
|
||||
if (historyItem.type !== "model")
|
||||
continue;
|
||||
for (let t = historyItem.response.length - 1; t >= 0 && charactersLeftToRemove > 0; t--) {
|
||||
const item = historyItem.response[t];
|
||||
if (typeof item === "string" || item.type !== "functionCall")
|
||||
continue;
|
||||
if (item.rawCall == null)
|
||||
continue;
|
||||
const originalRawCallTokensLength = LlamaText.fromJSON(item.rawCall).tokenize(tokenizer, "trimLeadingSpace").length;
|
||||
const newRawCallText = chatWrapper.generateFunctionCall(item.name, item.params);
|
||||
const newRawCallTextTokensLength = newRawCallText.tokenize(tokenizer, "trimLeadingSpace").length;
|
||||
if (newRawCallTextTokensLength < originalRawCallTokensLength) {
|
||||
item.rawCall = newRawCallText.toJSON();
|
||||
charactersLeftToRemove -= ((originalRawCallTokensLength - newRawCallTextTokensLength) * estimatedCharactersPerToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
function removeHistoryThatLedToModelResponseAtIndex(index) {
|
||||
let removedItems = 0;
|
||||
for (let i = index - 1; i >= 0; i--) {
|
||||
const historyItem = res[i];
|
||||
if (historyItem == null)
|
||||
continue;
|
||||
if (historyItem.type === "model")
|
||||
break; // stop removing history items if we reach another model response
|
||||
if (i === 0 && historyItem.type === "system")
|
||||
break; // keep the first system message
|
||||
if (historyItem.type === "user" || historyItem.type === "system") {
|
||||
const newText = truncateLlamaTextAndRoundToWords(LlamaText.fromJSON(historyItem.text), charactersLeftToRemove, undefined, false);
|
||||
const newTextString = newText.toString();
|
||||
const historyItemString = LlamaText.fromJSON(historyItem.text).toString();
|
||||
if (newText.values.length === 0) {
|
||||
res.splice(i, 1);
|
||||
i++;
|
||||
removedItems++;
|
||||
charactersLeftToRemove -= historyItemString.length;
|
||||
}
|
||||
else if (newTextString.length < historyItemString.length) {
|
||||
charactersLeftToRemove -= historyItemString.length - newTextString.length;
|
||||
if (historyItem.type === "user")
|
||||
historyItem.text = newText.toString();
|
||||
else
|
||||
historyItem.text = newText.toJSON();
|
||||
}
|
||||
}
|
||||
else {
|
||||
void historyItem;
|
||||
}
|
||||
}
|
||||
return removedItems;
|
||||
}
|
||||
function compressHistoryThatLedToModelResponseAtIndex(index, keepTokensCount = 0) {
|
||||
let removedItems = 0;
|
||||
let promptStartIndex = undefined;
|
||||
for (let i = index - 1; i >= 0; i--) {
|
||||
const historyItem = res[i];
|
||||
if (historyItem == null)
|
||||
continue;
|
||||
if (historyItem.type === "model") {
|
||||
promptStartIndex = i + 1;
|
||||
break;
|
||||
}
|
||||
if (i === 0 && historyItem.type === "system") {
|
||||
promptStartIndex = i + 1;
|
||||
break; // keep the first system message
|
||||
}
|
||||
}
|
||||
if (promptStartIndex == null || promptStartIndex >= index)
|
||||
return 0;
|
||||
for (let i = promptStartIndex; i < index && charactersLeftToRemove > 0; i++) {
|
||||
const historyItem = res[i];
|
||||
if (historyItem == null || historyItem.type !== "user")
|
||||
continue;
|
||||
let removeChars = Math.min(charactersLeftToRemove, historyItem.text.length);
|
||||
if (keepTokensCount > 0) {
|
||||
removeChars -= Math.floor(keepTokensCount * estimatedCharactersPerToken);
|
||||
if (removeChars < 0)
|
||||
removeChars = 0;
|
||||
keepTokensCount -= Math.min(keepTokensCount, Math.max(0, historyItem.text.length - removeChars) / estimatedCharactersPerToken);
|
||||
}
|
||||
const newText = truncateTextAndRoundToWords(historyItem.text, removeChars, undefined, false);
|
||||
if (newText.length === 0) {
|
||||
res.splice(i, 1);
|
||||
i--;
|
||||
index--;
|
||||
removedItems++;
|
||||
charactersLeftToRemove -= historyItem.text.length;
|
||||
}
|
||||
else {
|
||||
charactersLeftToRemove -= historyItem.text.length - newText.length;
|
||||
historyItem.text = newText;
|
||||
}
|
||||
}
|
||||
return removedItems;
|
||||
}
|
||||
function removeEmptySegmentsFromModelResponse(modelResponse) {
|
||||
const stack = [];
|
||||
for (let t = 0; t < modelResponse.length && charactersLeftToRemove > 0; t++) {
|
||||
const item = modelResponse[t];
|
||||
const isLastItem = t === modelResponse.length - 1;
|
||||
if (!isChatModelResponseSegment(item))
|
||||
continue;
|
||||
const type = item.segmentType;
|
||||
const topStack = stack.at(-1);
|
||||
if (topStack?.type === type) {
|
||||
if (item.ended && item.text === "" && topStack.canRemove) {
|
||||
modelResponse.splice(t, 1);
|
||||
t--;
|
||||
modelResponse.splice(topStack.startIndex, 1);
|
||||
t--;
|
||||
stack.pop();
|
||||
}
|
||||
else if (!item.ended && item.text === "" && !isLastItem) {
|
||||
modelResponse.splice(t, 1);
|
||||
t--;
|
||||
}
|
||||
else if (!item.ended && item.text !== "")
|
||||
topStack.canRemove = false;
|
||||
else if (item.ended)
|
||||
stack.pop();
|
||||
}
|
||||
else if (!item.ended)
|
||||
stack.push({
|
||||
type,
|
||||
startIndex: t,
|
||||
canRemove: item.text === ""
|
||||
});
|
||||
}
|
||||
}
|
||||
function compressFirstModelResponse() {
|
||||
for (let i = 0; i < res.length && charactersLeftToRemove > 0; i++) {
|
||||
const historyItem = res[i];
|
||||
const isLastHistoryItem = i === res.length - 1;
|
||||
if (historyItem.type !== "model")
|
||||
continue;
|
||||
for (let t = 0; t < historyItem.response.length && charactersLeftToRemove > 0; t++) {
|
||||
const item = historyItem.response[t];
|
||||
const isLastText = t === historyItem.response.length - 1;
|
||||
if (isLastHistoryItem && isLastText)
|
||||
continue;
|
||||
if (typeof item === "string") {
|
||||
const newText = truncateTextAndRoundToWords(item, charactersLeftToRemove, undefined, true);
|
||||
if (newText === "") {
|
||||
historyItem.response.splice(t, 1);
|
||||
t--;
|
||||
charactersLeftToRemove -= item.length;
|
||||
}
|
||||
else if (newText.length < item.length) {
|
||||
historyItem.response[t] = newText;
|
||||
charactersLeftToRemove -= item.length - newText.length;
|
||||
}
|
||||
}
|
||||
else if (isChatModelResponseFunctionCall(item)) {
|
||||
historyItem.response.splice(t, 1);
|
||||
t--;
|
||||
const functionCallAndResultTokenUsage = chatWrapper.generateFunctionCallsAndResults([item], true)
|
||||
.tokenize(tokenizer, "trimLeadingSpace").length;
|
||||
charactersLeftToRemove -= functionCallAndResultTokenUsage * estimatedCharactersPerToken;
|
||||
}
|
||||
else if (isChatModelResponseSegment(item)) {
|
||||
if (item.text !== "") {
|
||||
const newText = truncateTextAndRoundToWords(item.text, charactersLeftToRemove, undefined, true);
|
||||
if (newText === "" && item.ended) {
|
||||
const emptySegmentTokenUsage = chatWrapper.generateModelResponseText([{ ...item, text: "" }], true)
|
||||
.tokenize(tokenizer, "trimLeadingSpace").length;
|
||||
historyItem.response.splice(t, 1);
|
||||
t--;
|
||||
charactersLeftToRemove -= item.text.length + emptySegmentTokenUsage * estimatedCharactersPerToken;
|
||||
}
|
||||
else {
|
||||
charactersLeftToRemove -= item.text.length - newText.length;
|
||||
item.text = newText;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
void item;
|
||||
}
|
||||
removeEmptySegmentsFromModelResponse(historyItem.response);
|
||||
if (historyItem.response.length === 0) {
|
||||
// if the model response is removed from the history,
|
||||
// the things that led to it are not important anymore
|
||||
i -= removeHistoryThatLedToModelResponseAtIndex(i);
|
||||
res.splice(i, 1);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
}
|
||||
function compressLastModelResponse(minCharactersToKeep = 60) {
|
||||
const lastHistoryItem = res[res.length - 1];
|
||||
if (lastHistoryItem == null || lastHistoryItem.type !== "model")
|
||||
return;
|
||||
const lastResponseItem = lastHistoryItem.response[lastHistoryItem.response.length - 1];
|
||||
if (lastResponseItem == null || typeof lastResponseItem !== "string")
|
||||
return;
|
||||
compressHistoryThatLedToModelResponseAtIndex(res.length - 1, maxTokensCount / 4);
|
||||
if (charactersLeftToRemove <= 0)
|
||||
return;
|
||||
const nextTextLength = Math.max(Math.min(lastResponseItem.length, minCharactersToKeep), lastResponseItem.length - charactersLeftToRemove);
|
||||
const charactersToRemoveFromText = lastResponseItem.length - nextTextLength;
|
||||
const newText = truncateTextAndRoundToWords(lastResponseItem, charactersToRemoveFromText, undefined, true);
|
||||
if (newText.length < lastResponseItem.length) {
|
||||
lastHistoryItem.response[lastHistoryItem.response.length - 1] = newText;
|
||||
charactersLeftToRemove -= lastResponseItem.length - newText.length;
|
||||
}
|
||||
if (charactersLeftToRemove <= 0)
|
||||
return;
|
||||
compressHistoryThatLedToModelResponseAtIndex(res.length - 1);
|
||||
}
|
||||
compressFunctionCalls();
|
||||
if (charactersLeftToRemove <= 0)
|
||||
return res;
|
||||
compressFirstModelResponse();
|
||||
if (charactersLeftToRemove <= 0)
|
||||
return res;
|
||||
compressLastModelResponse();
|
||||
return res;
|
||||
}
|
||||
});
|
||||
const newMetadata = {
|
||||
removedCharactersNumber: removedCharactersCount
|
||||
};
|
||||
return {
|
||||
chatHistory: compressedChatHistory,
|
||||
metadata: newMetadata
|
||||
};
|
||||
}
|
||||
function isCalculationMetadata(metadata) {
|
||||
return metadata != null && typeof metadata === "object" && typeof metadata.removedCharactersNumber === "number";
|
||||
}
|
||||
//# sourceMappingURL=eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user