First upload version 0.0.1

This commit is contained in:
Neyra
2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions

View File

@@ -0,0 +1,459 @@
import { EventRelay } from "lifecycle-utils";
import { ChatWrapper } from "../../ChatWrapper.js";
import { LlamaContextSequence } from "../LlamaContext/LlamaContext.js";
import { ChatHistoryItem, ChatModelFunctions, ChatModelSegmentType, LLamaContextualRepeatPenalty, Token, Tokenizer } from "../../types.js";
import { GbnfJsonSchemaToType } from "../../utils/gbnfJson/types.js";
import { LlamaGrammar } from "../LlamaGrammar.js";
import { LlamaText, LlamaTextJSON } from "../../utils/LlamaText.js";
import { EvaluationPriority } from "../LlamaContext/types.js";
import { TokenBias } from "../TokenBias.js";
import { LlamaModel } from "../LlamaModel/LlamaModel.js";
export type LlamaChatOptions = {
contextSequence: LlamaContextSequence;
/** `"auto"` is used by default */
chatWrapper?: "auto" | ChatWrapper;
/**
* Automatically dispose the sequence when the session is disposed
*
* Defaults to `false`.
*/
autoDisposeSequence?: boolean;
};
export type LlamaChatResponseChunk = LlamaChatResponseTextChunk | LlamaChatResponseSegmentChunk;
export type LlamaChatResponseTextChunk = {
/** When `type` is `undefined`, the chunk is part of the main response and is not a segment */
type: undefined;
/**
* `segmentType` has no purpose when `type` is `undefined` (meaning that this chunk is part of the main response and is not a segment).
*/
segmentType: undefined;
/**
* The generated text chunk.
*
* Detokenized from the `tokens` property,
* but with the context of the previous generation (for better spacing of the text with some models).
*
* Prefer using this property over `tokens` when streaming the generated response as text.
*/
text: string;
/** The generated tokens */
tokens: Token[];
};
export type LlamaChatResponseSegmentChunk = {
type: "segment";
/** Segment type */
segmentType: ChatModelSegmentType;
/**
* The generated text chunk.
*
* Detokenized from the `tokens` property,
* but with the context of the previous generation (for better spacing of the text with some models).
*
* Prefer using this property over `tokens` when streaming the generated response as text.
*/
text: string;
/** The generated tokens */
tokens: Token[];
/**
* When the current chunk is the start of a segment, this field will be set.
*
* It's possible that a chunk with no tokens and empty text will be emitted just to set this field
* to signify that the segment has started.
*/
segmentStartTime?: Date;
/**
* When the current chunk is the last one of a segment (meaning the current segment has ended), this field will be set.
*
* It's possible that a chunk with no tokens and empty text will be emitted just to set this field
* to signify that the segment has ended.
*/
segmentEndTime?: Date;
};
export type LlamaChatResponseFunctionCallParamsChunk = {
/**
* Each different function call has a different `callIndex`.
*
* When the previous function call has finished being generated, the `callIndex` of the next one will increment.
*
* Use this value to distinguish between different function calls.
*/
callIndex: number;
/**
* The name of the function being called
*/
functionName: string;
/**
* A chunk of the generated text used for the function call parameters.
*
* Collect all the chunks together to construct the full function call parameters.
*
* After the function call is finished, the entire constructed params text can be parsed as a JSON object,
* according to the function parameters schema.
*/
paramsChunk: string;
/**
* When this is `true`, the current chunk is the last chunk in the generation of the current function call parameters.
*/
done: boolean;
};
export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunctions | undefined = undefined> = {
/**
* Called as the model generates the main response with the generated text chunk.
*
* Useful for streaming the generated response as it's being generated.
*
* Includes only the main response without any text segments (like thoughts).
* For streaming the response with segments, use {@link onResponseChunk `onResponseChunk`}.
*/
onTextChunk?: (text: string) => void;
/**
* Called as the model generates the main response with the generated tokens.
*
* Preferably, you'd want to use {@link onTextChunk `onTextChunk`} instead of this.
*
* Includes only the main response without any segments (like thoughts).
* For streaming the response with segments, use {@link onResponseChunk `onResponseChunk`}.
*/
onToken?: (tokens: Token[]) => void;
/**
* Called as the model generates a response with the generated text and tokens,
* including segment information (when the generated output is part of a segment).
*
* Useful for streaming the generated response as it's being generated, including the main response and all segments.
*
* Only use this function when you need the segmented texts, like thought segments (chain of thought text).
*/
onResponseChunk?: (chunk: LlamaChatResponseChunk) => void;
/**
* An AbortSignal to later abort the generation.
*
* When the signal is aborted, the generation will stop and throw `signal.reason` as the error.
*
* > To stop an ongoing generation without throwing an error, also set `stopOnAbortSignal` to `true`.
*/
signal?: AbortSignal;
/**
* When a response already started being generated and then the signal is aborted,
* the generation will stop and the response will be returned as is instead of throwing an error.
*
* Defaults to `false`.
*/
stopOnAbortSignal?: boolean;
/** Maximum number of tokens to generate */
maxTokens?: number;
/**
* Temperature is a hyperparameter that controls the randomness of the generated text.
* It affects the probability distribution of the model's output tokens.
*
* A higher temperature (e.g., 1.5) makes the output more random and creative,
* while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
*
* The suggested temperature is 0.8, which provides a balance between randomness and determinism.
*
* At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
*
* Set to `0` to disable.
* Disabled by default (set to `0`).
*/
temperature?: number;
/**
* From the next token candidates, discard the percentage of tokens with the lowest probability.
* For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded.
* This is useful for generating more high-quality results when using a high temperature.
* Set to a value between `0` and `1` to enable.
*
* Only relevant when `temperature` is set to a value greater than `0`.
* Disabled by default.
*/
minP?: number;
/**
* Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
* An integer number between `1` and the size of the vocabulary.
* Set to `0` to disable (which uses the full vocabulary).
*
* Only relevant when `temperature` is set to a value greater than 0.
*/
topK?: number;
/**
* Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
* and samples the next token only from this set.
* A float number between `0` and `1`.
* Set to `1` to disable.
*
* Only relevant when `temperature` is set to a value greater than `0`.
*/
topP?: number;
/**
* Used to control the randomness of the generated text.
*
* Change the seed to get different results.
*
* Only relevant when using `temperature`.
*/
seed?: number;
/**
* Trim whitespace from the end of the generated text
*
* Defaults to `false`.
*/
trimWhitespaceSuffix?: boolean;
repeatPenalty?: false | LLamaContextualRepeatPenalty;
/**
* Adjust the probability of tokens being generated.
* Can be used to bias the model to generate tokens that you want it to lean towards,
* or to avoid generating tokens that you want it to avoid.
*/
tokenBias?: TokenBias | (() => TokenBias);
/**
* See the parameter `evaluationPriority` on the `LlamaContextSequence.evaluate()` function for more information.
*/
evaluationPriority?: EvaluationPriority;
contextShift?: LLamaChatContextShiftOptions;
/**
* Custom stop triggers to stop the generation of the response when any of the provided triggers are found.
*/
customStopTriggers?: readonly (LlamaText | string | readonly (string | Token)[])[];
/**
* The evaluation context window returned from the last evaluation.
* This is an optimization to utilize existing context sequence state better when possible.
*/
lastEvaluationContextWindow?: {
/** The history of the last evaluation. */
history?: ChatHistoryItem[];
/**
* Minimum overlap percentage with existing context sequence state to use the last evaluation context window.
* If the last evaluation context window is not used, a new context will be generated based on the full history,
* which will decrease the likelihood of another context shift happening so soon.
*
* A number between `0` (exclusive) and `1` (inclusive).
*/
minimumOverlapPercentageToPreventContextShift?: number;
};
/**
* Called as the model generates function calls with the generated parameters chunk for each function call.
*
* Useful for streaming the generated function call parameters as they're being generated.
* Only useful in specific use cases,
* such as showing the generated textual file content as it's being generated (note that doing this requires parsing incomplete JSON).
*
* The constructed text from all the params chunks of a given function call can be parsed as a JSON object,
* according to the function parameters schema.
*
* Each function call has its own `callIndex` you can use to distinguish between them.
*
* Only relevant when using function calling (via passing the `functions` option).
*/
onFunctionCallParamsChunk?: (chunk: LlamaChatResponseFunctionCallParamsChunk) => void;
/**
* Set the maximum number of tokens the model is allowed to spend on various segmented responses.
*/
budgets?: {
/**
* Whether to include the tokens already consumed by the current model response being completed in the budget.
*
* Defaults to `true`.
*/
includeCurrentResponse?: boolean;
/**
* Budget for thought tokens.
*
* Defaults to `Infinity`.
*/
thoughtTokens?: number;
/**
* Budget for comment tokens.
*
* Defaults to `Infinity`.
*/
commentTokens?: number;
};
/**
* Stop the generation when the model tries to generate a non-textual segment or call a function.
*
* Useful for generating completions in a form of a model response.
*
* Defaults to `false`.
*/
abortOnNonText?: boolean;
} & ({
grammar?: LlamaGrammar;
functions?: never;
documentFunctionParams?: never;
maxParallelFunctionCalls?: never;
onFunctionCall?: never;
onFunctionCallParamsChunk?: never;
} | {
grammar?: never;
functions?: Functions | ChatModelFunctions;
documentFunctionParams?: boolean;
maxParallelFunctionCalls?: number;
onFunctionCall?: (functionCall: LlamaChatResponseFunctionCall<Functions extends ChatModelFunctions ? Functions : ChatModelFunctions>) => void;
onFunctionCallParamsChunk?: (chunk: LlamaChatResponseFunctionCallParamsChunk) => void;
});
export type LLamaChatLoadAndCompleteUserMessageOptions<Functions extends ChatModelFunctions | undefined = undefined> = {
/**
* Complete the given user prompt without adding it or the completion to the returned context window.
*/
initialUserPrompt?: string;
/**
* When a completion already started being generated and then the signal is aborted,
* the generation will stop and the completion will be returned as is instead of throwing an error.
*
* Defaults to `false`.
*/
stopOnAbortSignal?: boolean;
/**
* Called as the model generates a completion with the generated text chunk.
*
* Useful for streaming the generated completion as it's being generated.
*/
onTextChunk?: LLamaChatGenerateResponseOptions<Functions>["onTextChunk"];
/**
* Called as the model generates a completion with the generated tokens.
*
* Preferably, you'd want to use `onTextChunk` instead of this.
*/
onToken?: LLamaChatGenerateResponseOptions<Functions>["onToken"];
signal?: LLamaChatGenerateResponseOptions<Functions>["signal"];
maxTokens?: LLamaChatGenerateResponseOptions<Functions>["maxTokens"];
temperature?: LLamaChatGenerateResponseOptions<Functions>["temperature"];
minP?: LLamaChatGenerateResponseOptions<Functions>["minP"];
topK?: LLamaChatGenerateResponseOptions<Functions>["topK"];
topP?: LLamaChatGenerateResponseOptions<Functions>["topP"];
seed?: LLamaChatGenerateResponseOptions<Functions>["seed"];
trimWhitespaceSuffix?: LLamaChatGenerateResponseOptions<Functions>["trimWhitespaceSuffix"];
repeatPenalty?: LLamaChatGenerateResponseOptions<Functions>["repeatPenalty"];
tokenBias?: LLamaChatGenerateResponseOptions<Functions>["tokenBias"];
evaluationPriority?: LLamaChatGenerateResponseOptions<Functions>["evaluationPriority"];
contextShift?: LLamaChatGenerateResponseOptions<Functions>["contextShift"];
customStopTriggers?: LLamaChatGenerateResponseOptions<Functions>["customStopTriggers"];
lastEvaluationContextWindow?: LLamaChatGenerateResponseOptions<Functions>["lastEvaluationContextWindow"];
grammar?: LlamaGrammar;
/**
* Functions are not used by the model here,
* but are used for keeping the instructions given to the model about the functions in the current context state,
* to avoid context shifts.
*
* It's best to provide the same functions that were used for the previous prompt here.
*/
functions?: Functions | ChatModelFunctions;
/**
* Functions are not used by the model here,
* but are used for keeping the instructions given to the model about the functions in the current context state,
* to avoid context shifts.
*
* It's best to provide the same value that was used for the previous prompt here.
*/
documentFunctionParams?: boolean;
};
export type LLamaChatContextShiftOptions = {
/**
* The number of tokens to delete from the context window to make space for new ones.
* Defaults to 10% of the context size.
*/
size?: number | ((sequence: LlamaContextSequence) => number | Promise<number>);
/**
* The strategy to use when deleting tokens from the context window.
*
* Defaults to `"eraseFirstResponseAndKeepFirstSystem"`.
*/
strategy?: "eraseFirstResponseAndKeepFirstSystem" | ((options: {
/** Full chat history */
chatHistory: readonly ChatHistoryItem[];
/** Maximum number of tokens that the new chat history should fit under when tokenized */
maxTokensCount: number;
/** Tokenizer used to tokenize the chat history */
tokenizer: Tokenizer;
/** Chat wrapper used to generate the context state */
chatWrapper: ChatWrapper;
/**
* The metadata returned from the last context shift strategy call.
* Will be `null` on the first call.
*/
lastShiftMetadata?: object | null;
}) => {
chatHistory: ChatHistoryItem[];
metadata?: object | null;
} | Promise<{
chatHistory: ChatHistoryItem[];
metadata?: object | null;
}>);
/**
* The `contextShiftMetadata` returned from the last evaluation.
* This is an optimization to utilize the existing context state better when possible.
*/
lastEvaluationMetadata?: object | undefined | null;
};
export declare class LlamaChat {
readonly onDispose: EventRelay<void>;
constructor({ contextSequence, chatWrapper, autoDisposeSequence }: LlamaChatOptions);
dispose({ disposeSequence }?: {
disposeSequence?: boolean;
}): void;
/** @hidden */
[Symbol.dispose](): void;
get disposed(): boolean;
get chatWrapper(): ChatWrapper;
get sequence(): LlamaContextSequence;
get context(): import("../LlamaContext/LlamaContext.js").LlamaContext;
get model(): LlamaModel;
generateResponse<const Functions extends ChatModelFunctions | undefined = undefined>(history: ChatHistoryItem[], options?: LLamaChatGenerateResponseOptions<Functions>): Promise<LlamaChatResponse<Functions>>;
loadChatAndCompleteUserMessage<const Functions extends ChatModelFunctions | undefined = undefined>(history: ChatHistoryItem[], options?: LLamaChatLoadAndCompleteUserMessageOptions<Functions>): Promise<LlamaChatLoadAndCompleteUserResponse>;
}
export type LlamaChatResponse<Functions extends ChatModelFunctions | undefined = undefined> = {
/**
* The response text only, _without_ any text segments (like thoughts).
*/
response: string;
/**
* The full response, including all text and text segments (like thoughts).
*/
fullResponse: Array<string | LlamaChatResponseSegment>;
functionCalls?: Functions extends ChatModelFunctions ? LlamaChatResponseFunctionCall<Functions>[] : never;
lastEvaluation: {
cleanHistory: ChatHistoryItem[];
contextWindow: ChatHistoryItem[];
contextShiftMetadata: any;
};
metadata: {
remainingGenerationAfterStop?: string | Token[];
stopReason: "eogToken" | "stopGenerationTrigger" | "functionCalls" | "maxTokens" | "abort";
} | {
remainingGenerationAfterStop?: string | Token[];
stopReason: "customStopTrigger";
customStopTrigger: (string | Token)[];
};
};
export type LlamaChatResponseFunctionCall<Functions extends ChatModelFunctions, FunctionCallName extends keyof Functions & string = string & keyof Functions, Params = Functions[FunctionCallName]["params"] extends undefined | null | void ? undefined : GbnfJsonSchemaToType<Functions[FunctionCallName]["params"]>> = {
functionName: FunctionCallName;
params: Params;
raw: LlamaTextJSON;
};
export type LlamaChatResponseSegment = {
type: "segment";
segmentType: ChatModelSegmentType;
text: string;
ended: boolean;
raw: LlamaTextJSON;
startTime?: string;
endTime?: string;
};
export type LlamaChatLoadAndCompleteUserResponse = {
completion: string;
lastEvaluation: {
/**
* The completion and initial user prompt are not added to this context window result,
* but are loaded to the current context sequence state as tokens
*/
contextWindow: ChatHistoryItem[];
contextShiftMetadata: any;
};
metadata: {
remainingGenerationAfterStop?: string | Token[];
stopReason: "eogToken" | "stopGenerationTrigger" | "maxTokens" | "abort";
} | {
remainingGenerationAfterStop?: string | Token[];
stopReason: "customStopTrigger";
customStopTrigger: (string | Token)[];
};
};

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,11 @@
import { LlamaGrammar } from "../../LlamaGrammar.js";
import { ChatModelFunctions } from "../../../types.js";
import { ChatWrapper } from "../../../ChatWrapper.js";
import { Llama } from "../../../bindings/Llama.js";
export declare class FunctionCallNameGrammar<const Functions extends ChatModelFunctions> extends LlamaGrammar {
private readonly _functions;
private readonly _chatWrapper;
constructor(llama: Llama, functions: Functions, chatWrapper: ChatWrapper);
parseFunctionName(generatedFunctionName: string): keyof Functions & string;
private _validateFunctions;
}

View File

@@ -0,0 +1,55 @@
import { LlamaGrammar } from "../../LlamaGrammar.js";
import { LlamaText } from "../../../utils/LlamaText.js";
import { GbnfGrammarGenerator } from "../../../utils/gbnfJson/GbnfGrammarGenerator.js";
import { GbnfGrammar } from "../../../utils/gbnfJson/terminals/GbnfGrammar.js";
import { GbnfOr } from "../../../utils/gbnfJson/terminals/GbnfOr.js";
import { GbnfVerbatimText } from "../../../utils/gbnfJson/terminals/GbnfVerbatimText.js";
import { LlamaFunctionCallValidationError } from "./LlamaFunctionCallValidationError.js";
export class FunctionCallNameGrammar extends LlamaGrammar {
_functions;
_chatWrapper;
constructor(llama, functions, chatWrapper) {
const grammar = getGbnfGrammarForFunctionName(functions, chatWrapper);
super(llama, {
grammar,
stopGenerationTriggers: [LlamaText("\n")],
trimWhitespaceSuffix: true
});
this._functions = functions;
this._chatWrapper = chatWrapper;
this._validateFunctions();
}
parseFunctionName(generatedFunctionName) {
if (this._chatWrapper.settings.functions.call.optionalPrefixSpace && generatedFunctionName[0] === " ")
generatedFunctionName = generatedFunctionName.slice(1);
const newlineIndex = generatedFunctionName.indexOf("\n");
const functionName = generatedFunctionName.slice(0, newlineIndex < 0
? generatedFunctionName.length
: newlineIndex);
if (!Object.hasOwn(this._functions, functionName))
throw new LlamaFunctionCallValidationError(`Function name "${functionName}" is not in the supplied functions object`, this._functions, this._chatWrapper, generatedFunctionName);
return functionName;
}
_validateFunctions() {
for (const functionsName of Object.keys(this._functions)) {
if (functionsName.includes(" ") || functionsName.includes("\n") || functionsName.includes("\t"))
throw new Error(`Function name "${functionsName}" contains spaces, new lines or tabs`);
else if (functionsName === "")
throw new Error("Function name cannot be an empty string");
}
}
}
function getGbnfGrammarForFunctionName(functions, chatWrapper) {
const grammarGenerator = new GbnfGrammarGenerator();
const functionNameGrammars = [];
for (const functionName of Object.keys(functions))
functionNameGrammars.push(new GbnfVerbatimText(functionName));
const callGrammar = new GbnfOr(functionNameGrammars);
const rootTerminal = new GbnfGrammar([
...(chatWrapper.settings.functions.call.optionalPrefixSpace ? ["[ ]?"] : []),
callGrammar.resolve(grammarGenerator)
]);
const rootGrammar = rootTerminal.getGrammar();
return grammarGenerator.generateGbnfFile(rootGrammar + " [\\n]");
}
//# sourceMappingURL=FunctionCallNameGrammar.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"FunctionCallNameGrammar.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/FunctionCallNameGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAC,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAC,SAAS,EAAC,MAAM,6BAA6B,CAAC;AAEtD,OAAO,EAAC,oBAAoB,EAAC,MAAM,iDAAiD,CAAC;AAErF,OAAO,EAAC,WAAW,EAAC,MAAM,kDAAkD,CAAC;AAE7E,OAAO,EAAC,MAAM,EAAC,MAAM,6CAA6C,CAAC;AACnE,OAAO,EAAC,gBAAgB,EAAC,MAAM,uDAAuD,CAAC;AAEvF,OAAO,EAAC,gCAAgC,EAAC,MAAM,uCAAuC,CAAC;AAGvF,MAAM,OAAO,uBAAoE,SAAQ,YAAY;IAChF,UAAU,CAAY;IACtB,YAAY,CAAc;IAE3C,YAAmB,KAAY,EAAE,SAAoB,EAAE,WAAwB;QAC3E,MAAM,OAAO,GAAG,6BAA6B,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;QAEtE,KAAK,CAAC,KAAK,EAAE;YACT,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACzC,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAEhC,IAAI,CAAC,kBAAkB,EAAE,CAAC;IAC9B,CAAC;IAEM,iBAAiB,CAAC,qBAA6B;QAClD,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,mBAAmB,IAAI,qBAAqB,CAAC,CAAC,CAAC,KAAK,GAAG;YACjG,qBAAqB,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAE3D,MAAM,YAAY,GAAG,qBAAqB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAEzD,MAAM,YAAY,GAAG,qBAAqB,CAAC,KAAK,CAC5C,CAAC,EACD,YAAY,GAAG,CAAC;YACZ,CAAC,CAAC,qBAAqB,CAAC,MAAM;YAC9B,CAAC,CAAC,YAAY,CACO,CAAC;QAE9B,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,CAAC;YAC7C,MAAM,IAAI,gCAAgC,CACtC,kBAAkB,YAAY,2CAA2C,EACzE,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,qBAAqB,CACxB,CAAC;QAEN,OAAO,YAAY,CAAC;IACxB,CAAC;IAEO,kBAAkB;QACtB,KAAK,MAAM,aAAa,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YACvD,IAAI,aAAa,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC;gBAC3F,MAAM,IAAI,KAAK,CAAC,kBAAkB,aAAa,sCAAsC,CAAC,CAAC;iBACtF,IAAI,aAAa,KAAK,EAAE;gBACzB,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACnE,CAAC;IACL,CAAC;CACJ;AAED,SAAS,6BAA6B,CAClC,SAAoB,EAAE,WAAwB;IAE9C,MAAM,gBAAgB,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAEpD,MAAM,oBAAoB,GAAmB,EAAE,CAAC;IAEhD,KAAK,MAAM,YAAY,IAAI,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC;QAC7C,oBAAoB,CAAC,IAAI,CAAC,IAAI,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC;IAElE,MAAM,WAAW,GAAG,IAAI,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAErD,MAAM,YAAY,GAAG,IAAI,WAAW,CAAC;QACjC,GAAG,CAAC,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5E,WAAW,CAAC,OAAO,CAAC,gBAAgB,CAAC;KACxC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,YAAY,CAAC,UAAU,EAAE,CAAC;IAE9C,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,WAAW,GAAG,QAAQ,CAAC,CAAC;AACrE,CAAC"}

View File

@@ -0,0 +1,16 @@
import { LlamaGrammar } from "../../LlamaGrammar.js";
import { ChatModelFunctions } from "../../../types.js";
import { ChatWrapper } from "../../../ChatWrapper.js";
import { Llama } from "../../../bindings/Llama.js";
import { GbnfJsonSchema } from "../../../utils/gbnfJson/types.js";
export declare class FunctionCallParamsGrammar<const Functions extends ChatModelFunctions> extends LlamaGrammar {
private readonly _functions;
private readonly _chatWrapper;
private readonly _functionName;
private readonly _paramsSchema;
constructor(llama: Llama, functions: Functions, chatWrapper: ChatWrapper, functionName: string, paramsSchema: GbnfJsonSchema);
parseParams(callText: string): {
params: any;
raw: string;
};
}

View File

@@ -0,0 +1,45 @@
import { LlamaGrammar } from "../../LlamaGrammar.js";
import { LlamaText } from "../../../utils/LlamaText.js";
import { validateObjectAgainstGbnfSchema } from "../../../utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js";
import { GbnfGrammarGenerator } from "../../../utils/gbnfJson/GbnfGrammarGenerator.js";
import { getGbnfJsonTerminalForGbnfJsonSchema } from "../../../utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js";
import { LlamaFunctionCallValidationError } from "./LlamaFunctionCallValidationError.js";
export class FunctionCallParamsGrammar extends LlamaGrammar {
_functions;
_chatWrapper;
_functionName;
_paramsSchema;
constructor(llama, functions, chatWrapper, functionName, paramsSchema) {
const grammar = getGbnfGrammarForFunctionParams(paramsSchema);
super(llama, {
grammar,
stopGenerationTriggers: [LlamaText("\n".repeat(4))],
trimWhitespaceSuffix: true
});
this._functions = functions;
this._chatWrapper = chatWrapper;
this._functionName = functionName;
this._paramsSchema = paramsSchema;
}
parseParams(callText) {
const endIndex = callText.lastIndexOf("\n".repeat(4));
if (endIndex < 0)
throw new LlamaFunctionCallValidationError(`Expected function call params for function "${this._functionName}" to end with stop generation trigger`, this._functions, this._chatWrapper, callText);
const paramsString = callText.slice(0, endIndex);
if (paramsString.trim().length === 0)
throw new LlamaFunctionCallValidationError(`Expected function call params for function "${this._functionName}" to not be empty`, this._functions, this._chatWrapper, callText);
const params = JSON.parse(paramsString);
validateObjectAgainstGbnfSchema(params, this._paramsSchema);
return {
params: params, // prevent infinite TS type instantiation
raw: paramsString
};
}
}
function getGbnfGrammarForFunctionParams(paramsSchema) {
const grammarGenerator = new GbnfGrammarGenerator();
const rootTerminal = getGbnfJsonTerminalForGbnfJsonSchema(paramsSchema, grammarGenerator);
const rootGrammar = rootTerminal.resolve(grammarGenerator, true);
return grammarGenerator.generateGbnfFile(rootGrammar + ` "${"\\n".repeat(4)}"`);
}
//# sourceMappingURL=FunctionCallParamsGrammar.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"FunctionCallParamsGrammar.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAC,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAC,SAAS,EAAC,MAAM,6BAA6B,CAAC;AACtD,OAAO,EAAC,+BAA+B,EAAC,MAAM,kEAAkE,CAAC;AAEjH,OAAO,EAAC,oBAAoB,EAAC,MAAM,iDAAiD,CAAC;AACrF,OAAO,EAAC,oCAAoC,EAAC,MAAM,uEAAuE,CAAC;AAI3H,OAAO,EAAC,gCAAgC,EAAC,MAAM,uCAAuC,CAAC;AAGvF,MAAM,OAAO,yBAAsE,SAAQ,YAAY;IAClF,UAAU,CAAY;IACtB,YAAY,CAAc;IAC1B,aAAa,CAAS;IACtB,aAAa,CAAiB;IAE/C,YAAmB,KAAY,EAAE,SAAoB,EAAE,WAAwB,EAAE,YAAoB,EAAE,YAA4B;QAC/H,MAAM,OAAO,GAAG,+BAA+B,CAAC,YAAY,CAAC,CAAC;QAE9D,KAAK,CAAC,KAAK,EAAE;YACT,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YACnD,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAChC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAClC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAEM,WAAW,CAAC,QAAgB;QAC/B,MAAM,QAAQ,GAAG,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtD,IAAI,QAAQ,GAAG,CAAC;YACZ,MAAM,IAAI,gCAAgC,CACtC,+CAA+C,IAAI,CAAC,aAAa,uCAAuC,EACxG,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,QAAQ,CACX,CAAC;QAEN,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;QAEjD,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAChC,MAAM,IAAI,gCAAgC,CACtC,+CAA+C,IAAI,CAAC,aAAa,mBAAmB,EACpF,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,YAAY,EACjB,QAAQ,CACX,CAAC;QAEN,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QAExC,+BAA+B,CAAC,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QAE5D,OAAO;YACH,MAAM,EAAE,MAAa,EAAE,yCAAyC;YAChE,GAAG,EAAE,YAAY;SACpB,CAAC;IACN,CAAC;CACJ;AAED,SAAS,+BAA+B,CAAC,YAA4B;IACjE,MAAM,gBAAgB,GAAG,IAAI,oBAAoB,EAAE,CAAC;IACpD,MAAM,YAAY,GAAG,oCAAoC,CAAC,YAAY,EAAE,gBAAgB,CAAC,CAAC;IAC1F,MAAM,WAAW,GAAG,YAAY,CAAC,OAAO,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAC;IAEjE,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,WAAW,GAAG,KAAK,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;AACpF,CAAC"}

View File

@@ -0,0 +1,8 @@
import { ChatModelFunctions } from "../../../types.js";
import { ChatWrapper } from "../../../ChatWrapper.js";
export declare class LlamaFunctionCallValidationError<const Functions extends ChatModelFunctions> extends Error {
readonly functions: Functions;
readonly chatWrapper: ChatWrapper;
readonly callText: string;
constructor(message: string, functions: Functions, chatWrapper: ChatWrapper, callText: string);
}

View File

@@ -0,0 +1,12 @@
export class LlamaFunctionCallValidationError extends Error {
functions;
chatWrapper;
callText;
constructor(message, functions, chatWrapper, callText) {
super(message);
this.functions = functions;
this.chatWrapper = chatWrapper;
this.callText = callText;
}
}
//# sourceMappingURL=LlamaFunctionCallValidationError.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"LlamaFunctionCallValidationError.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,gCAA6E,SAAQ,KAAK;IACnF,SAAS,CAAY;IACrB,WAAW,CAAc;IACzB,QAAQ,CAAS;IAEjC,YAAmB,OAAe,EAAE,SAAoB,EAAE,WAAwB,EAAE,QAAgB;QAChG,KAAK,CAAC,OAAO,CAAC,CAAC;QAEf,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC7B,CAAC;CACJ"}

View File

@@ -0,0 +1,16 @@
import { ChatHistoryItem, Tokenizer } from "../../../../types.js";
import { ChatWrapper } from "../../../../ChatWrapper.js";
export declare function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({ chatHistory, maxTokensCount, tokenizer, chatWrapper, lastShiftMetadata }: {
chatHistory: ChatHistoryItem[];
maxTokensCount: number;
tokenizer: Tokenizer;
chatWrapper: ChatWrapper;
lastShiftMetadata?: object | null;
}): Promise<{
chatHistory: ChatHistoryItem[];
metadata: CalculationMetadata;
}>;
type CalculationMetadata = {
removedCharactersNumber: number;
};
export {};

View File

@@ -0,0 +1,254 @@
import { isChatModelResponseFunctionCall, isChatModelResponseSegment } from "../../../../types.js";
import { findCharacterRemovalCountToFitChatHistoryInContext } from "../../../../utils/findCharacterRemovalCountToFitChatHistoryInContext.js";
import { truncateLlamaTextAndRoundToWords, truncateTextAndRoundToWords } from "../../../../utils/truncateTextAndRoundToWords.js";
import { LlamaText } from "../../../../utils/LlamaText.js";
export async function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({ chatHistory, maxTokensCount, tokenizer, chatWrapper, lastShiftMetadata }) {
let initialCharactersRemovalCount = 0;
if (isCalculationMetadata(lastShiftMetadata))
initialCharactersRemovalCount = lastShiftMetadata.removedCharactersNumber;
const { removedCharactersCount, compressedChatHistory } = await findCharacterRemovalCountToFitChatHistoryInContext({
chatHistory,
tokensCountToFit: maxTokensCount,
initialCharactersRemovalCount,
tokenizer,
chatWrapper,
failedCompressionErrorMessage: "Failed to compress chat history for context shift due to a too long prompt or system message that cannot be compressed without affecting the generation quality. " +
"Consider increasing the context size or shortening the long prompt or system message.",
compressChatHistory({ chatHistory, charactersToRemove, estimatedCharactersPerToken }) {
const res = chatHistory.map((item) => structuredClone(item));
let charactersLeftToRemove = charactersToRemove;
function compressFunctionCalls() {
for (let i = res.length - 1; i >= 0 && charactersLeftToRemove > 0; i--) {
const historyItem = res[i];
if (historyItem.type !== "model")
continue;
for (let t = historyItem.response.length - 1; t >= 0 && charactersLeftToRemove > 0; t--) {
const item = historyItem.response[t];
if (typeof item === "string" || item.type !== "functionCall")
continue;
if (item.rawCall == null)
continue;
const originalRawCallTokensLength = LlamaText.fromJSON(item.rawCall).tokenize(tokenizer, "trimLeadingSpace").length;
const newRawCallText = chatWrapper.generateFunctionCall(item.name, item.params);
const newRawCallTextTokensLength = newRawCallText.tokenize(tokenizer, "trimLeadingSpace").length;
if (newRawCallTextTokensLength < originalRawCallTokensLength) {
item.rawCall = newRawCallText.toJSON();
charactersLeftToRemove -= ((originalRawCallTokensLength - newRawCallTextTokensLength) * estimatedCharactersPerToken);
}
}
}
}
function removeHistoryThatLedToModelResponseAtIndex(index) {
let removedItems = 0;
for (let i = index - 1; i >= 0; i--) {
const historyItem = res[i];
if (historyItem == null)
continue;
if (historyItem.type === "model")
break; // stop removing history items if we reach another model response
if (i === 0 && historyItem.type === "system")
break; // keep the first system message
if (historyItem.type === "user" || historyItem.type === "system") {
const newText = truncateLlamaTextAndRoundToWords(LlamaText.fromJSON(historyItem.text), charactersLeftToRemove, undefined, false);
const newTextString = newText.toString();
const historyItemString = LlamaText.fromJSON(historyItem.text).toString();
if (newText.values.length === 0) {
res.splice(i, 1);
i++;
removedItems++;
charactersLeftToRemove -= historyItemString.length;
}
else if (newTextString.length < historyItemString.length) {
charactersLeftToRemove -= historyItemString.length - newTextString.length;
if (historyItem.type === "user")
historyItem.text = newText.toString();
else
historyItem.text = newText.toJSON();
}
}
else {
void historyItem;
}
}
return removedItems;
}
function compressHistoryThatLedToModelResponseAtIndex(index, keepTokensCount = 0) {
let removedItems = 0;
let promptStartIndex = undefined;
for (let i = index - 1; i >= 0; i--) {
const historyItem = res[i];
if (historyItem == null)
continue;
if (historyItem.type === "model") {
promptStartIndex = i + 1;
break;
}
if (i === 0 && historyItem.type === "system") {
promptStartIndex = i + 1;
break; // keep the first system message
}
}
if (promptStartIndex == null || promptStartIndex >= index)
return 0;
for (let i = promptStartIndex; i < index && charactersLeftToRemove > 0; i++) {
const historyItem = res[i];
if (historyItem == null || historyItem.type !== "user")
continue;
let removeChars = Math.min(charactersLeftToRemove, historyItem.text.length);
if (keepTokensCount > 0) {
removeChars -= Math.floor(keepTokensCount * estimatedCharactersPerToken);
if (removeChars < 0)
removeChars = 0;
keepTokensCount -= Math.min(keepTokensCount, Math.max(0, historyItem.text.length - removeChars) / estimatedCharactersPerToken);
}
const newText = truncateTextAndRoundToWords(historyItem.text, removeChars, undefined, false);
if (newText.length === 0) {
res.splice(i, 1);
i--;
index--;
removedItems++;
charactersLeftToRemove -= historyItem.text.length;
}
else {
charactersLeftToRemove -= historyItem.text.length - newText.length;
historyItem.text = newText;
}
}
return removedItems;
}
function removeEmptySegmentsFromModelResponse(modelResponse) {
const stack = [];
for (let t = 0; t < modelResponse.length && charactersLeftToRemove > 0; t++) {
const item = modelResponse[t];
const isLastItem = t === modelResponse.length - 1;
if (!isChatModelResponseSegment(item))
continue;
const type = item.segmentType;
const topStack = stack.at(-1);
if (topStack?.type === type) {
if (item.ended && item.text === "" && topStack.canRemove) {
modelResponse.splice(t, 1);
t--;
modelResponse.splice(topStack.startIndex, 1);
t--;
stack.pop();
}
else if (!item.ended && item.text === "" && !isLastItem) {
modelResponse.splice(t, 1);
t--;
}
else if (!item.ended && item.text !== "")
topStack.canRemove = false;
else if (item.ended)
stack.pop();
}
else if (!item.ended)
stack.push({
type,
startIndex: t,
canRemove: item.text === ""
});
}
}
function compressFirstModelResponse() {
for (let i = 0; i < res.length && charactersLeftToRemove > 0; i++) {
const historyItem = res[i];
const isLastHistoryItem = i === res.length - 1;
if (historyItem.type !== "model")
continue;
for (let t = 0; t < historyItem.response.length && charactersLeftToRemove > 0; t++) {
const item = historyItem.response[t];
const isLastText = t === historyItem.response.length - 1;
if (isLastHistoryItem && isLastText)
continue;
if (typeof item === "string") {
const newText = truncateTextAndRoundToWords(item, charactersLeftToRemove, undefined, true);
if (newText === "") {
historyItem.response.splice(t, 1);
t--;
charactersLeftToRemove -= item.length;
}
else if (newText.length < item.length) {
historyItem.response[t] = newText;
charactersLeftToRemove -= item.length - newText.length;
}
}
else if (isChatModelResponseFunctionCall(item)) {
historyItem.response.splice(t, 1);
t--;
const functionCallAndResultTokenUsage = chatWrapper.generateFunctionCallsAndResults([item], true)
.tokenize(tokenizer, "trimLeadingSpace").length;
charactersLeftToRemove -= functionCallAndResultTokenUsage * estimatedCharactersPerToken;
}
else if (isChatModelResponseSegment(item)) {
if (item.text !== "") {
const newText = truncateTextAndRoundToWords(item.text, charactersLeftToRemove, undefined, true);
if (newText === "" && item.ended) {
const emptySegmentTokenUsage = chatWrapper.generateModelResponseText([{ ...item, text: "" }], true)
.tokenize(tokenizer, "trimLeadingSpace").length;
historyItem.response.splice(t, 1);
t--;
charactersLeftToRemove -= item.text.length + emptySegmentTokenUsage * estimatedCharactersPerToken;
}
else {
charactersLeftToRemove -= item.text.length - newText.length;
item.text = newText;
}
}
}
else
void item;
}
removeEmptySegmentsFromModelResponse(historyItem.response);
if (historyItem.response.length === 0) {
// if the model response is removed from the history,
// the things that led to it are not important anymore
i -= removeHistoryThatLedToModelResponseAtIndex(i);
res.splice(i, 1);
i--;
}
}
}
function compressLastModelResponse(minCharactersToKeep = 60) {
const lastHistoryItem = res[res.length - 1];
if (lastHistoryItem == null || lastHistoryItem.type !== "model")
return;
const lastResponseItem = lastHistoryItem.response[lastHistoryItem.response.length - 1];
if (lastResponseItem == null || typeof lastResponseItem !== "string")
return;
compressHistoryThatLedToModelResponseAtIndex(res.length - 1, maxTokensCount / 4);
if (charactersLeftToRemove <= 0)
return;
const nextTextLength = Math.max(Math.min(lastResponseItem.length, minCharactersToKeep), lastResponseItem.length - charactersLeftToRemove);
const charactersToRemoveFromText = lastResponseItem.length - nextTextLength;
const newText = truncateTextAndRoundToWords(lastResponseItem, charactersToRemoveFromText, undefined, true);
if (newText.length < lastResponseItem.length) {
lastHistoryItem.response[lastHistoryItem.response.length - 1] = newText;
charactersLeftToRemove -= lastResponseItem.length - newText.length;
}
if (charactersLeftToRemove <= 0)
return;
compressHistoryThatLedToModelResponseAtIndex(res.length - 1);
}
compressFunctionCalls();
if (charactersLeftToRemove <= 0)
return res;
compressFirstModelResponse();
if (charactersLeftToRemove <= 0)
return res;
compressLastModelResponse();
return res;
}
});
const newMetadata = {
removedCharactersNumber: removedCharactersCount
};
return {
chatHistory: compressedChatHistory,
metadata: newMetadata
};
}
function isCalculationMetadata(metadata) {
return metadata != null && typeof metadata === "object" && typeof metadata.removedCharactersNumber === "number";
}
//# sourceMappingURL=eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map