51 lines
1.5 KiB
TypeScript
51 lines
1.5 KiB
TypeScript
import { CommandModule } from "yargs";
|
|
import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js";
|
|
import { BuildGpu, LlamaNuma } from "../../bindings/types.js";
|
|
import { SpecializedChatWrapperTypeName } from "../../chatWrappers/utils/resolveChatWrapper.js";
|
|
type ChatCommand = {
|
|
modelPath?: string;
|
|
header?: string[];
|
|
gpu?: BuildGpu | "auto";
|
|
systemInfo: boolean;
|
|
systemPrompt?: string;
|
|
systemPromptFile?: string;
|
|
prompt?: string;
|
|
promptFile?: string;
|
|
wrapper: SpecializedChatWrapperTypeName | "auto";
|
|
noJinja?: boolean;
|
|
contextSize?: number;
|
|
batchSize?: number;
|
|
flashAttention?: boolean;
|
|
swaFullCache?: boolean;
|
|
noTrimWhitespace: boolean;
|
|
grammar: "text" | Parameters<typeof LlamaGrammar.getFor>[1];
|
|
jsonSchemaGrammarFile?: string;
|
|
threads?: number;
|
|
temperature: number;
|
|
minP: number;
|
|
topK: number;
|
|
topP: number;
|
|
seed?: number;
|
|
gpuLayers?: number;
|
|
repeatPenalty: number;
|
|
lastTokensRepeatPenalty: number;
|
|
penalizeRepeatingNewLine: boolean;
|
|
repeatFrequencyPenalty?: number;
|
|
repeatPresencePenalty?: number;
|
|
maxTokens: number;
|
|
reasoningBudget?: number;
|
|
noHistory: boolean;
|
|
environmentFunctions: boolean;
|
|
tokenPredictionDraftModel?: string;
|
|
tokenPredictionModelContextSize?: number;
|
|
debug: boolean;
|
|
numa?: LlamaNuma;
|
|
meter: boolean;
|
|
timing: boolean;
|
|
noMmap: boolean;
|
|
noDirectIo: boolean;
|
|
printTimings: boolean;
|
|
};
|
|
export declare const ChatCommand: CommandModule<object, ChatCommand>;
|
|
export {};
|