First upload version 0.0.1

This commit is contained in:
Neyra
2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions

2
node_modules/node-llama-cpp/dist/cli/cli.d.ts generated vendored Normal file
View File

@@ -0,0 +1,2 @@
#!/usr/bin/env node
export {};

45
node_modules/node-llama-cpp/dist/cli/cli.js generated vendored Normal file
View File

@@ -0,0 +1,45 @@
#!/usr/bin/env node
import { fileURLToPath } from "url";
import path from "path";
import yargs from "yargs";
import { hideBin } from "yargs/helpers";
import fs from "fs-extra";
import { cliBinName, documentationPageUrls } from "../config.js";
import { setIsRunningFromCLI } from "../state.js";
import { withCliCommandDescriptionDocsUrl } from "./utils/withCliCommandDescriptionDocsUrl.js";
import { PullCommand } from "./commands/PullCommand.js";
import { ChatCommand } from "./commands/ChatCommand.js";
import { InitCommand } from "./commands/InitCommand.js";
import { SourceCommand } from "./commands/source/SourceCommand.js";
import { CompleteCommand } from "./commands/CompleteCommand.js";
import { InfillCommand } from "./commands/InfillCommand.js";
import { InspectCommand } from "./commands/inspect/InspectCommand.js";
import { OnPostInstallCommand } from "./commands/OnPostInstallCommand.js";
import { DebugCommand } from "./commands/DebugCommand.js";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const packageJson = fs.readJSONSync(path.join(__dirname, "..", "..", "package.json"));
setIsRunningFromCLI(true);
const yarg = yargs(hideBin(process.argv));
yarg
.scriptName(cliBinName)
.usage(withCliCommandDescriptionDocsUrl("Usage: $0 <command> [options]", documentationPageUrls.CLI.index))
.command(PullCommand)
.command(ChatCommand)
.command(InitCommand)
.command(SourceCommand)
.command(CompleteCommand)
.command(InfillCommand)
.command(InspectCommand)
.command(OnPostInstallCommand)
.command(DebugCommand)
.recommendCommands()
.demandCommand(1)
.strict()
.strictCommands()
.alias("v", "version")
.help("h")
.alias("h", "help")
.version(packageJson.version)
.wrap(Math.min(130, yarg.terminalWidth()))
.parse();
//# sourceMappingURL=cli.js.map

1
node_modules/node-llama-cpp/dist/cli/cli.js.map generated vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../../src/cli/cli.ts"],"names":[],"mappings":";AAEA,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,OAAO,EAAC,MAAM,eAAe,CAAC;AACtC,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,UAAU,EAAE,qBAAqB,EAAC,MAAM,cAAc,CAAC;AAC/D,OAAO,EAAC,mBAAmB,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,EAAC,gCAAgC,EAAC,MAAM,6CAA6C,CAAC;AAC7F,OAAO,EAAC,WAAW,EAAC,MAAM,2BAA2B,CAAC;AACtD,OAAO,EAAC,WAAW,EAAC,MAAM,2BAA2B,CAAC;AACtD,OAAO,EAAC,WAAW,EAAC,MAAM,2BAA2B,CAAC;AACtD,OAAO,EAAC,aAAa,EAAC,MAAM,oCAAoC,CAAC;AACjE,OAAO,EAAC,eAAe,EAAC,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAC,aAAa,EAAC,MAAM,6BAA6B,CAAC;AAC1D,OAAO,EAAC,cAAc,EAAC,MAAM,sCAAsC,CAAC;AACpE,OAAO,EAAC,oBAAoB,EAAC,MAAM,oCAAoC,CAAC;AACxE,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AAExD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,WAAW,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC;AAEtF,mBAAmB,CAAC,IAAI,CAAC,CAAC;AAE1B,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;AAE1C,IAAI;KACC,UAAU,CAAC,UAAU,CAAC;KACtB,KAAK,CAAC,gCAAgC,CAAC,+BAA+B,EAAE,qBAAqB,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;KACzG,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,eAAe,CAAC;KACxB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,cAAc,CAAC;KACvB,OAAO,CAAC,oBAAoB,CAAC;KAC7B,OAAO,CAAC,YAAY,CAAC;KACrB,iBAAiB,EAAE;KACnB,aAAa,CAAC,CAAC,CAAC;KAChB,MAAM,EAAE;KACR,cAAc,EAAE;KAChB,KAAK,CAAC,GAAG,EAAE,SAAS,CAAC;KACrB,IAAI,CAAC,GAAG,CAAC;KACT,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC;KAClB,OAAO,CAAC,WAAW,CAAC,OAAO,CAAC;KAC5B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;KACzC,KAAK,EAAE,CAAC"}

View File

@@ -0,0 +1,50 @@
import { CommandModule } from "yargs";
import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js";
import { BuildGpu, LlamaNuma } from "../../bindings/types.js";
import { SpecializedChatWrapperTypeName } from "../../chatWrappers/utils/resolveChatWrapper.js";
type ChatCommand = {
modelPath?: string;
header?: string[];
gpu?: BuildGpu | "auto";
systemInfo: boolean;
systemPrompt?: string;
systemPromptFile?: string;
prompt?: string;
promptFile?: string;
wrapper: SpecializedChatWrapperTypeName | "auto";
noJinja?: boolean;
contextSize?: number;
batchSize?: number;
flashAttention?: boolean;
swaFullCache?: boolean;
noTrimWhitespace: boolean;
grammar: "text" | Parameters<typeof LlamaGrammar.getFor>[1];
jsonSchemaGrammarFile?: string;
threads?: number;
temperature: number;
minP: number;
topK: number;
topP: number;
seed?: number;
gpuLayers?: number;
repeatPenalty: number;
lastTokensRepeatPenalty: number;
penalizeRepeatingNewLine: boolean;
repeatFrequencyPenalty?: number;
repeatPresencePenalty?: number;
maxTokens: number;
reasoningBudget?: number;
noHistory: boolean;
environmentFunctions: boolean;
tokenPredictionDraftModel?: string;
tokenPredictionModelContextSize?: number;
debug: boolean;
numa?: LlamaNuma;
meter: boolean;
timing: boolean;
noMmap: boolean;
noDirectIo: boolean;
printTimings: boolean;
};
export declare const ChatCommand: CommandModule<object, ChatCommand>;
export {};

View File

@@ -0,0 +1,765 @@
import * as readline from "readline";
import process from "process";
import path from "path";
import chalk from "chalk";
import fs from "fs-extra";
import prettyMilliseconds from "pretty-ms";
import { chatCommandHistoryFilePath, defaultChatSystemPrompt, documentationPageUrls } from "../../config.js";
import { getIsInDocumentationMode } from "../../state.js";
import { ReplHistory } from "../../utils/ReplHistory.js";
import { defineChatSessionFunction } from "../../evaluator/LlamaChatSession/utils/defineChatSessionFunction.js";
import { getLlama } from "../../bindings/getLlama.js";
import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js";
import { LlamaChatSession } from "../../evaluator/LlamaChatSession/LlamaChatSession.js";
import { LlamaLogLevel, LlamaLogLevelGreaterThan, llamaNumaOptions, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption, parseNumaOption } from "../../bindings/types.js";
import withOra from "../../utils/withOra.js";
import { TokenMeter } from "../../evaluator/TokenMeter.js";
import { printInfoLine } from "../utils/printInfoLine.js";
import { resolveChatWrapper, specializedChatWrapperTypeNames } from "../../chatWrappers/utils/resolveChatWrapper.js";
import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
import { printCommonInfoLines } from "../utils/printCommonInfoLines.js";
import { resolveCommandGgufPath } from "../utils/resolveCommandGgufPath.js";
import { withProgressLog } from "../../utils/withProgressLog.js";
import { resolveHeaderFlag } from "../utils/resolveHeaderFlag.js";
import { withCliCommandDescriptionDocsUrl } from "../utils/withCliCommandDescriptionDocsUrl.js";
import { ConsoleInteraction, ConsoleInteractionKey } from "../utils/ConsoleInteraction.js";
import { DraftSequenceTokenPredictor } from "../../evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js";
export const ChatCommand = {
command: "chat [modelPath]",
describe: withCliCommandDescriptionDocsUrl("Chat with a model", documentationPageUrls.CLI.Chat),
builder(yargs) {
const isInDocumentationMode = getIsInDocumentationMode();
return yargs
.option("modelPath", {
alias: ["m", "model", "path", "url", "uri"],
type: "string",
description: "Model file to use for the chat. Can be a path to a local file or a URI of a model file to download. Leave empty to choose from a list of recommended models"
})
.option("header", {
alias: ["H"],
type: "string",
array: true,
description: "Headers to use when downloading a model from a URL, in the format `key: value`. You can pass this option multiple times to add multiple headers."
})
.option("gpu", {
type: "string",
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: nodeLlamaCppGpuOptions,
coerce: (value) => {
if (value == null || value == "")
return undefined;
return parseNodeLlamaCppGpuOption(value);
},
defaultDescription: "Uses the latest local build, and fallbacks to \"auto\"",
description: "Compute layer implementation type to use for llama.cpp. If omitted, uses the latest local build, and fallbacks to \"auto\""
})
.option("systemInfo", {
alias: "i",
type: "boolean",
default: false,
description: "Print llama.cpp system info"
})
.option("systemPrompt", {
alias: "s",
type: "string",
description: "System prompt to use against the model" +
(isInDocumentationMode ? "" : (". [the default value is determined by the chat wrapper, but is usually: " + defaultChatSystemPrompt.split("\n").join(" ") + "]"))
})
.option("systemPromptFile", {
type: "string",
description: "Path to a file to load text from and use as as the model system prompt"
})
.option("prompt", {
type: "string",
description: "First prompt to automatically send to the model when starting the chat"
})
.option("promptFile", {
type: "string",
description: "Path to a file to load text from and use as a first prompt to automatically send to the model when starting the chat"
})
.option("wrapper", {
alias: "w",
type: "string",
default: "auto",
choices: ["auto", ...specializedChatWrapperTypeNames],
description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's metadata and tokenizer"
})
.option("noJinja", {
type: "boolean",
default: false,
description: "Don't use a Jinja wrapper, even if it's the best option for the model"
})
.option("contextSize", {
alias: "c",
type: "number",
description: "Context size to use for the model context",
default: -1,
defaultDescription: "Automatically determined based on the available VRAM"
})
.option("batchSize", {
alias: "b",
type: "number",
description: "Batch size to use for the model context"
})
.option("flashAttention", {
alias: "fa",
type: "boolean",
default: false,
description: "Enable flash attention"
})
.option("swaFullCache", {
alias: "noSwa",
type: "boolean",
default: false,
description: "Disable SWA (Sliding Window Attention) on supported models"
})
.option("noTrimWhitespace", {
type: "boolean",
alias: ["noTrim"],
default: false,
description: "Don't trim whitespaces from the model response"
})
.option("grammar", {
alias: "g",
type: "string",
default: "text",
choices: ["text", "json", "list", "arithmetic", "japanese", "chess"],
description: "Restrict the model response to a specific grammar, like JSON for example"
})
.option("jsonSchemaGrammarFile", {
alias: ["jsgf"],
type: "string",
description: "File path to a JSON schema file, to restrict the model response to only generate output that conforms to the JSON schema"
})
.option("threads", {
type: "number",
defaultDescription: "Number of cores that are useful for math on the current machine",
description: "Number of threads to use for the evaluation of tokens"
})
.option("temperature", {
alias: "t",
type: "number",
default: 0,
description: "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The suggested temperature is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. Set to `0` to disable."
})
.option("minP", {
alias: "mp",
type: "number",
default: 0,
description: "From the next token candidates, discard the percentage of tokens with the lowest probability. For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded. This is useful for generating more high-quality results when using a high temperature. Set to a value between `0` and `1` to enable. Only relevant when `temperature` is set to a value greater than `0`."
})
.option("topK", {
alias: "k",
type: "number",
default: 40,
description: "Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation. An integer number between `1` and the size of the vocabulary. Set to `0` to disable (which uses the full vocabulary). Only relevant when `temperature` is set to a value greater than 0."
})
.option("topP", {
alias: "p",
type: "number",
default: 0.95,
description: "Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P, and samples the next token only from this set. A float number between `0` and `1`. Set to `1` to disable. Only relevant when `temperature` is set to a value greater than `0`."
})
.option("seed", {
type: "number",
description: "Used to control the randomness of the generated text. Only relevant when using `temperature`.",
defaultDescription: "The current epoch time"
})
.option("gpuLayers", {
alias: "gl",
type: "number",
description: "number of layers to store in VRAM",
default: -1,
defaultDescription: "Automatically determined based on the available VRAM"
})
.option("repeatPenalty", {
alias: "rp",
type: "number",
default: 1.1,
description: "Prevent the model from repeating the same token too much. Set to `1` to disable."
})
.option("lastTokensRepeatPenalty", {
alias: "rpn",
type: "number",
default: 64,
description: "Number of recent tokens generated by the model to apply penalties to repetition of"
})
.option("penalizeRepeatingNewLine", {
alias: "rpnl",
type: "boolean",
default: true,
description: "Penalize new line tokens. set `--no-penalizeRepeatingNewLine` or `--no-rpnl` to disable"
})
.option("repeatFrequencyPenalty", {
alias: "rfp",
type: "number",
description: "For n time a token is in the `punishTokens` array, lower its probability by `n * repeatFrequencyPenalty`. Set to a value between `0` and `1` to enable."
})
.option("repeatPresencePenalty", {
alias: "rpp",
type: "number",
description: "Lower the probability of all the tokens in the `punishTokens` array by `repeatPresencePenalty`. Set to a value between `0` and `1` to enable."
})
.option("maxTokens", {
alias: "mt",
type: "number",
default: 0,
description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size"
})
.option("reasoningBudget", {
alias: ["tb", "thinkingBudget", "thoughtsBudget"],
type: "number",
default: -1,
defaultDescription: "Unlimited",
description: "Maximum number of tokens the model can use for thoughts. Set to `0` to disable reasoning"
})
.option("noHistory", {
alias: "nh",
type: "boolean",
default: false,
description: "Don't load or save chat history"
})
.option("environmentFunctions", {
alias: "ef",
type: "boolean",
default: false,
description: "Provide access to environment functions like `getDate` and `getTime`"
})
.option("tokenPredictionDraftModel", {
alias: ["dm", "draftModel"],
type: "string",
description: "Model file to use for draft sequence token prediction (speculative decoding). Can be a path to a local file or a URI of a model file to download"
})
.option("tokenPredictionModelContextSize", {
alias: ["dc", "draftContextSize", "draftContext"],
type: "number",
description: "Max context size to use for the draft sequence token prediction model context",
default: 4096
})
.option("debug", {
alias: "d",
type: "boolean",
default: false,
description: "Print llama.cpp info and debug logs"
})
.option("numa", {
type: "string",
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: llamaNumaOptions,
coerce: (value) => {
if (value == null || value == "")
return false;
return parseNumaOption(value);
},
defaultDescription: "false",
description: "NUMA allocation policy. See the `numa` option on the `getLlama` method for more information"
})
.option("meter", {
type: "boolean",
default: false,
description: "Print how many tokens were used as input and output for each response"
})
.option("timing", {
type: "boolean",
default: false,
description: "Print how how long it took to generate each response"
})
.option("noMmap", {
type: "boolean",
default: false,
description: "Disable mmap (memory-mapped file) usage"
})
.option("noDirectIo", {
type: "boolean",
default: false,
description: "Disable Direct I/O usage when available"
})
.option("printTimings", {
alias: "pt",
type: "boolean",
default: false,
description: "Print llama.cpp's internal timings after each response"
});
},
async handler({ modelPath, header, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, flashAttention, swaFullCache, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, reasoningBudget, noHistory, environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, noDirectIo, printTimings }) {
try {
await RunChat({
modelPath, header, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize,
batchSize, flashAttention, swaFullCache, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads,
temperature, minP, topK, topP, seed,
gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
maxTokens, reasoningBudget, noHistory, environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize,
debug, numa, meter, timing, noMmap, noDirectIo, printTimings
});
}
catch (err) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.error(err);
process.exit(1);
}
}
};
async function RunChat({ modelPath: modelArg, header: headerArg, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, flashAttention, swaFullCache, noTrimWhitespace, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, reasoningBudget, noHistory, environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, noDirectIo, printTimings }) {
if (contextSize === -1)
contextSize = undefined;
if (gpuLayers === -1)
gpuLayers = undefined;
if (reasoningBudget === -1)
reasoningBudget = undefined;
const headers = resolveHeaderFlag(headerArg);
const trimWhitespace = !noTrimWhitespace;
if (debug)
console.info(`${chalk.yellow("Log level:")} debug`);
const llamaLogLevel = debug
? LlamaLogLevel.debug
: LlamaLogLevel.warn;
const llama = gpu == null
? await getLlama("lastBuild", {
logLevel: llamaLogLevel,
numa
})
: await getLlama({
gpu,
logLevel: llamaLogLevel,
numa
});
const logBatchSize = batchSize != null;
const useMmap = !noMmap && llama.supportsMmap;
const useDirectIo = !noDirectIo;
const resolvedModelPath = await resolveCommandGgufPath(modelArg, llama, headers, {
flashAttention,
swaFullCache,
useMmap
});
const resolvedDraftModelPath = (tokenPredictionDraftModel != null && tokenPredictionDraftModel !== "")
? await resolveCommandGgufPath(tokenPredictionDraftModel, llama, headers, {
flashAttention,
swaFullCache,
useMmap,
consoleTitle: "Draft model file"
})
: undefined;
if (systemInfo)
console.log(llama.systemInfo);
if (systemPromptFile != null && systemPromptFile !== "") {
if (systemPrompt != null && systemPrompt !== "" && systemPrompt !== defaultChatSystemPrompt)
console.warn(chalk.yellow("Both `systemPrompt` and `systemPromptFile` were specified. `systemPromptFile` will be used."));
systemPrompt = await fs.readFile(path.resolve(process.cwd(), systemPromptFile), "utf8");
}
if (promptFile != null && promptFile !== "") {
if (prompt != null && prompt !== "")
console.warn(chalk.yellow("Both `prompt` and `promptFile` were specified. `promptFile` will be used."));
prompt = await fs.readFile(path.resolve(process.cwd(), promptFile), "utf8");
}
if (batchSize != null && contextSize != null && batchSize > contextSize) {
console.warn(chalk.yellow("Batch size is greater than the context size. Batch size will be set to the context size."));
batchSize = contextSize;
}
let initialPrompt = prompt ?? null;
const model = await withProgressLog({
loadingText: chalk.blue.bold("Loading model"),
successText: chalk.blue("Model loaded"),
failText: chalk.blue("Failed to load model"),
liveUpdates: !debug,
noProgress: debug,
liveCtrlCSendsAbortSignal: true
}, async (progressUpdater) => {
try {
return await llama.loadModel({
modelPath: resolvedModelPath,
gpuLayers: gpuLayers != null
? gpuLayers
: contextSize != null
? { fitContext: { contextSize } }
: undefined,
defaultContextFlashAttention: flashAttention,
defaultContextSwaFullCache: swaFullCache,
useMmap,
useDirectIo,
ignoreMemorySafetyChecks: gpuLayers != null,
onLoadProgress(loadProgress) {
progressUpdater.setProgress(loadProgress);
},
loadSignal: progressUpdater.abortSignal
});
}
catch (err) {
if (err === progressUpdater.abortSignal?.reason)
process.exit(0);
throw err;
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const draftModel = resolvedDraftModelPath == null
? undefined
: await withProgressLog({
loadingText: chalk.blue.bold("Loading draft model"),
successText: chalk.blue("Draft model loaded"),
failText: chalk.blue("Failed to load draft model"),
liveUpdates: !debug,
noProgress: debug,
liveCtrlCSendsAbortSignal: true
}, async (progressUpdater) => {
try {
return await llama.loadModel({
modelPath: resolvedDraftModelPath,
defaultContextFlashAttention: flashAttention,
defaultContextSwaFullCache: swaFullCache,
useMmap,
useDirectIo,
onLoadProgress(loadProgress) {
progressUpdater.setProgress(loadProgress);
},
loadSignal: progressUpdater.abortSignal
});
}
catch (err) {
if (err === progressUpdater.abortSignal?.reason)
process.exit(0);
throw err;
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const draftContext = draftModel == null
? undefined
: await withOra({
loading: chalk.blue("Creating draft context"),
success: chalk.blue("Draft context created"),
fail: chalk.blue("Failed to create draft context"),
useStatusLogs: debug
}, async () => {
try {
return await draftModel.createContext({
contextSize: { max: tokenPredictionModelContextSize }
});
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const context = await withOra({
loading: chalk.blue("Creating context"),
success: chalk.blue("Context created"),
fail: chalk.blue("Failed to create context"),
useStatusLogs: debug
}, async () => {
try {
return await model.createContext({
contextSize: contextSize != null ? contextSize : undefined,
batchSize: batchSize != null ? batchSize : undefined,
threads: threads === null ? undefined : threads,
ignoreMemorySafetyChecks: gpuLayers != null || contextSize != null,
performanceTracking: printTimings
});
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const grammar = jsonSchemaGrammarFilePath != null
? await llama.createGrammarForJsonSchema(await fs.readJson(path.resolve(process.cwd(), jsonSchemaGrammarFilePath)))
: grammarArg !== "text"
? await LlamaGrammar.getFor(llama, grammarArg)
: undefined;
const chatWrapper = resolveChatWrapper({
type: wrapper,
bosString: model.tokens.bosString,
filename: model.filename,
fileInfo: model.fileInfo,
tokenizer: model.tokenizer,
noJinja
}) ?? new GeneralChatWrapper();
const draftContextSequence = draftContext?.getSequence();
const contextSequence = draftContextSequence != null
? context.getSequence({
tokenPredictor: new DraftSequenceTokenPredictor(draftContextSequence)
})
: context.getSequence();
const session = new LlamaChatSession({
contextSequence,
systemPrompt,
chatWrapper: chatWrapper
});
let lastDraftTokenMeterState = draftContextSequence?.tokenMeter.getState();
let lastTokenMeterState = contextSequence.tokenMeter.getState();
let lastTokenPredictionsStats = contextSequence.tokenPredictions;
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
if (grammarArg != "text" && jsonSchemaGrammarFilePath != null)
console.warn(chalk.yellow("Both `grammar` and `jsonSchemaGrammarFile` were specified. `jsonSchemaGrammarFile` will be used."));
if (environmentFunctions && grammar != null) {
console.warn(chalk.yellow("Environment functions are disabled since a grammar is already specified"));
environmentFunctions = false;
}
const padTitle = await printCommonInfoLines({
context,
draftContext,
useMmap,
useDirectIo,
printBos: true,
printEos: true,
logBatchSize,
tokenMeterEnabled: meter
});
printInfoLine({
title: "Chat",
padTitle: padTitle,
info: [{
title: "Wrapper",
value: chatWrapper.wrapperName
}, {
title: "Repeat penalty",
value: `${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`
}, {
show: repeatFrequencyPenalty != null,
title: "Repeat frequency penalty",
value: String(repeatFrequencyPenalty)
}, {
show: repeatPresencePenalty != null,
title: "Repeat presence penalty",
value: String(repeatPresencePenalty)
}, {
show: !penalizeRepeatingNewLine,
title: "Penalize repeating new line",
value: "disabled"
}, {
show: jsonSchemaGrammarFilePath != null,
title: "JSON schema grammar file",
value: () => path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath ?? ""))
}, {
show: jsonSchemaGrammarFilePath == null && grammarArg !== "text",
title: "Grammar",
value: grammarArg
}, {
show: environmentFunctions,
title: "Environment functions",
value: "enabled"
}, {
show: timing,
title: "Response timing",
value: "enabled"
}]
});
// this is for ora to not interfere with readline
await new Promise((resolve) => setTimeout(resolve, 1));
const replHistory = await ReplHistory.load(chatCommandHistoryFilePath, !noHistory);
async function getPrompt() {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
history: replHistory.history.slice()
});
const res = await new Promise((accept) => rl.question(chalk.yellow("> "), accept));
rl.close();
return res;
}
if (prompt != null && prompt !== "" && !printTimings && (meter || timing)) {
// warm up the context sequence before the first evaluation, to make the timings of the actual evaluations more accurate
const contextFirstToken = session.chatWrapper.generateContextState({
chatHistory: [
...session.getChatHistory(),
{ type: "user", text: "" }
]
}).contextText.tokenize(model.tokenizer)[0];
if (contextFirstToken != null)
await contextSequence.evaluateWithoutGeneratingNewTokens([contextFirstToken]);
}
else if (!printTimings && !meter)
void session.preloadPrompt("")
.catch(() => void 0); // don't throw an error if preloading fails because a real prompt is sent early
while (true) {
let hadTrimmedWhitespaceTextInThisIterationAndSegment = false;
let nextPrintLeftovers = "";
const input = initialPrompt != null
? initialPrompt
: await getPrompt();
if (initialPrompt != null) {
console.log(chalk.green("> ") + initialPrompt);
initialPrompt = null;
}
else
await replHistory.add(input);
if (input === ".exit")
break;
process.stdout.write(chalk.yellow("AI: "));
const [startColor, endColor] = chalk.blue("MIDDLE").split("MIDDLE");
const [segmentStartColor, segmentEndColor] = chalk.gray("MIDDLE").split("MIDDLE");
const abortController = new AbortController();
const consoleInteraction = new ConsoleInteraction();
consoleInteraction.onKey(ConsoleInteractionKey.ctrlC, async () => {
abortController.abort();
consoleInteraction.stop();
});
const timeBeforePrompt = Date.now();
let currentSegmentType;
try {
process.stdout.write(startColor);
consoleInteraction.start();
await session.prompt(input, {
grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
temperature,
minP,
topK,
topP,
seed: seed ?? undefined,
signal: abortController.signal,
stopOnAbortSignal: true,
budgets: {
thoughtTokens: reasoningBudget
},
repeatPenalty: {
penalty: repeatPenalty,
frequencyPenalty: repeatFrequencyPenalty != null ? repeatFrequencyPenalty : undefined,
presencePenalty: repeatPresencePenalty != null ? repeatPresencePenalty : undefined,
penalizeNewLine: penalizeRepeatingNewLine,
lastTokens: lastTokensRepeatPenalty
},
maxTokens: maxTokens === -1
? context.contextSize
: maxTokens <= 0
? undefined
: maxTokens,
onResponseChunk({ text: chunk, type: chunkType, segmentType }) {
if (segmentType != currentSegmentType) {
const printNewline = hadTrimmedWhitespaceTextInThisIterationAndSegment
? "\n"
: "";
hadTrimmedWhitespaceTextInThisIterationAndSegment = false;
if (chunkType !== "segment" || segmentType == null) {
process.stdout.write(segmentEndColor);
process.stdout.write(chalk.reset.whiteBright.bold(printNewline + "[response] "));
process.stdout.write(startColor);
}
else if (currentSegmentType == null) {
process.stdout.write(endColor);
process.stdout.write(chalk.reset.whiteBright.bold(printNewline + `[segment: ${segmentType}] `));
process.stdout.write(segmentStartColor);
}
else {
process.stdout.write(segmentEndColor);
process.stdout.write(chalk.reset.whiteBright.bold(printNewline + `[segment: ${segmentType}] `));
process.stdout.write(segmentStartColor);
}
currentSegmentType = segmentType;
}
let text = nextPrintLeftovers + chunk;
nextPrintLeftovers = "";
if (trimWhitespace) {
if (!hadTrimmedWhitespaceTextInThisIterationAndSegment) {
text = text.trimStart();
if (text.length > 0)
hadTrimmedWhitespaceTextInThisIterationAndSegment = true;
}
const textWithTrimmedEnd = text.trimEnd();
if (textWithTrimmedEnd.length < text.length) {
nextPrintLeftovers = text.slice(textWithTrimmedEnd.length);
text = textWithTrimmedEnd;
}
}
process.stdout.write(text);
},
functions: (grammar == null && environmentFunctions)
? defaultEnvironmentFunctions
: undefined,
trimWhitespaceSuffix: trimWhitespace
});
}
catch (err) {
if (!(abortController.signal.aborted && err === abortController.signal.reason))
throw err;
}
finally {
consoleInteraction.stop();
const currentEndColor = currentSegmentType != null
? segmentEndColor
: endColor;
if (abortController.signal.aborted)
process.stdout.write(currentEndColor + chalk.yellow("[generation aborted by user]"));
else
process.stdout.write(currentEndColor);
console.log();
}
const timeAfterPrompt = Date.now();
if (printTimings) {
if (LlamaLogLevelGreaterThan(llama.logLevel, LlamaLogLevel.info))
llama.logLevel = LlamaLogLevel.info;
await context.printTimings();
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
llama.logLevel = llamaLogLevel;
}
if (timing)
console.info(chalk.dim("Response duration: ") +
prettyMilliseconds(timeAfterPrompt - timeBeforePrompt, {
keepDecimalsOnWholeSeconds: true,
secondsDecimalDigits: 2,
separateMilliseconds: true,
compact: false
}));
if (meter) {
const newTokenMeterState = contextSequence.tokenMeter.getState();
const tokenMeterDiff = TokenMeter.diff(newTokenMeterState, lastTokenMeterState);
lastTokenMeterState = newTokenMeterState;
const showDraftTokenMeterDiff = lastDraftTokenMeterState != null && draftContextSequence != null;
const tokenPredictionsStats = contextSequence.tokenPredictions;
const validatedTokenPredictions = tokenPredictionsStats.validated - lastTokenPredictionsStats.validated;
const refutedTokenPredictions = tokenPredictionsStats.refuted - lastTokenPredictionsStats.refuted;
const usedTokenPredictions = tokenPredictionsStats.used - lastTokenPredictionsStats.used;
const unusedTokenPredictions = tokenPredictionsStats.unused - lastTokenPredictionsStats.unused;
lastTokenPredictionsStats = tokenPredictionsStats;
console.info([
showDraftTokenMeterDiff && (chalk.yellow("Main".padEnd("Drafter".length))),
chalk.dim("Input tokens:") + " " + String(tokenMeterDiff.usedInputTokens).padEnd(5, " "),
chalk.dim("Output tokens:") + " " + String(tokenMeterDiff.usedOutputTokens).padEnd(5, " "),
showDraftTokenMeterDiff && (chalk.dim("Validated predictions:") + " " + String(validatedTokenPredictions).padEnd(5, " ")),
showDraftTokenMeterDiff && (chalk.dim("Refuted predictions:") + " " + String(refutedTokenPredictions).padEnd(5, " ")),
showDraftTokenMeterDiff && (chalk.dim("Used predictions:") + " " + String(usedTokenPredictions).padEnd(5, " ")),
showDraftTokenMeterDiff && (chalk.dim("Unused predictions:") + " " + String(unusedTokenPredictions).padEnd(5, " "))
].filter(Boolean).join(" "));
if (lastDraftTokenMeterState != null && draftContextSequence != null) {
const newDraftTokenMeterState = draftContextSequence.tokenMeter.getState();
const draftTokenMeterDiff = TokenMeter.diff(newDraftTokenMeterState, lastDraftTokenMeterState);
lastDraftTokenMeterState = newDraftTokenMeterState;
console.info([
chalk.yellow("Drafter"),
chalk.dim("Input tokens:") + " " + String(draftTokenMeterDiff.usedInputTokens).padEnd(5, " "),
chalk.dim("Output tokens:") + " " + String(draftTokenMeterDiff.usedOutputTokens).padEnd(5, " ")
].join(" "));
}
}
}
}
const defaultEnvironmentFunctions = {
getDate: defineChatSessionFunction({
description: "Retrieve the current date",
handler() {
const date = new Date();
return [
date.getFullYear(),
String(date.getMonth() + 1).padStart(2, "0"),
String(date.getDate()).padStart(2, "0")
].join("-");
}
}),
getTime: defineChatSessionFunction({
description: "Retrieve the current time",
handler() {
return new Date().toLocaleTimeString("en-US");
}
})
};
//# sourceMappingURL=ChatCommand.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,38 @@
import { CommandModule } from "yargs";
import { BuildGpu, LlamaNuma } from "../../bindings/types.js";
type CompleteCommand = {
modelPath?: string;
header?: string[];
gpu?: BuildGpu | "auto";
systemInfo: boolean;
text?: string;
textFile?: string;
contextSize?: number;
batchSize?: number;
flashAttention?: boolean;
swaFullCache?: boolean;
threads?: number;
temperature: number;
minP: number;
topK: number;
topP: number;
seed?: number;
gpuLayers?: number;
repeatPenalty: number;
lastTokensRepeatPenalty: number;
penalizeRepeatingNewLine: boolean;
repeatFrequencyPenalty?: number;
repeatPresencePenalty?: number;
maxTokens: number;
tokenPredictionDraftModel?: string;
tokenPredictionModelContextSize?: number;
debug: boolean;
numa?: LlamaNuma;
meter: boolean;
timing: boolean;
noMmap: boolean;
noDirectIo: boolean;
printTimings: boolean;
};
export declare const CompleteCommand: CommandModule<object, CompleteCommand>;
export {};

View File

@@ -0,0 +1,567 @@
import * as readline from "readline";
import process from "process";
import path from "path";
import chalk from "chalk";
import fs from "fs-extra";
import prettyMilliseconds from "pretty-ms";
import { getLlama } from "../../bindings/getLlama.js";
import { LlamaLogLevel, LlamaLogLevelGreaterThan, llamaNumaOptions, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption, parseNumaOption } from "../../bindings/types.js";
import { LlamaCompletion } from "../../evaluator/LlamaCompletion.js";
import withOra from "../../utils/withOra.js";
import { TokenMeter } from "../../evaluator/TokenMeter.js";
import { printInfoLine } from "../utils/printInfoLine.js";
import { printCommonInfoLines } from "../utils/printCommonInfoLines.js";
import { resolveCommandGgufPath } from "../utils/resolveCommandGgufPath.js";
import { withProgressLog } from "../../utils/withProgressLog.js";
import { resolveHeaderFlag } from "../utils/resolveHeaderFlag.js";
import { withCliCommandDescriptionDocsUrl } from "../utils/withCliCommandDescriptionDocsUrl.js";
import { documentationPageUrls } from "../../config.js";
import { ConsoleInteraction, ConsoleInteractionKey } from "../utils/ConsoleInteraction.js";
import { DraftSequenceTokenPredictor } from "../../evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js";
export const CompleteCommand = {
command: "complete [modelPath]",
describe: withCliCommandDescriptionDocsUrl("Generate a completion for a given text", documentationPageUrls.CLI.Complete),
builder(yargs) {
return yargs
.option("modelPath", {
alias: ["m", "model", "path", "url", "uri"],
type: "string",
description: "Model file to use for the completion. Can be a path to a local file or a URI of a model file to download. Leave empty to choose from a list of recommended models"
})
.option("header", {
alias: ["H"],
type: "string",
array: true,
description: "Headers to use when downloading a model from a URL, in the format `key: value`. You can pass this option multiple times to add multiple headers."
})
.option("gpu", {
type: "string",
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: nodeLlamaCppGpuOptions,
coerce: (value) => {
if (value == null || value == "")
return undefined;
return parseNodeLlamaCppGpuOption(value);
},
defaultDescription: "Uses the latest local build, and fallbacks to \"auto\"",
description: "Compute layer implementation type to use for llama.cpp. If omitted, uses the latest local build, and fallbacks to \"auto\""
})
.option("systemInfo", {
alias: "i",
type: "boolean",
default: false,
description: "Print llama.cpp system info"
})
.option("text", {
type: "string",
description: "First text to automatically start generating completion for"
})
.option("textFile", {
type: "string",
description: "Path to a file to load text from and use as the first text to automatically start generating completion for"
})
.option("contextSize", {
alias: "c",
type: "number",
description: "Context size to use for the model context",
default: -1,
defaultDescription: "Automatically determined based on the available VRAM"
})
.option("batchSize", {
alias: "b",
type: "number",
description: "Batch size to use for the model context"
})
.option("flashAttention", {
alias: "fa",
type: "boolean",
default: false,
description: "Enable flash attention"
})
.option("swaFullCache", {
alias: "noSwa",
type: "boolean",
default: false,
description: "Disable SWA (Sliding Window Attention) on supported models"
})
.option("threads", {
type: "number",
defaultDescription: "Number of cores that are useful for math on the current machine",
description: "Number of threads to use for the evaluation of tokens"
})
.option("temperature", {
alias: "t",
type: "number",
default: 0,
description: "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The suggested temperature is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. Set to `0` to disable."
})
.option("minP", {
alias: "mp",
type: "number",
default: 0,
description: "From the next token candidates, discard the percentage of tokens with the lowest probability. For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded. This is useful for generating more high-quality results when using a high temperature. Set to a value between `0` and `1` to enable. Only relevant when `temperature` is set to a value greater than `0`."
})
.option("topK", {
alias: "k",
type: "number",
default: 40,
description: "Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation. An integer number between `1` and the size of the vocabulary. Set to `0` to disable (which uses the full vocabulary). Only relevant when `temperature` is set to a value greater than 0."
})
.option("topP", {
alias: "p",
type: "number",
default: 0.95,
description: "Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P, and samples the next token only from this set. A float number between `0` and `1`. Set to `1` to disable. Only relevant when `temperature` is set to a value greater than `0`."
})
.option("seed", {
type: "number",
description: "Used to control the randomness of the generated text. Only relevant when using `temperature`.",
defaultDescription: "The current epoch time"
})
.option("gpuLayers", {
alias: "gl",
type: "number",
description: "number of layers to store in VRAM",
default: -1,
defaultDescription: "Automatically determined based on the available VRAM"
})
.option("repeatPenalty", {
alias: "rp",
type: "number",
default: 1.1,
description: "Prevent the model from repeating the same token too much. Set to `1` to disable."
})
.option("lastTokensRepeatPenalty", {
alias: "rpn",
type: "number",
default: 64,
description: "Number of recent tokens generated by the model to apply penalties to repetition of"
})
.option("penalizeRepeatingNewLine", {
alias: "rpnl",
type: "boolean",
default: true,
description: "Penalize new line tokens. set `--no-penalizeRepeatingNewLine` or `--no-rpnl` to disable"
})
.option("repeatFrequencyPenalty", {
alias: "rfp",
type: "number",
description: "For n time a token is in the `punishTokens` array, lower its probability by `n * repeatFrequencyPenalty`. Set to a value between `0` and `1` to enable."
})
.option("repeatPresencePenalty", {
alias: "rpp",
type: "number",
description: "Lower the probability of all the tokens in the `punishTokens` array by `repeatPresencePenalty`. Set to a value between `0` and `1` to enable."
})
.option("maxTokens", {
alias: "mt",
type: "number",
default: 0,
description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size"
})
.option("tokenPredictionDraftModel", {
alias: ["dm", "draftModel"],
type: "string",
description: "Model file to use for draft sequence token prediction (speculative decoding). Can be a path to a local file or a URI of a model file to download"
})
.option("tokenPredictionModelContextSize", {
alias: ["dc", "draftContextSize", "draftContext"],
type: "number",
description: "Max context size to use for the draft sequence token prediction model context",
default: 4096
})
.option("debug", {
alias: "d",
type: "boolean",
default: false,
description: "Print llama.cpp info and debug logs"
})
.option("numa", {
type: "string",
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: llamaNumaOptions,
coerce: (value) => {
if (value == null || value == "")
return false;
return parseNumaOption(value);
},
defaultDescription: "false",
description: "NUMA allocation policy. See the `numa` option on the `getLlama` method for more information"
})
.option("meter", {
type: "boolean",
default: false,
description: "Log how many tokens were used as input and output for each response"
})
.option("timing", {
type: "boolean",
default: false,
description: "Print how how long it took to generate each response"
})
.option("noMmap", {
type: "boolean",
default: false,
description: "Disable mmap (memory-mapped file) usage"
})
.option("noDirectIo", {
type: "boolean",
default: false,
description: "Disable Direct I/O usage when available"
})
.option("printTimings", {
alias: "pt",
type: "boolean",
default: false,
description: "Print llama.cpp's internal timings after each response"
});
},
async handler({ modelPath, header, gpu, systemInfo, text, textFile, contextSize, batchSize, flashAttention, swaFullCache, threads, temperature, minP, topK, topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, noDirectIo, printTimings }) {
try {
await RunCompletion({
modelPath, header, gpu, systemInfo, text, textFile, contextSize, batchSize, flashAttention, swaFullCache,
threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty,
repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, noDirectIo, printTimings
});
}
catch (err) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.error(err);
process.exit(1);
}
}
};
async function RunCompletion({ modelPath: modelArg, header: headerArg, gpu, systemInfo, text, textFile, contextSize, batchSize, flashAttention, swaFullCache, threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, tokenPredictionDraftModel, tokenPredictionModelContextSize, maxTokens, debug, numa, meter, timing, noMmap, noDirectIo, printTimings }) {
if (contextSize === -1)
contextSize = undefined;
if (gpuLayers === -1)
gpuLayers = undefined;
const headers = resolveHeaderFlag(headerArg);
if (debug)
console.info(`${chalk.yellow("Log level:")} debug`);
const llamaLogLevel = debug
? LlamaLogLevel.debug
: LlamaLogLevel.warn;
const llama = gpu == null
? await getLlama("lastBuild", {
logLevel: llamaLogLevel,
numa
})
: await getLlama({
gpu,
logLevel: llamaLogLevel,
numa
});
const logBatchSize = batchSize != null;
const useMmap = !noMmap && llama.supportsMmap;
const useDirectIo = !noDirectIo;
const resolvedModelPath = await resolveCommandGgufPath(modelArg, llama, headers, {
flashAttention,
swaFullCache,
useMmap
});
const resolvedDraftModelPath = (tokenPredictionDraftModel != null && tokenPredictionDraftModel !== "")
? await resolveCommandGgufPath(tokenPredictionDraftModel, llama, headers, {
flashAttention,
swaFullCache,
useMmap,
consoleTitle: "Draft model file"
})
: undefined;
if (systemInfo)
console.log(llama.systemInfo);
if (textFile != null && textFile !== "") {
if (text != null && text !== "")
console.warn(chalk.yellow("Both `text` and `textFile` were specified. `textFile` will be used."));
text = await fs.readFile(path.resolve(process.cwd(), textFile), "utf8");
}
if (batchSize != null && contextSize != null && batchSize > contextSize) {
console.warn(chalk.yellow("Batch size is greater than the context size. Batch size will be set to the context size."));
batchSize = contextSize;
}
let initialText = text ?? null;
const model = await withProgressLog({
loadingText: chalk.blue.bold("Loading model"),
successText: chalk.blue("Model loaded"),
failText: chalk.blue("Failed to load model"),
liveUpdates: !debug,
noProgress: debug,
liveCtrlCSendsAbortSignal: true
}, async (progressUpdater) => {
try {
return await llama.loadModel({
modelPath: resolvedModelPath,
gpuLayers: gpuLayers != null
? gpuLayers
: contextSize != null
? { fitContext: { contextSize } }
: undefined,
defaultContextFlashAttention: flashAttention,
defaultContextSwaFullCache: swaFullCache,
useMmap,
useDirectIo,
ignoreMemorySafetyChecks: gpuLayers != null,
onLoadProgress(loadProgress) {
progressUpdater.setProgress(loadProgress);
},
loadSignal: progressUpdater.abortSignal
});
}
catch (err) {
if (err === progressUpdater.abortSignal?.reason)
process.exit(0);
throw err;
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const draftModel = resolvedDraftModelPath == null
? undefined
: await withProgressLog({
loadingText: chalk.blue.bold("Loading draft model"),
successText: chalk.blue("Draft model loaded"),
failText: chalk.blue("Failed to load draft model"),
liveUpdates: !debug,
noProgress: debug,
liveCtrlCSendsAbortSignal: true
}, async (progressUpdater) => {
try {
return await llama.loadModel({
modelPath: resolvedDraftModelPath,
defaultContextFlashAttention: flashAttention,
defaultContextSwaFullCache: swaFullCache,
useMmap,
useDirectIo,
onLoadProgress(loadProgress) {
progressUpdater.setProgress(loadProgress);
},
loadSignal: progressUpdater.abortSignal
});
}
catch (err) {
if (err === progressUpdater.abortSignal?.reason)
process.exit(0);
throw err;
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const draftContext = draftModel == null
? undefined
: await withOra({
loading: chalk.blue("Creating draft context"),
success: chalk.blue("Draft context created"),
fail: chalk.blue("Failed to create draft context"),
useStatusLogs: debug
}, async () => {
try {
return await draftModel.createContext({
contextSize: { max: tokenPredictionModelContextSize }
});
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const context = await withOra({
loading: chalk.blue("Creating context"),
success: chalk.blue("Context created"),
fail: chalk.blue("Failed to create context"),
useStatusLogs: debug
}, async () => {
try {
return await model.createContext({
contextSize: contextSize != null ? contextSize : undefined,
batchSize: batchSize != null ? batchSize : undefined,
threads: threads === null ? undefined : threads,
ignoreMemorySafetyChecks: gpuLayers != null || contextSize != null,
performanceTracking: printTimings
});
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const draftContextSequence = draftContext?.getSequence();
const contextSequence = draftContextSequence != null
? context.getSequence({
tokenPredictor: new DraftSequenceTokenPredictor(draftContextSequence)
})
: context.getSequence();
const completion = new LlamaCompletion({
contextSequence
});
let lastDraftTokenMeterState = draftContextSequence?.tokenMeter.getState();
let lastTokenMeterState = contextSequence.tokenMeter.getState();
let lastTokenPredictionsStats = contextSequence.tokenPredictions;
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
const padTitle = await printCommonInfoLines({
context,
draftContext,
useMmap,
useDirectIo,
minTitleLength: "Complete".length + 1,
logBatchSize,
tokenMeterEnabled: meter
});
printInfoLine({
title: "Complete",
padTitle: padTitle,
info: [{
title: "Repeat penalty",
value: `${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`
}, {
show: repeatFrequencyPenalty != null,
title: "Repeat frequency penalty",
value: String(repeatFrequencyPenalty)
}, {
show: repeatPresencePenalty != null,
title: "Repeat presence penalty",
value: String(repeatPresencePenalty)
}, {
show: !penalizeRepeatingNewLine,
title: "Penalize repeating new line",
value: "disabled"
}, {
show: timing,
title: "Response timing",
value: "enabled"
}]
});
// this is for ora to not interfere with readline
await new Promise((resolve) => setTimeout(resolve, 1));
const replHistory = [];
async function getPrompt() {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
history: replHistory.slice()
});
const res = await new Promise((accept) => rl.question(chalk.yellow("> "), accept));
rl.close();
return res;
}
while (true) {
const input = initialText != null
? initialText
: await getPrompt();
if (initialText != null) {
console.log(chalk.green("> ") + initialText);
initialText = null;
}
else
await replHistory.push(input);
if (input === ".exit")
break;
process.stdout.write(chalk.yellow("Completion: "));
const [startColor, endColor] = chalk.blue("MIDDLE").split("MIDDLE");
const abortController = new AbortController();
const consoleInteraction = new ConsoleInteraction();
consoleInteraction.onKey(ConsoleInteractionKey.ctrlC, async () => {
abortController.abort();
consoleInteraction.stop();
});
const timeBeforePrompt = Date.now();
try {
process.stdout.write(startColor);
consoleInteraction.start();
await completion.generateCompletion(input, {
temperature,
minP,
topK,
topP,
seed: seed ?? undefined,
signal: abortController.signal,
repeatPenalty: {
penalty: repeatPenalty,
frequencyPenalty: repeatFrequencyPenalty != null ? repeatFrequencyPenalty : undefined,
presencePenalty: repeatPresencePenalty != null ? repeatPresencePenalty : undefined,
penalizeNewLine: penalizeRepeatingNewLine,
lastTokens: lastTokensRepeatPenalty
},
maxTokens: maxTokens === -1
? context.contextSize
: maxTokens <= 0
? undefined
: maxTokens,
onTextChunk(chunk) {
process.stdout.write(chunk);
}
});
}
catch (err) {
if (!(abortController.signal.aborted && err === abortController.signal.reason))
throw err;
}
finally {
consoleInteraction.stop();
if (abortController.signal.aborted)
process.stdout.write(endColor + chalk.yellow("[generation aborted by user]"));
else
process.stdout.write(endColor);
console.log();
}
const timeAfterPrompt = Date.now();
if (printTimings) {
if (LlamaLogLevelGreaterThan(llama.logLevel, LlamaLogLevel.info))
llama.logLevel = LlamaLogLevel.info;
await context.printTimings();
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
llama.logLevel = llamaLogLevel;
}
if (timing)
console.info(chalk.dim("Response duration: ") +
prettyMilliseconds(timeAfterPrompt - timeBeforePrompt, {
keepDecimalsOnWholeSeconds: true,
secondsDecimalDigits: 2,
separateMilliseconds: true,
compact: false
}));
if (meter) {
const newTokenMeterState = contextSequence.tokenMeter.getState();
const tokenMeterDiff = TokenMeter.diff(newTokenMeterState, lastTokenMeterState);
lastTokenMeterState = newTokenMeterState;
const showDraftTokenMeterDiff = lastDraftTokenMeterState != null && draftContextSequence != null;
const tokenPredictionsStats = contextSequence.tokenPredictions;
const validatedTokenPredictions = tokenPredictionsStats.validated - lastTokenPredictionsStats.validated;
const refutedTokenPredictions = tokenPredictionsStats.refuted - lastTokenPredictionsStats.refuted;
const usedTokenPredictions = tokenPredictionsStats.used - lastTokenPredictionsStats.used;
const unusedTokenPredictions = tokenPredictionsStats.unused - lastTokenPredictionsStats.unused;
lastTokenPredictionsStats = tokenPredictionsStats;
console.info([
showDraftTokenMeterDiff && (chalk.yellow("Main".padEnd("Drafter".length))),
chalk.dim("Input tokens:") + " " + String(tokenMeterDiff.usedInputTokens).padEnd(5, " "),
chalk.dim("Output tokens:") + " " + String(tokenMeterDiff.usedOutputTokens).padEnd(5, " "),
showDraftTokenMeterDiff && (chalk.dim("Validated predictions:") + " " + String(validatedTokenPredictions).padEnd(5, " ")),
showDraftTokenMeterDiff && (chalk.dim("Refuted predictions:") + " " + String(refutedTokenPredictions).padEnd(5, " ")),
showDraftTokenMeterDiff && (chalk.dim("Used predictions:") + " " + String(usedTokenPredictions).padEnd(5, " ")),
showDraftTokenMeterDiff && (chalk.dim("Unused predictions:") + " " + String(unusedTokenPredictions).padEnd(5, " "))
].filter(Boolean).join(" "));
if (lastDraftTokenMeterState != null && draftContextSequence != null) {
const newDraftTokenMeterState = draftContextSequence.tokenMeter.getState();
const draftTokenMeterDiff = TokenMeter.diff(newDraftTokenMeterState, lastDraftTokenMeterState);
lastDraftTokenMeterState = newDraftTokenMeterState;
console.info([
chalk.yellow("Drafter"),
chalk.dim("Input tokens:") + " " + String(draftTokenMeterDiff.usedInputTokens).padEnd(5, " "),
chalk.dim("Output tokens:") + " " + String(draftTokenMeterDiff.usedOutputTokens).padEnd(5, " ")
].join(" "));
}
}
}
}
//# sourceMappingURL=CompleteCommand.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,7 @@
import { CommandModule } from "yargs";
declare const debugFunctions: readonly ["vram", "cmakeOptions"];
type DebugCommand = {
function: (typeof debugFunctions)[number];
};
export declare const DebugCommand: CommandModule<object, DebugCommand>;
export {};

View File

@@ -0,0 +1,55 @@
import os from "os";
import chalk from "chalk";
import { getLlama } from "../../bindings/getLlama.js";
import { prettyPrintObject } from "../../utils/prettyPrintObject.js";
import { logUsedGpuTypeOption } from "../utils/logUsedGpuTypeOption.js";
import { toBytes } from "../utils/toBytes.js";
const debugFunctions = ["vram", "cmakeOptions"];
export const DebugCommand = {
command: "debug [function]",
describe: false,
builder(yargs) {
return yargs
.option("function", {
type: "string",
choices: debugFunctions,
demandOption: true,
description: "debug function to run"
});
},
async handler({ function: func }) {
if (func === "vram")
await DebugVramFunction();
else if (func === "cmakeOptions")
await DebugCmakeOptionsFunction();
else
void func;
}
};
async function DebugVramFunction() {
const llama = await getLlama("lastBuild");
const vramStatus = await llama.getVramState();
const totalMemory = os.totalmem();
const freeMemory = os.freemem();
const usedMemory = totalMemory - freeMemory;
const getPercentageString = (amount, total) => {
if (total === 0)
return "0";
return String(Math.floor((amount / total) * 100 * 100) / 100);
};
logUsedGpuTypeOption(llama.gpu);
console.info();
console.info(`${chalk.yellow("Used VRAM:")} ${getPercentageString(vramStatus.used, vramStatus.total)}% ${chalk.gray("(" + toBytes(vramStatus.used) + "/" + toBytes(vramStatus.total) + ")")}`);
console.info(`${chalk.yellow("Free VRAM:")} ${getPercentageString(vramStatus.free, vramStatus.total)}% ${chalk.gray("(" + toBytes(vramStatus.free) + "/" + toBytes(vramStatus.total) + ")")}`);
console.info();
console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.gray("(" + toBytes(usedMemory) + "/" + toBytes(totalMemory) + ")")}`);
console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.gray("(" + toBytes(freeMemory) + "/" + toBytes(totalMemory) + ")")}`);
}
async function DebugCmakeOptionsFunction() {
const llama = await getLlama("lastBuild");
logUsedGpuTypeOption(llama.gpu);
console.info();
console.info(`${chalk.yellow("CMake options:")} ${prettyPrintObject(llama.cmakeOptions)}`);
console.info(`${chalk.yellow("Release:")} ${prettyPrintObject(llama.llamaCppRelease)}`);
}
//# sourceMappingURL=DebugCommand.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"DebugCommand.js","sourceRoot":"","sources":["../../../src/cli/commands/DebugCommand.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,QAAQ,EAAC,MAAM,4BAA4B,CAAC;AACpD,OAAO,EAAC,iBAAiB,EAAC,MAAM,kCAAkC,CAAC;AACnE,OAAO,EAAC,oBAAoB,EAAC,MAAM,kCAAkC,CAAC;AACtE,OAAO,EAAC,OAAO,EAAC,MAAM,qBAAqB,CAAC;AAE5C,MAAM,cAAc,GAAG,CAAC,MAAM,EAAE,cAAc,CAAU,CAAC;AAKzD,MAAM,CAAC,MAAM,YAAY,GAAwC;IAC7D,OAAO,EAAE,kBAAkB;IAC3B,QAAQ,EAAE,KAAK;IACf,OAAO,CAAC,KAAK;QACT,OAAO,KAAK;aACP,MAAM,CAAC,UAAU,EAAE;YAChB,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,cAAc;YACvB,YAAY,EAAE,IAAI;YAClB,WAAW,EAAE,uBAAuB;SACvC,CAAC,CAAC;IACX,CAAC;IACD,KAAK,CAAC,OAAO,CAAC,EAAC,QAAQ,EAAE,IAAI,EAAe;QACxC,IAAI,IAAI,KAAK,MAAM;YACf,MAAM,iBAAiB,EAAE,CAAC;aACzB,IAAI,IAAI,KAAK,cAAc;YAC5B,MAAM,yBAAyB,EAAE,CAAC;;YAElC,KAAM,IAAqB,CAAC;IACpC,CAAC;CACJ,CAAC;AAEF,KAAK,UAAU,iBAAiB;IAC5B,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE1C,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC,YAAY,EAAE,CAAC;IAC9C,MAAM,WAAW,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC;IAClC,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,EAAE,CAAC;IAChC,MAAM,UAAU,GAAG,WAAW,GAAG,UAAU,CAAC;IAE5C,MAAM,mBAAmB,GAAG,CAAC,MAAc,EAAE,KAAa,EAAE,EAAE;QAC1D,IAAI,KAAK,KAAK,CAAC;YACX,OAAO,GAAG,CAAC;QAEf,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC;IAClE,CAAC,CAAC;IAEF,oBAAoB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,OAAO,CAAC,IAAI,EAAE,CAAC;IAEf,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,YAAY,CAAC,IAAI,mBAAmB,CAAC,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC,KAAK,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,GAAG,GAAG,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC;IAC/L,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,YAAY,CAAC,IAAI,mBAAmB,CAAC,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC,KAAK,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,GAAG,GAAG,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC;IAC/L,OAAO,CAAC,IAAI,EAAE,CAAC;IACf,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,mBAAmB,CAAC,UAAU,EAAE,WAAW,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,UAAU,CAAC,GAAG,GAAG,GAAG,OAAO,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC;IAC1K,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,mBAAmB,CAAC,UAAU,EAAE,WAAW,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,UAAU,CAAC,GAAG,GAAG,GAAG,OAAO,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC;AAC9K,CAAC;AAED,KAAK,UAAU,yBAAyB;IACpC,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE1C,oBAAoB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,OAAO,CAAC,IAAI,EAAE,CAAC;IAEf,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,IAAI,iBAAiB,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;IAC3F,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,iBAAiB,CAAC,KAAK,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;AAC5F,CAAC"}

View File

@@ -0,0 +1,40 @@
import { CommandModule } from "yargs";
import { BuildGpu, LlamaNuma } from "../../bindings/types.js";
type InfillCommand = {
modelPath?: string;
header?: string[];
gpu?: BuildGpu | "auto";
systemInfo: boolean;
prefix?: string;
prefixFile?: string;
suffix?: string;
suffixFile?: string;
contextSize?: number;
batchSize?: number;
flashAttention?: boolean;
swaFullCache?: boolean;
threads?: number;
temperature: number;
minP: number;
topK: number;
topP: number;
seed?: number;
gpuLayers?: number;
repeatPenalty: number;
lastTokensRepeatPenalty: number;
penalizeRepeatingNewLine: boolean;
repeatFrequencyPenalty?: number;
repeatPresencePenalty?: number;
maxTokens: number;
tokenPredictionDraftModel?: string;
tokenPredictionModelContextSize?: number;
debug: boolean;
numa?: LlamaNuma;
meter: boolean;
timing: boolean;
noMmap: boolean;
noDirectIo: boolean;
printTimings: boolean;
};
export declare const InfillCommand: CommandModule<object, InfillCommand>;
export {};

View File

@@ -0,0 +1,602 @@
import * as readline from "readline";
import process from "process";
import path from "path";
import chalk from "chalk";
import fs from "fs-extra";
import prettyMilliseconds from "pretty-ms";
import { getLlama } from "../../bindings/getLlama.js";
import { LlamaLogLevel, LlamaLogLevelGreaterThan, llamaNumaOptions, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption, parseNumaOption } from "../../bindings/types.js";
import { LlamaCompletion } from "../../evaluator/LlamaCompletion.js";
import withOra from "../../utils/withOra.js";
import { TokenMeter } from "../../evaluator/TokenMeter.js";
import { printInfoLine } from "../utils/printInfoLine.js";
import { printCommonInfoLines } from "../utils/printCommonInfoLines.js";
import { resolveCommandGgufPath } from "../utils/resolveCommandGgufPath.js";
import { withProgressLog } from "../../utils/withProgressLog.js";
import { resolveHeaderFlag } from "../utils/resolveHeaderFlag.js";
import { withCliCommandDescriptionDocsUrl } from "../utils/withCliCommandDescriptionDocsUrl.js";
import { documentationPageUrls } from "../../config.js";
import { ConsoleInteraction, ConsoleInteractionKey } from "../utils/ConsoleInteraction.js";
import { DraftSequenceTokenPredictor } from "../../evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js";
export const InfillCommand = {
command: "infill [modelPath]",
describe: withCliCommandDescriptionDocsUrl("Generate an infill completion for a given suffix and prefix texts", documentationPageUrls.CLI.Infill),
builder(yargs) {
return yargs
.option("modelPath", {
alias: ["m", "model", "path", "url", "uri"],
type: "string",
description: "Model file to use for the infill. Can be a path to a local file or a URI of a model file to download. Leave empty to choose from a list of recommended models"
})
.option("header", {
alias: ["H"],
type: "string",
array: true,
description: "Headers to use when downloading a model from a URL, in the format `key: value`. You can pass this option multiple times to add multiple headers."
})
.option("gpu", {
type: "string",
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: nodeLlamaCppGpuOptions,
coerce: (value) => {
if (value == null || value == "")
return undefined;
return parseNodeLlamaCppGpuOption(value);
},
defaultDescription: "Uses the latest local build, and fallbacks to \"auto\"",
description: "Compute layer implementation type to use for llama.cpp. If omitted, uses the latest local build, and fallbacks to \"auto\""
})
.option("systemInfo", {
alias: "i",
type: "boolean",
default: false,
description: "Print llama.cpp system info"
})
.option("prefix", {
type: "string",
description: "First prefix text to automatically load"
})
.option("prefixFile", {
type: "string",
description: "Path to a file to load prefix text from automatically"
})
.option("suffix", {
type: "string",
description: "First suffix text to automatically load. Requires `prefix` or `prefixFile` to be set"
})
.option("suffixFile", {
type: "string",
description: "Path to a file to load suffix text from automatically. Requires `prefix` or `prefixFile` to be set"
})
.option("contextSize", {
alias: "c",
type: "number",
description: "Context size to use for the model context",
default: -1,
defaultDescription: "Automatically determined based on the available VRAM"
})
.option("batchSize", {
alias: "b",
type: "number",
description: "Batch size to use for the model context"
})
.option("flashAttention", {
alias: "fa",
type: "boolean",
default: false,
description: "Enable flash attention"
})
.option("swaFullCache", {
alias: "noSwa",
type: "boolean",
default: false,
description: "Disable SWA (Sliding Window Attention) on supported models"
})
.option("threads", {
type: "number",
defaultDescription: "Number of cores that are useful for math on the current machine",
description: "Number of threads to use for the evaluation of tokens"
})
.option("temperature", {
alias: "t",
type: "number",
default: 0,
description: "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The suggested temperature is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. Set to `0` to disable."
})
.option("minP", {
alias: "mp",
type: "number",
default: 0,
description: "From the next token candidates, discard the percentage of tokens with the lowest probability. For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded. This is useful for generating more high-quality results when using a high temperature. Set to a value between `0` and `1` to enable. Only relevant when `temperature` is set to a value greater than `0`."
})
.option("topK", {
alias: "k",
type: "number",
default: 40,
description: "Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation. An integer number between `1` and the size of the vocabulary. Set to `0` to disable (which uses the full vocabulary). Only relevant when `temperature` is set to a value greater than 0."
})
.option("topP", {
alias: "p",
type: "number",
default: 0.95,
description: "Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P, and samples the next token only from this set. A float number between `0` and `1`. Set to `1` to disable. Only relevant when `temperature` is set to a value greater than `0`."
})
.option("seed", {
type: "number",
description: "Used to control the randomness of the generated text. Only relevant when using `temperature`.",
defaultDescription: "The current epoch time"
})
.option("gpuLayers", {
alias: "gl",
type: "number",
description: "number of layers to store in VRAM",
default: -1,
defaultDescription: "Automatically determined based on the available VRAM"
})
.option("repeatPenalty", {
alias: "rp",
type: "number",
default: 1.1,
description: "Prevent the model from repeating the same token too much. Set to `1` to disable."
})
.option("lastTokensRepeatPenalty", {
alias: "rpn",
type: "number",
default: 64,
description: "Number of recent tokens generated by the model to apply penalties to repetition of"
})
.option("penalizeRepeatingNewLine", {
alias: "rpnl",
type: "boolean",
default: true,
description: "Penalize new line tokens. set `--no-penalizeRepeatingNewLine` or `--no-rpnl` to disable"
})
.option("repeatFrequencyPenalty", {
alias: "rfp",
type: "number",
description: "For n time a token is in the `punishTokens` array, lower its probability by `n * repeatFrequencyPenalty`. Set to a value between `0` and `1` to enable."
})
.option("repeatPresencePenalty", {
alias: "rpp",
type: "number",
description: "Lower the probability of all the tokens in the `punishTokens` array by `repeatPresencePenalty`. Set to a value between `0` and `1` to enable."
})
.option("maxTokens", {
alias: "mt",
type: "number",
default: 0,
description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size"
})
.option("tokenPredictionDraftModel", {
alias: ["dm", "draftModel"],
type: "string",
description: "Model file to use for draft sequence token prediction (speculative decoding). Can be a path to a local file or a URI of a model file to download"
})
.option("tokenPredictionModelContextSize", {
alias: ["dc", "draftContextSize", "draftContext"],
type: "number",
description: "Max context size to use for the draft sequence token prediction model context",
default: 4096
})
.option("debug", {
alias: "d",
type: "boolean",
default: false,
description: "Print llama.cpp info and debug logs"
})
.option("numa", {
type: "string",
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: llamaNumaOptions,
coerce: (value) => {
if (value == null || value == "")
return false;
return parseNumaOption(value);
},
defaultDescription: "false",
description: "NUMA allocation policy. See the `numa` option on the `getLlama` method for more information"
})
.option("meter", {
type: "boolean",
default: false,
description: "Log how many tokens were used as input and output for each response"
})
.option("timing", {
type: "boolean",
default: false,
description: "Print how how long it took to generate each response"
})
.option("noMmap", {
type: "boolean",
default: false,
description: "Disable mmap (memory-mapped file) usage"
})
.option("noDirectIo", {
type: "boolean",
default: false,
description: "Disable Direct I/O usage when available"
})
.option("printTimings", {
alias: "pt",
type: "boolean",
default: false,
description: "Print llama.cpp's internal timings after each response"
});
},
async handler({ modelPath, header, gpu, systemInfo, prefix, prefixFile, suffix, suffixFile, contextSize, batchSize, flashAttention, swaFullCache, threads, temperature, minP, topK, topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, noDirectIo, printTimings }) {
try {
await RunInfill({
modelPath, header, gpu, systemInfo, prefix, prefixFile, suffix, suffixFile, contextSize, batchSize, flashAttention,
swaFullCache, threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty,
repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, noDirectIo, printTimings
});
}
catch (err) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.error(err);
process.exit(1);
}
}
};
async function RunInfill({ modelPath: modelArg, header: headerArg, gpu, systemInfo, prefix, prefixFile, suffix, suffixFile, contextSize, batchSize, flashAttention, swaFullCache, threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, tokenPredictionDraftModel, tokenPredictionModelContextSize, maxTokens, debug, numa, meter, timing, noMmap, noDirectIo, printTimings }) {
if (contextSize === -1)
contextSize = undefined;
if (gpuLayers === -1)
gpuLayers = undefined;
const headers = resolveHeaderFlag(headerArg);
if (debug)
console.info(`${chalk.yellow("Log level:")} debug`);
const llamaLogLevel = debug
? LlamaLogLevel.debug
: LlamaLogLevel.warn;
const llama = gpu == null
? await getLlama("lastBuild", {
logLevel: llamaLogLevel,
numa
})
: await getLlama({
gpu,
logLevel: llamaLogLevel,
numa
});
const logBatchSize = batchSize != null;
const useMmap = !noMmap && llama.supportsMmap;
const useDirectIo = !noDirectIo;
const resolvedModelPath = await resolveCommandGgufPath(modelArg, llama, headers, {
flashAttention,
swaFullCache,
useMmap
});
const resolvedDraftModelPath = (tokenPredictionDraftModel != null && tokenPredictionDraftModel !== "")
? await resolveCommandGgufPath(tokenPredictionDraftModel, llama, headers, {
flashAttention,
swaFullCache,
useMmap,
consoleTitle: "Draft model file"
})
: undefined;
if (systemInfo)
console.log(llama.systemInfo);
if (prefixFile != null && prefixFile !== "") {
if (prefix != null && prefix !== "")
console.warn(chalk.yellow("Both `prefix` and `prefixFile` were specified. `prefixFile` will be used."));
prefix = await fs.readFile(path.resolve(process.cwd(), prefixFile), "utf8");
}
if (suffixFile != null && suffixFile !== "") {
if (suffix != null && suffix !== "")
console.warn(chalk.yellow("Both `suffix` and `suffixFile` were specified. `suffixFile` will be used."));
suffix = await fs.readFile(path.resolve(process.cwd(), suffixFile), "utf8");
}
if (suffix != null && prefix == null) {
console.warn(chalk.yellow("Suffix was specified but no prefix was specified. Suffix will be ignored."));
suffix = undefined;
}
if (batchSize != null && contextSize != null && batchSize > contextSize) {
console.warn(chalk.yellow("Batch size is greater than the context size. Batch size will be set to the context size."));
batchSize = contextSize;
}
let initialPrefix = prefix ?? null;
let initialSuffix = suffix ?? null;
const model = await withProgressLog({
loadingText: chalk.blue.bold("Loading model"),
successText: chalk.blue("Model loaded"),
failText: chalk.blue("Failed to load model"),
liveUpdates: !debug,
noProgress: debug,
liveCtrlCSendsAbortSignal: true
}, async (progressUpdater) => {
try {
return await llama.loadModel({
modelPath: resolvedModelPath,
gpuLayers: gpuLayers != null
? gpuLayers
: contextSize != null
? { fitContext: { contextSize } }
: undefined,
defaultContextFlashAttention: flashAttention,
defaultContextSwaFullCache: swaFullCache,
useMmap,
useDirectIo,
ignoreMemorySafetyChecks: gpuLayers != null,
onLoadProgress(loadProgress) {
progressUpdater.setProgress(loadProgress);
},
loadSignal: progressUpdater.abortSignal
});
}
catch (err) {
if (err === progressUpdater.abortSignal?.reason)
process.exit(0);
throw err;
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const draftModel = resolvedDraftModelPath == null
? undefined
: await withProgressLog({
loadingText: chalk.blue.bold("Loading draft model"),
successText: chalk.blue("Draft model loaded"),
failText: chalk.blue("Failed to load draft model"),
liveUpdates: !debug,
noProgress: debug,
liveCtrlCSendsAbortSignal: true
}, async (progressUpdater) => {
try {
return await llama.loadModel({
modelPath: resolvedDraftModelPath,
defaultContextFlashAttention: flashAttention,
defaultContextSwaFullCache: swaFullCache,
useMmap,
useDirectIo,
onLoadProgress(loadProgress) {
progressUpdater.setProgress(loadProgress);
},
loadSignal: progressUpdater.abortSignal
});
}
catch (err) {
if (err === progressUpdater.abortSignal?.reason)
process.exit(0);
throw err;
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const draftContext = draftModel == null
? undefined
: await withOra({
loading: chalk.blue("Creating draft context"),
success: chalk.blue("Draft context created"),
fail: chalk.blue("Failed to create draft context"),
useStatusLogs: debug
}, async () => {
try {
return await draftModel.createContext({
contextSize: { max: tokenPredictionModelContextSize }
});
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const context = await withOra({
loading: chalk.blue("Creating context"),
success: chalk.blue("Context created"),
fail: chalk.blue("Failed to create context"),
useStatusLogs: debug
}, async () => {
try {
return await model.createContext({
contextSize: contextSize != null ? contextSize : undefined,
batchSize: batchSize != null ? batchSize : undefined,
threads: threads === null ? undefined : threads,
ignoreMemorySafetyChecks: gpuLayers != null || contextSize != null,
performanceTracking: printTimings
});
}
finally {
if (llama.logLevel === LlamaLogLevel.debug) {
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
console.info();
}
}
});
const draftContextSequence = draftContext?.getSequence();
const contextSequence = draftContextSequence != null
? context.getSequence({
tokenPredictor: new DraftSequenceTokenPredictor(draftContextSequence)
})
: context.getSequence();
const completion = new LlamaCompletion({
contextSequence
});
let lastDraftTokenMeterState = draftContextSequence?.tokenMeter.getState();
let lastTokenMeterState = contextSequence.tokenMeter.getState();
let lastTokenPredictionsStats = contextSequence.tokenPredictions;
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
const padTitle = await printCommonInfoLines({
context,
draftContext,
useMmap,
useDirectIo,
logBatchSize,
tokenMeterEnabled: meter
});
printInfoLine({
title: "Infill",
padTitle: padTitle,
info: [{
title: "Repeat penalty",
value: `${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`
}, {
show: repeatFrequencyPenalty != null,
title: "Repeat frequency penalty",
value: String(repeatFrequencyPenalty)
}, {
show: repeatPresencePenalty != null,
title: "Repeat presence penalty",
value: String(repeatPresencePenalty)
}, {
show: !penalizeRepeatingNewLine,
title: "Penalize repeating new line",
value: "disabled"
}, {
show: timing,
title: "Response timing",
value: "enabled"
}]
});
// this is for ora to not interfere with readline
await new Promise((resolve) => setTimeout(resolve, 1));
if (!completion.infillSupported) {
console.log(chalk.red("Infill is not supported for this model"));
process.exit(1);
}
const replPrefixHistory = [];
const replSuffixHistory = [];
async function getInput(name) {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
history: name === "Prefix"
? replPrefixHistory.slice()
: replSuffixHistory.slice()
});
const res = await new Promise((accept) => rl.question(chalk.yellow(name + "> "), accept));
rl.close();
return res;
}
while (true) {
const prefixInput = initialPrefix != null
? initialPrefix
: await getInput("Prefix");
if (initialPrefix != null) {
console.log(chalk.green("Prefix> ") + initialPrefix);
initialPrefix = null;
}
else
await replPrefixHistory.push(prefixInput);
if (prefixInput === ".exit")
break;
const suffixInput = initialSuffix != null
? initialSuffix
: await getInput("Suffix");
if (initialSuffix != null) {
console.log(chalk.green("Suffix> ") + initialSuffix);
initialSuffix = null;
}
else
await replSuffixHistory.push(suffixInput);
if (suffixInput === ".exit")
break;
process.stdout.write(chalk.yellow("Infill: "));
const [startColor, endColor] = chalk.blue("MIDDLE").split("MIDDLE");
const abortController = new AbortController();
const consoleInteraction = new ConsoleInteraction();
consoleInteraction.onKey(ConsoleInteractionKey.ctrlC, async () => {
abortController.abort();
consoleInteraction.stop();
});
const timeBeforePrompt = Date.now();
try {
process.stdout.write(startColor);
consoleInteraction.start();
await completion.generateInfillCompletion(prefixInput, suffixInput, {
temperature,
minP,
topK,
topP,
seed: seed ?? undefined,
signal: abortController.signal,
repeatPenalty: {
penalty: repeatPenalty,
frequencyPenalty: repeatFrequencyPenalty != null ? repeatFrequencyPenalty : undefined,
presencePenalty: repeatPresencePenalty != null ? repeatPresencePenalty : undefined,
penalizeNewLine: penalizeRepeatingNewLine,
lastTokens: lastTokensRepeatPenalty
},
maxTokens: maxTokens === -1
? context.contextSize
: maxTokens <= 0
? undefined
: maxTokens,
onTextChunk(chunk) {
process.stdout.write(chunk);
}
});
}
catch (err) {
if (!(abortController.signal.aborted && err === abortController.signal.reason))
throw err;
}
finally {
consoleInteraction.stop();
if (abortController.signal.aborted)
process.stdout.write(endColor + chalk.yellow("[generation aborted by user]"));
else
process.stdout.write(endColor);
console.log();
}
const timeAfterPrompt = Date.now();
if (printTimings) {
if (LlamaLogLevelGreaterThan(llama.logLevel, LlamaLogLevel.info))
llama.logLevel = LlamaLogLevel.info;
await context.printTimings();
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
llama.logLevel = llamaLogLevel;
}
if (timing)
console.info(chalk.dim("Response duration: ") +
prettyMilliseconds(timeAfterPrompt - timeBeforePrompt, {
keepDecimalsOnWholeSeconds: true,
secondsDecimalDigits: 2,
separateMilliseconds: true,
compact: false
}));
if (meter) {
const newTokenMeterState = contextSequence.tokenMeter.getState();
const tokenMeterDiff = TokenMeter.diff(newTokenMeterState, lastTokenMeterState);
lastTokenMeterState = newTokenMeterState;
const showDraftTokenMeterDiff = lastDraftTokenMeterState != null && draftContextSequence != null;
const tokenPredictionsStats = contextSequence.tokenPredictions;
const validatedTokenPredictions = tokenPredictionsStats.validated - lastTokenPredictionsStats.validated;
const refutedTokenPredictions = tokenPredictionsStats.refuted - lastTokenPredictionsStats.refuted;
const usedTokenPredictions = tokenPredictionsStats.used - lastTokenPredictionsStats.used;
const unusedTokenPredictions = tokenPredictionsStats.unused - lastTokenPredictionsStats.unused;
lastTokenPredictionsStats = tokenPredictionsStats;
console.info([
showDraftTokenMeterDiff && (chalk.yellow("Main".padEnd("Drafter".length))),
chalk.dim("Input tokens:") + " " + String(tokenMeterDiff.usedInputTokens).padEnd(5, " "),
chalk.dim("Output tokens:") + " " + String(tokenMeterDiff.usedOutputTokens).padEnd(5, " "),
showDraftTokenMeterDiff && (chalk.dim("Validated predictions:") + " " + String(validatedTokenPredictions).padEnd(5, " ")),
showDraftTokenMeterDiff && (chalk.dim("Refuted predictions:") + " " + String(refutedTokenPredictions).padEnd(5, " ")),
showDraftTokenMeterDiff && (chalk.dim("Used predictions:") + " " + String(usedTokenPredictions).padEnd(5, " ")),
showDraftTokenMeterDiff && (chalk.dim("Unused predictions:") + " " + String(unusedTokenPredictions).padEnd(5, " "))
].filter(Boolean).join(" "));
if (lastDraftTokenMeterState != null && draftContextSequence != null) {
const newDraftTokenMeterState = draftContextSequence.tokenMeter.getState();
const draftTokenMeterDiff = TokenMeter.diff(newDraftTokenMeterState, lastDraftTokenMeterState);
lastDraftTokenMeterState = newDraftTokenMeterState;
console.info([
chalk.yellow("Drafter"),
chalk.dim("Input tokens:") + " " + String(draftTokenMeterDiff.usedInputTokens).padEnd(5, " "),
chalk.dim("Output tokens:") + " " + String(draftTokenMeterDiff.usedOutputTokens).padEnd(5, " ")
].join(" "));
}
}
}
}
//# sourceMappingURL=InfillCommand.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,12 @@
import { CommandModule } from "yargs";
import { BuildGpu } from "../../bindings/types.js";
type InitCommand = {
name?: string;
template?: string;
model?: string;
gpu?: BuildGpu | "auto";
};
export declare const InitCommand: CommandModule<object, InitCommand>;
export declare const CreateCliCommand: CommandModule<object, InitCommand>;
export declare function InitCommandHandler({ name, template, model, gpu }: InitCommand): Promise<void>;
export {};

View File

@@ -0,0 +1,230 @@
import process from "process";
import path from "path";
import chalk from "chalk";
import logSymbols from "log-symbols";
import validateNpmPackageName from "validate-npm-package-name";
import fs from "fs-extra";
import { consolePromptQuestion } from "../utils/consolePromptQuestion.js";
import { basicChooseFromListConsoleInteraction } from "../utils/basicChooseFromListConsoleInteraction.js";
import { splitAnsiToLines } from "../utils/splitAnsiToLines.js";
import { arrowChar } from "../../consts.js";
import { interactivelyAskForModel } from "../utils/interactivelyAskForModel.js";
import { LlamaLogLevel, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption } from "../../bindings/types.js";
import { getLlama } from "../../bindings/getLlama.js";
import { ProjectTemplateParameter, scaffoldProjectTemplate } from "../utils/projectTemplates.js";
import { documentationPageUrls, packedProjectTemplatesDirectory } from "../../config.js";
import { getModuleVersion } from "../../utils/getModuleVersion.js";
import withOra from "../../utils/withOra.js";
import { projectTemplates } from "../projectTemplates.js";
import { getReadablePath } from "../utils/getReadablePath.js";
import { createModelDownloader } from "../../utils/createModelDownloader.js";
import { withCliCommandDescriptionDocsUrl } from "../utils/withCliCommandDescriptionDocsUrl.js";
import { resolveModelDestination } from "../../utils/resolveModelDestination.js";
export const InitCommand = {
command: "init [name]",
describe: withCliCommandDescriptionDocsUrl("Generate a new `node-llama-cpp` project from a template", documentationPageUrls.CLI.Init),
builder(yargs) {
return yargs
.option("name", {
type: "string",
description: "Project name"
})
.option("template", {
type: "string",
choices: projectTemplates.map((template) => template.name),
description: "Template to use. If omitted, you will be prompted to select one"
})
.option("model", {
type: "string",
description: "Model URI to use. If omitted, you will be prompted to select one interactively"
})
.option("gpu", {
type: "string",
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: nodeLlamaCppGpuOptions,
coerce: (value) => {
if (value == null || value == "")
return undefined;
return parseNodeLlamaCppGpuOption(value);
},
defaultDescription: "Uses the latest local build, and fallbacks to \"auto\"",
description: "Compute layer implementation type to use for llama.cpp"
});
},
handler: InitCommandHandler
};
export const CreateCliCommand = {
command: "$0",
describe: withCliCommandDescriptionDocsUrl("Scaffold a new `node-llama-cpp` project from a template", documentationPageUrls.CLI.Init),
builder: InitCommand.builder,
handler: InitCommandHandler
};
export async function InitCommandHandler({ name, template, model, gpu }) {
const currentDirectory = path.resolve(process.cwd());
const projectName = (name != null && validateNpmPackageName(name ?? "").validForNewPackages)
? name
: await askForProjectName(currentDirectory);
const selectedTemplateOption = ((template != null && template !== "")
? projectTemplates.find((item) => item.name === template)
: undefined) ?? await askForTemplate();
async function resolveModelUri() {
if (model != null && model !== "") {
try {
const resolvedModelDestination = resolveModelDestination(model, true);
if (resolvedModelDestination.type === "uri")
return resolvedModelDestination.uri;
else if (resolvedModelDestination.type === "url")
return resolvedModelDestination.url;
}
catch (err) {
// do nothing
}
}
const llama = gpu == null
? await getLlama("lastBuild", {
logLevel: LlamaLogLevel.error
})
: await getLlama({
gpu,
logLevel: LlamaLogLevel.error
});
return await interactivelyAskForModel({
llama,
allowLocalModels: false,
downloadIntent: false
});
}
const modelUri = await resolveModelUri();
const targetDirectory = path.join(currentDirectory, projectName);
const readableTargetDirectoryPath = getReadablePath(targetDirectory);
await withOra({
loading: `Scaffolding a ${chalk.yellow(selectedTemplateOption.title)} project to ${chalk.yellow(readableTargetDirectoryPath)}`,
success: `Scaffolded a ${chalk.yellow(selectedTemplateOption.title)} project to ${chalk.yellow(readableTargetDirectoryPath)}`,
fail: `Failed to scaffold a ${chalk.yellow(selectedTemplateOption.title)} project to ${chalk.yellow(readableTargetDirectoryPath)}`
}, async () => {
const startTime = Date.now();
const minScaffoldTime = 1000 * 2; // ensure the IDE has enough time to refresh and show some progress
const template = await loadTemplate(selectedTemplateOption);
await fs.ensureDir(targetDirectory);
async function resolveModelInfo() {
const resolvedModelDestination = resolveModelDestination(modelUri);
if (resolvedModelDestination.type === "uri")
return {
modelUriOrUrl: resolvedModelDestination.uri,
modelUriOrFilename: resolvedModelDestination.uri,
cancelDownloader: async () => void 0
};
if (resolvedModelDestination.type === "file")
throw new Error("Unexpected file model destination");
const modelDownloader = await createModelDownloader({
modelUri: resolvedModelDestination.url,
showCliProgress: false,
deleteTempFileOnCancel: false
});
const modelEntrypointFilename = modelDownloader.entrypointFilename;
return {
modelUriOrUrl: resolvedModelDestination.url,
modelUriOrFilename: modelEntrypointFilename,
async cancelDownloader() {
try {
await modelDownloader.cancel();
}
catch (err) {
// do nothing
}
}
};
}
const { modelUriOrFilename, modelUriOrUrl, cancelDownloader } = await resolveModelInfo();
await scaffoldProjectTemplate({
template,
directoryPath: targetDirectory,
parameters: {
[ProjectTemplateParameter.ProjectName]: projectName,
[ProjectTemplateParameter.ModelUriOrUrl]: modelUriOrUrl,
[ProjectTemplateParameter.ModelUriOrFilename]: modelUriOrFilename,
[ProjectTemplateParameter.CurrentModuleVersion]: await getModuleVersion()
}
});
await cancelDownloader();
await new Promise((resolve) => setTimeout(resolve, Math.max(0, minScaffoldTime - (Date.now() - startTime))));
});
console.info(chalk.green("Done."));
console.info();
console.info("Now run these commands:");
console.info();
console.info(chalk.greenBright("cd") + " " + projectName);
console.info(chalk.greenBright("npm") + " install");
console.info(chalk.greenBright("npm") + " start");
console.info();
console.info(chalk.gray("Note: running \"npm install\" may take a little while since it also downloads the model you selected"));
process.exit(0);
}
async function askForTemplate() {
const selectedTemplateOption = await basicChooseFromListConsoleInteraction({
title: chalk.bold("Select a template:"),
footer(item) {
if (item.description == null)
return undefined;
const leftPad = 3;
const maxWidth = Math.max(1, process.stdout.columns - 2 - leftPad);
const lines = splitAnsiToLines(item.description, maxWidth);
return " \n" +
" ".repeat(leftPad) + chalk.bold.gray("Template description") + "\n" +
lines.map((line) => (" ".repeat(leftPad) + line)).join("\n");
},
items: projectTemplates,
renderItem(item, focused) {
return renderSelectableItem(item.titleFormat != null
? item.titleFormat(item.title)
: item.title, focused);
},
aboveItemsPadding: 1,
belowItemsPadding: 1,
renderSummaryOnExit(item) {
if (item == null)
return "";
return logSymbols.success + " Selected template " + chalk.blue(item.title);
},
exitOnCtrlC: true
});
if (selectedTemplateOption == null)
throw new Error("No template selected");
return selectedTemplateOption;
}
async function askForProjectName(currentDirectory) {
console.info();
const projectName = await consolePromptQuestion(chalk.bold("Enter a project name:") + chalk.dim(" (node-llama-cpp-project) "), {
defaultValue: "node-llama-cpp-project",
exitOnCtrlC: true,
async validate(input) {
const { validForNewPackages, errors } = validateNpmPackageName(input);
if (!validForNewPackages)
return (errors ?? ["The given project name cannot be used in a package.json file"]).join("\n");
if (await fs.pathExists(path.join(currentDirectory, input)))
return "A directory with the given project name already exists";
return null;
},
renderSummaryOnExit(item) {
if (item == null)
return "";
return logSymbols.success + " Entered project name " + chalk.blue(item);
}
});
if (projectName == null)
throw new Error("No project name entered");
return projectName;
}
function renderSelectableItem(text, focused) {
if (focused)
return " " + chalk.cyan(arrowChar) + " " + chalk.cyan(text);
return " * " + text;
}
async function loadTemplate(templateOption) {
const templateFilePath = path.join(packedProjectTemplatesDirectory, `${templateOption.name}.json`);
if (!(await fs.pathExists(templateFilePath)))
throw new Error(`Template file was not found for template "${templateOption.title}" ("${templateOption.name}")`);
const template = await fs.readJSON(templateFilePath);
return template;
}
//# sourceMappingURL=InitCommand.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,4 @@
import { CommandModule } from "yargs";
type OnPostInstallCommand = null;
export declare const OnPostInstallCommand: CommandModule<object, OnPostInstallCommand>;
export {};

View File

@@ -0,0 +1,35 @@
import chalk from "chalk";
import { defaultSkipDownload, documentationPageUrls } from "../../config.js";
import { getLlamaForOptions } from "../../bindings/getLlama.js";
import { setForceShowConsoleLogPrefix } from "../../state.js";
import { isRunningUnderRosetta } from "../utils/isRunningUnderRosetta.js";
import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
export const OnPostInstallCommand = {
command: "postinstall",
describe: false,
async handler() {
if (defaultSkipDownload)
return;
setForceShowConsoleLogPrefix(true);
if (await isRunningUnderRosetta()) {
console.error(getConsoleLogPrefix(false, false), chalk.red("llama.cpp is not supported under Rosetta on Apple Silicone Macs. " +
"Ensure that you're using a native arm64 node.js installation."));
console.error(getConsoleLogPrefix(false, false), "process.platform: " + process.platform + ", process.arch: " + process.arch);
console.error(getConsoleLogPrefix(false, false), "troubleshooting: " + documentationPageUrls.troubleshooting.RosettaIllegalHardwareInstruction);
process.exit(1);
}
try {
await getLlamaForOptions({
progressLogs: true
}, {
updateLastBuildInfoOnCompile: true
});
process.exit(0);
}
catch (err) {
console.error(err);
process.exit(1);
}
}
};
//# sourceMappingURL=OnPostInstallCommand.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"OnPostInstallCommand.js","sourceRoot":"","sources":["../../../src/cli/commands/OnPostInstallCommand.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,mBAAmB,EAAE,qBAAqB,EAAC,MAAM,iBAAiB,CAAC;AAC3E,OAAO,EAAC,kBAAkB,EAAC,MAAM,4BAA4B,CAAC;AAC9D,OAAO,EAAC,4BAA4B,EAAC,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAC,qBAAqB,EAAC,MAAM,mCAAmC,CAAC;AACxE,OAAO,EAAC,mBAAmB,EAAC,MAAM,oCAAoC,CAAC;AAIvE,MAAM,CAAC,MAAM,oBAAoB,GAAgD;IAC7E,OAAO,EAAE,aAAa;IACtB,QAAQ,EAAE,KAAK;IACf,KAAK,CAAC,OAAO;QACT,IAAI,mBAAmB;YACnB,OAAO;QAEX,4BAA4B,CAAC,IAAI,CAAC,CAAC;QAEnC,IAAI,MAAM,qBAAqB,EAAE,EAAE,CAAC;YAChC,OAAO,CAAC,KAAK,CACT,mBAAmB,CAAC,KAAK,EAAE,KAAK,CAAC,EACjC,KAAK,CAAC,GAAG,CACL,mEAAmE;gBACnE,+DAA+D,CAClE,CACJ,CAAC;YACF,OAAO,CAAC,KAAK,CACT,mBAAmB,CAAC,KAAK,EAAE,KAAK,CAAC,EACjC,oBAAoB,GAAG,OAAO,CAAC,QAAQ,GAAG,kBAAkB,GAAG,OAAO,CAAC,IAAI,CAC9E,CAAC;YACF,OAAO,CAAC,KAAK,CACT,mBAAmB,CAAC,KAAK,EAAE,KAAK,CAAC,EACjC,mBAAmB,GAAG,qBAAqB,CAAC,eAAe,CAAC,iCAAiC,CAChG,CAAC;YAEF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;QAED,IAAI,CAAC;YACD,MAAM,kBAAkB,CAAC;gBACrB,YAAY,EAAE,IAAI;aACrB,EAAE;gBACC,4BAA4B,EAAE,IAAI;aACrC,CAAC,CAAC;YAEH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;IACL,CAAC;CACJ,CAAC"}

View File

@@ -0,0 +1,13 @@
import { CommandModule } from "yargs";
type PullCommand = {
urls: string[];
header?: string[];
override: boolean;
noProgress: boolean;
noTempFile: boolean;
directory: string;
filename?: string;
parallel?: number;
};
export declare const PullCommand: CommandModule<object, PullCommand>;
export {};

View File

@@ -0,0 +1,158 @@
import process from "process";
import fs from "fs-extra";
import chalk from "chalk";
import { cliModelsDirectory, documentationPageUrls } from "../../config.js";
import { combineModelDownloaders, createModelDownloader } from "../../utils/createModelDownloader.js";
import { getReadablePath } from "../utils/getReadablePath.js";
import { ConsoleInteraction, ConsoleInteractionKey } from "../utils/ConsoleInteraction.js";
import { getIsInDocumentationMode } from "../../state.js";
import { resolveHeaderFlag } from "../utils/resolveHeaderFlag.js";
import { withCliCommandDescriptionDocsUrl } from "../utils/withCliCommandDescriptionDocsUrl.js";
export const PullCommand = {
command: "pull [urls..]",
aliases: ["get"],
describe: withCliCommandDescriptionDocsUrl("Download models from URLs", documentationPageUrls.CLI.Pull),
builder(yargs) {
const isInDocumentationMode = getIsInDocumentationMode();
return yargs
.option("urls", {
type: "string",
alias: ["url", "uris", "uri"],
array: true,
description: [
"A `.gguf` model URI to pull.",
!isInDocumentationMode && "Automatically handles split and binary-split models files, so only pass the URI to the first file of a model.",
!isInDocumentationMode && "If a file already exists and its size matches the expected size, it will not be downloaded again unless the `--override` flag is used.",
"Pass multiple URIs to download multiple models at once."
].filter(Boolean).join(isInDocumentationMode
? "\n"
: " "),
demandOption: true,
group: "Required:"
})
.option("header", {
alias: ["H"],
type: "string",
array: true,
description: "Headers to use when downloading a model from a URL, in the format `key: value`. You can pass this option multiple times to add multiple headers.",
group: "Optional:"
})
.option("override", {
alias: ["o"],
type: "boolean",
description: "Override existing model files",
default: false,
group: "Optional:"
})
.option("noProgress", {
type: "boolean",
description: "Do not show a progress bar while downloading",
default: false,
group: "Optional:"
})
.option("noTempFile", {
alias: ["noTemp"],
type: "boolean",
description: "Delete the temporary file when canceling the download",
default: false,
group: "Optional:"
})
.option("directory", {
alias: ["d", "dir"],
type: "string",
description: "Directory to save the model to",
default: cliModelsDirectory,
defaultDescription: isInDocumentationMode
? "`" + getReadablePath(cliModelsDirectory) + "`"
: getReadablePath(cliModelsDirectory),
group: "Optional:"
})
.option("filename", {
alias: ["n", "name"],
type: "string",
description: "Filename to save the model as. Can only be used if a single URL is passed",
group: "Optional:"
})
.option("parallel", {
alias: ["p"],
type: "number",
description: "Maximum parallel downloads",
default: 4,
group: "Optional:"
});
},
async handler({ urls, header: headerArg, override, noProgress, noTempFile, directory, filename, parallel }) {
const headers = resolveHeaderFlag(headerArg);
if (urls.length === 0)
throw new Error("At least one URI must be provided");
else if (urls.length > 1 && filename != null)
throw new Error("The `--filename` flag can only be used when a single URI is passed");
if (urls.length === 1) {
const downloader = await createModelDownloader({
modelUri: urls[0],
dirPath: directory,
headers,
showCliProgress: !noProgress,
deleteTempFileOnCancel: noTempFile,
skipExisting: !override,
fileName: filename || undefined,
parallelDownloads: parallel,
_showUriResolvingProgress: !noProgress
});
if (!override && downloader.totalFiles === 1 && await fs.pathExists(downloader.entrypointFilePath)) {
const fileStats = await fs.stat(downloader.entrypointFilePath);
if (downloader.totalSize === fileStats.size) {
console.info(`${chalk.yellow("File:")} ${getReadablePath(downloader.entrypointFilePath)}`);
console.info("Skipping download of an existing file: " + chalk.yellow(getReadablePath(downloader.entrypointFilePath)));
process.exit(0);
}
}
const consoleInteraction = new ConsoleInteraction();
consoleInteraction.onKey(ConsoleInteractionKey.ctrlC, async () => {
await downloader.cancel();
consoleInteraction.stop();
process.exit(0);
});
if (!noProgress) {
console.info(`Downloading to ${chalk.yellow(getReadablePath(directory))}${downloader.splitBinaryParts != null
? chalk.gray(` (combining ${downloader.splitBinaryParts} parts into a single file)`)
: ""}`);
consoleInteraction.start();
}
await downloader.download();
if (!noProgress)
consoleInteraction.stop();
console.info(`Downloaded to ${chalk.yellow(getReadablePath(downloader.entrypointFilePath))}`);
}
else {
const downloader = await combineModelDownloaders(urls.map((uri) => createModelDownloader({
modelUri: uri,
dirPath: directory,
headers,
showCliProgress: false,
deleteTempFileOnCancel: noTempFile,
skipExisting: !override
})), {
showCliProgress: !noProgress,
parallelDownloads: parallel
});
const consoleInteraction = new ConsoleInteraction();
consoleInteraction.onKey(ConsoleInteractionKey.ctrlC, async () => {
await downloader.cancel();
consoleInteraction.stop();
process.exit(0);
});
if (!noProgress) {
console.info(`Downloading to ${chalk.yellow(getReadablePath(directory))}`);
consoleInteraction.start();
}
await downloader.download();
if (!noProgress)
consoleInteraction.stop();
console.info(`Downloaded ${downloader.modelDownloaders.length} models to ${chalk.yellow(getReadablePath(directory))}\n${chalk.gray("*")} ` +
downloader.modelDownloaders.map((downloader) => chalk.yellow(downloader.entrypointFilename))
.join(`\n${chalk.gray("*")} `));
}
}
};
//# sourceMappingURL=PullCommand.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,4 @@
import { CommandModule } from "yargs";
type InspectCommand = {};
export declare const InspectCommand: CommandModule<object, InspectCommand>;
export {};

View File

@@ -0,0 +1,21 @@
import { withCliCommandDescriptionDocsUrl } from "../../utils/withCliCommandDescriptionDocsUrl.js";
import { documentationPageUrls } from "../../../config.js";
import { InspectGgufCommand } from "./commands/InspectGgufCommand.js";
import { InspectGpuCommand } from "./commands/InspectGpuCommand.js";
import { InspectMeasureCommand } from "./commands/InspectMeasureCommand.js";
import { InspectEstimateCommand } from "./commands/InspectEstimateCommand.js";
export const InspectCommand = {
command: "inspect <command>",
describe: withCliCommandDescriptionDocsUrl("Inspect the inner workings of `node-llama-cpp`", documentationPageUrls.CLI.Inspect.index),
builder(yargs) {
return yargs
.command(InspectGpuCommand)
.command(InspectGgufCommand)
.command(InspectMeasureCommand)
.command(InspectEstimateCommand);
},
async handler() {
// this function must exist, even though we do nothing here
}
};
//# sourceMappingURL=InspectCommand.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"InspectCommand.js","sourceRoot":"","sources":["../../../../src/cli/commands/inspect/InspectCommand.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,gCAAgC,EAAC,MAAM,iDAAiD,CAAC;AACjG,OAAO,EAAC,qBAAqB,EAAC,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAC,kBAAkB,EAAC,MAAM,kCAAkC,CAAC;AACpE,OAAO,EAAC,iBAAiB,EAAC,MAAM,iCAAiC,CAAC;AAClE,OAAO,EAAC,qBAAqB,EAAC,MAAM,qCAAqC,CAAC;AAC1E,OAAO,EAAC,sBAAsB,EAAC,MAAM,sCAAsC,CAAC;AAM5E,MAAM,CAAC,MAAM,cAAc,GAA0C;IACjE,OAAO,EAAE,mBAAmB;IAC5B,QAAQ,EAAE,gCAAgC,CACtC,gDAAgD,EAChD,qBAAqB,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAC1C;IACD,OAAO,CAAC,KAAK;QACT,OAAO,KAAK;aACP,OAAO,CAAC,iBAAiB,CAAC;aAC1B,OAAO,CAAC,kBAAkB,CAAC;aAC3B,OAAO,CAAC,qBAAqB,CAAC;aAC9B,OAAO,CAAC,sBAAsB,CAAC,CAAC;IACzC,CAAC;IACD,KAAK,CAAC,OAAO;QACT,2DAA2D;IAC/D,CAAC;CACJ,CAAC"}

View File

@@ -0,0 +1,14 @@
import { CommandModule } from "yargs";
import { BuildGpu } from "../../../../bindings/types.js";
type InspectEstimateCommand = {
modelPath: string;
header?: string[];
gpu?: BuildGpu | "auto";
gpuLayers?: number | "max";
contextSize?: number | "train";
embedding?: boolean;
noMmap?: boolean;
swaFullCache?: boolean;
};
export declare const InspectEstimateCommand: CommandModule<object, InspectEstimateCommand>;
export {};

View File

@@ -0,0 +1,248 @@
import process from "process";
import chalk from "chalk";
import fs from "fs-extra";
import { readGgufFileInfo } from "../../../../gguf/readGgufFileInfo.js";
import { resolveHeaderFlag } from "../../../utils/resolveHeaderFlag.js";
import { withCliCommandDescriptionDocsUrl } from "../../../utils/withCliCommandDescriptionDocsUrl.js";
import { documentationPageUrls } from "../../../../config.js";
import { printInfoLine } from "../../../utils/printInfoLine.js";
import { renderModelCompatibilityPercentageWithColors } from "../../../utils/renderModelCompatibilityPercentageWithColors.js";
import { getReadableContextSize } from "../../../../utils/getReadableContextSize.js";
import { GgufInsights } from "../../../../gguf/insights/GgufInsights.js";
import { getLlama } from "../../../../bindings/getLlama.js";
import { LlamaLogLevel, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption } from "../../../../bindings/types.js";
import { defaultTrainContextSizeForEstimationPurposes } from "../../../../gguf/insights/GgufInsightsConfigurationResolver.js";
import { getGgufFileTypeName } from "../../../../gguf/utils/getGgufFileTypeName.js";
import { getPrettyBuildGpuName } from "../../../../bindings/consts.js";
import withOra from "../../../../utils/withOra.js";
import { resolveModelArgToFilePathOrUrl } from "../../../../utils/resolveModelDestination.js";
import { printModelDestination } from "../../../utils/printModelDestination.js";
import { toBytes } from "../../../utils/toBytes.js";
import { printDidYouMeanUri } from "../../../utils/resolveCommandGgufPath.js";
import { isModelUri } from "../../../../utils/parseModelUri.js";
export const InspectEstimateCommand = {
command: "estimate [modelPath]",
describe: withCliCommandDescriptionDocsUrl("Estimate the compatibility of a model with the current hardware", documentationPageUrls.CLI.Inspect.Estimate),
builder(yargs) {
return yargs
.option("modelPath", {
alias: ["m", "model", "path", "url", "uri"],
type: "string",
demandOption: true,
description: "The path or URI of the GGUF file to use. If a URI is provided, the metadata will be read from the remote file without downloading the entire file.",
group: "Required:"
})
.option("header", {
alias: ["H"],
type: "string",
array: true,
description: "Headers to use when reading a model file from a URL, in the format `key: value`. You can pass this option multiple times to add multiple headers.",
group: "Optional:"
})
.option("gpu", {
type: "string",
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: nodeLlamaCppGpuOptions,
coerce: (value) => {
if (value == null || value == "")
return undefined;
return parseNodeLlamaCppGpuOption(value);
},
defaultDescription: "Uses the latest local build, and fallbacks to \"auto\"",
description: "Compute layer implementation type to use for llama.cpp. If omitted, uses the latest local build, and fallbacks to \"auto\"",
group: "Optional:"
})
.option("gpuLayers", {
alias: "gl",
type: "number",
description: "number of layers to store in VRAM. Set to `max` to use all the layers the model has",
string: true,
coerce: (value) => {
if (value === "max")
return -2;
return parseInt(value);
},
default: -1,
defaultDescription: "Automatically determined based on the available VRAM",
group: "Optional:"
})
.option("contextSize", {
alias: "c",
type: "number",
description: "Context size to use for the model context. Set to `max` or `train` to use the training context size. " +
"Note that the train context size is not necessarily what you should use for inference, " +
"and a big context size will use a lot of memory",
string: true,
coerce: (value) => {
if (value === "max" || value === "train")
return -2;
return parseInt(value);
},
default: -1,
defaultDescription: "Automatically determined based on the available VRAM",
group: "Optional:"
})
.option("embedding", {
alias: "e",
type: "boolean",
description: "Whether to estimate for creating an embedding context",
default: false,
group: "Optional:"
})
.option("noMmap", {
type: "boolean",
default: false,
description: "Disable mmap (memory-mapped file) usage"
})
.option("swaFullCache", {
alias: "noSwa",
type: "boolean",
default: false,
description: "Disable SWA (Sliding Window Attention) on supported models"
});
},
async handler({ modelPath: ggufPath, header: headerArg, gpu, gpuLayers, contextSize: contextSizeArg, embedding, noMmap, swaFullCache }) {
if (gpuLayers === -1)
gpuLayers = undefined;
if (gpuLayers === -2)
gpuLayers = "max";
if (contextSizeArg === -1)
contextSizeArg = undefined;
if (contextSizeArg === -2)
contextSizeArg = "train";
const headers = resolveHeaderFlag(headerArg);
const [resolvedModelDestination, resolvedGgufPath] = isModelUri(ggufPath)
? await withOra({
loading: chalk.blue("Resolving model URI"),
success: chalk.blue("Resolved model URI"),
fail: chalk.blue("Failed to resolve model URI"),
noSuccessLiveStatus: true
}, () => resolveModelArgToFilePathOrUrl(ggufPath, headers))
: await resolveModelArgToFilePathOrUrl(ggufPath, headers);
if (resolvedModelDestination.type === "file" && !await fs.pathExists(resolvedGgufPath)) {
console.error(`${chalk.red("File does not exist:")} ${resolvedGgufPath}`);
printDidYouMeanUri(ggufPath);
process.exit(1);
}
const llama = gpu == null
? await getLlama("lastBuild", {
logLevel: LlamaLogLevel.error
})
: await getLlama({
gpu,
logLevel: LlamaLogLevel.error
});
const useMmap = !noMmap && llama.supportsMmap;
printModelDestination(resolvedModelDestination);
if (embedding)
console.info(`${chalk.yellow("Estimating for an embedding context")}`);
const ggufFileInfo = await withOra({
loading: chalk.blue("Reading model metadata"),
success: chalk.blue("Read model metadata"),
fail: chalk.blue("Failed to read model metadata"),
noSuccessLiveStatus: true
}, async () => {
return await readGgufFileInfo(resolvedGgufPath, {
fetchHeaders: resolvedModelDestination.type === "file"
? undefined
: headers
});
});
const ggufInsights = await GgufInsights.from(ggufFileInfo, llama);
const contextSize = contextSizeArg === "train"
? ggufInsights.trainContextSize ?? defaultTrainContextSizeForEstimationPurposes
: contextSizeArg;
async function resolveCompatibilityScore(flashAttention) {
return await ggufInsights.configurationResolver.resolveAndScoreConfig({
flashAttention,
targetContextSize: contextSize,
targetGpuLayers: gpuLayers,
embeddingContext: embedding,
useMmap,
swaFullCache
});
}
const [compatibilityScore, compatibilityScoreWithFlashAttention] = await Promise.all([
resolveCompatibilityScore(false),
resolveCompatibilityScore(true)
]);
const longestTitle = Math.max("GPU info".length, "Model info".length, "Resolved config".length, "With flash attention".length) + 1;
if (llama.gpu !== false) {
const [vramState, deviceNames] = await Promise.all([
llama.getVramState(),
llama.getGpuDeviceNames()
]);
printInfoLine({
title: "GPU info",
padTitle: longestTitle,
info: [{
title: "Type",
value: getPrettyBuildGpuName(llama.gpu)
}, {
title: "VRAM",
value: toBytes(vramState.total)
}, {
title: "Name",
value: toOneLine(deviceNames.join(", "))
}]
});
}
printInfoLine({
title: "Model info",
padTitle: longestTitle,
info: [{
title: "Type",
value: toOneLine([
ggufFileInfo.metadata?.general?.architecture,
ggufFileInfo.metadata?.general?.size_label,
getGgufFileTypeName(ggufFileInfo.metadata.general?.file_type)
].filter(Boolean).join(" "))
}, {
title: "Size",
value: toBytes(ggufInsights.modelSize)
}, {
show: ggufInsights.trainContextSize != null,
title: "Train context size",
value: getReadableContextSize(ggufInsights.trainContextSize ?? 0)
}]
});
console.info();
logCompatibilityScore("Resolved config", longestTitle, compatibilityScore, ggufInsights, llama, false);
logCompatibilityScore("With flash attention", longestTitle, compatibilityScoreWithFlashAttention, ggufInsights, llama, true);
}
};
function logCompatibilityScore(title, padTitle, compatibilityScore, ggufInsights, llama, flashAttention) {
printInfoLine({
title,
padTitle,
separateLines: false,
info: [{
title: "",
value: renderModelCompatibilityPercentageWithColors(compatibilityScore.compatibilityScore * 100) + " compatibility"
}, {
show: ggufInsights.trainContextSize != null,
title: "Context size",
value: getReadableContextSize(compatibilityScore.resolvedValues.contextSize)
}, {
show: llama.gpu !== false,
title: "GPU layers",
value: () => (compatibilityScore.resolvedValues.gpuLayers + "/" + ggufInsights.totalLayers + " " +
chalk.dim(`(${Math.floor((compatibilityScore.resolvedValues.gpuLayers / ggufInsights.totalLayers) * 100)}%)`))
}, {
show: llama.gpu !== false,
title: "VRAM usage",
value: () => toBytes(compatibilityScore.resolvedValues.totalVramUsage)
}, {
title: "RAM usage",
value: () => toBytes(compatibilityScore.resolvedValues.totalRamUsage)
}, {
show: flashAttention,
title: "Flash attention",
value: "enabled"
}]
});
}
function toOneLine(text) {
return text.replaceAll("\n", chalk.gray("\\n"));
}
//# sourceMappingURL=InspectEstimateCommand.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,13 @@
import { CommandModule } from "yargs";
type InspectGgufCommand = {
modelPath: string;
header?: string[];
key?: string;
noSplice: boolean;
fullTensorInfo: boolean;
fullMetadataArrays: boolean;
plainJson: boolean;
outputToJsonFile?: string;
};
export declare const InspectGgufCommand: CommandModule<object, InspectGgufCommand>;
export {};

View File

@@ -0,0 +1,225 @@
import path from "path";
import process from "process";
import chalk from "chalk";
import fs from "fs-extra";
import { Template } from "@huggingface/jinja";
import { readGgufFileInfo } from "../../../../gguf/readGgufFileInfo.js";
import { prettyPrintObject } from "../../../../utils/prettyPrintObject.js";
import { getGgufFileTypeName } from "../../../../gguf/utils/getGgufFileTypeName.js";
import { resolveHeaderFlag } from "../../../utils/resolveHeaderFlag.js";
import { withCliCommandDescriptionDocsUrl } from "../../../utils/withCliCommandDescriptionDocsUrl.js";
import { documentationPageUrls } from "../../../../config.js";
import withOra from "../../../../utils/withOra.js";
import { resolveModelArgToFilePathOrUrl } from "../../../../utils/resolveModelDestination.js";
import { printModelDestination } from "../../../utils/printModelDestination.js";
import { getGgufMetadataKeyValue } from "../../../../gguf/utils/getGgufMetadataKeyValue.js";
import { toBytes } from "../../../utils/toBytes.js";
import { printDidYouMeanUri } from "../../../utils/resolveCommandGgufPath.js";
import { isModelUri } from "../../../../utils/parseModelUri.js";
const chatTemplateKey = ".chatTemplate";
export const InspectGgufCommand = {
command: "gguf [modelPath]",
describe: withCliCommandDescriptionDocsUrl("Inspect a GGUF file", documentationPageUrls.CLI.Inspect.GGUF),
builder(yargs) {
return yargs
.option("modelPath", {
alias: ["m", "model", "path", "url", "uri"],
type: "string",
demandOption: true,
description: "The path or URI of the GGUF file to inspect. If a URI is provided, the metadata will be read from the remote file without downloading the entire file.",
group: "Required:"
})
.option("header", {
alias: ["H"],
type: "string",
array: true,
description: "Headers to use when reading a model file from a URL, in the format `key: value`. You can pass this option multiple times to add multiple headers.",
group: "Optional:"
})
.option("key", {
alias: ["k"],
type: "string",
description: "A single metadata key to print the value of. If not provided, all metadata will be printed. " +
"If the key is `" + chatTemplateKey + "` then the chat template of the model will be formatted and printed.",
group: "Optional:"
})
.option("noSplice", {
alias: "s",
type: "boolean",
default: false,
description: "When split files are detected, it reads the metadata of the first file and splices the tensorInfo from all the parts. Use this flag to disable that behavior and read only the given file",
group: "Optional:"
})
.option("fullTensorInfo", {
alias: "t",
type: "boolean",
default: false,
description: "Show the full tensor info",
group: "Optional:"
})
.option("fullMetadataArrays", {
alias: "ma",
type: "boolean",
default: false,
description: "Print the full arrays in the metadata. Caution: those arrays can be extremely large and cover the entire terminal screen. Use with caution.",
group: "Optional:"
})
.option("plainJson", {
type: "boolean",
default: false,
description: "Print the output as plain JSON with no formatting. Useful for piping the output to other commands. The output won't truncate any values, so it may be extremely large. Use with caution.",
group: "Optional:"
})
.option("outputToJsonFile", {
type: "string",
description: "Path to a file to write the output to as JSON. The output won't truncate any values. The output won't be printed to the console",
group: "Optional:"
});
},
async handler({ modelPath: ggufPath, header: headerArg, key, noSplice, fullTensorInfo, fullMetadataArrays, plainJson, outputToJsonFile }) {
const headers = resolveHeaderFlag(headerArg);
const [resolvedModelDestination, resolvedGgufPath] = (!plainJson && isModelUri(ggufPath))
? await withOra({
loading: chalk.blue("Resolving model URI"),
success: chalk.blue("Resolved model URI"),
fail: chalk.blue("Failed to resolve model URI"),
noSuccessLiveStatus: true
}, () => resolveModelArgToFilePathOrUrl(ggufPath, headers))
: await resolveModelArgToFilePathOrUrl(ggufPath, headers);
if (resolvedModelDestination.type === "file" && !await fs.pathExists(resolvedGgufPath)) {
console.error(`${chalk.red("File does not exist:")} ${resolvedGgufPath}`);
printDidYouMeanUri(ggufPath);
process.exit(1);
}
if (!plainJson)
printModelDestination(resolvedModelDestination);
const parsedMetadata = plainJson
? await readGgufFileInfo(resolvedGgufPath, {
fetchHeaders: resolvedModelDestination.type === "file"
? undefined
: headers,
spliceSplitFiles: !noSplice
})
: await withOra({
loading: chalk.blue("Reading model metadata"),
success: chalk.blue("Read model metadata"),
fail: chalk.blue("Failed to read model metadata"),
noSuccessLiveStatus: true
}, async () => {
return await readGgufFileInfo(resolvedGgufPath, {
fetchHeaders: resolvedModelDestination.type === "file"
? undefined
: headers,
spliceSplitFiles: !noSplice
});
});
removeAdditionalTensorInfoFields(parsedMetadata.fullTensorInfo);
const fileTypeName = getGgufFileTypeName(parsedMetadata.metadata.general?.file_type);
if (plainJson || outputToJsonFile != null) {
const getOutputJson = () => {
if (key != null) {
const keyValue = key === chatTemplateKey
? tryFormattingJinja(getGgufMetadataKeyValue(parsedMetadata.metadata, "tokenizer.chat_template"))
: getGgufMetadataKeyValue(parsedMetadata.metadata, key);
if (keyValue === undefined) {
console.log(`Key not found: ${key}`);
process.exit(1);
}
return JSON.stringify(keyValue, undefined, 4);
}
return JSON.stringify({
splicedParts: parsedMetadata.splicedParts,
version: parsedMetadata.version,
fileType: fileTypeName,
tensorCount: parsedMetadata.totalTensorCount,
metadataSize: parsedMetadata.totalMetadataSize,
tensorInfoSize: parsedMetadata.totalTensorInfoSize,
metadata: parsedMetadata.metadata,
tensorInfo: parsedMetadata.fullTensorInfo
}, undefined, 4);
};
const outputJson = getOutputJson();
if (outputToJsonFile != null) {
const filePath = path.resolve(process.cwd(), outputToJsonFile);
await fs.writeFile(filePath, outputJson, "utf8");
console.info(`${chalk.yellow("JSON written to file:")} ${filePath}`);
}
else {
console.info(outputJson);
}
}
else if (key != null) {
const keyValue = key === chatTemplateKey
? tryFormattingJinja(getGgufMetadataKeyValue(parsedMetadata.metadata, "tokenizer.chat_template"))
: getGgufMetadataKeyValue(parsedMetadata.metadata, key);
if (keyValue === undefined) {
console.log(`${chalk.red("Metadata key not found:")} ${key}`);
process.exit(1);
}
const metadataPrettyPrintOptions = {
maxArrayValues: fullMetadataArrays
? undefined
: 10,
useNumberGrouping: true,
maxArrayItemsWidth: process.stdout.columns - 1
};
console.info(`${chalk.yellow("Metadata key:")} ${prettyPrintObject(key)}`);
console.info(`${chalk.yellow("Metadata:")} ${typeof keyValue === "string"
? keyValue
: prettyPrintObject(keyValue, undefined, metadataPrettyPrintOptions)}`);
}
else {
const metadataPrettyPrintOptions = {
maxArrayValues: fullMetadataArrays
? undefined
: 10,
useNumberGrouping: true,
maxArrayItemsWidth: process.stdout.columns - 1
};
const tensorInfoPrettyPrintOptions = {
maxArrayValues: fullTensorInfo
? undefined
: 4,
useNumberGrouping: true,
maxArrayItemsWidth: process.stdout.columns - 1,
multilineObjects: false
};
const numberLocaleFormattingOptions = {
style: "decimal",
useGrouping: true
};
if (parsedMetadata.splicedParts > 1)
console.info(`${chalk.yellow("Spliced parts:")} ${parsedMetadata.splicedParts}`);
console.info(`${chalk.yellow("GGUF version:")} ${parsedMetadata.version}`);
console.info(`${chalk.yellow("Tensor count:")} ${parsedMetadata.totalTensorCount.toLocaleString("en-US", numberLocaleFormattingOptions)}`);
console.info(`${chalk.yellow("Metadata size:")} ${toBytes(parsedMetadata.totalMetadataSize)}`);
console.info(`${chalk.yellow("Tensor info size:")} ${toBytes(parsedMetadata.totalTensorInfoSize)}`);
console.info(`${chalk.yellow("File type:")} ${fileTypeName ?? ""} ${chalk.white(`(${parsedMetadata.metadata.general?.file_type})`)}`);
console.info(`${chalk.yellow("Metadata:")} ${prettyPrintObject(parsedMetadata.metadata, undefined, metadataPrettyPrintOptions)}`);
console.info(`${chalk.yellow("Tensor info:")} ${prettyPrintObject(parsedMetadata.fullTensorInfo, undefined, tensorInfoPrettyPrintOptions)}`);
}
}
};
// these fields are added by the parser for ease of use and are not found in the gguf file itself
function removeAdditionalTensorInfoFields(tensorInfo) {
if (tensorInfo == null)
return;
for (const tensor of tensorInfo) {
delete tensor.fileOffset;
delete tensor.filePart;
}
}
function tryFormattingJinja(template) {
if (typeof template !== "string")
return template;
try {
const parsedTemplate = new Template(template);
return parsedTemplate.format({
indent: 4
}) ?? template;
}
catch (err) {
return template;
}
}
//# sourceMappingURL=InspectGgufCommand.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,4 @@
import { CommandModule } from "yargs";
type InspectGpuCommand = {};
export declare const InspectGpuCommand: CommandModule<object, InspectGpuCommand>;
export {};

View File

@@ -0,0 +1,249 @@
import os from "os";
import chalk from "chalk";
import { getLlamaForOptions } from "../../../../bindings/getLlama.js";
import { detectAvailableComputeLayers } from "../../../../bindings/utils/detectAvailableComputeLayers.js";
import { getPlatform } from "../../../../bindings/utils/getPlatform.js";
import { LlamaLogLevel } from "../../../../bindings/types.js";
import { getPrettyBuildGpuName } from "../../../../bindings/consts.js";
import { getModuleVersion } from "../../../../utils/getModuleVersion.js";
import { withCliCommandDescriptionDocsUrl } from "../../../utils/withCliCommandDescriptionDocsUrl.js";
import { builtinLlamaCppGitHubRepo, documentationPageUrls } from "../../../../config.js";
import { getPlatformInfo } from "../../../../bindings/utils/getPlatformInfo.js";
import { getLinuxDistroInfo } from "../../../../bindings/utils/getLinuxDistroInfo.js";
import { isRunningUnderRosetta } from "../../../utils/isRunningUnderRosetta.js";
import { toBytes } from "../../../utils/toBytes.js";
import { getBinariesGithubRelease } from "../../../../bindings/utils/binariesGithubRelease.js";
import { getClonedLlamaCppRepoReleaseInfo } from "../../../../bindings/utils/cloneLlamaCppRepo.js";
export const InspectGpuCommand = {
command: "gpu",
describe: withCliCommandDescriptionDocsUrl("Show the detected GPU types and their VRAM usage", documentationPageUrls.CLI.Inspect.GPU),
async handler() {
const platform = getPlatform();
const arch = process.arch;
const availableComputeLayers = await detectAvailableComputeLayers({ platform });
const gpusToLogVramUsageOf = [];
const gpuToLlama = new Map();
let lastLlama;
async function loadLlamaForGpu(gpu) {
if (!gpuToLlama.has(gpu)) {
const loadedLlama = await getLlamaForGpu(gpu);
gpuToLlama.set(gpu, loadedLlama);
if (loadedLlama != null)
lastLlama = loadedLlama;
}
return gpuToLlama.get(gpu);
}
if (platform === "linux") {
const linuxDistroInfo = await getLinuxDistroInfo();
if (linuxDistroInfo.prettyName !== "")
console.info(`${chalk.yellow("OS:")} ${linuxDistroInfo.prettyName} ${chalk.dim("(" + os.arch() + ")")}`);
else
console.info(`${chalk.yellow("OS:")} ${linuxDistroInfo.name || os.type()} ${linuxDistroInfo.version || os.release()} ${chalk.dim("(" + os.arch() + ")")}`);
}
else {
const platformInfo = await getPlatformInfo();
const osName = platformInfo.name === "Unknown"
? os.type()
: platformInfo.name;
console.info(`${chalk.yellow("OS:")} ${osName} ${platformInfo.version} ${chalk.dim("(" + os.arch() + ")")}`);
}
if (process.versions.node != null)
console.info(`${chalk.yellow("Node:")} ${process.versions.node} ${chalk.dim("(" + arch + ")")}`);
if (process.versions.bun != null)
console.info(`${chalk.yellow("Bun:")} ${process.versions.bun}`);
const typeScriptVersion = await getInstalledTypescriptVersion();
if (typeScriptVersion != null)
console.info(`${chalk.yellow("TypeScript:")} ${typeScriptVersion}`);
try {
const moduleVersion = await getModuleVersion();
if (moduleVersion != null) {
console.info();
console.info(`${chalk.yellow("node-llama-cpp:")} ${moduleVersion}`);
}
}
catch (err) {
// do nothing
}
try {
const prebuiltBinariesRelease = await getBinariesGithubRelease();
console.info(`${chalk.yellow("Prebuilt binaries:")} ${prebuiltBinariesRelease}`);
}
catch (err) {
// do nothing
}
try {
const clonedLlamaCppRelease = await getClonedLlamaCppRepoReleaseInfo();
if (clonedLlamaCppRelease != null)
console.info(`${chalk.yellow("Cloned source:")} ${clonedLlamaCppRelease.tag}` + (clonedLlamaCppRelease.llamaCppGithubRepo !== builtinLlamaCppGitHubRepo
? ` (${clonedLlamaCppRelease.llamaCppGithubRepo})`
: ""));
}
catch (err) {
// do nothing
}
console.info();
if (platform === "mac" && arch === "arm64") {
const llama = await loadLlamaForGpu("metal");
if (llama == null) {
console.info(`${chalk.yellow("Metal:")} ${chalk.red("Metal is detected, but using it failed")}`);
}
else {
console.info(`${chalk.yellow("Metal:")} ${chalk.green("available")}`);
gpusToLogVramUsageOf.push("metal");
}
}
else if (platform === "mac") {
if (await isRunningUnderRosetta()) {
console.error(chalk.red("llama.cpp is not supported under Rosetta on Apple Silicone Macs. " +
"Ensure that you're using a native arm64 node.js installation."));
console.error("process.platform: " + process.platform + ", process.arch: " + process.arch);
console.error("troubleshooting: " + documentationPageUrls.troubleshooting.RosettaIllegalHardwareInstruction);
}
console.info(`${chalk.yellow("Metal:")} ${chalk.red("not supported by llama.cpp on Intel Macs")}`);
const llama = await loadLlamaForGpu(false);
if (llama == null) {
console.info(`${chalk.yellow("CPU:")} ${chalk.red("Loading a binding with only CPU support failed")}`);
}
}
if (availableComputeLayers.cuda.hasNvidiaDriver && !availableComputeLayers.cuda.hasCudaRuntime) {
console.info(`${chalk.yellow("CUDA:")} ${chalk.red("NVIDIA driver is installed, but CUDA runtime is not")}`);
console.info(chalk.yellow("To resolve errors related to CUDA, see the CUDA guide: ") + documentationPageUrls.CUDA);
}
else if (availableComputeLayers.cuda.hasCudaRuntime && !availableComputeLayers.cuda.hasNvidiaDriver) {
console.info(`${chalk.yellow("CUDA:")} ${chalk.red("CUDA runtime is installed, but NVIDIA driver is not")}`);
console.info(chalk.yellow("To resolve errors related to CUDA, see the CUDA guide: ") + documentationPageUrls.CUDA);
}
else if (availableComputeLayers.cuda.hasCudaRuntime && availableComputeLayers.cuda.hasNvidiaDriver) {
const llama = await loadLlamaForGpu("cuda");
if (llama == null) {
console.info(`${chalk.yellow("CUDA:")} ${chalk.red("CUDA is detected, but using it failed")}`);
console.info(chalk.yellow("To resolve errors related to CUDA, see the CUDA guide: ") + documentationPageUrls.CUDA);
}
else {
console.info(`${chalk.yellow("CUDA:")} ${chalk.green("available")}`);
gpusToLogVramUsageOf.push("cuda");
if (llama._hadErrorLogs)
console.info(chalk.yellow("To resolve errors related to CUDA, see the CUDA guide: ") + documentationPageUrls.CUDA);
}
}
if (availableComputeLayers.vulkan) {
const llama = await loadLlamaForGpu("vulkan");
if (llama == null) {
console.info(`${chalk.yellow("Vulkan:")} ${chalk.red("Vulkan is detected, but using it failed")}`);
console.info(chalk.yellow("To resolve errors related to Vulkan, see the Vulkan guide: ") + documentationPageUrls.Vulkan);
}
else {
console.info(`${chalk.yellow("Vulkan:")} ${chalk.green("available")}`);
gpusToLogVramUsageOf.push("vulkan");
if (llama._hadErrorLogs)
console.info(chalk.yellow("To resolve errors related to Vulkan, see the Vulkan guide: ") + documentationPageUrls.Vulkan);
}
}
if (lastLlama == null)
await loadLlamaForGpu(false);
for (const gpu of gpusToLogVramUsageOf) {
const llama = gpuToLlama.get(gpu);
if (llama == null || llama.gpu !== gpu)
continue;
console.info();
await logGpuVramUsage(llama);
}
console.info();
await logRamUsage(lastLlama?.cpuMathCores);
if (lastLlama != null) {
await logSwapUsage(lastLlama);
console.info(`${chalk.yellow("mmap:")} ${lastLlama.supportsMmap ? "supported" : "unsupported"}`);
}
}
};
async function getLlamaForGpu(gpu) {
try {
// if you're reading this line, then you're probably looking for the `dryRun` option on `getLlama`
return await getLlamaForOptions({
gpu: gpu,
build: "never",
progressLogs: false,
logLevel: LlamaLogLevel.warn,
vramPadding: 0
}, {
skipLlamaInit: true,
pipeBinaryTestErrorLogs: true
});
}
catch (err) {
return undefined;
}
}
async function logGpuVramUsage(llama) {
try {
const gpuName = getPrettyBuildGpuName(llama.gpu);
const vramState = await llama.getVramState();
const gpuDeviceNames = await llama.getGpuDeviceNames();
if (gpuDeviceNames.length > 0)
console.info(`${chalk.yellow(`${gpuName} device${gpuDeviceNames.length > 1 ? "s" : ""}:`)} ${gpuDeviceNames.join(", ")}`);
console.info(`${chalk.yellow(`${gpuName} used VRAM:`)} ${getPercentageString(vramState.used, vramState.total)}% ${chalk.gray("(" + toBytes(vramState.used) + "/" + toBytes(vramState.total) + ")")}`);
console.info(`${chalk.yellow(`${gpuName} free VRAM:`)} ${getPercentageString(vramState.free, vramState.total)}% ${chalk.gray("(" + toBytes(vramState.free) + "/" + toBytes(vramState.total) + ")")}`);
if (vramState.unifiedSize > 0)
console.info(`${chalk.yellow(`${gpuName} unified memory:`)} ${toBytes(vramState.unifiedSize)} ${chalk.gray("(" + getPercentageString(vramState.unifiedSize, vramState.total) + "%)")}`);
}
catch (err) { }
}
async function logRamUsage(cpuMathCores) {
const totalMemory = os.totalmem();
const freeMemory = os.freemem();
const usedMemory = totalMemory - freeMemory;
const cpuDeviceNames = Array.from(new Set(os.cpus()
.map((cpu) => (cpu.model?.trim?.() ?? ""))
.filter((deviceName) => deviceName.length > 0)));
if (cpuDeviceNames.length > 0)
console.info(`${chalk.yellow("CPU model" + (cpuDeviceNames.length > 1 ? "s" : "") + ":")} ${cpuDeviceNames.join(", ")}`);
if (cpuMathCores != null)
console.info(`${chalk.yellow("Math cores:")} ${cpuMathCores}`);
console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.gray("(" + toBytes(usedMemory) + "/" + toBytes(totalMemory) + ")")}`);
console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.gray("(" + toBytes(freeMemory) + "/" + toBytes(totalMemory) + ")")}`);
}
async function logSwapUsage(llama) {
const swapState = await llama.getSwapState();
console.info(`${chalk.yellow("Used swap:")} ${getPercentageString(swapState.used, swapState.allocated)}% ${chalk.gray("(" + toBytes(swapState.used) + "/" + toBytes(swapState.allocated) + ")")}`);
console.info(`${chalk.yellow("Max swap size:")} ${swapState.maxSize === Infinity ? "dynamic" : toBytes(swapState.maxSize)}`);
}
function getPercentageString(amount, total) {
if (total === 0)
return "0";
return String(Math.floor((amount / total) * 100 * 100) / 100);
}
async function getInstalledTypescriptVersion() {
try {
const ts = await import("typescript");
const version = ts?.version ?? ts?.default?.version;
if (version != null && typeof version === "string" && version.length > 0)
return version;
return null;
}
catch (err) {
return null;
}
}
// // simple script to copy console logs as ansi to clipboard. Used to update the documentation
// import {spawn} from "child_process";
// const pendingLog: string[] = [];
// const originalConsoleInfo = console.info;
// console.info = function info(...args: any[]) {
// originalConsoleInfo.call(console, ...args);
// pendingLog.push(args.join(" "));
// };
//
// function copyLogs() {
// const res = pendingLog.join("\n");
//
// pbcopy(res);
// originalConsoleInfo.call(console, "Copied logs to clipboard");
// }
// function pbcopy(text: string) {
// const pbcopyProcess = spawn("pbcopy");
// pbcopyProcess.stdin.write(text);
// pbcopyProcess.stdin.end();
// }
//
// process.on("exit", copyLogs);
//# sourceMappingURL=InspectGpuCommand.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,23 @@
import { CommandModule } from "yargs";
import { BuildGpu } from "../../../../bindings/types.js";
type InspectMeasureCommand = {
modelPath?: string;
header?: string[];
gpu?: BuildGpu | "auto";
minLayers: number;
maxLayers?: number;
minContextSize: number;
maxContextSize?: number;
flashAttention?: boolean;
swaFullCache?: boolean;
batchSize?: number;
measures: number;
memory: "vram" | "ram" | "all";
noMmap: boolean;
noDirectIo: boolean;
printHeaderBeforeEachLayer?: boolean;
evaluateText?: string;
repeatEvaluateText?: number;
};
export declare const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>;
export {};

View File

@@ -0,0 +1,828 @@
import path from "path";
import process from "process";
import { fileURLToPath } from "url";
import { fork } from "node:child_process";
import os from "os";
import chalk from "chalk";
import stripAnsi from "strip-ansi";
import { readGgufFileInfo } from "../../../../gguf/readGgufFileInfo.js";
import { resolveCommandGgufPath } from "../../../utils/resolveCommandGgufPath.js";
import { getLlama } from "../../../../bindings/getLlama.js";
import { LlamaLogLevel, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption } from "../../../../bindings/types.js";
import { getConsoleLogPrefix } from "../../../../utils/getConsoleLogPrefix.js";
import { ConsoleTable } from "../../../utils/ConsoleTable.js";
import { GgufInsights } from "../../../../gguf/insights/GgufInsights.js";
import { resolveHeaderFlag } from "../../../utils/resolveHeaderFlag.js";
import { getPrettyBuildGpuName } from "../../../../bindings/consts.js";
import { getReadablePath } from "../../../utils/getReadablePath.js";
import { withCliCommandDescriptionDocsUrl } from "../../../utils/withCliCommandDescriptionDocsUrl.js";
import { documentationPageUrls } from "../../../../config.js";
import { toBytes } from "../../../utils/toBytes.js";
import { padSafeContextSize } from "../../../../evaluator/LlamaContext/utils/padSafeContextSize.js";
import { getPlatform } from "../../../../bindings/utils/getPlatform.js";
export const InspectMeasureCommand = {
command: "measure [modelPath]",
describe: withCliCommandDescriptionDocsUrl("Measure VRAM consumption of a GGUF model file with all possible combinations of gpu layers and context sizes", documentationPageUrls.CLI.Inspect.Measure),
builder(yargs) {
return yargs
.option("modelPath", {
alias: ["m", "model", "path", "url", "uri"],
type: "string",
description: "Model file to use for the measurements. Can be a path to a local file or a URI of a model file to download. Leave empty to choose from a list of recommended models"
})
.option("header", {
alias: ["H"],
type: "string",
array: true,
description: "Headers to use when downloading a model from a URL, in the format `key: value`. You can pass this option multiple times to add multiple headers."
})
.option("gpu", {
type: "string",
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: nodeLlamaCppGpuOptions,
coerce: (value) => {
if (value == null || value == "")
return undefined;
return parseNodeLlamaCppGpuOption(value);
},
defaultDescription: "Uses the latest local build, and fallbacks to \"auto\"",
description: "Compute layer implementation type to use for llama.cpp. If omitted, uses the latest local build, and fallbacks to \"auto\""
})
.option("minLayers", {
alias: "mnl",
type: "number",
default: 1,
description: "Minimum number of layers to offload to the GPU"
})
.option("maxLayers", {
alias: "mxl",
type: "number",
default: -1,
defaultDescription: "All layers",
description: "Maximum number of layers to offload to the GPU"
})
.option("minContextSize", {
alias: "mncs",
type: "number",
default: 512,
description: "Minimum context size"
})
.option("maxContextSize", {
alias: "mxcs",
type: "number",
default: -1,
defaultDescription: "Train context size",
description: "Maximum context size"
})
.option("flashAttention", {
alias: "fa",
type: "boolean",
default: false,
description: "Enable flash attention for the context"
})
.option("swaFullCache", {
alias: "noSwa",
type: "boolean",
default: false,
description: "Disable SWA (Sliding Window Attention) on supported models"
})
.option("batchSize", {
alias: "b",
type: "number",
description: "Batch size to use for the model context"
})
.option("measures", {
alias: "n",
type: "number",
default: 10,
description: "Number of context size measures to take for each gpu layers count"
})
.option("memory", {
type: "string",
choices: ["vram", "ram", "all"],
default: "vram",
description: "Type of memory to measure"
})
.option("noMmap", {
type: "boolean",
default: false,
description: "Disable mmap (memory-mapped file) usage"
})
.option("noDirectIo", {
type: "boolean",
default: false,
description: "Disable Direct I/O usage when available"
})
.option("printHeaderBeforeEachLayer", {
alias: "ph",
type: "boolean",
default: true,
description: "Print header before each layer's measures"
})
.option("evaluateText", {
alias: ["evaluate", "et"],
type: "string",
description: "Text to evaluate with the model"
})
.option("repeatEvaluateText", {
alias: ["repeatEvaluate", "ret"],
type: "number",
default: 1,
description: "Number of times to repeat the evaluation text before sending it for evaluation, in order to make it longer"
});
},
async handler({ modelPath: ggufPath, header: headerArg, gpu, minLayers, maxLayers, minContextSize, maxContextSize, flashAttention, swaFullCache, batchSize, measures = 10, memory: measureMemoryType, noMmap, noDirectIo, printHeaderBeforeEachLayer = true, evaluateText, repeatEvaluateText }) {
if (maxLayers === -1)
maxLayers = undefined;
if (maxContextSize === -1)
maxContextSize = undefined;
if (minLayers < 1)
minLayers = 1;
const exitAfterEachMeasurement = measureMemoryType === "ram" || measureMemoryType === "all";
const headers = resolveHeaderFlag(headerArg);
// ensure a llama build is available
const llama = gpu == null
? await getLlama("lastBuild", {
logLevel: LlamaLogLevel.error
})
: await getLlama({
gpu,
logLevel: LlamaLogLevel.error
});
const platform = getPlatform();
const useMmap = !noMmap && llama.supportsMmap;
const useDirectIo = !noDirectIo;
const resolvedGgufPath = await resolveCommandGgufPath(ggufPath, llama, headers, {
flashAttention, swaFullCache, useMmap
});
console.info(`${chalk.yellow("File:")} ${getReadablePath(resolvedGgufPath)}`);
console.info(`${chalk.yellow("GPU:")} ${getPrettyBuildGpuName(llama.gpu)}${gpu == null ? chalk.gray(" (last build)") : ""}`);
console.info(chalk.yellow("mmap:") + " " + (!llama.supportsMmap
? "unsupported"
: useMmap
? "enabled"
: "disabled"));
if (platform !== "mac") // Direct I/O is not supported on macOS
console.info(chalk.yellow("Direct I/O:") + " " + (useDirectIo
? "enabled"
: "disabled"));
if (measureMemoryType === "ram" || measureMemoryType === "all")
console.warn(chalk.yellow("RAM measurements are greatly inaccurate due to OS optimizations that prevent released memory from being immediately available"));
console.info();
const ggufMetadata = await readGgufFileInfo(resolvedGgufPath, {
sourceType: "filesystem"
});
const ggufInsights = await GgufInsights.from(ggufMetadata, llama);
const totalVram = (await llama.getVramState()).total;
const totalRam = os.totalmem();
let lastGpuLayers = maxLayers ?? ggufInsights.totalLayers;
let previousContextSizeCheck = undefined;
const measureTable = getMeasureTable(measureMemoryType);
measureTable.logHeader({ drawRowSeparator: !printHeaderBeforeEachLayer });
while (lastGpuLayers >= (minLayers ?? 0)) {
let printedAlreadyWithThisProcess = false;
let hadSuccessInThisProcess = false;
const getNewProccessValue = () => {
if (printedAlreadyWithThisProcess)
return undefined;
printedAlreadyWithThisProcess = true;
return chalk.green("*");
};
const done = await measureModel({
modelPath: resolvedGgufPath,
useMmap,
useDirectIo,
gpu: gpu == null
? undefined
: llama.gpu,
maxGpuLayers: lastGpuLayers,
minGpuLayers: minLayers,
initialMaxContextSize: previousContextSizeCheck,
maxContextSize,
minContextSize,
flashAttention,
swaFullCache,
batchSize,
tests: measures,
evaluateText: evaluateText == null
? undefined
: evaluateText.repeat(repeatEvaluateText ?? 1),
exitAfterMeasurement: exitAfterEachMeasurement,
onInfo({ gpuLayers, result }) {
if (lastGpuLayers !== gpuLayers) {
lastGpuLayers = gpuLayers;
previousContextSizeCheck = undefined;
measureTable.logLine({});
if (printHeaderBeforeEachLayer)
measureTable.logHeader({ drawRowSeparator: false });
}
if (result.type === "crash") {
if (!hadSuccessInThisProcess) {
measureTable.logLine({
newProcess: getNewProccessValue(),
type: chalk.redBright("Crash"),
gpuLayers: String(lastGpuLayers),
contextSize: previousContextSizeCheck != null
? String(previousContextSizeCheck)
: chalk.red(result.result),
estimatedModelVram: previousContextSizeCheck == null
? undefined
: chalk.red(result.result)
});
lastGpuLayers--;
}
}
else if (result.type === "error") {
previousContextSizeCheck = result.contextSize;
hadSuccessInThisProcess = true;
measureTable.logLine({
newProcess: getNewProccessValue(),
type: chalk.red("Error"),
gpuLayers: String(lastGpuLayers),
contextSize: previousContextSizeCheck != null
? String(previousContextSizeCheck)
: chalk.red(result.error),
estimatedModelVram: previousContextSizeCheck == null
? undefined
: chalk.red(result.error)
});
}
else if (result.type === "success") {
previousContextSizeCheck = result.contextSize;
hadSuccessInThisProcess = true;
const modelResourceEstimation = ggufInsights.estimateModelResourceRequirements({
gpuLayers: lastGpuLayers,
useMmap
});
const modelVramEstimation = modelResourceEstimation.gpuVram;
const modelVramEstimationDiffBytes = (modelVramEstimation < result.modelVramUsage ? "-" : "") +
toBytes(Math.abs(result.modelVramUsage - modelVramEstimation));
const modelVramEstimationDiffText = modelVramEstimationDiffBytes.padEnd(9, " ") + " " +
padStartAnsi("(" + renderDiffPercentageWithColors(((modelVramEstimation / result.modelVramUsage) - 1) * 100) + ")", 9);
const modelRamEstimation = modelResourceEstimation.cpuRam;
const modelRamEstimationDiffBytes = (modelRamEstimation < result.modelRamUsage ? "-" : "") +
toBytes(Math.abs(result.modelRamUsage - modelRamEstimation));
const modelRamEstimationDiffText = modelRamEstimationDiffBytes.padEnd(9, " ") + " " +
padStartAnsi("(" + renderDiffPercentageWithColors(((modelRamEstimation / result.modelRamUsage) - 1) * 100) + ")", 9);
const contextResourceEstimation = previousContextSizeCheck == null
? undefined
: ggufInsights.estimateContextResourceRequirements({
contextSize: previousContextSizeCheck,
modelGpuLayers: lastGpuLayers,
flashAttention,
swaFullCache,
batchSize
});
const contextVramEstimation = contextResourceEstimation?.gpuVram;
const contextVramEstimationDiffBytes = (result.contextVramUsage == null || contextVramEstimation == null)
? undefined
: ((contextVramEstimation < result.contextVramUsage ? "-" : "") +
toBytes(Math.abs(result.contextVramUsage - contextVramEstimation)));
const contextVramEstimationDiffText = (contextVramEstimation == null || contextVramEstimationDiffBytes == null || result.contextVramUsage == null)
? undefined
: (contextVramEstimationDiffBytes.padEnd(9, " ") + " " +
padStartAnsi("(" + renderDiffPercentageWithColors(((contextVramEstimation / result.contextVramUsage) - 1) * 100) + ")", 9));
const contextRamEstimation = contextResourceEstimation?.cpuRam;
const contextRamEstimationDiffBytes = (result.contextRamUsage == null || contextRamEstimation == null)
? undefined
: ((contextRamEstimation < result.contextRamUsage ? "-" : "") +
toBytes(Math.abs(result.contextRamUsage - contextRamEstimation)));
const contextRamEstimationDiffText = (contextRamEstimation == null || contextRamEstimationDiffBytes == null || result.contextRamUsage == null)
? undefined
: (contextRamEstimationDiffBytes.padEnd(9, " ") + " " +
padStartAnsi("(" + renderDiffPercentageWithColors(((contextRamEstimation / result.contextRamUsage) - 1) * 100) + ")", 9));
measureTable.logLine({
newProcess: getNewProccessValue(),
type: previousContextSizeCheck == null
? "Model"
: "Context",
gpuLayers: String(lastGpuLayers),
contextSize: previousContextSizeCheck != null
? String(previousContextSizeCheck)
: undefined,
estimatedModelVram: toBytes(modelVramEstimation),
actualModelVram: toBytes(result.modelVramUsage),
modelVramEstimationDiff: modelVramEstimationDiffText,
estimatedModelRam: toBytes(modelRamEstimation),
actualModelRam: toBytes(result.modelRamUsage),
modelRamEstimationDiff: modelRamEstimationDiffText,
estimatedContextVram: contextVramEstimation == null
? undefined
: toBytes(contextVramEstimation),
actualContextVram: result.contextVramUsage == null
? undefined
: toBytes(result.contextVramUsage),
contextVramEstimationDiff: contextVramEstimationDiffText,
totalVramUsage: ((result.totalVramUsage / totalVram) * 100).toFixed(2).padStart(5, "0") + "% " +
chalk.gray("(" + toBytes(result.totalVramUsage) + "/" + toBytes(totalVram) + ")"),
estimatedContextRam: contextRamEstimation == null
? undefined
: toBytes(contextRamEstimation),
actualContextRam: result.contextRamUsage == null
? undefined
: toBytes(result.contextRamUsage),
contextRamEstimationDiff: contextRamEstimationDiffText,
totalRamUsage: ((result.totalRamUsage / totalRam) * 100).toFixed(2).padStart(5, "0") + "% " +
chalk.gray("(" + toBytes(result.totalRamUsage) + "/" + toBytes(totalRam) + ")")
});
}
}
});
if (done)
break;
}
}
};
function getMeasureTable(memoryType) {
return new ConsoleTable([{
key: "newProcess",
title: " ",
width: 1
}, {
key: "type",
title: "Type",
width: Math.max("Type".length, "Model".length, "Context".length),
canSpanOverEmptyColumns: true
}, {
key: "gpuLayers",
title: "Layers",
width: "Layers".length,
canSpanOverEmptyColumns: true
}, {
key: "contextSize",
title: "Context size",
width: "Context size".length,
canSpanOverEmptyColumns: true
}, {
key: "estimatedModelVram",
visible: memoryType === "vram" || memoryType === "all",
title: "Estimated model VRAM",
width: "Estimated model VRAM".length,
canSpanOverEmptyColumns: true
}, {
key: "actualModelVram",
visible: memoryType === "vram" || memoryType === "all",
title: "Model VRAM",
width: "Model VRAM".length
}, {
key: "modelVramEstimationDiff",
visible: memoryType === "vram" || memoryType === "all",
title: "Diff",
width: Math.max("Diff".length, 9 + 1 + 9)
}, {
key: "estimatedModelRam",
visible: memoryType === "ram" || memoryType === "all",
title: "Estimated model RAM",
width: "Estimated model RAM".length,
canSpanOverEmptyColumns: true
}, {
key: "actualModelRam",
visible: memoryType === "ram" || memoryType === "all",
title: "Model RAM",
width: "Model RAM".length
}, {
key: "modelRamEstimationDiff",
visible: memoryType === "ram" || memoryType === "all",
title: "Diff",
width: Math.max("Diff".length, 9 + 1 + 9)
}, {
key: "estimatedContextVram",
visible: memoryType === "vram" || memoryType === "all",
title: "Estimated context VRAM",
width: "Estimated context VRAM".length
}, {
key: "actualContextVram",
visible: memoryType === "vram" || memoryType === "all",
title: "Context VRAM",
width: "Context VRAM".length
}, {
key: "contextVramEstimationDiff",
visible: memoryType === "vram" || memoryType === "all",
title: "Diff",
width: Math.max("Diff".length, 9 + 1 + 9)
}, {
key: "totalVramUsage",
visible: memoryType === "vram" || memoryType === "all",
title: "VRAM usage",
width: Math.max("VRAM usage".length, 8 + 1 + 8 + 1 + 8)
}, {
key: "estimatedContextRam",
visible: memoryType === "ram" || memoryType === "all",
title: "Estimated context RAM",
width: "Estimated context RAM".length
}, {
key: "actualContextRam",
visible: memoryType === "ram" || memoryType === "all",
title: "Context RAM",
width: "Context RAM".length
}, {
key: "contextRamEstimationDiff",
visible: memoryType === "ram" || memoryType === "all",
title: "Diff",
width: Math.max("Diff".length, 9 + 1 + 9)
}, {
key: "totalRamUsage",
visible: memoryType === "ram" || memoryType === "all",
title: "RAM usage",
width: Math.max("RAM usage".length, 8 + 1 + 8 + 1 + 8)
}]);
}
function renderDiffPercentageWithColors(percentage, { greenBright = 2, green = 6, yellow = 10, yellowBright = 14 } = {}) {
const percentageText = percentage.toFixed(2).padStart(5, "0") + "%";
const absPercentage = Math.abs(percentage);
if (absPercentage < greenBright)
return chalk.greenBright(percentageText);
else if (absPercentage < green)
return chalk.green(percentageText);
else if (absPercentage < yellow)
return chalk.yellow(percentageText);
else if (absPercentage < yellowBright)
return chalk.yellowBright(percentageText);
return chalk.red(percentageText);
}
const __filename = fileURLToPath(import.meta.url);
const detectedFileName = path.basename(__filename);
const expectedFileName = "InspectMeasureCommand";
async function measureModel({ modelPath, useMmap, useDirectIo, gpu, tests, initialMaxContextSize, maxContextSize, minContextSize, maxGpuLayers, minGpuLayers, flashAttention, swaFullCache, batchSize, evaluateText, exitAfterMeasurement = false, onInfo }) {
if (!detectedFileName.startsWith(expectedFileName)) {
console.warn(getConsoleLogPrefix() +
`"${expectedFileName}.js" file is not independent, so running sub-process tests cannot be done with it\n` +
getConsoleLogPrefix() +
'To resolve this issue, make sure that "node-llama-cpp" is not bundled together with other code.');
throw new Error("Sub-process tests cannot be done with the current file");
}
const subProcess = fork(__filename, [], {
detached: false,
stdio: [null, null, null, "ipc"],
env: {
...process.env,
MEASURE_MODEL_CP: "true",
MEASURE_MODEL_CP_GPU: gpu == null
? undefined
: JSON.stringify(gpu)
}
});
let isPlannedExit = false;
let isDone = false;
let forkSucceeded = false;
let timeoutHandle = null;
const processCreationTimeout = 1000 * 60 * 5;
const stdTexts = [];
let lastGpuLayers = maxGpuLayers;
function cleanup() {
if (subProcess.exitCode == null)
subProcess.kill("SIGKILL");
if (timeoutHandle != null)
clearTimeout(timeoutHandle);
process.off("exit", cleanup);
}
process.on("exit", cleanup);
subProcess.stdout?.on("data", (data) => {
stdTexts.push(data.toString());
});
subProcess.stderr?.on("data", (data) => {
stdTexts.push(data.toString());
});
return Promise.race([
new Promise((_, reject) => {
timeoutHandle = setTimeout(() => {
if (!forkSucceeded) {
reject(new Error("Measuring using a sub-process timed out"));
cleanup();
}
}, processCreationTimeout);
}),
new Promise((resolve, reject) => {
function done() {
if (!forkSucceeded)
reject(new Error(`Measuring a model failed to run a sub-process via file "${__filename}"`));
else if (isPlannedExit)
resolve(isPlannedExit && isDone);
cleanup();
}
subProcess.on("message", (message) => {
if (message.type === "ready") {
forkSucceeded = true;
subProcess.send({
type: "start",
modelPath,
useMmap,
useDirectIo,
tests,
initialMaxContextSize,
maxContextSize,
minContextSize,
maxGpuLayers,
minGpuLayers,
flashAttention,
swaFullCache,
batchSize,
evaluateText,
exitAfterMeasurement
});
if (timeoutHandle != null) {
clearTimeout(timeoutHandle);
timeoutHandle = null;
}
}
else if (message.type === "done") {
isPlannedExit = true;
isDone = true;
subProcess.send({ type: "exit" });
}
else if (message.type === "exit") {
isPlannedExit = true;
subProcess.send({ type: "exit" });
}
else if (message.type === "error") {
lastGpuLayers = message.gpuLayers;
onInfo({
gpuLayers: lastGpuLayers,
result: {
type: "error",
error: message.error,
contextSize: message.contextSize
}
});
}
else if (message.type === "stats") {
lastGpuLayers = message.gpuLayers;
onInfo({
gpuLayers: message.gpuLayers,
result: {
type: "success",
modelVramUsage: message.modelVramUsage,
modelRamUsage: message.modelRamUsage,
contextSize: message.contextSize,
contextVramUsage: message.contextVramUsage,
contextRamUsage: message.contextRamUsage,
contextStateSize: message.contextStateSize,
totalVramUsage: message.totalVramUsage,
totalRamUsage: message.totalRamUsage
}
});
}
});
subProcess.on("exit", (code) => {
if (code !== 0 || !isPlannedExit)
onInfo({
gpuLayers: lastGpuLayers,
result: {
type: "crash",
result: stdTexts.join("")
}
});
done();
});
if (subProcess.killed || subProcess.exitCode != null) {
if (subProcess.exitCode !== 0 || !isPlannedExit)
onInfo({
gpuLayers: lastGpuLayers,
result: {
type: "crash",
result: stdTexts.join("")
}
});
done();
}
})
]);
}
if (process.env.MEASURE_MODEL_CP === "true" && process.send != null) {
void runTestWorkerLogic();
}
async function runTestWorkerLogic() {
const gpuEnvVar = process.env.MEASURE_MODEL_CP_GPU;
const llama = (gpuEnvVar == null || gpuEnvVar === "")
? await getLlama("lastBuild", {
logLevel: LlamaLogLevel.error
})
: await getLlama({
gpu: JSON.parse(gpuEnvVar),
logLevel: LlamaLogLevel.error
});
if (process.send == null)
throw new Error("No IPC channel to parent process");
function sendInfoBack(info) {
if (process.send == null)
process.exit(1);
process.send(info);
}
async function testContextSizes({ model, modelVramUsage, modelRamUsage, startContextSize, maxContextSize, minContextSize, tests, flashAttention, swaFullCache, batchSize, evaluateText, exitAfterMeasurement = false }) {
let measurementsDone = 0;
const contextSizeCheckPlan = getContextSizesCheckPlan(maxContextSize != null
? Math.min(model.trainContextSize, maxContextSize)
: model.trainContextSize, tests, minContextSize);
let currentContextSizeCheck = startContextSize == null
? -1
: getNextItemInCheckContextSizesPlan(contextSizeCheckPlan, startContextSize);
while (currentContextSizeCheck != null) {
if (currentContextSizeCheck === -1)
currentContextSizeCheck = null;
try {
const preContextVramUsage = (await llama.getVramState()).used;
const preContextRamUsage = getMemoryUsage(llama);
const context = await model.createContext({
contextSize: currentContextSizeCheck ?? (maxContextSize != null
? { max: maxContextSize }
: undefined),
ignoreMemorySafetyChecks: currentContextSizeCheck != null,
flashAttention,
swaFullCache,
batchSize,
failedCreationRemedy: false
});
if (evaluateText != null && evaluateText != "") {
const sequence = context.getSequence();
await sequence.evaluateWithoutGeneratingNewTokens(model.tokenize(evaluateText));
}
const postContextVramUsage = (await llama.getVramState()).used;
const postContextRamUsage = getMemoryUsage(llama);
measurementsDone++;
sendInfoBack({
type: "stats",
gpuLayers: model.gpuLayers,
modelVramUsage,
modelRamUsage,
contextSize: context.contextSize,
contextVramUsage: postContextVramUsage - preContextVramUsage,
contextRamUsage: postContextRamUsage - preContextRamUsage,
contextStateSize: context.stateSize,
totalVramUsage: postContextVramUsage,
totalRamUsage: postContextRamUsage
});
currentContextSizeCheck = context.contextSize;
await context.dispose();
}
catch (err) {
sendInfoBack({
type: "error",
error: String(err),
gpuLayers: model.gpuLayers,
contextSize: currentContextSizeCheck == null
? undefined
: currentContextSizeCheck
});
if (currentContextSizeCheck == null) {
currentContextSizeCheck = contextSizeCheckPlan[0];
continue;
}
}
currentContextSizeCheck = getNextItemInCheckContextSizesPlan(contextSizeCheckPlan, currentContextSizeCheck);
if (exitAfterMeasurement)
return measurementsDone;
}
return measurementsDone;
}
async function testWithGpuLayers({ modelPath, useMmap, useDirectIo, gpuLayers, tests, startContextSize, maxContextSize, minContextSize, flashAttention, swaFullCache, batchSize, evaluateText, exitAfterMeasurement = false }) {
try {
const preModelVramUsage = (await llama.getVramState()).used;
const preModelRamUsage = getMemoryUsage(llama);
const model = await llama.loadModel({
modelPath,
useMmap,
useDirectIo,
gpuLayers,
defaultContextFlashAttention: flashAttention,
defaultContextSwaFullCache: swaFullCache,
ignoreMemorySafetyChecks: true
});
const postModelVramUsage = (await llama.getVramState()).used;
const postModelRamUsage = getMemoryUsage(llama);
sendInfoBack({
type: "stats",
gpuLayers: model.gpuLayers,
modelVramUsage: postModelVramUsage - preModelVramUsage,
modelRamUsage: postModelRamUsage - preModelRamUsage,
totalVramUsage: postModelVramUsage,
totalRamUsage: postModelRamUsage
});
const measurementsDone = await testContextSizes({
model,
modelVramUsage: postModelVramUsage - preModelVramUsage,
modelRamUsage: postModelRamUsage - preModelRamUsage,
startContextSize,
maxContextSize,
minContextSize,
flashAttention,
swaFullCache,
batchSize,
tests,
evaluateText,
exitAfterMeasurement
});
await model.dispose();
return measurementsDone;
}
catch (err) {
sendInfoBack({
type: "error",
error: String(err),
gpuLayers: gpuLayers
});
}
return 0;
}
process.on("message", async (message) => {
if (message.type === "start") {
for (let gpuLayers = message.maxGpuLayers; gpuLayers >= (message.minGpuLayers ?? 0); gpuLayers--) {
if (gpuLayers == message.maxGpuLayers && message.initialMaxContextSize != null) {
const ggufInsights = await GgufInsights.from(await readGgufFileInfo(message.modelPath), llama);
const contextSizeCheckPlan = getContextSizesCheckPlan(message.maxContextSize != null
? Math.min(ggufInsights.trainContextSize ?? 4096, message.maxContextSize)
: ggufInsights.trainContextSize ?? 4096, message.tests, message.minContextSize);
const firstContextSizeCheck = getNextItemInCheckContextSizesPlan(contextSizeCheckPlan, message.initialMaxContextSize);
if (firstContextSizeCheck == null)
continue;
}
const measurementsDone = await testWithGpuLayers({
modelPath: message.modelPath,
useMmap: message.useMmap,
useDirectIo: message.useDirectIo,
gpuLayers,
tests: message.tests,
startContextSize: gpuLayers == message.maxGpuLayers
? message.initialMaxContextSize
: undefined,
maxContextSize: message.maxContextSize,
minContextSize: message.minContextSize,
flashAttention: message.flashAttention,
swaFullCache: message.swaFullCache,
batchSize: message.batchSize,
evaluateText: message.evaluateText,
exitAfterMeasurement: message.exitAfterMeasurement
});
if (measurementsDone > 0 && message.exitAfterMeasurement) {
sendInfoBack({ type: "exit" });
return;
}
}
sendInfoBack({ type: "done" });
}
else if (message.type === "exit") {
await llama.dispose();
process.exit(0);
}
});
process.send({ type: "ready" });
}
function getContextSizesCheckPlan(trainContextSize, tests = 10, minContextSize) {
const res = [];
let shouldStop = false;
const attemptToCoverSizes = [256, 512, 1024, 2048, 4096];
function addSize(size) {
if (size > trainContextSize) {
size = trainContextSize;
shouldStop = true;
}
if (size < 2)
size = 2;
size = padSafeContextSize(size, "up");
if (res[res.length - 1] === size) {
shouldStop = true;
return;
}
res.push(size);
}
while (!shouldStop && res.length < tests) {
const lastSize = res[res.length - 1];
if (lastSize == null) {
addSize(Math.max(minContextSize ?? 0, Math.min(attemptToCoverSizes[0], trainContextSize / tests)));
continue;
}
const stepSizesLeft = Math.floor((trainContextSize - Math.min(lastSize, attemptToCoverSizes[attemptToCoverSizes.length - 1])) / (tests - res.length));
let stopAddingAttemptedSizes = false;
for (const size of attemptToCoverSizes) {
if (stepSizesLeft > lastSize && lastSize < size && size <= trainContextSize) {
addSize(size);
stopAddingAttemptedSizes = true;
break;
}
}
if (stopAddingAttemptedSizes)
continue;
addSize(lastSize + stepSizesLeft);
}
return res.reverse();
}
function getNextItemInCheckContextSizesPlan(plan, currentSize) {
for (const size of plan) {
if (size < currentSize)
return size;
}
return null;
}
function padStartAnsi(text, length, padChar = " ") {
const textWithoutAnsi = stripAnsi(text);
return padChar.repeat(Math.max(0, length - textWithoutAnsi.length)) + text;
}
function getMemoryUsage(llama) {
const totalMemoryUsage = llama._bindings.getMemoryInfo().total;
const vramUsage = llama._bindings.getGpuVramInfo();
let memoryUsage = totalMemoryUsage;
const unifiedMemoryVramUsage = Math.min(vramUsage.unifiedSize, vramUsage.used);
if (unifiedMemoryVramUsage <= memoryUsage)
memoryUsage -= unifiedMemoryVramUsage;
return memoryUsage;
}
//# sourceMappingURL=InspectMeasureCommand.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,4 @@
import { CommandModule } from "yargs";
type SourceCommand = {};
export declare const SourceCommand: CommandModule<object, SourceCommand>;
export {};

View File

@@ -0,0 +1,19 @@
import { withCliCommandDescriptionDocsUrl } from "../../utils/withCliCommandDescriptionDocsUrl.js";
import { documentationPageUrls } from "../../../config.js";
import { DownloadCommand } from "./commands/DownloadCommand.js";
import { BuildCommand } from "./commands/BuildCommand.js";
import { ClearCommand } from "./commands/ClearCommand.js";
export const SourceCommand = {
command: "source <command>",
describe: withCliCommandDescriptionDocsUrl("Manage `llama.cpp` source code", documentationPageUrls.CLI.Source.index),
builder(yargs) {
return yargs
.command(DownloadCommand)
.command(BuildCommand)
.command(ClearCommand);
},
async handler() {
// this function must exist, even though we do nothing here
}
};
//# sourceMappingURL=SourceCommand.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"SourceCommand.js","sourceRoot":"","sources":["../../../../src/cli/commands/source/SourceCommand.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,gCAAgC,EAAC,MAAM,iDAAiD,CAAC;AACjG,OAAO,EAAC,qBAAqB,EAAC,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAC,eAAe,EAAC,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AAMxD,MAAM,CAAC,MAAM,aAAa,GAAyC;IAC/D,OAAO,EAAE,kBAAkB;IAC3B,QAAQ,EAAE,gCAAgC,CACtC,gCAAgC,EAChC,qBAAqB,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CACzC;IACD,OAAO,CAAC,KAAK;QACT,OAAO,KAAK;aACP,OAAO,CAAC,eAAe,CAAC;aACxB,OAAO,CAAC,YAAY,CAAC;aACrB,OAAO,CAAC,YAAY,CAAC,CAAC;IAC/B,CAAC;IACD,KAAK,CAAC,OAAO;QACT,2DAA2D;IAC/D,CAAC;CACJ,CAAC"}

View File

@@ -0,0 +1,16 @@
import process from "process";
import { CommandModule } from "yargs";
import { BuildGpu } from "../../../../bindings/types.js";
type BuildCommand = {
arch?: typeof process.arch;
nodeTarget?: string;
gpu?: BuildGpu | "auto";
noUsageExample?: boolean;
};
export declare const BuildCommand: CommandModule<object, BuildCommand>;
export declare function BuildLlamaCppCommand({ arch, nodeTarget, gpu, noUsageExample,
/** @internal */
noCustomCmakeBuildOptionsInBinaryFolderName,
/** @internal */
ciMode }: BuildCommand): Promise<void>;
export {};

View File

@@ -0,0 +1,148 @@
import process from "process";
import chalk from "chalk";
import { compileLlamaCpp } from "../../../../bindings/utils/compileLLamaCpp.js";
import withOra from "../../../../utils/withOra.js";
import { clearTempFolder } from "../../../../utils/clearTempFolder.js";
import { builtinLlamaCppGitHubRepo, builtinLlamaCppRelease, isCI, defaultLlamaCppGpuSupport, documentationPageUrls } from "../../../../config.js";
import { downloadCmakeIfNeeded } from "../../../../utils/cmake.js";
import withStatusLogs from "../../../../utils/withStatusLogs.js";
import { logBinaryUsageExampleToConsole } from "../../../../bindings/utils/logBinaryUsageExampleToConsole.js";
import { getPlatform } from "../../../../bindings/utils/getPlatform.js";
import { resolveCustomCmakeOptions } from "../../../../bindings/utils/resolveCustomCmakeOptions.js";
import { getClonedLlamaCppRepoReleaseInfo, isLlamaCppRepoCloned } from "../../../../bindings/utils/cloneLlamaCppRepo.js";
import { nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption } from "../../../../bindings/types.js";
import { logUsedGpuTypeOption } from "../../../utils/logUsedGpuTypeOption.js";
import { getGpuTypesToUseForOption } from "../../../../bindings/utils/getGpuTypesToUseForOption.js";
import { getConsoleLogPrefix } from "../../../../utils/getConsoleLogPrefix.js";
import { getPrettyBuildGpuName } from "../../../../bindings/consts.js";
import { getPlatformInfo } from "../../../../bindings/utils/getPlatformInfo.js";
import { withCliCommandDescriptionDocsUrl } from "../../../utils/withCliCommandDescriptionDocsUrl.js";
export const BuildCommand = {
command: "build",
aliases: ["compile"],
describe: withCliCommandDescriptionDocsUrl("Compile the currently downloaded `llama.cpp` source code", documentationPageUrls.CLI.Source.Build),
builder(yargs) {
return yargs
.option("arch", {
alias: "a",
type: "string",
coerce: (value) => value,
description: "The architecture to compile llama.cpp for"
})
.option("nodeTarget", {
alias: "t",
type: "string",
description: "The Node.js version to compile llama.cpp for. Example: `v18.0.0`"
})
.option("gpu", {
type: "string",
default: defaultLlamaCppGpuSupport,
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: nodeLlamaCppGpuOptions,
coerce: parseNodeLlamaCppGpuOption,
description: "Compute layer implementation type to use for llama.cpp"
})
.option("noUsageExample", {
alias: "nu",
type: "boolean",
default: false,
description: "Don't print code usage example after building"
})
.option("noCustomCmakeBuildOptionsInBinaryFolderName", {
type: "boolean",
hidden: true, // this is only for the CI to use
default: false,
description: "Don't include custom CMake build options in build folder name"
})
.option("ciMode", {
type: "boolean",
hidden: true, // this is only for the CI to use
default: false,
description: "Enable CI only build options"
});
},
handler: BuildLlamaCppCommand
};
export async function BuildLlamaCppCommand({ arch = undefined, nodeTarget = undefined, gpu = defaultLlamaCppGpuSupport, noUsageExample = false,
/** @internal */
noCustomCmakeBuildOptionsInBinaryFolderName = false,
/** @internal */
ciMode = false }) {
if (!(await isLlamaCppRepoCloned())) {
console.log(chalk.red('llama.cpp is not downloaded. Please run "node-llama-cpp source download" first'));
process.exit(1);
}
const includeBuildOptionsInBinaryFolderName = !noCustomCmakeBuildOptionsInBinaryFolderName || !isCI;
const clonedLlamaCppRepoReleaseInfo = await getClonedLlamaCppRepoReleaseInfo();
const platform = getPlatform();
const platformInfo = await getPlatformInfo();
const customCmakeOptions = resolveCustomCmakeOptions();
const buildGpusToTry = await getGpuTypesToUseForOption(gpu, { platform, arch });
let downloadedCmake = false;
for (let i = 0; i < buildGpusToTry.length; i++) {
const gpuToTry = buildGpusToTry[i];
const isLastItem = i === buildGpusToTry.length - 1;
if (gpuToTry == null)
continue;
logUsedGpuTypeOption(gpuToTry);
if (!downloadedCmake) {
await downloadCmakeIfNeeded(true);
downloadedCmake = true;
}
const buildOptions = {
customCmakeOptions,
progressLogs: true,
platform,
platformInfo,
arch: arch
? arch
: process.arch,
gpu: gpuToTry,
llamaCpp: {
repo: clonedLlamaCppRepoReleaseInfo?.llamaCppGithubRepo ?? builtinLlamaCppGitHubRepo,
release: clonedLlamaCppRepoReleaseInfo?.tag ?? builtinLlamaCppRelease
}
};
try {
await withStatusLogs({
loading: chalk.blue("Compiling llama.cpp"),
success: chalk.blue("Compiled llama.cpp"),
fail: chalk.blue("Failed to compile llama.cpp")
}, async () => {
await compileLlamaCpp(buildOptions, {
nodeTarget: nodeTarget ? nodeTarget : undefined,
updateLastBuildInfo: true,
downloadCmakeIfNeeded: false,
ensureLlamaCppRepoIsCloned: false,
includeBuildOptionsInBinaryFolderName,
ciMode: isCI && ciMode
});
});
}
catch (err) {
console.error(getConsoleLogPrefix() +
`Failed to build llama.cpp with ${getPrettyBuildGpuName(gpuToTry)} support. ` +
(!isLastItem
? `falling back to building llama.cpp with ${getPrettyBuildGpuName(buildGpusToTry[i + 1])} support. `
: "") +
"Error:", err);
if (isLastItem)
throw err;
continue;
}
await withOra({
loading: chalk.blue("Removing temporary files"),
success: chalk.blue("Removed temporary files"),
fail: chalk.blue("Failed to remove temporary files")
}, async () => {
await clearTempFolder();
});
if (!noUsageExample) {
console.log();
logBinaryUsageExampleToConsole(buildOptions, gpu !== "auto", true);
console.log();
}
break;
}
}
//# sourceMappingURL=BuildCommand.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"BuildCommand.js","sourceRoot":"","sources":["../../../../../src/cli/commands/source/commands/BuildCommand.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,eAAe,EAAC,MAAM,+CAA+C,CAAC;AAC9E,OAAO,OAAO,MAAM,8BAA8B,CAAC;AACnD,OAAO,EAAC,eAAe,EAAC,MAAM,sCAAsC,CAAC;AACrE,OAAO,EAAC,yBAAyB,EAAE,sBAAsB,EAAE,IAAI,EAAE,yBAAyB,EAAE,qBAAqB,EAAC,MAAM,uBAAuB,CAAC;AAChJ,OAAO,EAAC,qBAAqB,EAAC,MAAM,4BAA4B,CAAC;AACjE,OAAO,cAAc,MAAM,qCAAqC,CAAC;AACjE,OAAO,EAAC,8BAA8B,EAAC,MAAM,8DAA8D,CAAC;AAC5G,OAAO,EAAC,WAAW,EAAC,MAAM,2CAA2C,CAAC;AACtE,OAAO,EAAC,yBAAyB,EAAC,MAAM,yDAAyD,CAAC;AAClG,OAAO,EAAC,gCAAgC,EAAE,oBAAoB,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAyB,sBAAsB,EAAE,0BAA0B,EAAC,MAAM,+BAA+B,CAAC;AACzH,OAAO,EAAC,oBAAoB,EAAC,MAAM,wCAAwC,CAAC;AAC5E,OAAO,EAAC,yBAAyB,EAAC,MAAM,yDAAyD,CAAC;AAClG,OAAO,EAAC,mBAAmB,EAAC,MAAM,0CAA0C,CAAC;AAC7E,OAAO,EAAC,qBAAqB,EAAC,MAAM,gCAAgC,CAAC;AACrE,OAAO,EAAC,eAAe,EAAC,MAAM,+CAA+C,CAAC;AAC9E,OAAO,EAAC,gCAAgC,EAAC,MAAM,oDAAoD,CAAC;AAepG,MAAM,CAAC,MAAM,YAAY,GAAwC;IAC7D,OAAO,EAAE,OAAO;IAChB,OAAO,EAAE,CAAC,SAAS,CAAC;IACpB,QAAQ,EAAE,gCAAgC,CACtC,0DAA0D,EAC1D,qBAAqB,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CACzC;IACD,OAAO,CAAC,KAAK;QACT,OAAO,KAAK;aACP,MAAM,CAAC,MAAM,EAAE;YACZ,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK;YACxB,WAAW,EAAE,2CAA2C;SAC3D,CAAC;aACD,MAAM,CAAC,YAAY,EAAE;YAClB,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,kEAAkE;SAClF,CAAC;aACD,MAAM,CAAC,KAAK,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,yBAAyB;YAElC,2FAA2F;YAC3F,OAAO,EAAE,sBAAwF;YACjG,MAAM,EAAE,0BAA0B;YAClC,WAAW,EAAE,wDAAwD;SACxE,CAAC;aACD,MAAM,CAAC,gBAAgB,EAAE;YACtB,KAAK,EAAE,IAAI;YACX,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,KAAK;YACd,WAAW,EAAE,+CAA+C;SAC/D,CAAC;aACD,MAAM,CAAC,6CAA6C,EAAE;YACnD,IAAI,EAAE,SAAS;YACf,MAAM,EAAE,IAAI,EAAE,iCAAiC;YAC/C,OAAO,EAAE,KAAK;YACd,WAAW,EAAE,+DAA+D;SAC/E,CAAC;aACD,MAAM,CAAC,QAAQ,EAAE;YACd,IAAI,EAAE,SAAS;YACf,MAAM,EAAE,IAAI,EAAE,iCAAiC;YAC/C,OAAO,EAAE,KAAK;YACd,WAAW,EAAE,8BAA8B;SAC9C,CAAC,CAAC;IACX,CAAC;IACD,OAAO,EAAE,oBAAoB;CAChC,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,EACvC,IAAI,GAAG,SAAS,EAChB,UAAU,GAAG,SAAS,EACtB,GAAG,GAAG,yBAAyB,EAC/B,cAAc,GAAG,KAAK;AAEtB,gBAAgB;AAChB,2CAA2C,GAAG,KAAK;AAEnD,gBAAgB;AAChB,MAAM,GAAG,KAAK,EACH;IACX,IAAI,CAAC,CAAC,MAAM,oBAAoB,EAAE,CAAC,EAAE,CAAC;QAClC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,gFAAgF,CAAC,CAAC,CAAC;QACzG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,MAAM,qCAAqC,GAAG,CAAC,2CAA2C,IAAI,CAAC,IAAI,CAAC;IAEpG,MAAM,6BAA6B,GAAG,MAAM,gCAAgC,EAAE,CAAC;IAE/E,MAAM,QAAQ,GAAG,WAAW,EAAE,CAAC;IAC/B,MAAM,YAAY,GAAG,MAAM,eAAe,EAAE,CAAC;IAC7C,MAAM,kBAAkB,GAAG,yBAAyB,EAAE,CAAC;IACvD,MAAM,cAAc,GAAe,MAAM,yBAAyB,CAAC,GAAG,EAAE,EAAC,QAAQ,EAAE,IAAI,EAAC,CAAC,CAAC;IAC1F,IAAI,eAAe,GAAG,KAAK,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,MAAM,QAAQ,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;QACnC,MAAM,UAAU,GAAG,CAAC,KAAK,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC;QAEnD,IAAI,QAAQ,IAAI,IAAI;YAChB,SAAS;QAEb,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QAE/B,IAAI,CAAC,eAAe,EAAE,CAAC;YACnB,MAAM,qBAAqB,CAAC,IAAI,CAAC,CAAC;YAClC,eAAe,GAAG,IAAI,CAAC;QAC3B,CAAC;QAED,MAAM,YAAY,GAAiB;YAC/B,kBAAkB;YAClB,YAAY,EAAE,IAAI;YAClB,QAAQ;YACR,YAAY;YACZ,IAAI,EAAE,IAAI;gBACN,CAAC,CAAC,IAA2B;gBAC7B,CAAC,CAAC,OAAO,CAAC,IAAI;YAClB,GAAG,EAAE,QAAQ;YACb,QAAQ,EAAE;gBACN,IAAI,EAAE,6BAA6B,EAAE,kBAAkB,IAAI,yBAAyB;gBACpF,OAAO,EAAE,6BAA6B,EAAE,GAAG,IAAI,sBAAsB;aACxE;SACJ,CAAC;QAEF,IAAI,CAAC;YACD,MAAM,cAAc,CAAC;gBACjB,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC;gBAC1C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC;gBACzC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,6BAA6B,CAAC;aAClD,EAAE,KAAK,IAAI,EAAE;gBACV,MAAM,eAAe,CAAC,YAAY,EAAE;oBAChC,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;oBAC/C,mBAAmB,EAAE,IAAI;oBACzB,qBAAqB,EAAE,KAAK;oBAC5B,0BAA0B,EAAE,KAAK;oBACjC,qCAAqC;oBACrC,MAAM,EAAE,IAAI,IAAI,MAAM;iBACzB,CAAC,CAAC;YACP,CAAC,CAAC,CAAC;QACP,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CACT,mBAAmB,EAAE;gBACrB,kCAAkC,qBAAqB,CAAC,QAAQ,CAAC,YAAY;gBAC7E,CACI,CAAC,UAAU;oBACP,CAAC,CAAC,2CAA2C,qBAAqB,CAAC,cAAc,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,YAAY;oBACrG,CAAC,CAAC,EAAE,CACX;gBACD,QAAQ,EACR,GAAG,CACN,CAAC;YAEF,IAAI,UAAU;gBACV,MAAM,GAAG,CAAC;YAEd,SAAS;QACb,CAAC;QAED,MAAM,OAAO,CAAC;YACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC;YAC/C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC;YAC9C,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,kCAAkC,CAAC;SACvD,EAAE,KAAK,IAAI,EAAE;YACV,MAAM,eAAe,EAAE,CAAC;QAC5B,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc,EAAE,CAAC;YAClB,OAAO,CAAC,GAAG,EAAE,CAAC;YACd,8BAA8B,CAAC,YAAY,EAAE,GAAG,KAAK,MAAM,EAAE,IAAI,CAAC,CAAC;YACnE,OAAO,CAAC,GAAG,EAAE,CAAC;QAClB,CAAC;QAED,MAAM;IACV,CAAC;AACL,CAAC"}

View File

@@ -0,0 +1,7 @@
import { CommandModule } from "yargs";
type ClearCommand = {
type: "source" | "builds" | "cmake" | "all";
};
export declare const ClearCommand: CommandModule<object, ClearCommand>;
export declare function ClearLlamaCppBuildCommand({ type }: ClearCommand): Promise<void>;
export {};

View File

@@ -0,0 +1,54 @@
import fs from "fs-extra";
import chalk from "chalk";
import { documentationPageUrls, llamaCppDirectory, llamaCppDirectoryInfoFilePath } from "../../../../config.js";
import withOra from "../../../../utils/withOra.js";
import { clearAllLocalBuilds } from "../../../../bindings/utils/clearAllLocalBuilds.js";
import { clearLocalCmake, fixXpackPermissions } from "../../../../utils/cmake.js";
import { withCliCommandDescriptionDocsUrl } from "../../../utils/withCliCommandDescriptionDocsUrl.js";
export const ClearCommand = {
command: "clear [type]",
aliases: ["clean"],
describe: withCliCommandDescriptionDocsUrl("Clear files created by `node-llama-cpp`", documentationPageUrls.CLI.Source.Clear),
builder(yargs) {
return yargs
.option("type", {
type: "string",
choices: ["source", "builds", "cmake", "all"],
default: "all",
description: "Files to clear"
});
},
handler: ClearLlamaCppBuildCommand
};
export async function ClearLlamaCppBuildCommand({ type }) {
if (type === "source" || type === "all") {
await withOra({
loading: chalk.blue("Clearing source"),
success: chalk.blue("Cleared source"),
fail: chalk.blue("Failed to clear source")
}, async () => {
await fs.remove(llamaCppDirectory);
await fs.remove(llamaCppDirectoryInfoFilePath);
});
}
if (type === "builds" || type === "all") {
await withOra({
loading: chalk.blue("Clearing all builds"),
success: chalk.blue("Cleared all builds"),
fail: chalk.blue("Failed to clear all builds")
}, async () => {
await clearAllLocalBuilds();
});
}
if (type === "cmake" || type === "all") {
await withOra({
loading: chalk.blue("Clearing internal cmake"),
success: chalk.blue("Cleared internal cmake"),
fail: chalk.blue("Failed to clear internal cmake")
}, async () => {
await fixXpackPermissions();
await clearLocalCmake();
});
}
}
//# sourceMappingURL=ClearCommand.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"ClearCommand.js","sourceRoot":"","sources":["../../../../../src/cli/commands/source/commands/ClearCommand.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,qBAAqB,EAAE,iBAAiB,EAAE,6BAA6B,EAAC,MAAM,uBAAuB,CAAC;AAC9G,OAAO,OAAO,MAAM,8BAA8B,CAAC;AACnD,OAAO,EAAC,mBAAmB,EAAC,MAAM,mDAAmD,CAAC;AACtF,OAAO,EAAC,eAAe,EAAE,mBAAmB,EAAC,MAAM,4BAA4B,CAAC;AAChF,OAAO,EAAC,gCAAgC,EAAC,MAAM,oDAAoD,CAAC;AAMpG,MAAM,CAAC,MAAM,YAAY,GAAwC;IAC7D,OAAO,EAAE,cAAc;IACvB,OAAO,EAAE,CAAC,OAAO,CAAC;IAClB,QAAQ,EAAE,gCAAgC,CACtC,yCAAyC,EACzC,qBAAqB,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CACzC;IACD,OAAO,CAAC,KAAK;QACT,OAAO,KAAK;aACP,MAAM,CAAC,MAAM,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAkC;YAC9E,OAAO,EAAE,KAA6B;YACtC,WAAW,EAAE,gBAAgB;SAChC,CAAC,CAAC;IACX,CAAC;IACD,OAAO,EAAE,yBAAyB;CACrC,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,EAAC,IAAI,EAAe;IAChE,IAAI,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACtC,MAAM,OAAO,CAAC;YACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC;YACtC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC;YACrC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,wBAAwB,CAAC;SAC7C,EAAE,KAAK,IAAI,EAAE;YACV,MAAM,EAAE,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC;YACnC,MAAM,EAAE,CAAC,MAAM,CAAC,6BAA6B,CAAC,CAAC;QACnD,CAAC,CAAC,CAAC;IACP,CAAC;IAED,IAAI,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACtC,MAAM,OAAO,CAAC;YACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC;YAC1C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC;YACzC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,4BAA4B,CAAC;SACjD,EAAE,KAAK,IAAI,EAAE;YACV,MAAM,mBAAmB,EAAE,CAAC;QAChC,CAAC,CAAC,CAAC;IACP,CAAC;IAED,IAAI,IAAI,KAAK,OAAO,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACrC,MAAM,OAAO,CAAC;YACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC;YAC9C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,wBAAwB,CAAC;YAC7C,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,gCAAgC,CAAC;SACrD,EAAE,KAAK,IAAI,EAAE;YACV,MAAM,mBAAmB,EAAE,CAAC;YAC5B,MAAM,eAAe,EAAE,CAAC;QAC5B,CAAC,CAAC,CAAC;IACP,CAAC;AACL,CAAC"}

View File

@@ -0,0 +1,16 @@
import process from "process";
import { CommandModule } from "yargs";
import { BuildGpu } from "../../../../bindings/types.js";
type DownloadCommandArgs = {
repo?: string;
release?: "latest" | string;
arch?: typeof process.arch;
nodeTarget?: string;
gpu?: BuildGpu | "auto";
skipBuild?: boolean;
noBundle?: boolean;
noUsageExample?: boolean;
};
export declare const DownloadCommand: CommandModule<object, DownloadCommandArgs>;
export declare function DownloadLlamaCppCommand(args: DownloadCommandArgs): Promise<void>;
export {};

View File

@@ -0,0 +1,219 @@
import process from "process";
import fs from "fs-extra";
import chalk from "chalk";
import { defaultLlamaCppGitHubRepo, defaultLlamaCppRelease, isCI, llamaCppDirectory, llamaCppDirectoryInfoFilePath, defaultLlamaCppGpuSupport, documentationPageUrls } from "../../../../config.js";
import { compileLlamaCpp } from "../../../../bindings/utils/compileLLamaCpp.js";
import withOra from "../../../../utils/withOra.js";
import { clearTempFolder } from "../../../../utils/clearTempFolder.js";
import { setBinariesGithubRelease } from "../../../../bindings/utils/binariesGithubRelease.js";
import { downloadCmakeIfNeeded } from "../../../../utils/cmake.js";
import withStatusLogs from "../../../../utils/withStatusLogs.js";
import { getIsInDocumentationMode } from "../../../../state.js";
import { getGitBundlePathForRelease, unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle } from "../../../../utils/gitReleaseBundles.js";
import { cloneLlamaCppRepo } from "../../../../bindings/utils/cloneLlamaCppRepo.js";
import { getPlatform } from "../../../../bindings/utils/getPlatform.js";
import { resolveCustomCmakeOptions } from "../../../../bindings/utils/resolveCustomCmakeOptions.js";
import { logBinaryUsageExampleToConsole } from "../../../../bindings/utils/logBinaryUsageExampleToConsole.js";
import { resolveGithubRelease } from "../../../../utils/resolveGithubRelease.js";
import { nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption } from "../../../../bindings/types.js";
import { logUsedGpuTypeOption } from "../../../utils/logUsedGpuTypeOption.js";
import { getGpuTypesToUseForOption } from "../../../../bindings/utils/getGpuTypesToUseForOption.js";
import { getConsoleLogPrefix } from "../../../../utils/getConsoleLogPrefix.js";
import { getPrettyBuildGpuName } from "../../../../bindings/consts.js";
import { getPlatformInfo } from "../../../../bindings/utils/getPlatformInfo.js";
import { withCliCommandDescriptionDocsUrl } from "../../../utils/withCliCommandDescriptionDocsUrl.js";
export const DownloadCommand = {
command: "download",
describe: withCliCommandDescriptionDocsUrl("Download a release of `llama.cpp` and compile it", documentationPageUrls.CLI.Source.Download),
builder(yargs) {
const isInDocumentationMode = getIsInDocumentationMode();
return yargs
.option("repo", {
type: "string",
default: defaultLlamaCppGitHubRepo,
description: "The GitHub repository to download a release of llama.cpp from. Can also be set via the `NODE_LLAMA_CPP_REPO` environment variable"
})
.option("release", {
type: "string",
default: isInDocumentationMode ? "<current build>" : defaultLlamaCppRelease,
description: "The tag of the llama.cpp release to download. Set to `latest` to download the latest release. Can also be set via the `NODE_LLAMA_CPP_REPO_RELEASE` environment variable"
})
.option("arch", {
alias: "a",
type: "string",
coerce: (value) => value,
description: "The architecture to compile llama.cpp for"
})
.option("nodeTarget", {
alias: "t",
type: "string",
description: "The Node.js version to compile llama.cpp for. Example: `v18.0.0`"
})
.option("gpu", {
type: "string",
default: defaultLlamaCppGpuSupport,
// yargs types don't support passing `false` as a choice, although it is supported by yargs
choices: nodeLlamaCppGpuOptions,
coerce: parseNodeLlamaCppGpuOption,
description: "Compute layer implementation type to use for llama.cpp"
})
.option("skipBuild", {
alias: "sb",
type: "boolean",
default: false,
description: "Skip building llama.cpp after downloading it"
})
.option("noBundle", {
alias: "nb",
type: "boolean",
default: false,
description: "Download a llama.cpp release only from GitHub, even if a local git bundle exists for the release"
})
.option("noUsageExample", {
alias: "nu",
type: "boolean",
default: false,
description: "Don't print code usage example after building"
})
.option("updateBinariesReleaseMetadataAndSaveGitBundle", {
type: "boolean",
hidden: true, // this is only for the CI to use
default: false,
description: "Update the binariesGithubRelease.json file with the release of llama.cpp that was downloaded"
});
},
handler: DownloadLlamaCppCommand
};
export async function DownloadLlamaCppCommand(args) {
const { repo = defaultLlamaCppGitHubRepo, release = defaultLlamaCppRelease, arch = undefined, nodeTarget = undefined, gpu = defaultLlamaCppGpuSupport, skipBuild = false, noBundle = false, noUsageExample = false, updateBinariesReleaseMetadataAndSaveGitBundle = false } = args;
const useBundle = noBundle != true;
const platform = getPlatform();
const platformInfo = await getPlatformInfo();
const customCmakeOptions = resolveCustomCmakeOptions();
const buildGpusToTry = skipBuild
? []
: await getGpuTypesToUseForOption(gpu, { platform, arch });
const [githubOwner, githubRepo] = repo.split("/");
if (githubOwner == null || githubRepo == null)
throw new Error(`Invalid GitHub repository: ${repo}`);
let downloadedCmake = false;
console.log(`${chalk.yellow("Repo:")} ${repo}`);
console.log(`${chalk.yellow("Release:")} ${release}`);
if (!skipBuild) {
logUsedGpuTypeOption(buildGpusToTry[0]);
}
console.log();
let githubReleaseTag = (useBundle && (await getGitBundlePathForRelease(githubOwner, githubRepo, release)) != null)
? release
: null;
if (githubReleaseTag == null)
await withOra({
loading: chalk.blue("Fetching llama.cpp info"),
success: chalk.blue("Fetched llama.cpp info"),
fail: chalk.blue("Failed to fetch llama.cpp info")
}, async () => {
githubReleaseTag = await resolveGithubRelease(githubOwner, githubRepo, release);
});
await clearTempFolder();
await withOra({
loading: chalk.blue("Removing existing llama.cpp directory"),
success: chalk.blue("Removed existing llama.cpp directory"),
fail: chalk.blue("Failed to remove existing llama.cpp directory")
}, async () => {
await fs.remove(llamaCppDirectory);
await fs.remove(llamaCppDirectoryInfoFilePath);
});
await cloneLlamaCppRepo(githubOwner, githubRepo, githubReleaseTag, useBundle);
if (!skipBuild) {
for (let i = 0; i < buildGpusToTry.length; i++) {
const gpuToTry = buildGpusToTry[i];
const isLastItem = i === buildGpusToTry.length - 1;
if (gpuToTry == null)
continue;
if (i > 0) // we already logged the first gpu before
logUsedGpuTypeOption(gpuToTry);
if (!downloadedCmake) {
await downloadCmakeIfNeeded(true);
downloadedCmake = true;
}
const buildOptions = {
customCmakeOptions,
progressLogs: true,
platform,
platformInfo,
arch: arch
? arch
: process.arch,
gpu: gpuToTry,
llamaCpp: {
repo,
release: githubReleaseTag
}
};
try {
await withStatusLogs({
loading: chalk.blue("Compiling llama.cpp"),
success: chalk.blue("Compiled llama.cpp"),
fail: chalk.blue("Failed to compile llama.cpp")
}, async () => {
await compileLlamaCpp(buildOptions, {
nodeTarget: nodeTarget ? nodeTarget : undefined,
updateLastBuildInfo: true,
downloadCmakeIfNeeded: false,
ensureLlamaCppRepoIsCloned: false,
includeBuildOptionsInBinaryFolderName: true
});
});
}
catch (err) {
console.error(getConsoleLogPrefix() +
`Failed to build llama.cpp with ${getPrettyBuildGpuName(gpuToTry)} support. ` +
(!isLastItem
? `falling back to building llama.cpp with ${getPrettyBuildGpuName(buildGpusToTry[i + 1])} support. `
: "") +
"Error:", err);
if (isLastItem)
throw err;
continue;
}
if (!noUsageExample) {
console.log();
console.log();
logBinaryUsageExampleToConsole(buildOptions, gpu !== "auto", true);
}
break;
}
}
else if (!noUsageExample) {
const buildOptions = {
customCmakeOptions,
progressLogs: true,
platform,
platformInfo,
arch: arch
? arch
: process.arch,
gpu: buildGpusToTry[0],
llamaCpp: {
repo,
release: githubReleaseTag
}
};
console.log();
console.log();
logBinaryUsageExampleToConsole(buildOptions, gpu !== "auto", true);
}
if (isCI && updateBinariesReleaseMetadataAndSaveGitBundle) {
await setBinariesGithubRelease(githubReleaseTag);
await unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle();
}
console.log();
console.log();
console.log(`${chalk.yellow("Repo:")} ${repo}`);
console.log(chalk.yellow("Release:") + " " + release + (release === "latest"
? (" " + chalk.gray("(" + githubReleaseTag + ")"))
: ""));
console.log();
console.log(chalk.green("Done"));
}
//# sourceMappingURL=DownloadCommand.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,7 @@
export type ProjectTemplateOption = {
title: string;
name: string;
titleFormat?(title: string): string;
description?: string;
};
export declare const projectTemplates: ProjectTemplateOption[];

View File

@@ -0,0 +1,10 @@
export const projectTemplates = [{
title: "Node + TypeScript",
name: "node-typescript",
description: "A Node.js project with TypeScript using vite-node, some ESLint configuration, basic setup with a selected model file, and a working example of a simple usage of node-llama-cpp with the model"
}, {
title: "Electron + TypeScript + React",
name: "electron-typescript-react",
description: "An Electron project with TypeScript and React using Vite-Electron, some ESLint configuration, basic setup with a selected model file, and a working example of a simple usage of node-llama-cpp with the model"
}];
//# sourceMappingURL=projectTemplates.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"projectTemplates.js","sourceRoot":"","sources":["../../src/cli/projectTemplates.ts"],"names":[],"mappings":"AAMA,MAAM,CAAC,MAAM,gBAAgB,GAA4B,CAAC;QACtD,KAAK,EAAE,mBAAmB;QAC1B,IAAI,EAAE,iBAAiB;QACvB,WAAW,EAAE,gMAAgM;KAChN,EAAE;QACC,KAAK,EAAE,+BAA+B;QACtC,IAAI,EAAE,2BAA2B;QACjC,WAAW,EAAE,gNAAgN;KAChO,CAAC,CAAC"}

View File

@@ -0,0 +1,2 @@
import { ModelRecommendation } from "./utils/resolveModelRecommendationFileOptions.js";
export declare const recommendedModels: ModelRecommendation[];

View File

@@ -0,0 +1,428 @@
export const recommendedModels = [{
name: "gpt-oss 20B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "gpt-oss models were created by OpenAI and are using chain of though (CoT) to reason across a wide variety of topics, and utilize a Mixture of Experts architecture.\n" +
"It's optimized for agentic cases, with native support for function calling.\n" +
"Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" +
"This model only has 3.6B active parameters, thus making it very fast.\n" +
"This is the 20 billion parameters version of the model.",
fileOptions: [
"hf:giladgd/gpt-oss-20b-GGUF/gpt-oss-20b.MXFP4.gguf"
]
}, {
name: "gpt-oss 120B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "gpt-oss models were created by OpenAI and are using chain of though (CoT) to reason across a wide variety of topics, and utilize a Mixture of Experts architecture.\n" +
"It's optimized for agentic cases, with native support for function calling.\n" +
"Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" +
"This model only has 5.1B active parameters, thus making it very fast.\n" +
"This is the 120 billion parameters version of the model.",
fileOptions: [
"hf:giladgd/gpt-oss-120b-GGUF/gpt-oss-120b.MXFP4-00001-of-00002.gguf"
]
}, {
name: "Qwen 3 32B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "Qwen model was created by Alibaba and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with native support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high.\n" +
"This is the 32 billion parameters version of the model.\n" +
"Its performance is comparable and even surpasses DeepSeek R1 and GPT-o1.",
fileOptions: [
"hf:Qwen/Qwen3-32B-GGUF:Q8_0",
"hf:Qwen/Qwen3-32B-GGUF:Q6_K",
"hf:Qwen/Qwen3-32B-GGUF:Q5_K_M",
"hf:Qwen/Qwen3-32B-GGUF:Q4_K_M"
]
}, {
name: "Qwen 3 14B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "Qwen model was created by Alibaba and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with native support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high compared to its size.\n" +
"This is the 14 billion parameters version of the model.",
fileOptions: [
"hf:Qwen/Qwen3-14B-GGUF:Q8_0",
"hf:Qwen/Qwen3-14B-GGUF:Q6_K",
"hf:Qwen/Qwen3-14B-GGUF:Q5_K_M",
"hf:Qwen/Qwen3-14B-GGUF:Q4_K_M"
]
}, {
name: "Qwen 3 8B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "Qwen model was created by Alibaba and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with native support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high compared to its size.\n" +
"This is the 8 billion parameters version of the model.",
fileOptions: [
"hf:Qwen/Qwen3-8B-GGUF:Q8_0",
"hf:Qwen/Qwen3-8B-GGUF:Q6_K",
"hf:Qwen/Qwen3-8B-GGUF:Q5_K_M",
"hf:Qwen/Qwen3-8B-GGUF:Q4_K_M"
]
}, {
name: "Qwen 3 4B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "Qwen model was created by Alibaba and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with native support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high compared to its size.\n" +
"This is the 4 billion parameters version of the model, and is suitable for simpler tasks and can run on lower-end hardware, as well as be very fast on higher-end hardware.",
fileOptions: [
"hf:Qwen/Qwen3-4B-GGUF:Q8_0",
"hf:Qwen/Qwen3-4B-GGUF:Q6_K",
"hf:Qwen/Qwen3-4B-GGUF:Q5_K_M",
"hf:Qwen/Qwen3-4B-GGUF:Q4_K_M"
]
}, {
name: "Qwen 3 0.6B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "Qwen model was created by Alibaba and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with native support for function calling.\n" +
"This model is censored, but its responses quality on many topics is very high compared to its small size.\n" +
"This is the 0.6B billion parameters version of the model and is suitable for very simple tasks and can run on very resource-constraint hardware.\n",
fileOptions: [
"hf:Qwen/Qwen3-0.6B-GGUF:Q8_0"
]
}, {
name: "Seed OSS 36B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "The Seed OSS model was created by ByteDance and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for agentic use cases, with native support for function calling and flexible control of the thinking budget (via `SeedChatWrapper` options).\n" +
"This model can support a context size of up to 512K tokens (if you have enough VRAM to accommodate it).\n" +
"This is a 36 billion parameters model.",
fileOptions: [
"hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q8_0",
"hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q6_K",
"hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q5_K_M",
"hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q4_K_M"
]
}, {
name: "DeepSeek R1 Distill Qwen 7B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high.\n" +
"This is the 7 billion parameters version of the model - a fine tuned Qwen 2.5 7B base model with distillation from the 671B DeepSeek R1 version.",
fileOptions: [
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q8_0",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q5_K_M",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q5_K_S",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M"
]
}, {
name: "DeepSeek R1 Distill Qwen 14B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high.\n" +
"This is the 14 billion parameters version of the model - a fine tuned Qwen 2.5 14B base model with distillation from the 671B DeepSeek R1 version.",
fileOptions: [
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q8_0",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q6_K",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q5_K_M",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q5_K_S",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q4_K_M"
]
}, {
name: "DeepSeek R1 Distill Qwen 32B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high.\n" +
"This is the 32 billion parameters version of the model - a fine tuned Qwen 2.5 32B base model with distillation from the 671B DeepSeek R1 version.",
fileOptions: [
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q8_0",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q6_K",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q5_K_M",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q5_K_S",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M"
]
}, {
name: "DeepSeek R1 Distill Llama 8B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is censored, even though it's based on Llama 3.1.\n" +
"This is the 8 billion parameters version of the model - a fine tuned Llama 3.1 8B base model with distillation from the 671B DeepSeek R1 version.",
fileOptions: [
"hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q8_0",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q6_K",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q5_K_M",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q5_K_S",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q4_K_M"
]
}, {
name: "DeepSeek R1 Distill Llama 70B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is censored, even though it's based on Llama 3.3.\n" +
"This is the 70 billion parameters version of the model - a fine tuned Llama 3.3 70B base model with distillation from the 671B DeepSeek R1 version.",
fileOptions: [
"hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B.Q8_0.gguf.part1of2",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B.Q6_K.gguf.part1of2",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF:Q5_K_M",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF:Q5_K_S",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF:Q4_K_M"
]
}, {
name: "Qwen 3 30B A3B MoE",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "Qwen model was created by Alibaba and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with native support for function calling.\n" +
"This version of the model utilizes a Mixture of Experts architecture, with only 3B active parameters, thus making it very fast.\n" +
"Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" +
"This model is censored, but its responses quality on many topics is high compared to its high generation speed.\n" +
"This is the 30 billion parameters Mixtures of Experts (MoE) version of the model.\n" +
"Its performance is comparable and even surpasses DeepSeek V3 and GPT-4o.",
fileOptions: [
"hf:Qwen/Qwen3-30B-A3B-GGUF:Q8_0",
"hf:Qwen/Qwen3-30B-A3B-GGUF:Q6_K",
"hf:Qwen/Qwen3-30B-A3B-GGUF:Q5_K_M",
"hf:Qwen/Qwen3-30B-A3B-GGUF:Q4_K_M"
]
}, {
name: "QwQ 32B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "QwQ model was created by Alibaba and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with native support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high.\n" +
"Its performance is comparable to DeepSeek R1 671B.",
fileOptions: [
"hf:Qwen/QwQ-32B-GGUF:Q8_0",
"hf:Qwen/QwQ-32B-GGUF:Q6_K",
"hf:Qwen/QwQ-32B-GGUF:Q5_K_M",
"hf:Qwen/QwQ-32B-GGUF:Q4_K_M"
]
}, {
name: "Llama 3.1 8B",
abilities: ["chat", "complete", "functionCalling"],
description: "Llama 3.1 model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This is the 8 billion parameters version of the model.",
fileOptions: [
"hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q8_0",
"hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q6_K",
"hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M"
]
}, {
name: "Llama 3.1 70B",
abilities: ["chat", "complete", "functionCalling"],
description: "Llama 3.1 model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This is the 70 billion parameters version of the model. " +
"You need a GPU with a lot of VRAM to use this version.",
fileOptions: [
"hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2",
"hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q6_K.gguf.part1of2",
"hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF:Q4_K_M",
"hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF:Q4_K_S"
]
}, {
name: "Llama 3.1 405B",
abilities: ["chat", "complete", "functionCalling"],
description: "Llama 3.1 model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This is the 405 billion parameters version of the model, and its capabilities are comparable and sometimes even surpass GPT-4o and Claude 3.5 Sonnet.\n" +
"You need a GPU with a lot of VRAM to use this version of Llama 3.1.",
fileOptions: [
"hf:mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF/Meta-Llama-3.1-405B-Instruct.Q3_K_L.gguf.part1of5",
"hf:mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF/Meta-Llama-3.1-405B-Instruct.Q3_K_M.gguf.part1of4"
]
}, {
name: "Phi 4 14B",
abilities: ["chat", "complete", "functionCalling"],
description: "Phi 4 model was created by Microsoft and is optimized for complex reasoning in areas such as math.",
fileOptions: [
"hf:mradermacher/phi-4-GGUF:Q8_0",
"hf:mradermacher/phi-4-GGUF:Q6_K",
"hf:mradermacher/phi-4-GGUF:Q4_K_M",
"hf:mradermacher/phi-4-GGUF:Q4_K_S"
]
}, {
name: "Mistral Nemo 12B",
abilities: ["chat", "complete", "functionCalling"],
description: "Mistral Nemo model was created by Mistral AI and was trained on large proportion of multilingual and code data, with support for function calling.\n" +
"It was trained jointly by Mistral AI and NVIDIA.\n" +
"This is a 12 billion parameters model.",
fileOptions: [
"hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q8_0",
"hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q6_K",
"hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M",
"hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_S"
]
}, {
name: "Llama 3.2 3B",
abilities: ["chat", "complete", "functionCalling"],
description: "Llama 3.2 3B model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is smarter than the 1B model, but is still relatively small and can run on less capable machines.",
fileOptions: [
"hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q8_0",
"hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q6_K",
"hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q4_K_M",
"hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q4_K_S"
]
}, {
name: "Phi 3 3.8B",
abilities: ["chat", "complete", "functionCalling"],
description: "Phi 3 model was created by Microsoft and is optimized for strong reasoning (especially math and logic).\n" +
"This is the small version of the model.",
fileOptions: [
"hf:bartowski/Phi-3.1-mini-4k-instruct-GGUF:Q8_0",
"hf:bartowski/Phi-3.1-mini-4k-instruct-GGUF:Q4_K_M"
]
}, {
name: "OLMoE 1B 7B MoE",
abilities: ["chat"],
description: "OLMoE models were created by AllenAI, and are fully open source models that utilize a Mixture of Experts architecture.\n" +
"Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" +
"This model includes 64 expert models, with a total of 7 billion parameters.\n" +
"This model generates output extremely fast.",
fileOptions: [
"hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf",
"hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q6_k.gguf",
"hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q5_k_m.gguf",
"hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q4_k_s.gguf",
"hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q4_k_m.gguf"
]
}, {
name: "Mixtral 8x7B MoE",
abilities: ["chat", "complete"],
description: "Mixtral models were created by Mistal AI and are general purpose models that utilize a Mixture of Experts architecture.\n" +
"Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" +
"This model includes 8 expert models, each with 7 billion parameters.",
fileOptions: [
"hf:TheBloke/Mixtral-8x7B-v0.1-GGUF:Q5_K_M",
"hf:TheBloke/Mixtral-8x7B-v0.1-GGUF:Q4_K_M"
]
}, {
name: "Mistral 7B Instruct v0.2",
abilities: ["chat", "complete"],
description: "Mistral models were created by Mistal AI and are general purpose models.\n" +
"This is the 7 billion parameters version of the model.",
fileOptions: [
"hf:TheBloke/Mistral-7B-Instruct-v0.2-GGUF:Q5_K_M",
"hf:TheBloke/Mistral-7B-Instruct-v0.2-GGUF:Q4_K_M"
]
}, {
name: "Dolphin 2.5 Mixtral 8x7B MoE",
abilities: ["chat", "complete"],
description: "This Dolphin Mixtral model was created by Eric Hartford and is an uncensored model based on Mixtral, with really good coding skills.\n" +
"See the Mixtral model above for more information about Mixtral models.\n" +
"This model includes 8 expert models, each with 7 billion parameters.",
fileOptions: [
"hf:TheBloke/dolphin-2.5-mixtral-8x7b-GGUF:Q5_K_M",
"hf:TheBloke/dolphin-2.5-mixtral-8x7b-GGUF:Q4_K_M"
]
}, {
name: "Gemma 2 9B",
abilities: ["chat", "complete"],
description: "Gemma models were created by Google and are optimized suited for variety of text generation tasks, " +
"including question answering, summarization, and reasoning, with a focus on responsible responses.\n" +
"This is the 9 billion parameters version of the model.",
fileOptions: [
"hf:bartowski/gemma-2-9b-it-GGUF:Q6_K_L",
"hf:bartowski/gemma-2-9b-it-GGUF:Q6_K",
"hf:bartowski/gemma-2-9b-it-GGUF:Q5_K_M",
"hf:bartowski/gemma-2-9b-it-GGUF:Q5_K_S",
"hf:bartowski/gemma-2-9b-it-GGUF:Q4_K_M"
]
}, {
name: "Gemma 2 2B",
abilities: ["chat", "complete"],
description: "Gemma models were created by Google and are optimized suited for variety of text generation tasks, " +
"including question answering, summarization, and reasoning, with a focus on responsible responses.\n" +
"This is the 2 billion parameters version of the model and is significantly less powerful than the 9B version.",
fileOptions: [
"hf:bartowski/gemma-2-2b-it-GGUF:Q6_K_L",
"hf:bartowski/gemma-2-2b-it-GGUF:Q6_K",
"hf:bartowski/gemma-2-2b-it-GGUF:Q5_K_M",
"hf:bartowski/gemma-2-2b-it-GGUF:Q5_K_S",
"hf:bartowski/gemma-2-2b-it-GGUF:Q4_K_M"
]
}, {
name: "Gemma 2 27B",
abilities: ["chat", "complete"],
description: "Gemma models were created by Google and are optimized suited for varoety of text generation tasks, " +
"including question answering, summarization, and reasoning, with a focus on responsible responses.\n" +
"This is the 27 billion parameters version of the model.\n" +
"Since the model is relatively big, it may not run well on your machine",
fileOptions: [
"hf:bartowski/gemma-2-27b-it-GGUF:Q6_K_L",
"hf:bartowski/gemma-2-27b-it-GGUF:Q6_K",
"hf:bartowski/gemma-2-27b-it-GGUF:Q5_K_M",
"hf:bartowski/gemma-2-27b-it-GGUF:Q5_K_S",
"hf:bartowski/gemma-2-27b-it-GGUF:Q4_K_M"
]
}, {
name: "Orca 2 13B",
abilities: ["chat", "complete"],
description: "Orca 2 model was created by Microsoft and is optimized for reasoning over given data, reading comprehensions, math problem solving and text summarization.\n" +
"This is the 13 billion parameters version of the model.",
fileOptions: [
"hf:TheBloke/Orca-2-13B-GGUF:Q5_K_M",
"hf:TheBloke/Orca-2-13B-GGUF:Q4_K_M"
]
}, {
name: "Code Llama 7B",
abilities: ["chat", "complete", "infill"],
description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" +
"This is the 7 billion parameters version of the model.",
fileOptions: [
"hf:TheBloke/CodeLlama-7B-GGUF:Q5_K_M",
"hf:TheBloke/CodeLlama-7B-GGUF:Q4_K_M"
]
}, {
name: "Code Llama 13B",
abilities: ["chat", "complete", "infill"],
description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" +
"This is the 13 billion parameters version of the model.",
fileOptions: [
"hf:TheBloke/CodeLlama-13B-GGUF:Q5_K_M",
"hf:TheBloke/CodeLlama-13B-GGUF:Q4_K_M"
]
}, {
name: "Code Llama 34B",
abilities: ["chat", "complete", "infill"],
description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" +
"This is the 34 billion parameters version of the model.\n" +
"You need a GPU with handful of VRAM to use this version.",
fileOptions: [
"hf:TheBloke/CodeLlama-34B-GGUF:Q5_K_M",
"hf:TheBloke/CodeLlama-34B-GGUF:Q4_K_M"
]
}, {
name: "CodeGemma 2B",
abilities: ["code", "complete", "infill"],
description: "CodeGemma models were created by Google and are optimized for code completion, code generation, " +
"natual language understanding, mathematical reasoning, and instruction following.\n" +
"This model is not suited for chat.\n" +
"This is the 2 billion parameters version of the model.\n",
fileOptions: [
"hf:bartowski/codegemma-2b-GGUF:Q8_0",
"hf:bartowski/codegemma-2b-GGUF:Q6_K",
"hf:bartowski/codegemma-2b-GGUF:Q5_K_M",
"hf:bartowski/codegemma-2b-GGUF:Q5_K_S",
"hf:bartowski/codegemma-2b-GGUF:Q4_K_M"
]
}, {
name: "CodeGemma 7B",
abilities: ["code", "complete", "infill"],
description: "CodeGemma models were created by Google and are optimized for code completion, code generation, " +
"natual language understanding, mathematical reasoning, and instruction following.\n" +
"This model is not suited for chat.\n" +
"This is the 7 billion parameters version of the model.\n",
fileOptions: [
"hf:bartowski/codegemma-1.1-7b-it-GGUF:Q6_K",
"hf:bartowski/codegemma-1.1-7b-it-GGUF:Q5_K_M",
"hf:bartowski/codegemma-1.1-7b-it-GGUF:Q5_K_S",
"hf:bartowski/codegemma-1.1-7b-it-GGUF:Q4_K_M"
]
}, {
name: "Stable Code Instruct 3B",
abilities: ["chat", "complete", "infill"],
description: "Stable Code models were created by Stability AI and are optimized for code completion.",
fileOptions: [
"hf:stabilityai/stable-code-instruct-3b/stable-code-3b-q5_k_m.gguf",
"hf:stabilityai/stable-code-instruct-3b/stable-code-3b-q4_k_m.gguf"
]
}];
//# sourceMappingURL=recommendedModels.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,2 @@
#!/usr/bin/env node
export {};

26
node_modules/node-llama-cpp/dist/cli/startCreateCli.js generated vendored Normal file
View File

@@ -0,0 +1,26 @@
#!/usr/bin/env node
import yargs from "yargs";
import { hideBin } from "yargs/helpers";
import { setIsRunningFromCLI } from "../state.js";
import { CreateCliCommand } from "./commands/InitCommand.js";
/** @internal */
export function _startCreateCli({ cliBinName, packageVersion, _enable }) {
if (_enable !== Symbol.for("internal"))
return;
setIsRunningFromCLI(true);
const yarg = yargs(hideBin(process.argv));
yarg
.scriptName(cliBinName)
.usage("Usage: $0 [options]")
.command(CreateCliCommand)
.demandCommand(1)
.strict()
.strictCommands()
.alias("v", "version")
.help("h")
.alias("h", "help")
.version(packageVersion)
.wrap(Math.min(100, yarg.terminalWidth()))
.parse();
}
//# sourceMappingURL=startCreateCli.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"startCreateCli.js","sourceRoot":"","sources":["../../src/cli/startCreateCli.ts"],"names":[],"mappings":";AAEA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,OAAO,EAAC,MAAM,eAAe,CAAC;AACtC,OAAO,EAAC,mBAAmB,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,EAAC,gBAAgB,EAAC,MAAM,2BAA2B,CAAC;AAE3D,gBAAgB;AAChB,MAAM,UAAU,eAAe,CAAC,EAC5B,UAAU,EACV,cAAc,EACd,OAAO,EAKV;IACG,IAAI,OAAO,KAAK,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC;QAClC,OAAO;IAEX,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAE1B,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;IAE1C,IAAI;SACC,UAAU,CAAC,UAAU,CAAC;SACtB,KAAK,CAAC,qBAAqB,CAAC;SAC5B,OAAO,CAAC,gBAAgB,CAAC;SACzB,aAAa,CAAC,CAAC,CAAC;SAChB,MAAM,EAAE;SACR,cAAc,EAAE;SAChB,KAAK,CAAC,GAAG,EAAE,SAAS,CAAC;SACrB,IAAI,CAAC,GAAG,CAAC;SACT,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC;SAClB,OAAO,CAAC,cAAc,CAAC;SACvB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;SACzC,KAAK,EAAE,CAAC;AACjB,CAAC"}

View File

@@ -0,0 +1,22 @@
export declare const enum ConsoleInteractionKey {
ctrlC = "\u0003",
upArrow = "\u001B[A",
downArrow = "\u001B[B",
enter = "\r"
}
export declare class ConsoleInteraction {
constructor({ stdin }?: {
stdin?: NodeJS.ReadStream;
});
get isActive(): boolean;
start(): void;
stop(): void;
onKey(key: string | ConsoleInteractionKey | (string | ConsoleInteractionKey)[], callback: () => void): ConsoleInteractionOnKeyHandle;
static yesNoQuestion(question: string): Promise<boolean>;
}
export declare class ConsoleInteractionOnKeyHandle {
private constructor();
dispose(): void;
[Symbol.dispose](): void;
get disposed(): boolean;
}

View File

@@ -0,0 +1,122 @@
import process from "process";
import chalk from "chalk";
export var ConsoleInteractionKey;
(function (ConsoleInteractionKey) {
ConsoleInteractionKey["ctrlC"] = "\u0003";
ConsoleInteractionKey["upArrow"] = "\u001B[A";
ConsoleInteractionKey["downArrow"] = "\u001B[B";
ConsoleInteractionKey["enter"] = "\r";
})(ConsoleInteractionKey || (ConsoleInteractionKey = {}));
export class ConsoleInteraction {
/** @internal */ _keyCallbacks = new Map();
/** @internal */ _stdin;
/** @internal */ _isActive = false;
constructor({ stdin = process.stdin } = {}) {
this._stdin = stdin;
this._onData = this._onData.bind(this);
}
get isActive() {
return this._isActive;
}
start() {
if (this._isActive)
return;
this._isActive = true;
if (this._stdin.isTTY)
this._stdin.setRawMode(true);
this._stdin.on("data", this._onData);
this._stdin.resume();
}
stop() {
if (!this._isActive)
return;
this._isActive = false;
if (this._stdin.isTTY)
this._stdin.setRawMode(false);
this._stdin.off("data", this._onData);
this._stdin.pause();
}
onKey(key, callback) {
if (typeof key === "string")
key = [key];
for (const k of key) {
if (!this._keyCallbacks.has(k))
this._keyCallbacks.set(k, []);
this._keyCallbacks.get(k).push(callback);
}
return ConsoleInteractionOnKeyHandle._create(() => {
for (const k of key) {
const callbacks = this._keyCallbacks.get(k);
if (callbacks == null)
continue;
const index = callbacks.indexOf(callback);
if (index >= 0)
callbacks.splice(index, 1);
}
});
}
/** @internal */
_onData(data) {
if (!this._isActive)
return;
const key = data.toString();
const callbacks = this._keyCallbacks.get(key) ?? [];
if (callbacks.length === 0 && key === ConsoleInteractionKey.ctrlC) {
process.stdout.write("\n");
this.stop();
process.exit(0);
}
for (const callback of callbacks) {
try {
callback();
}
catch (err) {
console.error(err);
}
}
}
static yesNoQuestion(question) {
return new Promise((resolve) => {
const interaction = new ConsoleInteraction();
interaction.onKey(["Y", "y"], () => {
resolve(true);
interaction.stop();
process.stdout.write("\n");
});
interaction.onKey(["N", "n"], () => {
resolve(false);
interaction.stop();
process.stdout.write("\n");
});
console.log();
process.stdout.write(question + " " + chalk.gray("(Y/n) "));
interaction.start();
});
}
}
export class ConsoleInteractionOnKeyHandle {
/** @internal */
_dispose;
constructor(dispose) {
this._dispose = dispose;
this.dispose = this.dispose.bind(this);
this[Symbol.dispose] = this[Symbol.dispose].bind(this);
}
dispose() {
if (this._dispose != null) {
this._dispose();
this._dispose = null;
}
}
[Symbol.dispose]() {
this.dispose();
}
get disposed() {
return this._dispose == null;
}
/** @internal */
static _create(dispose) {
return new ConsoleInteractionOnKeyHandle(dispose);
}
}
//# sourceMappingURL=ConsoleInteraction.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"ConsoleInteraction.js","sourceRoot":"","sources":["../../../src/cli/utils/ConsoleInteraction.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,MAAM,CAAN,IAAkB,qBAKjB;AALD,WAAkB,qBAAqB;IACnC,yCAAgB,CAAA;IAChB,6CAAoB,CAAA;IACpB,+CAAsB,CAAA;IACtB,qCAAY,CAAA;AAChB,CAAC,EALiB,qBAAqB,KAArB,qBAAqB,QAKtC;AAED,MAAM,OAAO,kBAAkB;IAC3B,gBAAgB,CAAkB,aAAa,GAAwD,IAAI,GAAG,EAAE,CAAC;IACjH,gBAAgB,CAAkB,MAAM,CAAoB;IAC5D,gBAAgB,CAAS,SAAS,GAAY,KAAK,CAAC;IAEpD,YAAmB,EAAC,KAAK,GAAG,OAAO,CAAC,KAAK,KAAiC,EAAE;QACxE,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC;IAED,IAAW,QAAQ;QACf,OAAO,IAAI,CAAC,SAAS,CAAC;IAC1B,CAAC;IAEM,KAAK;QACR,IAAI,IAAI,CAAC,SAAS;YACd,OAAO;QAEX,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAEtB,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK;YACjB,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAEjC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;IACzB,CAAC;IAEM,IAAI;QACP,IAAI,CAAC,IAAI,CAAC,SAAS;YACf,OAAO;QAEX,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QAEvB,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK;YACjB,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QAElC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;IACxB,CAAC;IAEM,KAAK,CAAC,GAAwE,EAAE,QAAoB;QACvG,IAAI,OAAO,GAAG,KAAK,QAAQ;YACvB,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC;QAEhB,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;YAClB,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC1B,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAElC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC9C,CAAC;QAED,OAAO,6BAA6B,CAAC,OAAO,CAAC,GAAG,EAAE;YAC9C,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;gBAClB,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBAE5C,IAAI,SAAS,IAAI,IAAI;oBACjB,SAAS;gBAEb,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;gBAE1C,IAAI,KAAK,IAAI,CAAC;oBACV,SAAS,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YACnC,CAAC;QACL,CAAC,CAAC,CAAC;IACP,CAAC;IAED,gBAAgB;IACR,OAAO,CAAC,IAAY;QACxB,IAAI,CAAC,IAAI,CAAC,SAAS;YACf,OAAO;QAEX,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAEpD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,GAAG,KAAK,qBAAqB,CAAC,KAAK,EAAE,CAAC;YAChE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC3B,IAAI,CAAC,IAAI,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;QAED,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YAC/B,IAAI,CAAC;gBACD,QAAQ,EAAE,CAAC;YACf,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACX,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACvB,CAAC;QACL,CAAC;IACL,CAAC;IAEM,MAAM,CAAC,aAAa,CAAC,QAAgB;QACxC,OAAO,IAAI,OAAO,CAAU,CAAC,OAAO,EAAE,EAAE;YACpC,MAAM,WAAW,GAAG,IAAI,kBAAkB,EAAE,CAAC;YAE7C,WAAW,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC;gBACd,WAAW,CAAC,IAAI,EAAE,CAAC;gBACnB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC/B,CAAC,CAAC,CAAC;YACH,WAAW,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE;gBAC/B,OAAO,CAAC,KAAK,CAAC,CAAC;gBACf,WAAW,CAAC,IAAI,EAAE,CAAC;gBACnB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC/B,CAAC,CAAC,CAAC;YAEH,OAAO,CAAC,GAAG,EAAE,CAAC;YACd,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC5D,WAAW,CAAC,KAAK,EAAE,CAAC;QACxB,CAAC,CAAC,CAAC;IACP,CAAC;CACJ;AAED,MAAM,OAAO,6BAA6B;IACtC,gBAAgB;IACR,QAAQ,CAAsB;IAEtC,YAAoB,OAAmB;QACnC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;QAExB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3D,CAAC;IAEM,OAAO;QACV,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE,CAAC;YACxB,IAAI,CAAC,QAAQ,EAAE,CAAC;YAChB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACzB,CAAC;IACL,CAAC;IAEM,CAAC,MAAM,CAAC,OAAO,CAAC;QACnB,IAAI,CAAC,OAAO,EAAE,CAAC;IACnB,CAAC;IAED,IAAW,QAAQ;QACf,OAAO,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC;IACjC,CAAC;IAED,gBAAgB;IACT,MAAM,CAAC,OAAO,CAAC,OAAmB;QACrC,OAAO,IAAI,6BAA6B,CAAC,OAAO,CAAC,CAAC;IACtD,CAAC;CACJ"}

View File

@@ -0,0 +1,24 @@
export declare class ConsoleTable<const T extends readonly ConsoleTableColumn[]> {
private readonly _columns;
private readonly _columnSeparator;
private readonly _drawHeaderRowSeparator;
constructor(columns: T, { columnSeparator, drawHeaderRowSeparator }?: {
columnSeparator?: string;
drawHeaderRowSeparator?: boolean;
});
logHeader({ drawRowSeparator }?: {
drawRowSeparator?: boolean;
}): void;
logLine(data: {
[key in T[number]["key"]]?: string;
}): void;
}
export type ConsoleTableColumn<K extends string = string> = {
readonly key: K;
readonly title?: string;
readonly titleFormatter?: (value: string) => string;
readonly width?: number;
readonly valueFormatter?: (value: string) => string;
readonly canSpanOverEmptyColumns?: boolean;
readonly visible?: boolean;
};

View File

@@ -0,0 +1,90 @@
import chalk from "chalk";
import sliceAnsi from "slice-ansi";
import stripAnsi from "strip-ansi";
export class ConsoleTable {
_columns;
_columnSeparator;
_drawHeaderRowSeparator;
constructor(columns, { columnSeparator = chalk.gray(" | "), drawHeaderRowSeparator = true } = {}) {
this._columns = filterHiddenColumns(columns);
this._columnSeparator = columnSeparator;
this._drawHeaderRowSeparator = drawHeaderRowSeparator;
}
logHeader({ drawRowSeparator = this._drawHeaderRowSeparator } = {}) {
let logLine = "";
for (let i = 0; i < this._columns.length; i++) {
const column = this._columns[i];
const canSpanOverEmptyColumns = column.canSpanOverEmptyColumns ?? false;
let title = column.title ?? " ";
let columnSize = getColumnWidth(column);
title = toOneLine(title);
title = (column.titleFormatter ?? defaultTitleFormatter)(title);
while (title.length > columnSize && canSpanOverEmptyColumns && i < this._columns.length - 1) {
i++;
const nextColumn = this._columns[i];
if (nextColumn.title != null) {
i--;
break;
}
columnSize += stripAnsi(this._columnSeparator).length + getColumnWidth(nextColumn);
}
const moreText = "...";
if (stripAnsi(title).length > columnSize)
title = sliceAnsi(title, 0, columnSize - moreText.length) + chalk.gray(moreText);
title = title + " ".repeat(Math.max(0, columnSize - stripAnsi(title).length));
title = sliceAnsi(title, 0, columnSize);
if (i < this._columns.length - 1)
title += this._columnSeparator;
logLine += title;
}
console.info(logLine);
if (drawRowSeparator)
console.info(chalk.gray("-".repeat(stripAnsi(logLine).length)));
}
logLine(data) {
let logLine = "";
for (let i = 0; i < this._columns.length; i++) {
const column = this._columns[i];
let value = data[column.key];
const canSpanOverEmptyColumns = column.canSpanOverEmptyColumns ?? false;
if (value != null && column.valueFormatter != null)
value = column.valueFormatter(value);
if (value == null)
value = "";
value = toOneLine(value);
const valueWithoutAnsi = stripAnsi(value);
let columnSize = getColumnWidth(column);
while (valueWithoutAnsi.length > columnSize && canSpanOverEmptyColumns && i < this._columns.length - 1) {
i++;
const nextColumn = this._columns[i];
const nextValue = data[nextColumn.key];
if (nextValue != null) {
i--;
break;
}
columnSize += stripAnsi(this._columnSeparator).length + getColumnWidth(nextColumn);
}
const moreText = "...";
if (valueWithoutAnsi.length > columnSize)
value = sliceAnsi(value, 0, columnSize - moreText.length) + chalk.gray(moreText);
value = value + " ".repeat(Math.max(0, columnSize - valueWithoutAnsi.length));
value = sliceAnsi(value, 0, columnSize);
if (i < this._columns.length - 1)
value += this._columnSeparator;
logLine += value;
}
console.info(logLine);
}
}
const defaultTitleFormatter = (value) => chalk.bold(value);
function getColumnWidth(column) {
return column.width ?? stripAnsi(toOneLine(column.title ?? " ")).length;
}
function toOneLine(text) {
return text.replaceAll("\n", chalk.gray("\\n"));
}
function filterHiddenColumns(columns) {
return columns
.filter((column) => column.visible !== false);
}
//# sourceMappingURL=ConsoleTable.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"ConsoleTable.js","sourceRoot":"","sources":["../../../src/cli/utils/ConsoleTable.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,SAAS,MAAM,YAAY,CAAC;AACnC,OAAO,SAAS,MAAM,YAAY,CAAC;AAEnC,MAAM,OAAO,YAAY;IACJ,QAAQ,CAAI;IACZ,gBAAgB,CAAS;IACzB,uBAAuB,CAAU;IAElD,YAAmB,OAAU,EAAE,EAC3B,eAAe,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EACnC,sBAAsB,GAAG,IAAI,KAI7B,EAAE;QACF,IAAI,CAAC,QAAQ,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;QAC7C,IAAI,CAAC,gBAAgB,GAAG,eAAe,CAAC;QACxC,IAAI,CAAC,uBAAuB,GAAG,sBAAsB,CAAC;IAC1D,CAAC;IAEM,SAAS,CAAC,EAAC,gBAAgB,GAAG,IAAI,CAAC,uBAAuB,KAAkC,EAAE;QACjG,IAAI,OAAO,GAAG,EAAE,CAAC;QAEjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC;YACjC,MAAM,uBAAuB,GAAG,MAAM,CAAC,uBAAuB,IAAI,KAAK,CAAC;YACxE,IAAI,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,GAAG,CAAC;YAChC,IAAI,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;YAExC,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;YAEzB,KAAK,GAAG,CAAC,MAAM,CAAC,cAAc,IAAI,qBAAqB,CAAC,CAAC,KAAK,CAAC,CAAC;YAEhE,OAAO,KAAK,CAAC,MAAM,GAAG,UAAU,IAAI,uBAAuB,IAAI,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1F,CAAC,EAAE,CAAC;gBACJ,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC;gBAErC,IAAI,UAAU,CAAC,KAAK,IAAI,IAAI,EAAE,CAAC;oBAC3B,CAAC,EAAE,CAAC;oBACJ,MAAM;gBACV,CAAC;gBAED,UAAU,IAAI,SAAS,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,MAAM,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;YACvF,CAAC;YAED,MAAM,QAAQ,GAAG,KAAK,CAAC;YACvB,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,UAAU;gBACpC,KAAK,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,EAAE,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAErF,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;YAC9E,KAAK,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;YAExC,IAAI,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAC5B,KAAK,IAAI,IAAI,CAAC,gBAAgB,CAAC;YAEnC,OAAO,IAAI,KAAK,CAAC;QACrB,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEtB,IAAI,gBAAgB;YAChB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IACxE,CAAC;IAEM,OAAO,CAAC,IAA0C;QACrD,IAAI,OAAO,GAAG,EAAE,CAAC;QAEjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC;YACjC,IAAI,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,GAAwB,CAAC,CAAC;YAClD,MAAM,uBAAuB,GAAG,MAAM,CAAC,uBAAuB,IAAI,KAAK,CAAC;YAExE,IAAI,KAAK,IAAI,IAAI,IAAI,MAAM,CAAC,cAAc,IAAI,IAAI;gBAC9C,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;YAEzC,IAAI,KAAK,IAAI,IAAI;gBACb,KAAK,GAAG,EAAE,CAAC;YAEf,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;YAEzB,MAAM,gBAAgB,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;YAC1C,IAAI,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;YAExC,OAAO,gBAAgB,CAAC,MAAM,GAAG,UAAU,IAAI,uBAAuB,IAAI,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrG,CAAC,EAAE,CAAC;gBACJ,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC;gBACrC,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,GAAwB,CAAC,CAAC;gBAE5D,IAAI,SAAS,IAAI,IAAI,EAAE,CAAC;oBACpB,CAAC,EAAE,CAAC;oBACJ,MAAM;gBACV,CAAC;gBAED,UAAU,IAAI,SAAS,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,MAAM,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;YACvF,CAAC;YAED,MAAM,QAAQ,GAAG,KAAK,CAAC;YACvB,IAAI,gBAAgB,CAAC,MAAM,GAAG,UAAU;gBACpC,KAAK,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,EAAE,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAErF,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC;YAC9E,KAAK,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;YAExC,IAAI,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAC5B,KAAK,IAAI,IAAI,CAAC,gBAAgB,CAAC;YAEnC,OAAO,IAAI,KAAK,CAAC;QACrB,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1B,CAAC;CACJ;AAED,MAAM,qBAAqB,GAAG,CAAC,KAAa,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAYnE,SAAS,cAAc,CAAC,MAA0B;IAC9C,OAAO,MAAM,CAAC,KAAK,IAAI,SAAS,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;AAC5E,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC3B,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AACpD,CAAC;AAED,SAAS,mBAAmB,CAAgD,OAAU;IAClF,OAAO,OAAO;SACT,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,KAAK,KAAK,CAAuC,CAAC;AAC5F,CAAC"}

View File

@@ -0,0 +1,13 @@
export declare function basicChooseFromListConsoleInteraction<T>({ title, footer, items, renderItem, canFocusItem, canSelectItem, initialFocusIndex, aboveItemsPadding, belowItemsPadding, renderSummaryOnExit, exitOnCtrlC }: {
title: string | ((focusedItem: T, rerender: () => void) => string);
footer?: string | ((focusedItem: T, rerender: () => void) => string | undefined);
items: T[];
renderItem(item: T, focused: boolean, rerender: () => void): string;
canFocusItem?(item: T): boolean;
canSelectItem?(item: T): boolean;
initialFocusIndex?: number;
aboveItemsPadding?: number;
belowItemsPadding?: number;
renderSummaryOnExit?(item: T | null): string;
exitOnCtrlC?: boolean;
}): Promise<T | null>;

View File

@@ -0,0 +1,111 @@
import process from "process";
import UpdateManager from "stdout-update";
import stripAnsi from "strip-ansi";
import sliceAnsi from "slice-ansi";
import chalk from "chalk";
import { ConsoleInteraction, ConsoleInteractionKey } from "./ConsoleInteraction.js";
import { splitAnsiToLines } from "./splitAnsiToLines.js";
export async function basicChooseFromListConsoleInteraction({ title, footer, items, renderItem, canFocusItem, canSelectItem, initialFocusIndex = 0, aboveItemsPadding = 1, belowItemsPadding = 1, renderSummaryOnExit = (item) => (item == null ? "" : renderItem(item, false, () => void 0)), exitOnCtrlC = true }) {
const updateManager = UpdateManager.getInstance();
let focusIndex = initialFocusIndex;
let scrollOffset = 0;
let rerenderTimeout = undefined;
let isDone = false;
function adjustScrollOffset(screenLines) {
if (focusIndex < scrollOffset + aboveItemsPadding)
scrollOffset = Math.max(0, focusIndex - aboveItemsPadding);
else if (focusIndex > scrollOffset + screenLines - belowItemsPadding)
scrollOffset = Math.min(Math.max(0, focusIndex - screenLines + belowItemsPadding), items.length - 1 - screenLines);
}
function scheduleRerender() {
if (isDone)
return;
if (rerenderTimeout == null)
rerenderTimeout = setTimeout(renderScreen, 0);
}
function renderScreen() {
clearTimeout(rerenderTimeout);
rerenderTimeout = undefined;
if (isDone)
return;
while (canFocusItem != null && focusIndex > 0 && !canFocusItem(items[focusIndex]))
focusIndex--;
while (canFocusItem != null && focusIndex < items.length - 1 && !canFocusItem(items[focusIndex]))
focusIndex++;
const maxWidth = (process.stdout.columns ?? 80) - 2;
const maxHeight = (process.stdout.rows ?? 24) - 2;
const focusedItem = items[focusIndex];
const titleLines = splitAnsiToLines(title instanceof Function ? title(focusedItem, scheduleRerender) : title, maxWidth);
const footerLines = splitAnsiToLines(footer instanceof Function ? footer(focusedItem, scheduleRerender) : footer, maxWidth);
const reservedLinesCount = titleLines.length + footerLines.length;
const maxItemLinesCount = Math.max(1, maxHeight - reservedLinesCount);
adjustScrollOffset(maxItemLinesCount);
updateManager.update([
...titleLines,
...items
.slice(scrollOffset, scrollOffset + maxItemLinesCount + 1)
.map((item, index) => (renderSingleLine(renderItem(item, scrollOffset + index === focusIndex, scheduleRerender), maxWidth))),
...footerLines
]);
}
updateManager.hook();
const consoleInteraction = new ConsoleInteraction();
try {
consoleInteraction.onKey(ConsoleInteractionKey.upArrow, () => {
let newFocusIndex = Math.max(0, focusIndex - 1);
while (newFocusIndex > 0 && canFocusItem != null && !canFocusItem(items[newFocusIndex]))
newFocusIndex--;
if (canFocusItem == null || canFocusItem(items[newFocusIndex])) {
focusIndex = newFocusIndex;
renderScreen();
}
});
consoleInteraction.onKey(ConsoleInteractionKey.downArrow, () => {
let newFocusIndex = Math.min(items.length - 1, focusIndex + 1);
while (newFocusIndex < items.length - 1 && canFocusItem != null && !canFocusItem(items[newFocusIndex]))
newFocusIndex++;
if (canFocusItem == null || canFocusItem(items[newFocusIndex])) {
focusIndex = newFocusIndex;
renderScreen();
}
});
process.on("SIGWINCH", renderScreen);
renderScreen();
const res = await new Promise((resolve) => {
consoleInteraction.onKey(ConsoleInteractionKey.enter, () => {
if (canSelectItem == null || canSelectItem(items[focusIndex]))
resolve(items[focusIndex]);
});
consoleInteraction.onKey(ConsoleInteractionKey.ctrlC, () => {
if (exitOnCtrlC) {
updateManager.update([""]);
consoleInteraction.stop();
updateManager.unhook(true);
process.exit(0);
}
resolve(null);
});
consoleInteraction.start();
});
isDone = true;
clearTimeout(rerenderTimeout);
rerenderTimeout = undefined;
process.off("SIGWINCH", renderScreen);
updateManager.update([
renderSummaryOnExit(res)
]);
return res;
}
finally {
consoleInteraction.stop();
updateManager.unhook(true);
}
}
function renderSingleLine(text, maxWidth) {
const textWithoutAnsi = stripAnsi(text);
const moreText = "...";
if (textWithoutAnsi.length > maxWidth)
return sliceAnsi(text, 0, maxWidth - moreText.length) + chalk.gray(moreText);
return text;
}
//# sourceMappingURL=basicChooseFromListConsoleInteraction.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,6 @@
export declare function consolePromptQuestion(question: string, { validate, renderSummaryOnExit, exitOnCtrlC, defaultValue }?: {
validate?: (input: string) => string | null | Promise<string | null>;
renderSummaryOnExit?: (item: string | null) => string;
exitOnCtrlC?: boolean;
defaultValue?: string;
}): Promise<string | null>;

View File

@@ -0,0 +1,81 @@
import readline from "readline";
import process from "process";
import chalk from "chalk";
import { splitAnsiToLines } from "./splitAnsiToLines.js";
export async function consolePromptQuestion(question, { validate, renderSummaryOnExit, exitOnCtrlC = true, defaultValue } = {}) {
let lastErrorText = "";
let lastResponse = "";
process.stdout.moveCursor(0, -1);
while (true) {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
let res = await new Promise((accept) => {
const initialCursorPosition = rl.getCursorPos();
function onSigInt() {
rl.off("SIGINT", onSigInt);
rl.close();
const linesUsed = splitAnsiToLines(lastErrorText, process.stdout.columns).length +
rl.getCursorPos().rows - initialCursorPosition.rows + 1;
clearLastLines(linesUsed);
if (exitOnCtrlC) {
rl.close();
process.exit(0);
}
else
accept(null);
}
rl.on("SIGINT", onSigInt);
rl.question(question, (res) => {
rl.off("SIGINT", onSigInt);
rl.close();
accept(res);
});
rl.write(lastResponse);
});
const linesUsed = splitAnsiToLines(lastErrorText + question + res, process.stdout.columns).length + (res != null ? 1 : 0);
if (res == null) {
clearLastLines(linesUsed);
if (renderSummaryOnExit != null) {
const summary = renderSummaryOnExit(null);
if (summary !== "")
process.stdout.write(summary + "\n");
}
return null;
}
if (res === "" && defaultValue != null)
res = defaultValue;
lastResponse = res;
const validationError = await validate?.(res);
if (validationError != null) {
clearLastLines(linesUsed);
lastErrorText = chalk.red(validationError) + "\n";
process.stdout.write(lastErrorText);
continue;
}
else if (renderSummaryOnExit != null) {
clearLastLines(linesUsed);
const summary = renderSummaryOnExit(res);
if (summary !== "")
process.stdout.write(summary + "\n");
}
else if (lastErrorText !== "") {
clearLastLines(linesUsed);
process.stdout.write(question + res + "\n");
}
return res;
}
}
function clearLastLines(linesCount) {
if (linesCount === 0)
return;
process.stdout.write("\n");
for (let i = 0; i < linesCount; i++) {
process.stdout.moveCursor(0, -1);
process.stdout.clearLine(0);
}
process.stdout.write("\n");
process.stdout.moveCursor(0, -1);
}
//# sourceMappingURL=consolePromptQuestion.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"consolePromptQuestion.js","sourceRoot":"","sources":["../../../src/cli/utils/consolePromptQuestion.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,UAAU,CAAC;AAChC,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,gBAAgB,EAAC,MAAM,uBAAuB,CAAC;AAGvD,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,QAAgB,EAAE,EAC1D,QAAQ,EACR,mBAAmB,EACnB,WAAW,GAAG,IAAI,EAClB,YAAY,KAMZ,EAAE;IACF,IAAI,aAAa,GAAG,EAAE,CAAC;IACvB,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAEjC,OAAO,IAAI,EAAE,CAAC;QACV,MAAM,EAAE,GAAG,QAAQ,CAAC,eAAe,CAAC;YAChC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACzB,CAAC,CAAC;QAEH,IAAI,GAAG,GAAG,MAAM,IAAI,OAAO,CAAgB,CAAC,MAAM,EAAE,EAAE;YAClD,MAAM,qBAAqB,GAAG,EAAE,CAAC,YAAY,EAAE,CAAC;YAChD,SAAS,QAAQ;gBACb,EAAE,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;gBAC3B,EAAE,CAAC,KAAK,EAAE,CAAC;gBAEX,MAAM,SAAS,GAAG,gBAAgB,CAAC,aAAa,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM;oBAC5E,EAAE,CAAC,YAAY,EAAE,CAAC,IAAI,GAAG,qBAAqB,CAAC,IAAI,GAAG,CAAC,CAAC;gBAC5D,cAAc,CAAC,SAAS,CAAC,CAAC;gBAE1B,IAAI,WAAW,EAAE,CAAC;oBACd,EAAE,CAAC,KAAK,EAAE,CAAC;oBACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBACpB,CAAC;;oBACG,MAAM,CAAC,IAAI,CAAC,CAAC;YACrB,CAAC;YAED,EAAE,CAAC,EAAE,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAE1B,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,GAAG,EAAE,EAAE;gBAC1B,EAAE,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;gBAC3B,EAAE,CAAC,KAAK,EAAE,CAAC;gBAEX,MAAM,CAAC,GAAG,CAAC,CAAC;YAChB,CAAC,CAAC,CAAC;YACH,EAAE,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QAC3B,CAAC,CAAC,CAAC;QAEH,MAAM,SAAS,GAAG,gBAAgB,CAAC,aAAa,GAAG,QAAQ,GAAG,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAE1H,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;YACd,cAAc,CAAC,SAAS,CAAC,CAAC;YAE1B,IAAI,mBAAmB,IAAI,IAAI,EAAE,CAAC;gBAC9B,MAAM,OAAO,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;gBAE1C,IAAI,OAAO,KAAK,EAAE;oBACd,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;YAC7C,CAAC;YAED,OAAO,IAAI,CAAC;QAChB,CAAC;QAED,IAAI,GAAG,KAAK,EAAE,IAAI,YAAY,IAAI,IAAI;YAClC,GAAG,GAAG,YAAY,CAAC;QAEvB,YAAY,GAAG,GAAG,CAAC;QAEnB,MAAM,eAAe,GAAG,MAAM,QAAQ,EAAE,CAAC,GAAG,CAAC,CAAC;QAE9C,IAAI,eAAe,IAAI,IAAI,EAAE,CAAC;YAC1B,cAAc,CAAC,SAAS,CAAC,CAAC;YAC1B,aAAa,GAAG,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC,GAAG,IAAI,CAAC;YAClD,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;YAEpC,SAAS;QACb,CAAC;aAAM,IAAI,mBAAmB,IAAI,IAAI,EAAE,CAAC;YACrC,cAAc,CAAC,SAAS,CAAC,CAAC;YAE1B,MAAM,OAAO,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;YAEzC,IAAI,OAAO,KAAK,EAAE;gBACd,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;QAC7C,CAAC;aAAM,IAAI,aAAa,KAAK,EAAE,EAAE,CAAC;YAC9B,cAAc,CAAC,SAAS,CAAC,CAAC;YAC1B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,GAAG,IAAI,CAAC,CAAC;QAChD,CAAC;QAED,OAAO,GAAG,CAAC;IACf,CAAC;AACL,CAAC;AAED,SAAS,cAAc,CAAC,UAAkB;IACtC,IAAI,UAAU,KAAK,CAAC;QAChB,OAAO;IAEX,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACjC,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3B,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AACrC,CAAC"}

View File

@@ -0,0 +1 @@
export declare function getReadablePath(fsPath: string): string;

View File

@@ -0,0 +1,14 @@
import os from "os";
import path from "path";
export function getReadablePath(fsPath) {
const resolvedPath = path.resolve(process.cwd(), fsPath);
if (process.platform === "win32" || process.platform === "cygwin")
return resolvedPath;
let homedir = os.homedir();
if (!homedir.endsWith("/"))
homedir += "/";
if (resolvedPath.startsWith(homedir))
return "~" + resolvedPath.slice(homedir.length - "/".length);
return resolvedPath;
}
//# sourceMappingURL=getReadablePath.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"getReadablePath.js","sourceRoot":"","sources":["../../../src/cli/utils/getReadablePath.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,UAAU,eAAe,CAAC,MAAc;IAC1C,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC;IAEzD,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ;QAC7D,OAAO,YAAY,CAAC;IAExB,IAAI,OAAO,GAAG,EAAE,CAAC,OAAO,EAAE,CAAC;IAC3B,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC;QACtB,OAAO,IAAI,GAAG,CAAC;IAEnB,IAAI,YAAY,CAAC,UAAU,CAAC,OAAO,CAAC;QAChC,OAAO,GAAG,GAAG,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;IAEjE,OAAO,YAAY,CAAC;AACxB,CAAC"}

View File

@@ -0,0 +1,10 @@
import { Llama } from "../../bindings/Llama.js";
export declare function interactivelyAskForModel({ llama, modelsDirectory, allowLocalModels, downloadIntent, flashAttention, swaFullCache, useMmap }: {
llama: Llama;
modelsDirectory?: string;
allowLocalModels?: boolean;
downloadIntent?: boolean;
flashAttention?: boolean;
swaFullCache?: boolean;
useMmap?: boolean;
}): Promise<string>;

View File

@@ -0,0 +1,471 @@
import path from "path";
import process from "process";
import chalk from "chalk";
import fs from "fs-extra";
import stripAnsi from "strip-ansi";
import logSymbols from "log-symbols";
import { getReadableContextSize } from "../../utils/getReadableContextSize.js";
import { arrowChar } from "../../consts.js";
import { getGgufSplitPartsInfo } from "../../gguf/utils/resolveSplitGgufParts.js";
import { withProgressLog } from "../../utils/withProgressLog.js";
import { GgufInsights } from "../../gguf/insights/GgufInsights.js";
import { readGgufFileInfo } from "../../gguf/readGgufFileInfo.js";
import { getPrettyBuildGpuName } from "../../bindings/consts.js";
import { isUrl } from "../../utils/isUrl.js";
import { isModelUri, parseModelUri } from "../../utils/parseModelUri.js";
import { resolveModelRecommendationFileOptions } from "./resolveModelRecommendationFileOptions.js";
import { getReadablePath } from "./getReadablePath.js";
import { basicChooseFromListConsoleInteraction } from "./basicChooseFromListConsoleInteraction.js";
import { splitAnsiToLines } from "./splitAnsiToLines.js";
import { consolePromptQuestion } from "./consolePromptQuestion.js";
import { renderInfoLine } from "./printInfoLine.js";
import { renderModelCompatibilityPercentageWithColors } from "./renderModelCompatibilityPercentageWithColors.js";
import { toBytes } from "./toBytes.js";
const vramStateUpdateInterval = 1000;
export async function interactivelyAskForModel({ llama, modelsDirectory, allowLocalModels = true, downloadIntent = true, flashAttention = false, swaFullCache = false, useMmap }) {
let localModelFileOptions = [];
const recommendedModelOptions = [];
const activeInteractionController = new AbortController();
let scheduledTitleRerenderTimeout = undefined;
let vramState = await llama.getVramState();
const canUseGpu = vramState.total > 0;
if (allowLocalModels && modelsDirectory != null && await fs.existsSync(modelsDirectory)) {
const ggufFileNames = (await fs.readdir(modelsDirectory))
.filter((fileName) => {
if (!fileName.endsWith(".gguf"))
return false;
const partsInfo = getGgufSplitPartsInfo(fileName);
return partsInfo == null || partsInfo.part === 1;
});
let readItems = 0;
const renderProgress = () => ("(" + String(readItems)
.padStart(String(ggufFileNames.length).length, "0") + "/" + ggufFileNames.length + ")");
if (ggufFileNames.length > 0)
await withProgressLog({
loadingText: "Reading local models directory",
failText: "Failed to read local models directory",
successText: "Read local models directory",
noSuccessLiveStatus: true,
initialProgressBarText: renderProgress()
}, async (progressUpdater) => {
localModelFileOptions = await Promise.all(ggufFileNames.map(async (fileName) => {
const filePath = path.join(modelsDirectory, fileName);
let ggufInsights = undefined;
try {
const ggufFileInfo = await readGgufFileInfo(filePath, {
sourceType: "filesystem",
signal: activeInteractionController.signal
});
ggufInsights = await GgufInsights.from(ggufFileInfo, llama);
}
catch (err) {
// do nothing
}
readItems++;
progressUpdater.setProgress(readItems / ggufFileNames.length, renderProgress());
const compatibilityScore = await ggufInsights?.configurationResolver.scoreModelConfigurationCompatibility({
flashAttention: flashAttention && ggufInsights?.flashAttentionSupported,
swaFullCache,
useMmap
});
return {
type: "localModel",
title: fileName,
path: filePath,
addedDate: (await fs.stat(filePath)).birthtimeMs,
ggufInsights: ggufInsights,
compatibilityScore: compatibilityScore?.compatibilityScore,
compatibilityBonusScore: compatibilityScore?.bonusScore,
compatibilityContextSize: compatibilityScore?.resolvedValues.contextSize
};
}));
localModelFileOptions = localModelFileOptions.sort((a, b) => {
if (a.compatibilityScore == null && b.compatibilityScore == null)
return b.addedDate - a.addedDate;
else if (a.compatibilityScore == null)
return -1;
else if (b.compatibilityScore == null)
return 1;
else if (b.compatibilityScore === a.compatibilityScore &&
b.compatibilityBonusScore != null && a.compatibilityBonusScore != null)
return b.compatibilityBonusScore - a.compatibilityBonusScore;
return b.compatibilityScore - a.compatibilityScore;
});
});
}
try {
// if this file gets very big, we don't want to load it on every CLI usage
const { recommendedModels } = await import("../recommendedModels.js");
for (const recommendedModel of recommendedModels) {
const potentialUris = resolveModelRecommendationFileOptions(recommendedModel);
if (potentialUris.length > 0)
recommendedModelOptions.push({
type: "recommendedModel",
title: recommendedModel.name,
potentialUris,
description: recommendedModel.description
});
}
}
catch (err) {
// do nothing
}
let initialFocusIndex = 3; // first model option
const options = [
{
type: "action",
text: allowLocalModels
? "Enter a model URI or file path..."
: "Enter a model URI...",
key: "getPath"
},
...((localModelFileOptions.length === 0 || modelsDirectory == null)
? []
: [
{
type: "separator",
text: () => " " + chalk.gray("-".repeat(4))
},
{
type: "separator",
text: " " + chalk.bold("Downloaded models") + " " + chalk.dim(`(${getReadablePath(modelsDirectory)})`)
},
...localModelFileOptions
]),
...(recommendedModelOptions.length === 0
? []
: [
{
type: "separator",
text: () => " " + chalk.gray("-".repeat(4))
},
{
type: "separator",
text: " " + chalk.bold("Recommended models") + (downloadIntent
? (" " + chalk.dim("(select to download)"))
: "")
},
...recommendedModelOptions
])
];
try {
while (true) {
const minWidth = Math.min(80 + (flashAttention ? 26 : 0), process.stdout.columns - 1);
const selectedItem = await basicChooseFromListConsoleInteraction({
title(item, rerender) {
const title = chalk.bold("Select a model:") + " ";
const vramStateText = vramState.total === 0
? chalk.bgGray(" " +
"No GPU" +
" ")
: (chalk.bgGray(" " +
chalk.yellow("GPU:") + " " + getPrettyBuildGpuName(llama.gpu) +
" ") +
" " +
chalk.bgGray(" " +
chalk.yellow("VRAM usage:") + " " +
(String(Math.floor((vramState.used / vramState.total) * 100 * 100) / 100) + "%") + " " +
chalk.dim("(" + toBytes(vramState.used) + "/" + toBytes(vramState.total) + ")") +
" ") + (!flashAttention
? ""
: (" " +
chalk.bgGray(" " +
chalk.yellow("Flash attention:") + " " + "enabled" +
" "))));
const pad = Math.max(0, minWidth - (stripAnsi(title).length + stripAnsi(vramStateText).length));
clearTimeout(scheduledTitleRerenderTimeout);
scheduledTitleRerenderTimeout = setTimeout(async () => {
const newVramState = await llama.getVramState();
if (vramState.used !== newVramState.used || vramState.total !== newVramState.total) {
vramState = newVramState;
rerender();
}
}, vramStateUpdateInterval);
return [
title,
" ".repeat(pad),
vramStateText
].join("");
},
footer(item) {
if (item.type !== "recommendedModel" || item.description == null)
return undefined;
const leftPad = 3;
const maxWidth = Math.max(1, process.stdout.columns - 2 - leftPad);
const lines = splitAnsiToLines(item.description, maxWidth);
return " \n" +
" ".repeat(leftPad) + chalk.bold.gray("Model description") + "\n" +
lines.map((line) => (" ".repeat(leftPad) + line))
.join("\n") + "\n" +
splitAnsiToLines(renderRecommendedModelTechnicalInfo(item.selectedUri, maxWidth, canUseGpu), maxWidth)
.map((line) => (" ".repeat(leftPad) + line))
.join("\n");
},
items: options,
renderItem(item, focused, rerender) {
return renderSelectionItem(item, focused, rerender, activeInteractionController.signal, llama, flashAttention, swaFullCache, useMmap);
},
canFocusItem(item) {
return item.type === "recommendedModel" || item.type === "localModel" || item.type === "action";
},
canSelectItem(item) {
if (item.type === "recommendedModel")
return item.selectedUri != null;
return item.type === "localModel" || item.type === "action";
},
initialFocusIndex: Math.min(initialFocusIndex, options.length - 1),
aboveItemsPadding: 1,
belowItemsPadding: 1,
renderSummaryOnExit(item) {
if (item == null || item.type === "action" || item.type === "separator")
return "";
else if (item.type === "localModel") {
const modelTitle = item.title instanceof Function
? item.title()
: item.title;
return logSymbols.success + " Selected model " + chalk.blue(modelTitle);
}
else if (item.type === "recommendedModel") {
const modelTitle = item.title instanceof Function
? item.title()
: item.title;
return logSymbols.success + " Selected model " + chalk.blue(modelTitle);
}
void item;
return "";
},
exitOnCtrlC: true
});
if (selectedItem == null || selectedItem.type === "separator")
continue;
else if (selectedItem.type === "localModel")
return selectedItem.path;
else if (selectedItem.type === "recommendedModel" && selectedItem.selectedUri != null)
return selectedItem.selectedUri.uri;
else if (selectedItem.type === "action") {
if (selectedItem.key === "getPath") {
initialFocusIndex = 0;
const selectedModelUriOrPath = await askForModelUriOrPath(allowLocalModels);
if (selectedModelUriOrPath == null)
continue;
return selectedModelUriOrPath;
}
}
}
}
finally {
activeInteractionController.abort();
}
}
async function askForModelUriOrPath(allowLocalModels) {
return await consolePromptQuestion(allowLocalModels
? chalk.bold("Enter a model URI or file path: ")
: chalk.bold("Enter a model URI: "), {
exitOnCtrlC: false,
async validate(input) {
if (isUrl(input, false)) {
try {
new URL(input);
}
catch (err) {
return "Invalid URL";
}
return null;
}
try {
if (parseModelUri(input) != null)
return null;
}
catch (err) {
return err instanceof Error
? (err?.message || "Invalid model URI")
: "Invalid model URI";
}
if (!allowLocalModels)
return "Only URIs are allowed";
try {
if (await fs.pathExists(input))
return null;
return "File does not exist";
}
catch (err) {
return "Invalid path";
}
},
renderSummaryOnExit(item) {
if (item == null)
return "";
if (isUrl(item, false))
return logSymbols.success + " Entered model URL " + chalk.blue(item);
else if (isModelUri(item)) {
return logSymbols.success + " Entered model URI " + chalk.blue(item);
}
else
return logSymbols.success + " Entered model path " + chalk.blue(item);
}
});
}
function renderSelectionItem(item, focused, rerender, abortSignal, llama, flashAttention, swaFullCache, useMmap) {
if (item.type === "localModel") {
let modelText = item.title instanceof Function
? item.title()
: item.title;
if (item.ggufInsights != null)
modelText += " " + renderModelCompatibility(item.ggufInsights, item.compatibilityScore, item.compatibilityContextSize);
else
modelText += " " + chalk.bgGray.yellow(" Cannot read metadata ");
return renderSelectableItem(modelText, focused);
}
else if (item.type === "recommendedModel") {
let modelText = item.title instanceof Function
? item.title()
: item.title;
if (item.selectedUri == null) {
if (item.uriSelectionLoadingState == null) {
item.uriSelectionLoadingState = "loading";
void selectFileForModelRecommendation({
recommendedModelOption: item,
abortSignal,
rerenderOption: rerender,
llama,
flashAttention,
swaFullCache,
useMmap
});
}
if (item.uriSelectionLoadingState === "loading")
modelText += " " + chalk.bgGray.yellow(" Loading info ");
else if (item.uriSelectionLoadingState === "done")
modelText += " " + chalk.bgGray.yellow(" Failed to load info ");
else
void item.uriSelectionLoadingState;
}
else
modelText += " " + renderModelCompatibility(item.selectedUri.ggufInsights, item.selectedUri.compatibilityScore.compatibilityScore, item.selectedUri.compatibilityScore.resolvedValues.contextSize);
return renderSelectableItem(modelText, focused);
}
else if (item.type === "separator") {
return item.text instanceof Function
? item.text()
: item.text;
}
else if (item.type === "action") {
const actionText = item.text instanceof Function
? item.text()
: item.text;
return renderSelectableItem(actionText, focused);
}
void item;
return "";
}
function renderSelectableItem(text, focused) {
if (focused)
return " " + chalk.cyan(arrowChar) + " " + chalk.cyan(text);
return " * " + text;
}
function renderModelCompatibility(ggufInsights, compatibilityScore, compatibilityContextSize) {
const info = [];
if (compatibilityScore != null)
info.push(renderModelCompatibilityPercentageWithColors(compatibilityScore * 100) + chalk.whiteBright(" compatibility")
+ (compatibilityContextSize == null
? ""
: (chalk.gray(" | ") + chalk.yellow(getReadableContextSize(compatibilityContextSize)) + chalk.whiteBright(" context"))));
info.push(chalk.yellow("Size:") + " " + chalk.whiteBright(toBytes(ggufInsights.modelSize)));
return info
.map((item) => chalk.bgGray(" " + item + " "))
.join(" ");
}
function renderRecommendedModelTechnicalInfo(modelSelectedUri, maxWidth, canUseGpu) {
if (modelSelectedUri == null)
return " \n" + chalk.bgGray.yellow(" Loading info ") + "\n ";
const ggufInsights = modelSelectedUri.ggufInsights;
const compatibilityScore = modelSelectedUri.compatibilityScore;
const longestTitle = Math.max("Model info".length, "Resolved config".length) + 1;
return " \n" + [
renderInfoLine({
title: "Model info",
padTitle: longestTitle,
separateLines: false,
maxWidth,
info: [{
title: "Size",
value: toBytes(ggufInsights.modelSize)
}, {
show: ggufInsights.trainContextSize != null,
title: "Train context size",
value: () => getReadableContextSize(ggufInsights.trainContextSize ?? 0)
}]
}),
renderInfoLine({
title: "Resolved config",
padTitle: longestTitle,
separateLines: false,
maxWidth,
info: [{
title: "",
value: renderModelCompatibilityPercentageWithColors(compatibilityScore.compatibilityScore * 100) + " compatibility"
}, {
show: ggufInsights.trainContextSize != null,
title: "Context size",
value: getReadableContextSize(compatibilityScore.resolvedValues.contextSize)
}, {
show: canUseGpu,
title: "GPU layers",
value: () => (compatibilityScore.resolvedValues.gpuLayers + "/" + ggufInsights.totalLayers + " " +
chalk.dim(`(${Math.floor((compatibilityScore.resolvedValues.gpuLayers / ggufInsights.totalLayers) * 100)}%)`))
}, {
show: canUseGpu,
title: "VRAM usage",
value: () => toBytes(compatibilityScore.resolvedValues.totalVramUsage)
}, {
title: "RAM usage",
value: () => toBytes(compatibilityScore.resolvedValues.totalRamUsage)
}]
})
].join("\n");
}
async function selectFileForModelRecommendation({ recommendedModelOption, llama, abortSignal, rerenderOption, flashAttention, swaFullCache, useMmap }) {
try {
let bestScore = undefined;
let bestScoreSelectedUri = undefined;
for (const potentialUri of recommendedModelOption.potentialUris) {
if (abortSignal.aborted)
return;
try {
const ggufFileInfo = await readGgufFileInfo(potentialUri, {
sourceType: "network",
signal: abortSignal
});
const ggufInsights = await GgufInsights.from(ggufFileInfo, llama);
if (abortSignal.aborted)
return;
const compatibilityScore = await ggufInsights.configurationResolver.scoreModelConfigurationCompatibility({
flashAttention,
swaFullCache,
useMmap
});
if (bestScore == null || compatibilityScore.compatibilityScore > bestScore) {
bestScore = compatibilityScore.compatibilityScore;
bestScoreSelectedUri = {
uri: potentialUri,
ggufInsights,
compatibilityScore
};
if (bestScore === 1)
break;
}
}
catch (err) {
// do nothing
}
}
recommendedModelOption.selectedUri = bestScoreSelectedUri;
recommendedModelOption.uriSelectionLoadingState = "done";
rerenderOption();
}
catch (err) {
recommendedModelOption.uriSelectionLoadingState = "done";
rerenderOption();
}
}
//# sourceMappingURL=interactivelyAskForModel.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
export declare function isRunningUnderRosetta(): Promise<boolean>;

View File

@@ -0,0 +1,20 @@
import path from "path";
import { fileURLToPath } from "url";
import process from "process";
import { getPlatform } from "../../bindings/utils/getPlatform.js";
import { spawnCommand } from "../../utils/spawnCommand.js";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
export async function isRunningUnderRosetta() {
const platform = getPlatform();
// // only check for rosetta on macOS when x64 is detected
if (platform !== "mac" || process.arch !== "x64")
return false;
try {
const res = await spawnCommand("sysctl", ["-n", "sysctl.proc_translated"], __dirname, process.env, false);
return res.combinedStd.trim() === "1";
}
catch (err) {
return false;
}
}
//# sourceMappingURL=isRunningUnderRosetta.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"isRunningUnderRosetta.js","sourceRoot":"","sources":["../../../src/cli/utils/isRunningUnderRosetta.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAC,WAAW,EAAC,MAAM,qCAAqC,CAAC;AAChE,OAAO,EAAC,YAAY,EAAC,MAAM,6BAA6B,CAAC;AAEzD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,CAAC,KAAK,UAAU,qBAAqB;IACvC,MAAM,QAAQ,GAAG,WAAW,EAAE,CAAC;IAE/B,0DAA0D;IAC1D,IAAI,QAAQ,KAAK,KAAK,IAAI,OAAO,CAAC,IAAI,KAAK,KAAK;QAC5C,OAAO,KAAK,CAAC;IAEjB,IAAI,CAAC;QACD,MAAM,GAAG,GAAG,MAAM,YAAY,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,wBAAwB,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAE1G,OAAO,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,KAAK,GAAG,CAAC;IAC1C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACX,OAAO,KAAK,CAAC;IACjB,CAAC;AACL,CAAC"}

View File

@@ -0,0 +1,2 @@
import { BuildGpu } from "../../bindings/types.js";
export declare function logUsedGpuTypeOption(gpu: BuildGpu): void;

View File

@@ -0,0 +1,9 @@
import chalk from "chalk";
import { getPrettyBuildGpuName } from "../../bindings/consts.js";
export function logUsedGpuTypeOption(gpu) {
if (gpu == false)
console.log(`${chalk.yellow("GPU:")} disabled`);
else
console.log(`${chalk.yellow("GPU:")} ${getPrettyBuildGpuName(gpu)}`);
}
//# sourceMappingURL=logUsedGpuTypeOption.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"logUsedGpuTypeOption.js","sourceRoot":"","sources":["../../../src/cli/utils/logUsedGpuTypeOption.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,EAAC,qBAAqB,EAAC,MAAM,0BAA0B,CAAC;AAE/D,MAAM,UAAU,oBAAoB,CAAC,GAAa;IAC9C,IAAI,GAAG,IAAI,KAAK;QACZ,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;;QAEhD,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,qBAAqB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;AAC7E,CAAC"}

View File

@@ -0,0 +1,12 @@
import { LlamaContext } from "../../evaluator/LlamaContext/LlamaContext.js";
export declare function printCommonInfoLines({ context, draftContext, minTitleLength, useMmap, useDirectIo, logBatchSize, tokenMeterEnabled, printBos, printEos }: {
context: LlamaContext;
draftContext?: LlamaContext;
minTitleLength?: number;
useMmap?: boolean;
useDirectIo?: boolean;
logBatchSize?: boolean;
tokenMeterEnabled?: boolean;
printBos?: boolean;
printEos?: boolean;
}): Promise<number>;

View File

@@ -0,0 +1,154 @@
import chalk from "chalk";
import { getPrettyBuildGpuName } from "../../bindings/consts.js";
import { getPlatform } from "../../bindings/utils/getPlatform.js";
import { printInfoLine } from "./printInfoLine.js";
import { toBytes } from "./toBytes.js";
export async function printCommonInfoLines({ context, draftContext, minTitleLength = 0, useMmap, useDirectIo, logBatchSize = false, tokenMeterEnabled = false, printBos = false, printEos = false }) {
const platform = getPlatform();
const llama = context._llama;
const model = context.model;
const padTitle = Math.max(minTitleLength, "Context".length + 1, draftContext != null
? ("Draft context".length + 1)
: 0);
if (llama.gpu !== false) {
const [vramState, deviceNames] = await Promise.all([
llama.getVramState(),
llama.getGpuDeviceNames()
]);
printInfoLine({
title: "GPU",
padTitle: padTitle,
info: [{
title: "Type",
value: getPrettyBuildGpuName(llama.gpu)
}, {
title: "VRAM",
value: toBytes(vramState.total)
}, {
title: "Name",
value: toOneLine(deviceNames.join(", "))
}]
});
}
printInfoLine({
title: "Model",
padTitle: padTitle,
info: [{
title: "Type",
value: toOneLine(model.typeDescription)
}, {
title: "Size",
value: toBytes(model.size)
}, {
show: llama.gpu !== false,
title: "GPU layers",
value: `${model.gpuLayers}/${model.fileInsights.totalLayers} offloaded ${chalk.dim(`(${Math.floor((model.gpuLayers / model.fileInsights.totalLayers) * 100)}%)`)}`
}, {
title: "mmap",
value: !model._llama.supportsMmap
? "unsupported"
: (useMmap || useMmap == null)
? "enabled"
: "disabled"
}, {
title: "Direct I/O",
show: platform !== "mac", // Direct IO is not supported on macOS
value: platform === "mac"
? "unsupported"
: (useDirectIo || useDirectIo == null)
? "enabled"
: "disabled"
}, {
show: printBos,
title: "BOS",
value: () => toOneLine(String(model.tokens.bosString))
}, {
show: printEos,
title: "EOS",
value: () => toOneLine(String(model.tokens.eosString))
}, {
title: "Train context size",
value: model.trainContextSize.toLocaleString("en-US")
}]
});
printInfoLine({
title: "Context",
padTitle: padTitle,
info: [{
title: "Size",
value: context.contextSize.toLocaleString("en-US")
}, {
title: "Threads",
value: context.currentThreads.toLocaleString("en-US")
}, {
show: logBatchSize,
title: "Batch size",
value: context.batchSize.toLocaleString("en-US")
}, {
show: context.flashAttention,
title: "Flash attention",
value: "enabled"
}, {
show: tokenMeterEnabled,
title: "Token meter",
value: "enabled"
}]
});
if (draftContext != null) {
const draftModel = draftContext.model;
printInfoLine({
title: "Draft model",
padTitle: padTitle,
info: [{
title: "Type",
value: toOneLine(draftModel.typeDescription)
}, {
title: "Size",
value: toBytes(draftModel.size)
}, {
show: llama.gpu !== false,
title: "GPU layers",
value: `${draftModel.gpuLayers}/${draftModel.fileInsights.totalLayers} offloaded ${chalk.dim(`(${Math.floor((draftModel.gpuLayers / draftModel.fileInsights.totalLayers) * 100)}%)`)}`
}, {
show: printBos,
title: "BOS",
value: () => toOneLine(String(draftModel.tokens.bosString))
}, {
show: printEos,
title: "EOS",
value: () => toOneLine(String(draftModel.tokens.eosString))
}, {
title: "Train context size",
value: draftModel.trainContextSize.toLocaleString("en-US")
}]
});
printInfoLine({
title: "Draft context",
padTitle: padTitle,
info: [{
title: "Size",
value: draftContext.contextSize.toLocaleString("en-US")
}, {
title: "Threads",
value: draftContext.currentThreads.toLocaleString("en-US")
}, {
show: logBatchSize,
title: "Batch size",
value: draftContext.batchSize.toLocaleString("en-US")
}, {
show: draftContext.flashAttention,
title: "Flash attention",
value: "enabled"
}, {
show: tokenMeterEnabled,
title: "Token meter",
value: "enabled"
}]
});
}
return padTitle;
}
function toOneLine(text) {
return text.replaceAll("\n", chalk.gray("\\n"));
}
//# sourceMappingURL=printCommonInfoLines.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"printCommonInfoLines.js","sourceRoot":"","sources":["../../../src/cli/utils/printCommonInfoLines.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,qBAAqB,EAAC,MAAM,0BAA0B,CAAC;AAE/D,OAAO,EAAC,WAAW,EAAC,MAAM,qCAAqC,CAAC;AAChE,OAAO,EAAC,aAAa,EAAC,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAC,OAAO,EAAC,MAAM,cAAc,CAAC;AAErC,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,EACvC,OAAO,EACP,YAAY,EACZ,cAAc,GAAG,CAAC,EAClB,OAAO,EACP,WAAW,EACX,YAAY,GAAG,KAAK,EACpB,iBAAiB,GAAG,KAAK,EACzB,QAAQ,GAAG,KAAK,EAChB,QAAQ,GAAG,KAAK,EAWnB;IACG,MAAM,QAAQ,GAAG,WAAW,EAAE,CAAC;IAC/B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IAC7B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CACrB,cAAc,EACd,SAAS,CAAC,MAAM,GAAG,CAAC,EACpB,YAAY,IAAI,IAAI;QAChB,CAAC,CAAC,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC,CACV,CAAC;IAEF,IAAI,KAAK,CAAC,GAAG,KAAK,KAAK,EAAE,CAAC;QACtB,MAAM,CACF,SAAS,EACT,WAAW,CACd,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAClB,KAAK,CAAC,YAAY,EAAE;YACpB,KAAK,CAAC,iBAAiB,EAAE;SAC5B,CAAC,CAAC;QAEH,aAAa,CAAC;YACV,KAAK,EAAE,KAAK;YACZ,QAAQ,EAAE,QAAQ;YAClB,IAAI,EAAE,CAAC;oBACH,KAAK,EAAE,MAAM;oBACb,KAAK,EAAE,qBAAqB,CAAC,KAAK,CAAC,GAAG,CAAC;iBAC1C,EAAE;oBACC,KAAK,EAAE,MAAM;oBACb,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC;iBAClC,EAAE;oBACC,KAAK,EAAE,MAAM;oBACb,KAAK,EAAE,SAAS,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC3C,CAAC;SACL,CAAC,CAAC;IACP,CAAC;IACD,aAAa,CAAC;QACV,KAAK,EAAE,OAAO;QACd,QAAQ,EAAE,QAAQ;QAClB,IAAI,EAAE,CAAC;gBACH,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC,eAAe,CAAC;aAC1C,EAAE;gBACC,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC;aAC7B,EAAE;gBACC,IAAI,EAAE,KAAK,CAAC,GAAG,KAAK,KAAK;gBACzB,KAAK,EAAE,YAAY;gBACnB,KAAK,EAAE,GAAG,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,YAAY,CAAC,WAAW,cACvD,KAAK,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,KAAK,CAAC,YAAY,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC,IAAI,CAC1F,EAAE;aACL,EAAE;gBACC,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,CAAC,KAAK,CAAC,MAAM,CAAC,YAAY;oBAC7B,CAAC,CAAC,aAAa;oBACf,CAAC,CAAC,CAAC,OAAO,IAAI,OAAO,IAAI,IAAI,CAAC;wBAC1B,CAAC,CAAC,SAAS;wBACX,CAAC,CAAC,UAAU;aACvB,EAAE;gBACC,KAAK,EAAE,YAAY;gBACnB,IAAI,EAAE,QAAQ,KAAK,KAAK,EAAE,sCAAsC;gBAChE,KAAK,EAAE,QAAQ,KAAK,KAAK;oBACrB,CAAC,CAAC,aAAa;oBACf,CAAC,CAAC,CAAC,WAAW,IAAI,WAAW,IAAI,IAAI,CAAC;wBAClC,CAAC,CAAC,SAAS;wBACX,CAAC,CAAC,UAAU;aACvB,EAAE;gBACC,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,KAAK;gBACZ,KAAK,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;aACzD,EAAE;gBACC,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,KAAK;gBACZ,KAAK,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;aACzD,EAAE;gBACC,KAAK,EAAE,oBAAoB;gBAC3B,KAAK,EAAE,KAAK,CAAC,gBAAgB,CAAC,cAAc,CAAC,OAAO,CAAC;aACxD,CAAC;KACL,CAAC,CAAC;IACH,aAAa,CAAC;QACV,KAAK,EAAE,SAAS;QAChB,QAAQ,EAAE,QAAQ;QAClB,IAAI,EAAE,CAAC;gBACH,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,OAAO,CAAC,WAAW,CAAC,cAAc,CAAC,OAAO,CAAC;aACrD,EAAE;gBACC,KAAK,EAAE,SAAS;gBAChB,KAAK,EAAE,OAAO,CAAC,cAAc,CAAC,cAAc,CAAC,OAAO,CAAC;aACxD,EAAE;gBACC,IAAI,EAAE,YAAY;gBAClB,KAAK,EAAE,YAAY;gBACnB,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,cAAc,CAAC,OAAO,CAAC;aACnD,EAAE;gBACC,IAAI,EAAE,OAAO,CAAC,cAAc;gBAC5B,KAAK,EAAE,iBAAiB;gBACxB,KAAK,EAAE,SAAS;aACnB,EAAE;gBACC,IAAI,EAAE,iBAAiB;gBACvB,KAAK,EAAE,aAAa;gBACpB,KAAK,EAAE,SAAS;aACnB,CAAC;KACL,CAAC,CAAC;IAEH,IAAI,YAAY,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,UAAU,GAAG,YAAY,CAAC,KAAK,CAAC;QAEtC,aAAa,CAAC;YACV,KAAK,EAAE,aAAa;YACpB,QAAQ,EAAE,QAAQ;YAClB,IAAI,EAAE,CAAC;oBACH,KAAK,EAAE,MAAM;oBACb,KAAK,EAAE,SAAS,CAAC,UAAU,CAAC,eAAe,CAAC;iBAC/C,EAAE;oBACC,KAAK,EAAE,MAAM;oBACb,KAAK,EAAE,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;iBAClC,EAAE;oBACC,IAAI,EAAE,KAAK,CAAC,GAAG,KAAK,KAAK;oBACzB,KAAK,EAAE,YAAY;oBACnB,KAAK,EAAE,GAAG,UAAU,CAAC,SAAS,IAAI,UAAU,CAAC,YAAY,CAAC,WAAW,cACjE,KAAK,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,UAAU,CAAC,SAAS,GAAG,UAAU,CAAC,YAAY,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC,IAAI,CACpG,EAAE;iBACL,EAAE;oBACC,IAAI,EAAE,QAAQ;oBACd,KAAK,EAAE,KAAK;oBACZ,KAAK,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;iBAC9D,EAAE;oBACC,IAAI,EAAE,QAAQ;oBACd,KAAK,EAAE,KAAK;oBACZ,KAAK,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;iBAC9D,EAAE;oBACC,KAAK,EAAE,oBAAoB;oBAC3B,KAAK,EAAE,UAAU,CAAC,gBAAgB,CAAC,cAAc,CAAC,OAAO,CAAC;iBAC7D,CAAC;SACL,CAAC,CAAC;QACH,aAAa,CAAC;YACV,KAAK,EAAE,eAAe;YACtB,QAAQ,EAAE,QAAQ;YAClB,IAAI,EAAE,CAAC;oBACH,KAAK,EAAE,MAAM;oBACb,KAAK,EAAE,YAAY,CAAC,WAAW,CAAC,cAAc,CAAC,OAAO,CAAC;iBAC1D,EAAE;oBACC,KAAK,EAAE,SAAS;oBAChB,KAAK,EAAE,YAAY,CAAC,cAAc,CAAC,cAAc,CAAC,OAAO,CAAC;iBAC7D,EAAE;oBACC,IAAI,EAAE,YAAY;oBAClB,KAAK,EAAE,YAAY;oBACnB,KAAK,EAAE,YAAY,CAAC,SAAS,CAAC,cAAc,CAAC,OAAO,CAAC;iBACxD,EAAE;oBACC,IAAI,EAAE,YAAY,CAAC,cAAc;oBACjC,KAAK,EAAE,iBAAiB;oBACxB,KAAK,EAAE,SAAS;iBACnB,EAAE;oBACC,IAAI,EAAE,iBAAiB;oBACvB,KAAK,EAAE,aAAa;oBACpB,KAAK,EAAE,SAAS;iBACnB,CAAC;SACL,CAAC,CAAC;IACP,CAAC;IAED,OAAO,QAAQ,CAAC;AACpB,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC3B,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AACpD,CAAC"}

View File

@@ -0,0 +1,12 @@
export declare function printInfoLine(options: Parameters<typeof renderInfoLine>[0]): void;
export declare function renderInfoLine({ title, padTitle, separateLines, info, maxWidth }: {
title?: string;
padTitle?: number;
separateLines?: boolean;
info: Array<{
title: string;
value: string | (() => string);
show?: boolean;
}>;
maxWidth?: number;
}): string;

View File

@@ -0,0 +1,54 @@
import chalk from "chalk";
import stripAnsi from "strip-ansi";
export function printInfoLine(options) {
console.info(renderInfoLine(options));
}
export function renderInfoLine({ title, padTitle = 0, separateLines = false, info, maxWidth = process.stdout.columns - 1 }) {
const res = [];
const items = [];
if (separateLines) {
if (title != null && title.length > 0)
res.push(chalk.yellowBright(`${title.trim()}`));
for (const { title, value, show } of info) {
if (show === false)
continue;
if (title == null || title === "")
items.push(value instanceof Function ? value() : value);
else
items.push(`${chalk.yellow(title + ":")} ${value instanceof Function ? value() : value}`);
}
const itemPrefix = `${chalk.dim("|")} `;
res.push(itemPrefix + items.join("\n" + itemPrefix));
return res.join("\n") + "\n";
}
else {
if (title != null && title.length > 0)
res.push(chalk.yellowBright(`${title.padEnd(padTitle, " ")}`));
for (const { title, value, show } of info) {
if (show === false)
continue;
if (title == null || title === "")
items.push(chalk.bgGray(` ${value instanceof Function ? value() : value} `));
else
items.push(chalk.bgGray(` ${chalk.yellow(title + ":")} ${value instanceof Function ? value() : value} `));
}
const startPad = stripAnsi(res.join(" ")).length + (res.length > 0 ? " ".length : 0);
res.push(splitItemsIntoLines(items, maxWidth - startPad).join("\n" + " ".repeat(startPad)));
return res.join(" ");
}
}
function splitItemsIntoLines(items, maxLineLength) {
const lines = [];
let currentLine = [];
for (const item of items) {
if (stripAnsi([...currentLine, item].join(" ")).length > maxLineLength) {
lines.push(currentLine.join(" "));
currentLine = [];
}
currentLine.push(item);
}
if (currentLine.length > 0)
lines.push(currentLine.join(" "));
return lines;
}
//# sourceMappingURL=printInfoLine.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"printInfoLine.js","sourceRoot":"","sources":["../../../src/cli/utils/printInfoLine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,SAAS,MAAM,YAAY,CAAC;AAEnC,MAAM,UAAU,aAAa,CAAC,OAA6C;IACvE,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,EAC3B,KAAK,EAAE,QAAQ,GAAG,CAAC,EAAE,aAAa,GAAG,KAAK,EAAE,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,GAAG,CAAC,EAW1F;IACG,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,aAAa,EAAE,CAAC;QAChB,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YACjC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,GAAG,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;QAEpD,KAAK,MAAM,EAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAC,IAAI,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI,KAAK,KAAK;gBACd,SAAS;YAEb,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,KAAK,EAAE;gBAC7B,KAAK,CAAC,IAAI,CAAC,KAAK,YAAY,QAAQ,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;;gBAExD,KAAK,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,GAAG,GAAG,CAAC,IAAI,KAAK,YAAY,QAAQ,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QAClG,CAAC;QAED,MAAM,UAAU,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC;QACxC,GAAG,CAAC,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC;QACrD,OAAO,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IACjC,CAAC;SAAM,CAAC;QACJ,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YACjC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QAEnE,KAAK,MAAM,EAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAC,IAAI,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI,KAAK,KAAK;gBACd,SAAS;YAEb,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,KAAK,EAAE;gBAC7B,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,KAAK,YAAY,QAAQ,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;;gBAE7E,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,GAAG,GAAG,CAAC,IAAI,KAAK,YAAY,QAAQ,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QAClH,CAAC;QAED,MAAM,QAAQ,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrF,GAAG,CAAC,IAAI,CAAC,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC5F,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACzB,CAAC;AACL,CAAC;AAED,SAAS,mBAAmB,CAAC,KAAe,EAAE,aAAqB;IAC/D,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,WAAW,GAAa,EAAE,CAAC;IAE/B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,SAAS,CAAC,CAAC,GAAG,WAAW,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;YACrE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAClC,WAAW,GAAG,EAAE,CAAC;QACrB,CAAC;QAED,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAEtC,OAAO,KAAK,CAAC;AACjB,CAAC"}

View File

@@ -0,0 +1,2 @@
import { ResolveModelDestination } from "../../utils/resolveModelDestination.js";
export declare function printModelDestination(modelDestination: ResolveModelDestination): void;

View File

@@ -0,0 +1,11 @@
import chalk from "chalk";
import { getReadablePath } from "./getReadablePath.js";
export function printModelDestination(modelDestination) {
if (modelDestination.type === "url")
console.info(`${chalk.yellow("URL:")} ${modelDestination.url}`);
else if (modelDestination.type === "uri")
console.info(`${chalk.yellow("URI:")} ${modelDestination.uri}`);
else
console.info(`${chalk.yellow("File:")} ${getReadablePath(modelDestination.path)}`);
}
//# sourceMappingURL=printModelDestination.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"printModelDestination.js","sourceRoot":"","sources":["../../../src/cli/utils/printModelDestination.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,EAAC,eAAe,EAAC,MAAM,sBAAsB,CAAC;AAErD,MAAM,UAAU,qBAAqB,CAAC,gBAAyC;IAC3E,IAAI,gBAAgB,CAAC,IAAI,KAAK,KAAK;QAC/B,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,gBAAgB,CAAC,GAAG,EAAE,CAAC,CAAC;SAC/D,IAAI,gBAAgB,CAAC,IAAI,KAAK,KAAK;QACpC,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,gBAAgB,CAAC,GAAG,EAAE,CAAC,CAAC;;QAEhE,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,eAAe,CAAC,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AAC3F,CAAC"}

View File

@@ -0,0 +1,19 @@
export declare const enum ProjectTemplateParameter {
ProjectName = "projectName",
CurrentModuleVersion = "currentNodeLlamaCppModuleVersion",
ModelUriOrUrl = "modelUriOrUrl",
ModelUriOrFilename = "modelUriOrFilename"
}
export type PackagedFileEntry = {
path: string[];
content: string;
};
export type ProjectTemplate = {
files: PackagedFileEntry[];
};
export declare function getProjectTemplateParameterText(parameter: ProjectTemplateParameter, escapeText?: boolean | 0 | 1 | 2): string;
export declare function scaffoldProjectTemplate({ template, parameters, directoryPath }: {
template: ProjectTemplate;
parameters: Record<ProjectTemplateParameter, string>;
directoryPath: string;
}): Promise<void>;

View File

@@ -0,0 +1,47 @@
import path from "path";
import fs from "fs-extra";
export var ProjectTemplateParameter;
(function (ProjectTemplateParameter) {
ProjectTemplateParameter["ProjectName"] = "projectName";
ProjectTemplateParameter["CurrentModuleVersion"] = "currentNodeLlamaCppModuleVersion";
ProjectTemplateParameter["ModelUriOrUrl"] = "modelUriOrUrl";
ProjectTemplateParameter["ModelUriOrFilename"] = "modelUriOrFilename";
})(ProjectTemplateParameter || (ProjectTemplateParameter = {}));
export function getProjectTemplateParameterText(parameter, escapeText = true) {
let escapes = "";
if (escapeText === true || escapeText === 1)
escapes = "|escape";
else if (escapeText === 2)
escapes = "|escape|escape";
return "{{" + parameter + escapes + "}}";
}
function applyProjectTemplateParameters(template, parameters) {
for (const [parameter, value] of Object.entries(parameters)) {
template = template.split(getProjectTemplateParameterText(parameter, 0)).join(String(value));
template = template.split(getProjectTemplateParameterText(parameter, 1)).join(JSON.stringify(String(value)).slice(1, -1));
template = template.split(getProjectTemplateParameterText(parameter, 2)).join(JSON.stringify(JSON.stringify(String(value)).slice(1, -1)).slice(1, -1));
}
return template;
}
export async function scaffoldProjectTemplate({ template, parameters, directoryPath }) {
for (const file of template.files) {
const filePath = path.join(directoryPath, ...file.path);
const fileContent = transformFileContent({
content: applyProjectTemplateParameters(file.content, parameters),
originalPath: file.path,
parameters
});
await fs.ensureDir(path.dirname(filePath));
await fs.writeFile(filePath, fileContent, "utf8");
}
}
function transformFileContent({ content, originalPath, parameters }) {
if (originalPath.length === 1 && originalPath[0] === "package.json") {
const packageJson = JSON.parse(content);
if (parameters[ProjectTemplateParameter.ProjectName] != null)
packageJson.name = parameters[ProjectTemplateParameter.ProjectName];
return JSON.stringify(packageJson, null, 2);
}
return content;
}
//# sourceMappingURL=projectTemplates.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"projectTemplates.js","sourceRoot":"","sources":["../../../src/cli/utils/projectTemplates.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAE1B,MAAM,CAAN,IAAkB,wBAKjB;AALD,WAAkB,wBAAwB;IACtC,uDAA2B,CAAA;IAC3B,qFAAyD,CAAA;IACzD,2DAA+B,CAAA;IAC/B,qEAAyC,CAAA;AAC7C,CAAC,EALiB,wBAAwB,KAAxB,wBAAwB,QAKzC;AAWD,MAAM,UAAU,+BAA+B,CAAC,SAAmC,EAAE,aAAkC,IAAI;IACvH,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,UAAU,KAAK,IAAI,IAAI,UAAU,KAAK,CAAC;QACvC,OAAO,GAAG,SAAS,CAAC;SACnB,IAAI,UAAU,KAAK,CAAC;QACrB,OAAO,GAAG,gBAAgB,CAAC;IAE/B,OAAO,IAAI,GAAG,SAAS,GAAG,OAAO,GAAG,IAAI,CAAC;AAC7C,CAAC;AAED,SAAS,8BAA8B,CAAC,QAAgB,EAAE,UAAoD;IAC1G,KAAK,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,IAAK,MAAM,CAAC,OAAO,CAAC,UAAU,CAA0C,EAAE,CAAC;QACpG,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,+BAA+B,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QAC7F,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,+BAA+B,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1H,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,+BAA+B,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CACzE,IAAI,CAAC,SAAS,CACV,IAAI,CAAC,SAAS,CACV,MAAM,CAAC,KAAK,CAAC,CAChB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CACjB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CACjB,CAAC;IACN,CAAC;IAED,OAAO,QAAQ,CAAC;AACpB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,EAC1C,QAAQ,EAAE,UAAU,EAAE,aAAa,EAKtC;IACG,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;QACxD,MAAM,WAAW,GAAG,oBAAoB,CAAC;YACrC,OAAO,EAAE,8BAA8B,CAAC,IAAI,CAAC,OAAO,EAAE,UAAU,CAAC;YACjE,YAAY,EAAE,IAAI,CAAC,IAAI;YACvB,UAAU;SACb,CAAC,CAAC;QAEH,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;QAC3C,MAAM,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;IACtD,CAAC;AACL,CAAC;AAED,SAAS,oBAAoB,CAAC,EAC1B,OAAO,EAAE,YAAY,EAAE,UAAU,EAGpC;IACG,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,CAAC,CAAC,CAAC,KAAK,cAAc,EAAE,CAAC;QAClE,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAExC,IAAI,UAAU,CAAC,wBAAwB,CAAC,WAAW,CAAC,IAAI,IAAI;YACxD,WAAW,CAAC,IAAI,GAAG,UAAU,CAAC,wBAAwB,CAAC,WAAW,CAAC,CAAC;QAExE,OAAO,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,OAAO,CAAC;AACnB,CAAC"}

View File

@@ -0,0 +1,6 @@
export declare function renderModelCompatibilityPercentageWithColors(percentage: number, { greenBright, green, yellow, yellowBright }?: {
greenBright?: number;
green?: number;
yellow?: number;
yellowBright?: number;
}): string;

View File

@@ -0,0 +1,14 @@
import chalk from "chalk";
export function renderModelCompatibilityPercentageWithColors(percentage, { greenBright = 100, green = 95, yellow = 85, yellowBright = 75 } = {}) {
const percentageText = String(Math.floor(percentage)) + "%";
if (percentage >= greenBright)
return chalk.greenBright(percentageText);
else if (percentage >= green)
return chalk.green(percentageText);
else if (percentage >= yellow)
return chalk.yellow(percentageText);
else if (percentage >= yellowBright)
return chalk.yellowBright(percentageText);
return chalk.red(percentageText);
}
//# sourceMappingURL=renderModelCompatibilityPercentageWithColors.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"renderModelCompatibilityPercentageWithColors.js","sourceRoot":"","sources":["../../../src/cli/utils/renderModelCompatibilityPercentageWithColors.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,MAAM,UAAU,4CAA4C,CAAC,UAAkB,EAAE,EAC7E,WAAW,GAAG,GAAG,EACjB,KAAK,GAAG,EAAE,EACV,MAAM,GAAG,EAAE,EACX,YAAY,GAAG,EAAE,KAMjB,EAAE;IACF,MAAM,cAAc,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,GAAG,GAAG,CAAC;IAE5D,IAAI,UAAU,IAAI,WAAW;QACzB,OAAO,KAAK,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;SACxC,IAAI,UAAU,IAAI,KAAK;QACxB,OAAO,KAAK,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;SAClC,IAAI,UAAU,IAAI,MAAM;QACzB,OAAO,KAAK,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC;SACnC,IAAI,UAAU,IAAI,YAAY;QAC/B,OAAO,KAAK,CAAC,YAAY,CAAC,cAAc,CAAC,CAAC;IAE9C,OAAO,KAAK,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;AACrC,CAAC"}

View File

@@ -0,0 +1,16 @@
import { Llama } from "../../bindings/Llama.js";
export declare function resolveCommandGgufPath(ggufPath: string | undefined, llama: Llama, fetchHeaders?: Record<string, string>, { targetDirectory, flashAttention, swaFullCache, useMmap, consoleTitle }?: {
targetDirectory?: string;
flashAttention?: boolean;
swaFullCache?: boolean;
useMmap?: boolean;
consoleTitle?: string;
}): Promise<string>;
export declare function tryCoercingModelUri(ggufPath: string): {
uri: string;
modifiedRegion: {
start: number;
end: number;
};
} | undefined;
export declare function printDidYouMeanUri(ggufPath: string): void;

Some files were not shown because too many files have changed in this diff Show More