First upload version 0.0.1
This commit is contained in:
546
node_modules/node-llama-cpp/dist/bindings/Llama.js
generated
vendored
Normal file
546
node_modules/node-llama-cpp/dist/bindings/Llama.js
generated
vendored
Normal file
@@ -0,0 +1,546 @@
|
||||
import os from "os";
|
||||
import path from "path";
|
||||
import chalk from "chalk";
|
||||
import { DisposedError, EventRelay, withLock } from "lifecycle-utils";
|
||||
import { getConsoleLogPrefix } from "../utils/getConsoleLogPrefix.js";
|
||||
import { LlamaModel } from "../evaluator/LlamaModel/LlamaModel.js";
|
||||
import { DisposeGuard } from "../utils/DisposeGuard.js";
|
||||
import { LlamaJsonSchemaGrammar } from "../evaluator/LlamaJsonSchemaGrammar.js";
|
||||
import { LlamaGrammar } from "../evaluator/LlamaGrammar.js";
|
||||
import { ThreadsSplitter } from "../utils/ThreadsSplitter.js";
|
||||
import { getLlamaClasses } from "../utils/getLlamaClasses.js";
|
||||
import { LlamaLocks, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual } from "./types.js";
|
||||
import { MemoryOrchestrator } from "./utils/MemoryOrchestrator.js";
|
||||
export const LlamaLogLevelToAddonLogLevel = new Map([
|
||||
[LlamaLogLevel.disabled, 0],
|
||||
[LlamaLogLevel.fatal, 1],
|
||||
[LlamaLogLevel.error, 2],
|
||||
[LlamaLogLevel.warn, 3],
|
||||
[LlamaLogLevel.info, 4],
|
||||
[LlamaLogLevel.log, 5],
|
||||
[LlamaLogLevel.debug, 6]
|
||||
]);
|
||||
const addonLogLevelToLlamaLogLevel = new Map([...LlamaLogLevelToAddonLogLevel.entries()].map(([key, value]) => [value, key]));
|
||||
const defaultLogLevel = 5;
|
||||
const defaultCPUMinThreadSplitterThreads = 4;
|
||||
export class Llama {
|
||||
/** @internal */ _bindings;
|
||||
/** @internal */ _backendDisposeGuard = new DisposeGuard();
|
||||
/** @internal */ _memoryLock = {};
|
||||
/** @internal */ _consts;
|
||||
/** @internal */ _vramOrchestrator;
|
||||
/** @internal */ _vramPadding;
|
||||
/** @internal */ _ramOrchestrator;
|
||||
/** @internal */ _ramPadding;
|
||||
/** @internal */ _swapOrchestrator;
|
||||
/** @internal */ _debug;
|
||||
/** @internal */ _threadsSplitter;
|
||||
/** @internal */ _hadErrorLogs = false;
|
||||
/** @internal */ _gpu;
|
||||
/** @internal */ _numa;
|
||||
/** @internal */ _buildType;
|
||||
/** @internal */ _cmakeOptions;
|
||||
/** @internal */ _supportsGpuOffloading;
|
||||
/** @internal */ _supportsMmap;
|
||||
/** @internal */ _gpuSupportsMmap;
|
||||
/** @internal */ _supportsMlock;
|
||||
/** @internal */ _mathCores;
|
||||
/** @internal */ _llamaCppRelease;
|
||||
/** @internal */ _logger;
|
||||
/** @internal */ _logLevel;
|
||||
/** @internal */ _pendingLog = null;
|
||||
/** @internal */ _pendingLogLevel = null;
|
||||
/** @internal */ _logDispatchQueuedMicrotasks = 0;
|
||||
/** @internal */ _previousLog = null;
|
||||
/** @internal */ _previousLogLevel = null;
|
||||
/** @internal */ _nextLogNeedNewLine = false;
|
||||
/** @internal */ _disposed = false;
|
||||
_classes;
|
||||
onDispose = new EventRelay();
|
||||
constructor({ bindings, bindingPath, extBackendsPath, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, debug, numa, buildGpu, maxThreads, vramOrchestrator, vramPadding, ramOrchestrator, ramPadding, swapOrchestrator, skipLlamaInit }) {
|
||||
this._dispatchPendingLogMicrotask = this._dispatchPendingLogMicrotask.bind(this);
|
||||
this._onAddonLog = this._onAddonLog.bind(this);
|
||||
this._bindings = bindings;
|
||||
this._debug = debug;
|
||||
this._numa = numa ?? false;
|
||||
this._logLevel = this._debug
|
||||
? LlamaLogLevel.debug
|
||||
: (logLevel ?? LlamaLogLevel.debug);
|
||||
const previouslyLoaded = bindings.markLoaded();
|
||||
if (!this._debug && (!skipLlamaInit || !previouslyLoaded)) {
|
||||
this._bindings.setLogger(this._onAddonLog);
|
||||
this._bindings.setLoggerLogLevel(LlamaLogLevelToAddonLogLevel.get(this._logLevel) ?? defaultLogLevel);
|
||||
}
|
||||
bindings.loadBackends();
|
||||
let loadedGpu = bindings.getGpuType();
|
||||
if (loadedGpu == null || (loadedGpu === false && buildGpu !== false)) {
|
||||
const backendsPath = path.dirname(bindingPath);
|
||||
const fallbackBackendsDir = path.join(extBackendsPath ?? backendsPath, "fallback");
|
||||
bindings.loadBackends(backendsPath);
|
||||
loadedGpu = bindings.getGpuType();
|
||||
if (loadedGpu == null || (loadedGpu === false && buildGpu !== false))
|
||||
bindings.loadBackends(fallbackBackendsDir);
|
||||
}
|
||||
bindings.ensureGpuDeviceIsSupported();
|
||||
if (this._numa !== false)
|
||||
bindings.setNuma(numa);
|
||||
this._gpu = bindings.getGpuType() ?? false;
|
||||
this._supportsGpuOffloading = bindings.getSupportsGpuOffloading();
|
||||
this._supportsMmap = bindings.getSupportsMmap();
|
||||
this._gpuSupportsMmap = bindings.getGpuSupportsMmap();
|
||||
this._supportsMlock = bindings.getSupportsMlock();
|
||||
this._mathCores = Math.floor(bindings.getMathCores());
|
||||
this._consts = bindings.getConsts();
|
||||
this._vramOrchestrator = vramOrchestrator;
|
||||
this._vramPadding = vramPadding;
|
||||
this._ramOrchestrator = ramOrchestrator;
|
||||
this._ramPadding = ramPadding;
|
||||
this._swapOrchestrator = swapOrchestrator;
|
||||
this._threadsSplitter = new ThreadsSplitter(maxThreads ?? (this._gpu === false
|
||||
? Math.max(defaultCPUMinThreadSplitterThreads, this._mathCores)
|
||||
: 0));
|
||||
this._logger = logger;
|
||||
this._buildType = buildType;
|
||||
this._cmakeOptions = Object.freeze({ ...cmakeOptions });
|
||||
this._llamaCppRelease = Object.freeze({
|
||||
repo: llamaCppRelease.repo,
|
||||
release: llamaCppRelease.release
|
||||
});
|
||||
this._onExit = this._onExit.bind(this);
|
||||
process.on("exit", this._onExit);
|
||||
}
|
||||
async dispose() {
|
||||
if (this._disposed)
|
||||
return;
|
||||
this._disposed = true;
|
||||
this.onDispose.dispatchEvent();
|
||||
await this._backendDisposeGuard.acquireDisposeLock();
|
||||
await this._bindings.dispose();
|
||||
}
|
||||
/** @hidden */
|
||||
async [Symbol.asyncDispose]() {
|
||||
await this.dispose();
|
||||
}
|
||||
get disposed() {
|
||||
return this._disposed;
|
||||
}
|
||||
get classes() {
|
||||
if (this._classes == null)
|
||||
this._classes = getLlamaClasses();
|
||||
return this._classes;
|
||||
}
|
||||
get gpu() {
|
||||
return this._gpu;
|
||||
}
|
||||
get supportsGpuOffloading() {
|
||||
return this._supportsGpuOffloading;
|
||||
}
|
||||
get supportsMmap() {
|
||||
return this._supportsMmap;
|
||||
}
|
||||
get gpuSupportsMmap() {
|
||||
return this._gpuSupportsMmap;
|
||||
}
|
||||
get supportsMlock() {
|
||||
return this._supportsMlock;
|
||||
}
|
||||
/** The number of CPU cores that are useful for math */
|
||||
get cpuMathCores() {
|
||||
return this._mathCores;
|
||||
}
|
||||
/**
|
||||
* The maximum number of threads that can be used by the Llama instance.
|
||||
*
|
||||
* If set to `0`, the Llama instance will have no limit on the number of threads.
|
||||
*
|
||||
* See the `maxThreads` option of `getLlama` for more information.
|
||||
*/
|
||||
get maxThreads() {
|
||||
return this._threadsSplitter.maxThreads;
|
||||
}
|
||||
set maxThreads(value) {
|
||||
this._threadsSplitter.maxThreads = Math.floor(Math.max(0, value));
|
||||
}
|
||||
/**
|
||||
* See the `numa` option of `getLlama` for more information
|
||||
*/
|
||||
get numa() {
|
||||
return this._numa;
|
||||
}
|
||||
get logLevel() {
|
||||
return this._logLevel;
|
||||
}
|
||||
set logLevel(value) {
|
||||
this._ensureNotDisposed();
|
||||
if (value === this._logLevel || this._debug)
|
||||
return;
|
||||
this._bindings.setLoggerLogLevel(LlamaLogLevelToAddonLogLevel.get(value) ?? defaultLogLevel);
|
||||
this._logLevel = value;
|
||||
}
|
||||
get logger() {
|
||||
return this._logger;
|
||||
}
|
||||
set logger(value) {
|
||||
this._logger = value;
|
||||
if (value !== Llama.defaultConsoleLogger)
|
||||
this._nextLogNeedNewLine = false;
|
||||
}
|
||||
get buildType() {
|
||||
return this._buildType;
|
||||
}
|
||||
get cmakeOptions() {
|
||||
return this._cmakeOptions;
|
||||
}
|
||||
get llamaCppRelease() {
|
||||
return this._llamaCppRelease;
|
||||
}
|
||||
get systemInfo() {
|
||||
this._ensureNotDisposed();
|
||||
return this._bindings.systemInfo();
|
||||
}
|
||||
/**
|
||||
* VRAM padding used for memory size calculations, as these calculations are not always accurate.
|
||||
* This is set by default to ensure stability, but can be configured when you call `getLlama`.
|
||||
*
|
||||
* See `vramPadding` on `getLlama` for more information.
|
||||
*/
|
||||
get vramPaddingSize() {
|
||||
return this._vramPadding.size;
|
||||
}
|
||||
/**
|
||||
* The total amount of VRAM that is currently being used.
|
||||
*
|
||||
* `unifiedSize` represents the amount of VRAM that is shared between the CPU and GPU.
|
||||
* On SoC devices, this is usually the same as `total`.
|
||||
*/
|
||||
async getVramState() {
|
||||
this._ensureNotDisposed();
|
||||
const { total, used, unifiedSize } = this._bindings.getGpuVramInfo();
|
||||
return {
|
||||
total,
|
||||
used,
|
||||
free: Math.max(0, total - used),
|
||||
unifiedSize
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Get the state of the swap memory.
|
||||
*
|
||||
* **`maxSize`** - The maximum size of the swap memory that the system can allocate.
|
||||
* If the swap size is dynamic (like on macOS), this will be `Infinity`.
|
||||
*
|
||||
* **`allocated`** - The total size allocated by the system for swap memory.
|
||||
*
|
||||
* **`used`** - The amount of swap memory that is currently being used from the `allocated` size.
|
||||
*
|
||||
* On Windows, this will return the info for the page file.
|
||||
*/
|
||||
async getSwapState() {
|
||||
this._ensureNotDisposed();
|
||||
const { total, maxSize, free } = this._bindings.getSwapInfo();
|
||||
return {
|
||||
maxSize: maxSize === -1
|
||||
? Infinity
|
||||
: maxSize,
|
||||
allocated: total,
|
||||
used: total - free
|
||||
};
|
||||
}
|
||||
async getGpuDeviceNames() {
|
||||
this._ensureNotDisposed();
|
||||
const { deviceNames } = this._bindings.getGpuDeviceInfo();
|
||||
return deviceNames;
|
||||
}
|
||||
async loadModel(options) {
|
||||
this._ensureNotDisposed();
|
||||
return await withLock([this._memoryLock, LlamaLocks.loadToMemory], options.loadSignal, async () => {
|
||||
this._ensureNotDisposed();
|
||||
const preventDisposalHandle = this._backendDisposeGuard.createPreventDisposalHandle();
|
||||
try {
|
||||
return await LlamaModel._create(options, { _llama: this });
|
||||
}
|
||||
finally {
|
||||
preventDisposalHandle.dispose();
|
||||
}
|
||||
});
|
||||
}
|
||||
/* eslint-disable @stylistic/max-len */
|
||||
/**
|
||||
* @see [Using a JSON Schema Grammar](https://node-llama-cpp.withcat.ai/guide/grammar#json-schema) tutorial
|
||||
* @see [Reducing Hallucinations When Using JSON Schema Grammar](https://node-llama-cpp.withcat.ai/guide/grammar#reducing-json-schema-hallucinations) tutorial
|
||||
*/
|
||||
async createGrammarForJsonSchema(schema) {
|
||||
return new LlamaJsonSchemaGrammar(this, schema);
|
||||
}
|
||||
/* eslint-enable @stylistic/max-len */
|
||||
async getGrammarFor(type) {
|
||||
return await LlamaGrammar.getFor(this, type);
|
||||
}
|
||||
/**
|
||||
* @see [Using Grammar](https://node-llama-cpp.withcat.ai/guide/grammar) tutorial
|
||||
*/
|
||||
async createGrammar(options) {
|
||||
return new LlamaGrammar(this, options);
|
||||
}
|
||||
/** @internal */
|
||||
async _init() {
|
||||
await this._bindings.init();
|
||||
}
|
||||
/**
|
||||
* Log messages related to the Llama instance
|
||||
* @internal
|
||||
*/
|
||||
_log(level, message) {
|
||||
this._onAddonLog(LlamaLogLevelToAddonLogLevel.get(level) ?? defaultLogLevel, message + "\n");
|
||||
}
|
||||
/** @internal */
|
||||
_onAddonLog(level, message) {
|
||||
const llamaLogLevel = addonLogLevelToLlamaLogLevel.get(level) ?? LlamaLogLevel.fatal;
|
||||
if (this._pendingLog != null && this._pendingLogLevel != null && this._pendingLogLevel != llamaLogLevel) {
|
||||
this._callLogger(this._pendingLogLevel, this._pendingLog);
|
||||
this._pendingLog = null;
|
||||
}
|
||||
const sourceMessage = (this._pendingLog ?? "") + message;
|
||||
const lastNewLineIndex = sourceMessage.lastIndexOf("\n");
|
||||
const currentLog = lastNewLineIndex < 0
|
||||
? sourceMessage
|
||||
: sourceMessage.slice(0, lastNewLineIndex);
|
||||
const nextLog = lastNewLineIndex < 0
|
||||
? ""
|
||||
: sourceMessage.slice(lastNewLineIndex + 1);
|
||||
if (currentLog !== "")
|
||||
this._callLogger(llamaLogLevel, currentLog);
|
||||
if (nextLog !== "") {
|
||||
this._pendingLog = nextLog;
|
||||
this._pendingLogLevel = llamaLogLevel;
|
||||
queueMicrotask(this._dispatchPendingLogMicrotask);
|
||||
this._logDispatchQueuedMicrotasks++;
|
||||
}
|
||||
else
|
||||
this._pendingLog = null;
|
||||
}
|
||||
/** @internal */
|
||||
_dispatchPendingLogMicrotask() {
|
||||
this._logDispatchQueuedMicrotasks--;
|
||||
if (this._logDispatchQueuedMicrotasks !== 0)
|
||||
return;
|
||||
if (this._pendingLog != null && this._pendingLogLevel != null) {
|
||||
this._callLogger(this._pendingLogLevel, this._pendingLog);
|
||||
this._pendingLog = null;
|
||||
}
|
||||
}
|
||||
/** @internal */
|
||||
_callLogger(level, message) {
|
||||
// llama.cpp uses dots to indicate progress, so we don't want to print them as different lines,
|
||||
// and instead, append to the same log line
|
||||
if (logMessageIsOnlyDots(message) && this._logger === Llama.defaultConsoleLogger) {
|
||||
if (logMessageIsOnlyDots(this._previousLog) && level === this._previousLogLevel) {
|
||||
process.stdout.write(message);
|
||||
}
|
||||
else {
|
||||
this._nextLogNeedNewLine = true;
|
||||
process.stdout.write(prefixAndColorMessage(message, getColorForLogLevel(level)));
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (this._nextLogNeedNewLine) {
|
||||
process.stdout.write("\n");
|
||||
this._nextLogNeedNewLine = false;
|
||||
}
|
||||
try {
|
||||
const transformedLogLevel = getTransformedLogLevel(level, message, this.gpu);
|
||||
if (LlamaLogLevelGreaterThanOrEqual(transformedLogLevel, this._logLevel))
|
||||
this._logger(transformedLogLevel, message);
|
||||
}
|
||||
catch (err) {
|
||||
// the native addon code calls this function, so there's no use to throw an error here
|
||||
}
|
||||
}
|
||||
this._previousLog = message;
|
||||
this._previousLogLevel = level;
|
||||
if (!this._hadErrorLogs && LlamaLogLevelGreaterThan(level, LlamaLogLevel.error))
|
||||
this._hadErrorLogs = true;
|
||||
}
|
||||
/** @internal */
|
||||
_onExit() {
|
||||
if (this._pendingLog != null && this._pendingLogLevel != null) {
|
||||
this._callLogger(this._pendingLogLevel, this._pendingLog);
|
||||
this._pendingLog = null;
|
||||
}
|
||||
}
|
||||
/** @internal */
|
||||
_ensureNotDisposed() {
|
||||
if (this._disposed)
|
||||
throw new DisposedError();
|
||||
}
|
||||
/** @internal */
|
||||
static async _create({ bindings, bindingPath, extBackendsPath, buildType, buildMetadata, logLevel, logger, vramPadding, ramPadding, maxThreads, skipLlamaInit = false, debug, numa }) {
|
||||
const vramOrchestrator = new MemoryOrchestrator(() => {
|
||||
const { total, used, unifiedSize } = bindings.getGpuVramInfo();
|
||||
return {
|
||||
total,
|
||||
free: Math.max(0, total - used),
|
||||
unifiedSize
|
||||
};
|
||||
});
|
||||
const ramOrchestrator = new MemoryOrchestrator(() => {
|
||||
const used = process.memoryUsage().rss;
|
||||
const total = os.totalmem();
|
||||
return {
|
||||
total,
|
||||
free: Math.max(0, total - used),
|
||||
unifiedSize: total
|
||||
};
|
||||
});
|
||||
const swapOrchestrator = new MemoryOrchestrator(() => {
|
||||
const { total, maxSize, free } = bindings.getSwapInfo();
|
||||
const used = total - free;
|
||||
if (maxSize === -1)
|
||||
return {
|
||||
total: Infinity,
|
||||
free: Infinity,
|
||||
unifiedSize: Infinity
|
||||
};
|
||||
return {
|
||||
total: maxSize,
|
||||
free: maxSize - used,
|
||||
unifiedSize: maxSize
|
||||
};
|
||||
});
|
||||
let resolvedRamPadding;
|
||||
if (ramPadding instanceof Function)
|
||||
resolvedRamPadding = ramOrchestrator.reserveMemory(ramPadding((await ramOrchestrator.getMemoryState()).total));
|
||||
else
|
||||
resolvedRamPadding = ramOrchestrator.reserveMemory(ramPadding);
|
||||
const llama = new Llama({
|
||||
bindings,
|
||||
bindingPath,
|
||||
extBackendsPath,
|
||||
buildType,
|
||||
cmakeOptions: buildMetadata.buildOptions.customCmakeOptions,
|
||||
llamaCppRelease: {
|
||||
repo: buildMetadata.buildOptions.llamaCpp.repo,
|
||||
release: buildMetadata.buildOptions.llamaCpp.release
|
||||
},
|
||||
logLevel,
|
||||
logger,
|
||||
debug,
|
||||
numa,
|
||||
buildGpu: buildMetadata.buildOptions.gpu,
|
||||
vramOrchestrator,
|
||||
maxThreads,
|
||||
vramPadding: vramOrchestrator.reserveMemory(0),
|
||||
ramOrchestrator,
|
||||
ramPadding: resolvedRamPadding,
|
||||
swapOrchestrator,
|
||||
skipLlamaInit
|
||||
});
|
||||
if (llama.gpu === false || vramPadding === 0) {
|
||||
// do nothing since `llama._vramPadding` is already set to 0
|
||||
}
|
||||
else if (vramPadding instanceof Function) {
|
||||
const currentVramPadding = llama._vramPadding;
|
||||
llama._vramPadding = vramOrchestrator.reserveMemory(vramPadding((await vramOrchestrator.getMemoryState()).total));
|
||||
currentVramPadding.dispose();
|
||||
}
|
||||
else {
|
||||
const currentVramPadding = llama._vramPadding;
|
||||
llama._vramPadding = vramOrchestrator.reserveMemory(vramPadding);
|
||||
currentVramPadding.dispose();
|
||||
}
|
||||
if (!skipLlamaInit)
|
||||
await llama._init();
|
||||
return llama;
|
||||
}
|
||||
static defaultConsoleLogger(level, message) {
|
||||
switch (level) {
|
||||
case LlamaLogLevel.disabled:
|
||||
break;
|
||||
case LlamaLogLevel.fatal:
|
||||
// we don't use console.error here because it prints the stack trace
|
||||
console.warn(prefixAndColorMessage(message, getColorForLogLevel(level)));
|
||||
break;
|
||||
case LlamaLogLevel.error:
|
||||
// we don't use console.error here because it prints the stack trace
|
||||
console.warn(prefixAndColorMessage(message, getColorForLogLevel(level)));
|
||||
break;
|
||||
case LlamaLogLevel.warn:
|
||||
console.warn(prefixAndColorMessage(message, getColorForLogLevel(level)));
|
||||
break;
|
||||
case LlamaLogLevel.info:
|
||||
console.info(prefixAndColorMessage(message, getColorForLogLevel(level)));
|
||||
break;
|
||||
case LlamaLogLevel.log:
|
||||
console.info(prefixAndColorMessage(message, getColorForLogLevel(level)));
|
||||
break;
|
||||
case LlamaLogLevel.debug:
|
||||
console.debug(prefixAndColorMessage(message, getColorForLogLevel(level)));
|
||||
break;
|
||||
default:
|
||||
void level;
|
||||
console.warn(getConsoleLogPrefix() + getColorForLogLevel(LlamaLogLevel.warn)(`Unknown log level: ${level}`));
|
||||
console.log(prefixAndColorMessage(message, getColorForLogLevel(level)));
|
||||
}
|
||||
}
|
||||
}
|
||||
function getColorForLogLevel(level) {
|
||||
switch (level) {
|
||||
case LlamaLogLevel.disabled: return chalk.whiteBright;
|
||||
case LlamaLogLevel.fatal: return chalk.redBright;
|
||||
case LlamaLogLevel.error: return chalk.red;
|
||||
case LlamaLogLevel.warn: return chalk.yellow;
|
||||
case LlamaLogLevel.info: return chalk.whiteBright;
|
||||
case LlamaLogLevel.log: return chalk.white;
|
||||
case LlamaLogLevel.debug: return chalk.gray;
|
||||
default:
|
||||
void level;
|
||||
return chalk.whiteBright;
|
||||
}
|
||||
}
|
||||
function prefixAndColorMessage(message, color) {
|
||||
return getConsoleLogPrefix() + (message
|
||||
.split("\n")
|
||||
.map((line) => color(line))
|
||||
.join("\n" + getConsoleLogPrefix()));
|
||||
}
|
||||
function logMessageIsOnlyDots(message) {
|
||||
if (message == null)
|
||||
return false;
|
||||
for (let i = 0; i < message.length; i++) {
|
||||
if (message[i] !== ".")
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
function getTransformedLogLevel(level, message, gpu) {
|
||||
if (level === LlamaLogLevel.warn && message.endsWith("the full capacity of the model will not be utilized"))
|
||||
return LlamaLogLevel.info;
|
||||
else if (level === LlamaLogLevel.warn && message.startsWith("ggml_metal_init: skipping kernel_") && message.endsWith("(not supported)"))
|
||||
return LlamaLogLevel.log;
|
||||
else if (level === LlamaLogLevel.warn && message.startsWith("ggml_cuda_init: GGML_CUDA_FORCE_") && message.endsWith(" no"))
|
||||
return LlamaLogLevel.log;
|
||||
else if (level === LlamaLogLevel.info && message.startsWith("load_backend: loaded "))
|
||||
return LlamaLogLevel.log;
|
||||
else if (level === LlamaLogLevel.warn && message.startsWith("make_cpu_buft_list: disabling extra buffer types"))
|
||||
return LlamaLogLevel.info;
|
||||
else if (level === LlamaLogLevel.warn && message.startsWith("llama_context: non-unified KV cache requires ggml_set_rows() - forcing unified KV cache"))
|
||||
return LlamaLogLevel.info;
|
||||
else if (level === LlamaLogLevel.warn && message.startsWith("llama_kv_cache_unified: LLAMA_SET_ROWS=0, using old ggml_cpy() method for backwards compatibility"))
|
||||
return LlamaLogLevel.info;
|
||||
else if (level === LlamaLogLevel.warn && message.startsWith("init: embeddings required but some input tokens were not marked as outputs -> overriding"))
|
||||
return LlamaLogLevel.info;
|
||||
else if (level === LlamaLogLevel.warn && message.startsWith("load: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list"))
|
||||
return LlamaLogLevel.info;
|
||||
else if (level === LlamaLogLevel.warn && message.startsWith("llama_init_from_model: model default pooling_type is [0], but [-1] was specified"))
|
||||
return LlamaLogLevel.info;
|
||||
else if (level === LlamaLogLevel.warn && message.startsWith("llama_model_loader: direct I/O is enabled, disabling mmap"))
|
||||
return LlamaLogLevel.info;
|
||||
else if (level === LlamaLogLevel.warn && message.startsWith("llama_model_loader: direct I/O is not available, using mmap"))
|
||||
return LlamaLogLevel.info;
|
||||
else if (gpu === false && level === LlamaLogLevel.warn && message.startsWith("llama_adapter_lora_init_impl: lora for '") && message.endsWith("' cannot use buft 'CPU_REPACK', fallback to CPU"))
|
||||
return LlamaLogLevel.info;
|
||||
else if (gpu === "metal" && level === LlamaLogLevel.warn && message.startsWith("ggml_metal_device_init: tensor API disabled for"))
|
||||
return LlamaLogLevel.info;
|
||||
return level;
|
||||
}
|
||||
//# sourceMappingURL=Llama.js.map
|
||||
Reference in New Issue
Block a user