First upload version 0.0.1

2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/LlamaModel.d.ts
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/LlamaModel.d.ts
@@ -0,0 +1,311 @@
+import { EventRelay } from "lifecycle-utils";
+import { Token, Tokenizer } from "../../types.js";
+import { ModelTypeDescription } from "../../bindings/AddonTypes.js";
+import { LlamaVocabularyType } from "../../bindings/types.js";
+import { GgufFileInfo } from "../../gguf/types/GgufFileInfoTypes.js";
+import { GgufInsights } from "../../gguf/insights/GgufInsights.js";
+import { LlamaContextOptions } from "../LlamaContext/types.js";
+import { LlamaContext } from "../LlamaContext/LlamaContext.js";
+import { LlamaEmbeddingContext, LlamaEmbeddingContextOptions } from "../LlamaEmbeddingContext.js";
+import { GgufMetadata } from "../../gguf/types/GgufMetadataTypes.js";
+import { OverridesObject } from "../../utils/OverridesObject.js";
+import { LlamaRankingContext, LlamaRankingContextOptions } from "../LlamaRankingContext.js";
+import { TokenAttributes } from "./utils/TokenAttributes.js";
+import type { Llama } from "../../bindings/Llama.js";
+import type { BuiltinSpecialTokenValue } from "../../utils/LlamaText.js";
+export type LlamaModelOptions = {
+    /** path to the model on the filesystem */
+    modelPath: string;
+    /**
+     * Number of layers to store in VRAM.
+     * - **`"auto"`** - adapt to the current VRAM state and try to fit as many layers as possible in it.
+     * Takes into account the VRAM required to create a context with a `contextSize` set to `"auto"`.
+     * - **`"max"`** - store all layers in VRAM. If there's not enough VRAM, an error will be thrown. Use with caution.
+     * - **`number`** - store the specified number of layers in VRAM. If there's not enough VRAM, an error will be thrown. Use with caution.
+     * - **`{min?: number, max?: number, fitContext?: {contextSize: number}}`** - adapt to the current VRAM state and try to fit as
+     * many layers as possible in it, but at least `min` and at most `max` layers. Set `fitContext` to the parameters of a context you
+     * intend to create with the model, so it'll take it into account in the calculations and leave enough memory for such a context.
+     *
+     * If GPU support is disabled, will be set to `0` automatically.
+     *
+     * Defaults to `"auto"`.
+     */
+    gpuLayers?: "auto" | "max" | number | {
+        min?: number;
+        max?: number;
+        fitContext?: {
+            contextSize?: number;
+            /**
+             * Defaults to `false`.
+             */
+            embeddingContext?: boolean;
+        };
+    };
+    /**
+     * Only load the vocabulary, not weight tensors.
+     *
+     * Useful when you only want to use the model to use its tokenizer but not for evaluation.
+     *
+     * Defaults to `false`.
+     */
+    vocabOnly?: boolean;
+    /**
+     * Use mmap (memory-mapped file) to load the model.
+     *
+     * Using mmap allows the OS to load the model tensors directly from the file on the filesystem,
+     * and makes it easier for the system to manage memory.
+     *
+     * When using mmap, you might notice a delay the first time you actually use the model,
+     * which is caused by the OS itself loading the model into memory.
+     *
+     * Defaults to `true` if the current system supports it.
+     */
+    useMmap?: boolean;
+    /**
+     * Direct I/O is a method of reading and writing data to and from the storage device directly to the application memory,
+     * bypassing OS in-memory caches.
+     *
+     * It leads to improved model loading times and reduced RAM usage,
+     * on the expense of higher loading times when the model unloaded and loaded again repeatedly in a short period of time.
+     *
+     * When this option is enabled, if Direct I/O is supported by the system (and for the given file)
+     * it will be used and mmap will be disabled.
+     *
+     * Unsupported on macOS.
+     *
+     * Defaults to `true`.
+     */
+    useDirectIo?: boolean;
+    /**
+     * Force the system to keep the model in the RAM/VRAM.
+     * Use with caution as this can crash your system if the available resources are insufficient.
+     */
+    useMlock?: boolean;
+    /**
+     * Check for tensor validity before actually loading the model.
+     * Using it increases the time it takes to load the model.
+     *
+     * Defaults to `false`.
+     */
+    checkTensors?: boolean;
+    /**
+     * Enable flash attention by default for contexts created with this model.
+     * Only works with models that support flash attention.
+     *
+     * Flash attention is an optimization in the attention mechanism that makes inference faster, more efficient and uses less memory.
+     *
+     * The support for flash attention is currently experimental and may not always work as expected.
+     * Use with caution.
+     *
+     * This option will be ignored if flash attention is not supported by the model.
+     *
+     * Enabling this affects the calculations of default values for the model and contexts created with it
+     * as flash attention reduces the amount of memory required,
+     * which allows for more layers to be offloaded to the GPU and for context sizes to be bigger.
+     *
+     * Defaults to `false`.
+     *
+     * Upon flash attention exiting the experimental status, the default value will become `true`.
+     */
+    defaultContextFlashAttention?: boolean;
+    /**
+     * When using SWA (Sliding Window Attention) on a supported model,
+     * extend the sliding window size to the current context size (meaning practically disabling SWA)
+     * by default for contexts created with this model.
+     *
+     * See the `swaFullCache` option of the `.createContext()` method for more information.
+     *
+     * Defaults to `false`.
+     */
+    defaultContextSwaFullCache?: boolean;
+    /**
+     * Called with the load percentage when the model is being loaded.
+     * @param loadProgress - a number between 0 (exclusive) and 1 (inclusive).
+     */
+    onLoadProgress?(loadProgress: number): void;
+    /** An abort signal to abort the model load */
+    loadSignal?: AbortSignal;
+    /**
+     * Ignore insufficient memory errors and continue with the model load.
+     * Can cause the process to crash if there's not enough VRAM to fit the model.
+     *
+     * Defaults to `false`.
+     */
+    ignoreMemorySafetyChecks?: boolean;
+    /**
+     * Metadata overrides to load the model with.
+     *
+     * > **Note:** Most metadata value overrides aren't supported and overriding them will have no effect on `llama.cpp`.
+     * > Only use this for metadata values that are explicitly documented to be supported by `llama.cpp` to be overridden,
+     * > and only in cases when this is crucial, as this is not guaranteed to always work as expected.
+     */
+    metadataOverrides?: OverridesObject<GgufMetadata, number | bigint | boolean | string>;
+};
+export declare class LlamaModel {
+    readonly tokenizer: Tokenizer;
+    readonly onDispose: EventRelay<void>;
+    private constructor();
+    dispose(): Promise<void>;
+    /** @hidden */
+    [Symbol.asyncDispose](): Promise<void>;
+    get disposed(): boolean;
+    get llama(): Llama;
+    get tokens(): LlamaModelTokens;
+    get filename(): string | undefined;
+    get fileInfo(): GgufFileInfo;
+    get fileInsights(): GgufInsights;
+    /**
+     * Number of layers offloaded to the GPU.
+     * If GPU support is disabled, this will always be `0`.
+     */
+    get gpuLayers(): number;
+    /**
+     * Total model size in memory in bytes.
+     *
+     * When using mmap, actual memory usage may be higher than this value due to `llama.cpp`'s performance optimizations.
+     */
+    get size(): number;
+    get flashAttentionSupported(): boolean;
+    get defaultContextFlashAttention(): boolean;
+    get defaultContextSwaFullCache(): boolean;
+    /**
+     * Transform text into tokens that can be fed to the model
+     * @param text - the text to tokenize
+     * @param [specialTokens] - if set to true, text that correspond to special tokens will be tokenized to those tokens.
+     * For example, `<s>` will be tokenized to the BOS token if `specialTokens` is set to `true`,
+     * otherwise it will be tokenized to tokens that corresponds to the plaintext `<s>` string.
+     * @param [options] - additional options for tokenization.
+     * If set to `"trimLeadingSpace"`, a leading space will be trimmed from the tokenized output if the output has an
+     * additional space at the beginning.
+     */
+    tokenize(text: string, specialTokens?: boolean, options?: "trimLeadingSpace"): Token[];
+    tokenize(text: BuiltinSpecialTokenValue, specialTokens: "builtin"): Token[];
+    /**
+     * Transform tokens into text
+     * @param tokens - the tokens to detokenize.
+     * @param [specialTokens] - if set to `true`, special tokens will be detokenized to their corresponding token text representation.
+     *
+     * Recommended for debugging purposes only.
+     *
+     * > **Note:** there may be additional spaces around special tokens that were not present in the original text - this is not a bug,
+     * this is [how the tokenizer is supposed to work](https://github.com/ggml-org/llama.cpp/pull/7697#issuecomment-2144003246).
+     *
+     * Defaults to `false`.
+     * @param [lastTokens] - the last few tokens that preceded the tokens to detokenize.
+     * If provided, the last few tokens will be used to determine whether a space has to be added before the current tokens or not,
+     * and apply other detokenizer-specific heuristics to provide the correct text continuation to the existing tokens.
+     *
+     * Using it may have no effect with some models, but it is still recommended.
+     */
+    detokenize(tokens: readonly Token[], specialTokens?: boolean, lastTokens?: readonly Token[]): string;
+    getTokenAttributes(token: Token): TokenAttributes;
+    /** Check whether the given token is a special token (a control-type token or a token with no normal text representation) */
+    isSpecialToken(token: Token | undefined): boolean;
+    iterateAllTokens(): Generator<Token, void, unknown>;
+    /** Check whether the given token is an EOG (End Of Generation) token, like EOS or EOT. */
+    isEogToken(token: Token | undefined): boolean;
+    createContext(options?: LlamaContextOptions): Promise<LlamaContext>;
+    /**
+     * @see [Using Embedding](https://node-llama-cpp.withcat.ai/guide/embedding) tutorial
+     */
+    createEmbeddingContext(options?: LlamaEmbeddingContextOptions): Promise<LlamaEmbeddingContext>;
+    /**
+     * @see [Reranking Documents](https://node-llama-cpp.withcat.ai/guide/embedding#reranking) tutorial
+     */
+    createRankingContext(options?: LlamaRankingContextOptions): Promise<LlamaRankingContext>;
+    /**
+     * Get warnings about the model file that would affect its usage.
+     *
+     * These warnings include all the warnings generated by `GgufInsights`, but are more comprehensive.
+     */
+    getWarnings(): string[];
+    /** @hidden `ModelTypeDescription` type alias is too long in the documentation */
+    get typeDescription(): ModelTypeDescription;
+    /** The context size the model was trained on */
+    get trainContextSize(): number;
+    /** The size of an embedding vector the model can produce */
+    get embeddingVectorSize(): number;
+    get vocabularyType(): LlamaVocabularyType;
+}
+export declare class LlamaModelTokens {
+    private constructor();
+    /**
+     * @returns infill tokens
+     */
+    get infill(): LlamaModelInfillTokens;
+    /**
+     * @returns The BOS (Beginning Of Sequence) token.
+     */
+    get bos(): Token | null;
+    /**
+     * @returns The EOS (End Of Sequence) token.
+     */
+    get eos(): Token | null;
+    /**
+     * @returns The EOT (End Of Turn) token.
+     */
+    get eot(): Token | null;
+    /**
+     * @returns The SEP (Sentence Separator) token.
+     */
+    get sep(): Token | null;
+    /**
+     * @returns The NL (New Line) token.
+     */
+    get nl(): Token | null;
+    /**
+     * @returns The BOS (Beginning Of Sequence) token text representation.
+     */
+    get bosString(): string | null;
+    /**
+     * @returns The EOS (End Of Sequence) token text representation.
+     */
+    get eosString(): string | null;
+    /**
+     * @returns The EOT (End Of Turn) token text representation.
+     */
+    get eotString(): string | null;
+    /**
+     * @returns The SEP (Sentence Separator) token text representation.
+     */
+    get sepString(): string | null;
+    /**
+     * @returns The NL (New Line) token text representation.
+     */
+    get nlString(): string | null;
+    /**
+     * @returns Whether we should prepend a BOS (Beginning Of Sequence) token for evaluations with this model.
+     */
+    get shouldPrependBosToken(): boolean;
+    /**
+     * @returns Whether we should append an EOS (End Of Sequence) token for evaluations with this model.
+     */
+    get shouldAppendEosToken(): boolean;
+}
+export declare class LlamaModelInfillTokens {
+    private constructor();
+    /**
+     * @returns The beginning of infill prefix token.
+     */
+    get prefix(): Token | null;
+    /**
+     * @returns The beginning of infill middle token.
+     */
+    get middle(): Token | null;
+    /**
+     * @returns The beginning of infill suffix token.
+     */
+    get suffix(): Token | null;
+    /**
+     * @returns The beginning of infill prefix token as a string.
+     */
+    get prefixString(): string | null;
+    /**
+     * @returns The beginning of infill middle token as a string.
+     */
+    get middleString(): string | null;
+    /**
+     * @returns The beginning of infill suffix token as a string.
+     */
+    get suffixString(): string | null;
+}
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/LlamaModel.js
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/LlamaModel.js
@@ -0,0 +1,832 @@
+import process from "process";
+import path from "path";
+import { AsyncDisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
+import { removeNullFields } from "../../utils/removeNullFields.js";
+import { DisposeGuard } from "../../utils/DisposeGuard.js";
+import { LlamaLocks, LlamaLogLevel, LlamaVocabularyType, LlamaVocabularyTypeValues } from "../../bindings/types.js";
+import { readGgufFileInfo } from "../../gguf/readGgufFileInfo.js";
+import { GgufInsights } from "../../gguf/insights/GgufInsights.js";
+import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
+import { getReadablePath } from "../../cli/utils/getReadablePath.js";
+import { LlamaContext } from "../LlamaContext/LlamaContext.js";
+import { LlamaEmbeddingContext } from "../LlamaEmbeddingContext.js";
+import { GgufArchitectureType } from "../../gguf/types/GgufMetadataTypes.js";
+import { maxRecentDetokenizerTokens } from "../../consts.js";
+import { LlamaRankingContext } from "../LlamaRankingContext.js";
+import { TokenAttribute, TokenAttributes } from "./utils/TokenAttributes.js";
+const defaultUseMmap = true;
+const defaultUseDirectIo = true;
+const defaultContextFlashAttentionEnabled = false;
+const defaultContextSwaFullCache = false;
+export class LlamaModel {
+    /** @internal */ _llama;
+    /** @internal */ _model;
+    /** @internal */ _backendModelDisposeGuard;
+    /** @internal */ _tokens;
+    /** @internal */ _modelPath;
+    /** @internal */ _fileInfo;
+    /** @internal */ _fileInsights;
+    /** @internal */ _gpuLayers;
+    /** @internal */ _vocabOnly;
+    /** @internal */ _filename;
+    /** @internal */ _disposedState = { disposed: false };
+    /** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
+    /** @internal */ _llamaPreventDisposalHandle;
+    /** @internal */ _defaultContextFlashAttentionOptionEnabled;
+    /** @internal */ _defaultContextFlashAttention;
+    /** @internal */ _defaultContextSwaFullCache;
+    /** @internal */ _flashAttentionSupported;
+    /** @internal */ _loraAdapters = new Map();
+    /** @internal */ _typeDescription;
+    /** @internal */ _trainContextSize;
+    /** @internal */ _embeddingVectorSize;
+    /** @internal */ _vocabularyType;
+    tokenizer;
+    onDispose = new EventRelay();
+    constructor({ modelPath, gpuLayers, vocabOnly = false, useMmap, useDirectIo, useMlock, checkTensors, onLoadProgress, loadSignal, metadataOverrides }, { _llama, _fileInfo, _fileInsights, _defaultContextFlashAttentionOptionEnabled, _defaultContextFlashAttention, _defaultContextSwaFullCache, _flashAttentionSupported }) {
+        this._llama = _llama;
+        this._fileInfo = _fileInfo;
+        this._modelPath = path.resolve(process.cwd(), modelPath);
+        this._fileInsights = _fileInsights;
+        this._gpuLayers = gpuLayers;
+        this._vocabOnly = vocabOnly ?? false;
+        this._backendModelDisposeGuard = new DisposeGuard([this._llama._backendDisposeGuard]);
+        this._llamaPreventDisposalHandle = this._llama._backendDisposeGuard.createPreventDisposalHandle();
+        this._defaultContextFlashAttentionOptionEnabled = _defaultContextFlashAttentionOptionEnabled;
+        this._defaultContextFlashAttention = _defaultContextFlashAttention;
+        this._defaultContextSwaFullCache = _defaultContextSwaFullCache;
+        this._flashAttentionSupported = _flashAttentionSupported;
+        const overridesList = ggufMetadataOverridesToList(metadataOverrides);
+        this._model = new this._llama._bindings.AddonModel(this._modelPath, removeNullFields({
+            addonExports: this._llama._bindings,
+            gpuLayers,
+            vocabOnly: this._vocabOnly,
+            useMmap,
+            useDirectIo,
+            useMlock: _llama.supportsMlock
+                ? useMlock
+                : undefined,
+            checkTensors: checkTensors ?? false,
+            onLoadProgress: onLoadProgress == null
+                ? undefined
+                : (loadPercentage) => {
+                    try {
+                        onLoadProgress(loadPercentage);
+                    }
+                    catch (err) {
+                        // the native addon code calls this function, so there's no use to throw an error here
+                        console.error(err);
+                    }
+                },
+            hasLoadAbortSignal: loadSignal != null,
+            overridesList: overridesList.length > 0
+                ? overridesList
+                : undefined
+        }));
+        this._tokens = LlamaModelTokens._create(this._model, this._disposedState);
+        this._filename = path.basename(modelPath);
+        this._disposeAggregator.add(() => {
+            this._disposedState.disposed = true;
+        });
+        this._disposeAggregator.add(this.onDispose.dispatchEvent);
+        this._disposeAggregator.add(this._llama.onDispose.createListener(disposeModelIfReferenced.bind(null, new WeakRef(this))));
+        this._disposeAggregator.add(async () => {
+            await this._backendModelDisposeGuard.acquireDisposeLock();
+            await this._model.dispose();
+            this._llamaPreventDisposalHandle.dispose();
+        });
+        this.tokenize = this.tokenize.bind(this);
+        this.detokenize = this.detokenize.bind(this);
+        this.isSpecialToken = this.isSpecialToken.bind(this);
+        this.isEogToken = this.isEogToken.bind(this);
+        this.tokenize.detokenize = this.detokenize;
+        this.tokenize.isSpecialToken = this.isSpecialToken;
+        this.tokenize.isEogToken = this.isEogToken;
+        Object.freeze(this.tokenize);
+        this.tokenizer = this.tokenize;
+    }
+    async dispose() {
+        if (this._disposedState.disposed)
+            return;
+        this._disposedState.disposed = true;
+        await this._disposeAggregator.dispose();
+    }
+    /** @hidden */
+    async [Symbol.asyncDispose]() {
+        await this.dispose();
+    }
+    get disposed() {
+        return this._disposedState.disposed;
+    }
+    get llama() {
+        return this._llama;
+    }
+    get tokens() {
+        return this._tokens;
+    }
+    get filename() {
+        return this._filename;
+    }
+    get fileInfo() {
+        return this._fileInfo;
+    }
+    get fileInsights() {
+        return this._fileInsights;
+    }
+    /**
+     * Number of layers offloaded to the GPU.
+     * If GPU support is disabled, this will always be `0`.
+     */
+    get gpuLayers() {
+        return this._gpuLayers;
+    }
+    /**
+     * Total model size in memory in bytes.
+     *
+     * When using mmap, actual memory usage may be higher than this value due to `llama.cpp`'s performance optimizations.
+     */
+    get size() {
+        this._ensureNotDisposed();
+        return this._model.getModelSize();
+    }
+    get flashAttentionSupported() {
+        return this._flashAttentionSupported;
+    }
+    get defaultContextFlashAttention() {
+        return this._defaultContextFlashAttention;
+    }
+    get defaultContextSwaFullCache() {
+        return this._defaultContextSwaFullCache;
+    }
+    tokenize(text, specialTokens = false, options) {
+        this._ensureNotDisposed();
+        if (text === "")
+            return [];
+        if (specialTokens === "builtin") {
+            const builtinToken = text;
+            switch (builtinToken) {
+                case "BOS": return this.tokens.bos == null ? [] : [this.tokens.bos];
+                case "EOS": return this.tokens.eos == null ? [] : [this.tokens.eos];
+                case "NL": return this.tokens.nl == null ? [] : [this.tokens.nl];
+                case "EOT": return this.tokens.eot == null ? [] : [this.tokens.eot];
+                case "SEP": return this.tokens.sep == null ? [] : [this.tokens.sep];
+            }
+            void builtinToken;
+            throw new Error(`Unknown builtin special token: ${builtinToken}`);
+        }
+        if (options === "trimLeadingSpace") {
+            if (specialTokens) {
+                const countLeadingSpaces = (text) => {
+                    let count = 0;
+                    for (; count < text.length; count++) {
+                        if (text[count] !== " ")
+                            break;
+                    }
+                    return count;
+                };
+                const textLeadingSpaces = countLeadingSpaces(text);
+                const [workaroundToken, workaroundTokenString] = (this.tokens.bos != null && this.tokens.bosString != null)
+                    ? [this.tokens.bos, this.tokens.bosString]
+                    : (this.tokens.eos != null && this.tokens.eosString != null)
+                        ? [this.tokens.eos, this.tokens.eosString]
+                        : (this.tokens.nl != null && this.tokens.nlString != null)
+                            ? [this.tokens.nl, this.tokens.nlString]
+                            : (this.tokens.eot != null && this.tokens.eotString != null)
+                                ? [this.tokens.eot, this.tokens.eotString]
+                                : [null, null];
+                if (workaroundToken != null && workaroundTokenString != null) {
+                    const tokens = Array.from(this._model.tokenize(workaroundTokenString + text, true));
+                    const workaroundTokenIndex = tokens.indexOf(workaroundToken);
+                    // only use the tokenized output if it can be corrected, otherwise fallback to the default tokenization
+                    if (workaroundTokenIndex >= 0 && workaroundTokenIndex <= 1) {
+                        tokens.splice(0, workaroundTokenIndex + 1);
+                        if (countLeadingSpaces(this.detokenize(tokens, true)) === textLeadingSpaces)
+                            return tokens;
+                    }
+                }
+                const workaroundTokensString = "\n";
+                const workaroundTokens = Array.from(this._model.tokenize(workaroundTokensString, true));
+                if (text.startsWith(workaroundTokensString)) {
+                    const tokens = Array.from(this._model.tokenize(text, true));
+                    if (this.detokenize(tokens, true).startsWith(workaroundTokensString))
+                        return tokens;
+                }
+                const tokens = Array.from(this._model.tokenize(workaroundTokensString + text, true));
+                // only use the tokenized output if it can be corrected, otherwise fallback to the default tokenization
+                if (workaroundTokens.length > 0 && workaroundTokens.every((token, index) => tokens[index] === token)) {
+                    tokens.splice(0, workaroundTokens.length);
+                    if (countLeadingSpaces(this.detokenize(tokens, true)) === textLeadingSpaces)
+                        return tokens;
+                }
+            }
+            else {
+                const workaroundTokensString = "\n";
+                const workaroundTokens = Array.from(this._model.tokenize(workaroundTokensString, false));
+                if (text.startsWith(workaroundTokensString)) {
+                    const tokens = Array.from(this._model.tokenize(text, false));
+                    if (this.detokenize(tokens, false).startsWith(workaroundTokensString))
+                        return tokens;
+                }
+                const tokens = Array.from(this._model.tokenize(workaroundTokensString + text, false));
+                // only use the tokenized output if it can be corrected, otherwise fallback to the default tokenization
+                if (workaroundTokens.length > 0 && workaroundTokens.every((token, index) => tokens[index] === token)) {
+                    tokens.splice(0, workaroundTokens.length);
+                    return tokens;
+                }
+            }
+        }
+        return Array.from(this._model.tokenize(text, specialTokens));
+    }
+    /**
+     * Transform tokens into text
+     * @param tokens - the tokens to detokenize.
+     * @param [specialTokens] - if set to `true`, special tokens will be detokenized to their corresponding token text representation.
+     *
+     * Recommended for debugging purposes only.
+     *
+     * > **Note:** there may be additional spaces around special tokens that were not present in the original text - this is not a bug,
+     * this is [how the tokenizer is supposed to work](https://github.com/ggml-org/llama.cpp/pull/7697#issuecomment-2144003246).
+     *
+     * Defaults to `false`.
+     * @param [lastTokens] - the last few tokens that preceded the tokens to detokenize.
+     * If provided, the last few tokens will be used to determine whether a space has to be added before the current tokens or not,
+     * and apply other detokenizer-specific heuristics to provide the correct text continuation to the existing tokens.
+     *
+     * Using it may have no effect with some models, but it is still recommended.
+     */
+    detokenize(tokens, specialTokens = false, lastTokens) {
+        this._ensureNotDisposed();
+        if (tokens.length === 0)
+            return "";
+        if (lastTokens == null || lastTokens.length === 0)
+            return this._model.detokenize(Uint32Array.from(tokens), Boolean(specialTokens));
+        const addedTokens = lastTokens.slice(-maxRecentDetokenizerTokens);
+        const addedTokensText = this._model.detokenize(Uint32Array.from(addedTokens), Boolean(specialTokens));
+        if (addedTokensText === "")
+            return this._model.detokenize(Uint32Array.from(tokens), Boolean(specialTokens));
+        const text = this._model.detokenize(Uint32Array.from([...addedTokens, ...tokens]), Boolean(specialTokens));
+        if (text.startsWith(addedTokensText))
+            return text.slice(addedTokensText.length);
+        return this._model.detokenize(Uint32Array.from(tokens), Boolean(specialTokens));
+    }
+    getTokenAttributes(token) {
+        if (token == null)
+            throw new Error("Token cannot be null");
+        if (this.vocabularyType === LlamaVocabularyType.none)
+            return TokenAttributes._create(token, TokenAttribute.undefined);
+        return TokenAttributes._create(token, this._model.getTokenAttributes(token));
+    }
+    /** Check whether the given token is a special token (a control-type token or a token with no normal text representation) */
+    isSpecialToken(token) {
+        if (token == null)
+            return false;
+        if (this.getTokenAttributes(token).control)
+            return true;
+        const normalText = this.detokenize([token], false);
+        if (normalText === "")
+            return this.detokenize([token], true) !== "";
+        return false;
+    }
+    *iterateAllTokens() {
+        if (this.vocabularyType === LlamaVocabularyType.none)
+            return;
+        const totalTokens = this.fileInfo.metadata?.tokenizer?.ggml?.tokens?.length;
+        if (typeof totalTokens !== "number")
+            return;
+        for (let i = 0; i < totalTokens; i++)
+            yield i;
+    }
+    /** Check whether the given token is an EOG (End Of Generation) token, like EOS or EOT. */
+    isEogToken(token) {
+        if (token == null)
+            return false;
+        return token === this.tokens.eos || token === this.tokens.eot || this._model.isEogToken(token);
+    }
+    async createContext(options = {}) {
+        if (this._vocabOnly)
+            throw new Error("Model is loaded in vocabOnly mode, so no context can be created");
+        return await withLock([this._llama._memoryLock, LlamaLocks.loadToMemory], options.createSignal, async () => {
+            const preventDisposalHandle = this._backendModelDisposeGuard.createPreventDisposalHandle();
+            try {
+                return await LlamaContext._create(options, { _model: this });
+            }
+            finally {
+                preventDisposalHandle.dispose();
+            }
+        });
+    }
+    /**
+     * @see [Using Embedding](https://node-llama-cpp.withcat.ai/guide/embedding) tutorial
+     */
+    async createEmbeddingContext(options = {}) {
+        if (this._vocabOnly)
+            throw new Error("Model is loaded in vocabOnly mode, so no context can be created");
+        return await LlamaEmbeddingContext._create({ _model: this }, options);
+    }
+    /**
+     * @see [Reranking Documents](https://node-llama-cpp.withcat.ai/guide/embedding#reranking) tutorial
+     */
+    async createRankingContext(options = {}) {
+        if (this._vocabOnly)
+            throw new Error("Model is loaded in vocabOnly mode, so no context can be created");
+        return await LlamaRankingContext._create({ _model: this }, options);
+    }
+    /**
+     * Get warnings about the model file that would affect its usage.
+     *
+     * These warnings include all the warnings generated by `GgufInsights`, but are more comprehensive.
+     */
+    getWarnings() {
+        this._ensureNotDisposed();
+        const warnings = this._fileInsights.getWarnings(this._modelPath);
+        const modelFilePathText = `("${getReadablePath(this._modelPath)}")`;
+        try {
+            const beforeTextNoSpecialTokens = "some test text here";
+            const afterTextNoSpecialTokens = this.detokenize(this.tokenize(beforeTextNoSpecialTokens, false, "trimLeadingSpace"), false);
+            if (beforeTextNoSpecialTokens !== afterTextNoSpecialTokens)
+                warnings.push(`Using this model ${modelFilePathText} to tokenize text and then detokenize it resulted in a different text. ` +
+                    "There might be an issue with the model or the tokenizer implementation. " +
+                    "Using this model may not work as intended");
+        }
+        catch (err) {
+            // do nothing
+        }
+        try {
+            if (this._defaultContextFlashAttentionOptionEnabled && !this._flashAttentionSupported) {
+                if (this.fileInfo.metadata?.general?.architecture === GgufArchitectureType.grok)
+                    warnings.push("Flash attention is incompatible with Grok and thus was turned off");
+                else if (this.fileInfo.metadata?.general?.architecture === GgufArchitectureType.gemma2)
+                    warnings.push("Flash attention is incompatible with Gemma2 and thus was turned off");
+                else {
+                    const nHead = this.fileInfo.architectureMetadata?.attention?.head_count ?? 0;
+                    const nEmbd = this.fileInfo.architectureMetadata?.embedding_length ?? 0;
+                    const nEmbdHeadK = this.fileInfo.architectureMetadata?.attention?.key_length ?? ((nHead == 0) ? 0 : (nEmbd / nHead));
+                    const nEmbdHeadV = this.fileInfo.architectureMetadata?.attention?.value_length ?? ((nHead == 0) ? 0 : nEmbd / nHead);
+                    if (nEmbdHeadK !== nEmbdHeadV)
+                        warnings.push("Flash attention is incompatible with this model and thus was turned off");
+                }
+            }
+        }
+        catch (err) {
+            // do nothing
+        }
+        return warnings;
+    }
+    /** @hidden `ModelTypeDescription` type alias is too long in the documentation */
+    get typeDescription() {
+        this._ensureNotDisposed();
+        if (this._typeDescription == null)
+            this._typeDescription = this._model.getModelDescription();
+        return this._typeDescription;
+    }
+    /** The context size the model was trained on */
+    get trainContextSize() {
+        this._ensureNotDisposed();
+        if (this._trainContextSize == null)
+            this._trainContextSize = this._model.getTrainContextSize();
+        return this._trainContextSize;
+    }
+    /** The size of an embedding vector the model can produce */
+    get embeddingVectorSize() {
+        this._ensureNotDisposed();
+        if (this._embeddingVectorSize == null)
+            this._embeddingVectorSize = this._model.getEmbeddingVectorSize();
+        return this._embeddingVectorSize;
+    }
+    get vocabularyType() {
+        this._ensureNotDisposed();
+        if (this._vocabularyType == null) {
+            const vocabType = this._model.getVocabularyType();
+            this._vocabularyType = LlamaVocabularyTypeValues[vocabType];
+            if (this._vocabularyType == null) {
+                console.warn(getConsoleLogPrefix() + "Unknown vocabulary type:", vocabType);
+                this._vocabularyType = LlamaVocabularyType.none;
+            }
+        }
+        return this._vocabularyType;
+    }
+    /** @internal */
+    _ensureNotDisposed() {
+        if (this._disposedState.disposed)
+            throw new DisposedError();
+    }
+    /** @internal */
+    async _getOrLoadLora(filePath) {
+        const resolvedPath = path.resolve(process.cwd(), filePath);
+        if (this._loraAdapters.has(resolvedPath))
+            return this._loraAdapters.get(resolvedPath);
+        return await withLock([this._loraAdapters, "modify"], async () => {
+            if (this._loraAdapters.has(resolvedPath))
+                return this._loraAdapters.get(resolvedPath);
+            const lora = new this._llama._bindings.AddonModelLora(this._model, resolvedPath);
+            await this._model.loadLora(lora);
+            this._loraAdapters.set(resolvedPath, lora);
+            return lora;
+        });
+    }
+    /** @internal */
+    static async _create(modelOptions, { _llama }) {
+        const { loadSignal, defaultContextFlashAttention } = modelOptions;
+        const useMmap = _llama.supportsMmap && (modelOptions.useMmap ?? defaultUseMmap);
+        const useDirectIo = modelOptions.useDirectIo ?? defaultUseDirectIo;
+        const fileInfo = await readGgufFileInfo(modelOptions.modelPath, {
+            sourceType: "filesystem",
+            signal: loadSignal
+        });
+        applyGgufMetadataOverrides(fileInfo, modelOptions.metadataOverrides);
+        const ggufInsights = await GgufInsights.from(fileInfo, _llama);
+        const flashAttentionSupported = ggufInsights.flashAttentionSupported;
+        const resolvedDefaultContextFlashAttention = flashAttentionSupported
+            ? (defaultContextFlashAttention ?? defaultContextFlashAttentionEnabled)
+            : false;
+        const resolvedDefaultContextSwaFullCache = modelOptions.defaultContextSwaFullCache ?? defaultContextSwaFullCache;
+        const gpuLayers = await ggufInsights.configurationResolver.resolveModelGpuLayers(modelOptions.gpuLayers, {
+            ignoreMemorySafetyChecks: modelOptions.ignoreMemorySafetyChecks,
+            defaultContextFlashAttention: resolvedDefaultContextFlashAttention,
+            defaultContextSwaFullCache: resolvedDefaultContextSwaFullCache,
+            useMmap
+        });
+        const resourceRequirementsEstimation = ggufInsights.estimateModelResourceRequirements({
+            gpuLayers: gpuLayers,
+            useMmap
+        });
+        const model = new LlamaModel({ ...modelOptions, gpuLayers, useMmap, useDirectIo }, {
+            _fileInfo: fileInfo,
+            _fileInsights: ggufInsights,
+            _llama,
+            _defaultContextFlashAttentionOptionEnabled: defaultContextFlashAttention ?? false,
+            _flashAttentionSupported: flashAttentionSupported,
+            _defaultContextFlashAttention: resolvedDefaultContextFlashAttention,
+            _defaultContextSwaFullCache: resolvedDefaultContextSwaFullCache
+        });
+        const modelCreationVramReservation = modelOptions.ignoreMemorySafetyChecks
+            ? null
+            : _llama._vramOrchestrator.reserveMemory(resourceRequirementsEstimation.gpuVram);
+        const modelCreationRamReservation = modelOptions.ignoreMemorySafetyChecks
+            ? null
+            : _llama._ramOrchestrator.reserveMemory(resourceRequirementsEstimation.cpuRam);
+        const loggedWarnings = new Set();
+        function onAbort() {
+            model._model.abortActiveModelLoad();
+            loadSignal?.removeEventListener("abort", onAbort);
+        }
+        function logWarnings(warnings) {
+            for (const warning of warnings) {
+                if (loggedWarnings.has(warning))
+                    continue;
+                _llama._log(LlamaLogLevel.warn, warning);
+                loggedWarnings.add(warning);
+            }
+        }
+        if (loadSignal != null) {
+            if (loadSignal.aborted)
+                throw loadSignal.reason;
+            loadSignal.addEventListener("abort", onAbort);
+        }
+        logWarnings(ggufInsights.getWarnings(modelOptions.modelPath));
+        try {
+            const modelLoaded = await model._model.init();
+            if (loadSignal?.aborted) {
+                if (modelLoaded)
+                    await model._model.dispose();
+                throw loadSignal.reason;
+            }
+            else if (!modelLoaded)
+                throw new Error("Failed to load model");
+            loadSignal?.removeEventListener("abort", onAbort);
+            logWarnings(model.getWarnings());
+            return model;
+        }
+        finally {
+            loadSignal?.removeEventListener("abort", onAbort);
+            modelCreationVramReservation?.dispose?.();
+            modelCreationRamReservation?.dispose?.();
+        }
+    }
+}
+export class LlamaModelTokens {
+    /** @internal */ _model;
+    /** @internal */ _disposedState;
+    /** @internal */ _infillTokens;
+    /** @internal */ _bosToken;
+    /** @internal */ _eosToken;
+    /** @internal */ _eotToken;
+    /** @internal */ _sepToken;
+    /** @internal */ _nlToken;
+    /** @internal */ _bosString;
+    /** @internal */ _eosString;
+    /** @internal */ _eotString;
+    /** @internal */ _sepString;
+    /** @internal */ _nlString;
+    /** @internal */ _shouldPrependBosToken;
+    /** @internal */ _shouldAppendEosToken;
+    constructor(model, disposedState) {
+        this._model = model;
+        this._disposedState = disposedState;
+    }
+    /**
+     * @returns infill tokens
+     */
+    get infill() {
+        this._ensureNotDisposed();
+        if (this._infillTokens == null)
+            this._infillTokens = LlamaModelInfillTokens._create(this._model, this._disposedState);
+        return this._infillTokens;
+    }
+    /**
+     * @returns The BOS (Beginning Of Sequence) token.
+     */
+    get bos() {
+        this._ensureNotDisposed();
+        if (this._bosToken == null)
+            this._bosToken = this._model.tokenBos();
+        if (this._bosToken === -1)
+            return null;
+        return this._bosToken;
+    }
+    /**
+     * @returns The EOS (End Of Sequence) token.
+     */
+    get eos() {
+        this._ensureNotDisposed();
+        if (this._eosToken == null)
+            this._eosToken = this._model.tokenEos();
+        if (this._eosToken === -1)
+            return null;
+        return this._eosToken;
+    }
+    /**
+     * @returns The EOT (End Of Turn) token.
+     */
+    get eot() {
+        this._ensureNotDisposed();
+        if (this._eotToken == null)
+            this._eotToken = this._model.eotToken();
+        if (this._eotToken === -1)
+            return null;
+        return this._eotToken;
+    }
+    /**
+     * @returns The SEP (Sentence Separator) token.
+     */
+    get sep() {
+        this._ensureNotDisposed();
+        if (this._sepToken == null)
+            this._sepToken = this._model.sepToken();
+        if (this._sepToken === -1)
+            return null;
+        return this._sepToken;
+    }
+    /**
+     * @returns The NL (New Line) token.
+     */
+    get nl() {
+        this._ensureNotDisposed();
+        if (this._nlToken == null)
+            this._nlToken = this._model.tokenNl();
+        if (this._nlToken === -1)
+            return null;
+        return this._nlToken;
+    }
+    /**
+     * @returns The BOS (Beginning Of Sequence) token text representation.
+     */
+    get bosString() {
+        this._ensureNotDisposed();
+        const bosToken = this.bos;
+        if (bosToken == null)
+            return null;
+        if (this._bosString == null)
+            this._bosString = this._model.getTokenString(bosToken);
+        return this._bosString;
+    }
+    /**
+     * @returns The EOS (End Of Sequence) token text representation.
+     */
+    get eosString() {
+        this._ensureNotDisposed();
+        const eosToken = this.eos;
+        if (eosToken == null)
+            return null;
+        if (this._eosString == null)
+            this._eosString = this._model.getTokenString(eosToken);
+        return this._eosString;
+    }
+    /**
+     * @returns The EOT (End Of Turn) token text representation.
+     */
+    get eotString() {
+        this._ensureNotDisposed();
+        const eotToken = this.eot;
+        if (eotToken == null)
+            return null;
+        if (this._eotString == null)
+            this._eotString = this._model.getTokenString(eotToken);
+        return this._eotString;
+    }
+    /**
+     * @returns The SEP (Sentence Separator) token text representation.
+     */
+    get sepString() {
+        this._ensureNotDisposed();
+        const sepToken = this.sep;
+        if (sepToken == null)
+            return null;
+        if (this._sepString == null)
+            this._sepString = this._model.getTokenString(sepToken);
+        return this._sepString;
+    }
+    /**
+     * @returns The NL (New Line) token text representation.
+     */
+    get nlString() {
+        this._ensureNotDisposed();
+        const nlToken = this.nl;
+        if (nlToken == null)
+            return null;
+        if (this._nlString == null)
+            this._nlString = this._model.getTokenString(nlToken);
+        return this._nlString;
+    }
+    /**
+     * @returns Whether we should prepend a BOS (Beginning Of Sequence) token for evaluations with this model.
+     */
+    get shouldPrependBosToken() {
+        this._ensureNotDisposed();
+        if (this._shouldPrependBosToken == null)
+            this._shouldPrependBosToken = this.bos != null && this._model.shouldPrependBosToken();
+        return this._shouldPrependBosToken;
+    }
+    /**
+     * @returns Whether we should append an EOS (End Of Sequence) token for evaluations with this model.
+     */
+    get shouldAppendEosToken() {
+        this._ensureNotDisposed();
+        if (this._shouldAppendEosToken == null)
+            this._shouldAppendEosToken = this.bos != null && this._model.shouldAppendEosToken();
+        return this._shouldAppendEosToken;
+    }
+    /** @internal */
+    _ensureNotDisposed() {
+        if (this._disposedState.disposed)
+            throw new DisposedError();
+    }
+    /** @internal */
+    static _create(model, disposedState) {
+        return new LlamaModelTokens(model, disposedState);
+    }
+}
+export class LlamaModelInfillTokens {
+    /** @internal */ _model;
+    /** @internal */ _disposedState;
+    /** @internal */ _prefixToken;
+    /** @internal */ _middleToken;
+    /** @internal */ _suffixToken;
+    /** @internal */ _prefixString;
+    /** @internal */ _middleString;
+    /** @internal */ _suffixString;
+    constructor(model, disposedState) {
+        this._model = model;
+        this._disposedState = disposedState;
+    }
+    /**
+     * @returns The beginning of infill prefix token.
+     */
+    get prefix() {
+        this._ensureNotDisposed();
+        if (this._prefixToken == null)
+            this._prefixToken = this._resolveSpecialToken(this._model.prefixToken(), ["<fim_prefix>"]);
+        if (this._prefixToken === -1)
+            return null;
+        return this._prefixToken;
+    }
+    /**
+     * @returns The beginning of infill middle token.
+     */
+    get middle() {
+        this._ensureNotDisposed();
+        if (this._middleToken == null)
+            this._middleToken = this._resolveSpecialToken(this._model.middleToken(), ["<fim_middle>"]);
+        if (this._middleToken === -1)
+            return null;
+        return this._middleToken;
+    }
+    /**
+     * @returns The beginning of infill suffix token.
+     */
+    get suffix() {
+        this._ensureNotDisposed();
+        if (this._suffixToken == null)
+            this._suffixToken = this._resolveSpecialToken(this._model.suffixToken(), ["<fim_suffix>"]);
+        if (this._suffixToken === -1)
+            return null;
+        return this._suffixToken;
+    }
+    /**
+     * @returns The beginning of infill prefix token as a string.
+     */
+    get prefixString() {
+        this._ensureNotDisposed();
+        const prefixToken = this.prefix;
+        if (prefixToken == null)
+            return null;
+        if (this._prefixString == null)
+            this._prefixString = this._model.getTokenString(prefixToken);
+        return this._prefixString;
+    }
+    /**
+     * @returns The beginning of infill middle token as a string.
+     */
+    get middleString() {
+        this._ensureNotDisposed();
+        const middleToken = this.middle;
+        if (middleToken == null)
+            return null;
+        if (this._middleString == null)
+            this._middleString = this._model.getTokenString(middleToken);
+        return this._middleString;
+    }
+    /**
+     * @returns The beginning of infill suffix token as a string.
+     */
+    get suffixString() {
+        this._ensureNotDisposed();
+        const suffixToken = this.suffix;
+        if (suffixToken == null)
+            return null;
+        if (this._suffixString == null)
+            this._suffixString = this._model.getTokenString(suffixToken);
+        return this._suffixString;
+    }
+    /** @internal */
+    _ensureNotDisposed() {
+        if (this._disposedState.disposed)
+            throw new DisposedError();
+    }
+    /** @internal */
+    _resolveSpecialToken(token, fallbackTexts) {
+        if (token != null && token !== -1)
+            return token;
+        for (const text of fallbackTexts) {
+            const tokens = this._model.tokenize(text, true);
+            if (tokens.length !== 1)
+                continue;
+            return tokens[0];
+        }
+        return -1;
+    }
+    /** @internal */
+    static _create(model, disposedState) {
+        return new LlamaModelInfillTokens(model, disposedState);
+    }
+}
+function applyGgufMetadataOverrides(ggufFileInfo, overrides) {
+    function applyOverride(object, override) {
+        if (override == null || object == null)
+            return;
+        if (object instanceof Array || typeof object !== "object" || typeof override !== "object")
+            return;
+        for (const [key, value] of Object.entries(override)) {
+            if (value instanceof Array || typeof value !== "object" || (typeof value === "object" && typeof object[key] !== "object"))
+                object[key] = value;
+            else
+                applyOverride(object[key], value);
+        }
+    }
+    applyOverride(ggufFileInfo.metadata, overrides);
+}
+function ggufMetadataOverridesToList(overrides) {
+    const maxStringLength = 127;
+    const maxKeyLength = 127;
+    const res = [];
+    function addItem(object, path) {
+        if (object == null || object instanceof Array)
+            return;
+        if (typeof object !== "object") {
+            if (typeof object === "string" && object.length > maxStringLength)
+                throw new Error(`Metadata key "${path.join(".")}" override string value (${JSON.stringify(object)}) is longer than ${maxStringLength} characters`);
+            const key = path.join(".");
+            if (key.length > maxKeyLength)
+                throw new Error(`Metadata key "${key}" override path is longer than ${maxKeyLength} characters`);
+            let type = undefined;
+            if (typeof object === "number") {
+                if (typeof object === "bigint" || Number.isInteger(object))
+                    type = 0;
+                else
+                    type = 1;
+            }
+            res.push([key, object, type]);
+            return;
+        }
+        for (const [key, value] of Object.entries(object))
+            addItem(value, [...path, key]);
+    }
+    addItem(overrides ?? {}, []);
+    return res;
+}
+function disposeModelIfReferenced(modelRef) {
+    const model = modelRef.deref();
+    if (model != null)
+        void model.dispose();
+}
+//# sourceMappingURL=LlamaModel.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/LlamaModel.js.map
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/LlamaModel.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts
@@ -0,0 +1,29 @@
+import { Token } from "../../../types.js";
+export declare const enum TokenAttribute {
+    undefined = 0,
+    unknown = 1,
+    unused = 2,
+    normal = 4,
+    control = 8,// SPECIAL
+    userDefined = 16,
+    byte = 32,
+    normalized = 64,
+    lstrip = 128,
+    rstrip = 256,
+    singleWord = 512
+}
+export declare class TokenAttributes {
+    readonly token: Token;
+    private constructor();
+    get undefined(): boolean;
+    get unknown(): boolean;
+    get unused(): boolean;
+    get normal(): boolean;
+    get control(): boolean;
+    get userDefined(): boolean;
+    get byte(): boolean;
+    get normalized(): boolean;
+    get lstrip(): boolean;
+    get rstrip(): boolean;
+    get singleWord(): boolean;
+}
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/utils/TokenAttributes.js
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/utils/TokenAttributes.js
@@ -0,0 +1,65 @@
+// updated against `enum llama_token_attr` from `llama.h`
+export var TokenAttribute;
+(function (TokenAttribute) {
+    TokenAttribute[TokenAttribute["undefined"] = 0] = "undefined";
+    TokenAttribute[TokenAttribute["unknown"] = 1] = "unknown";
+    TokenAttribute[TokenAttribute["unused"] = 2] = "unused";
+    TokenAttribute[TokenAttribute["normal"] = 4] = "normal";
+    TokenAttribute[TokenAttribute["control"] = 8] = "control";
+    TokenAttribute[TokenAttribute["userDefined"] = 16] = "userDefined";
+    TokenAttribute[TokenAttribute["byte"] = 32] = "byte";
+    TokenAttribute[TokenAttribute["normalized"] = 64] = "normalized";
+    TokenAttribute[TokenAttribute["lstrip"] = 128] = "lstrip";
+    TokenAttribute[TokenAttribute["rstrip"] = 256] = "rstrip";
+    TokenAttribute[TokenAttribute["singleWord"] = 512] = "singleWord";
+})(TokenAttribute || (TokenAttribute = {}));
+export class TokenAttributes {
+    token;
+    /** @internal */ _attributes;
+    constructor(token, attributes) {
+        this.token = token;
+        this._attributes = attributes;
+    }
+    get undefined() {
+        return this._attributes === TokenAttribute.undefined;
+    }
+    get unknown() {
+        return this._hasAttribute(TokenAttribute.unknown);
+    }
+    get unused() {
+        return this._hasAttribute(TokenAttribute.unused);
+    }
+    get normal() {
+        return this._hasAttribute(TokenAttribute.normal);
+    }
+    get control() {
+        return this._hasAttribute(TokenAttribute.control);
+    }
+    get userDefined() {
+        return this._hasAttribute(TokenAttribute.userDefined);
+    }
+    get byte() {
+        return this._hasAttribute(TokenAttribute.byte);
+    }
+    get normalized() {
+        return this._hasAttribute(TokenAttribute.normalized);
+    }
+    get lstrip() {
+        return this._hasAttribute(TokenAttribute.lstrip);
+    }
+    get rstrip() {
+        return this._hasAttribute(TokenAttribute.rstrip);
+    }
+    get singleWord() {
+        return this._hasAttribute(TokenAttribute.singleWord);
+    }
+    /** @internal */
+    _hasAttribute(attribute) {
+        return (this._attributes & attribute) === attribute;
+    }
+    /** @internal */
+    static _create(token, attributes) {
+        return new TokenAttributes(token, attributes);
+    }
+}
+//# sourceMappingURL=TokenAttributes.js.map
--- a/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map
+++ b/node_modules/node-llama-cpp/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"TokenAttributes.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaModel/utils/TokenAttributes.ts"],"names":[],"mappings":"AAEA,yDAAyD;AACzD,MAAM,CAAN,IAAkB,cAYjB;AAZD,WAAkB,cAAc;IAC5B,6DAAa,CAAA;IACb,yDAAgB,CAAA;IAChB,uDAAe,CAAA;IACf,uDAAe,CAAA;IACf,yDAAgB,CAAA;IAChB,kEAAoB,CAAA;IACpB,oDAAa,CAAA;IACb,gEAAmB,CAAA;IACnB,yDAAe,CAAA;IACf,yDAAe,CAAA;IACf,iEAAmB,CAAA;AACvB,CAAC,EAZiB,cAAc,KAAd,cAAc,QAY/B;AAED,MAAM,OAAO,eAAe;IACR,KAAK,CAAQ;IAC7B,gBAAgB,CAAkB,WAAW,CAAiB;IAE9D,YAAoB,KAAY,EAAE,UAA0B;QACxD,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;IAClC,CAAC;IAED,IAAW,SAAS;QAChB,OAAO,IAAI,CAAC,WAAW,KAAK,cAAc,CAAC,SAAS,CAAC;IACzD,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IACrD,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IACrD,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,IAAW,WAAW;QAClB,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;IAC1D,CAAC;IAED,IAAW,IAAI;QACX,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;IACnD,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;IACzD,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IACrD,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IACrD,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;IACzD,CAAC;IAED,gBAAgB;IACR,aAAa,CAAC,SAAyB;QAC3C,OAAO,CAAC,IAAI,CAAC,WAAW,GAAG,SAAS,CAAC,KAAK,SAAS,CAAC;IACxD,CAAC;IAED,gBAAgB;IACT,MAAM,CAAC,OAAO,CAAC,KAAY,EAAE,UAA0B;QAC1D,OAAO,IAAI,eAAe,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;IAClD,CAAC;CACJ"}
				`@@ -0,0 +1 @@`
				{"version":3,"file":"TokenAttributes.js","sourceRoot":"","sources":["../../../../src/evaluator/LlamaModel/utils/TokenAttributes.ts"],"names":[],"mappings":"AAEA,yDAAyD;AACzD,MAAM,CAAN,IAAkB,cAYjB;AAZD,WAAkB,cAAc;IAC5B,6DAAa,CAAA;IACb,yDAAgB,CAAA;IAChB,uDAAe,CAAA;IACf,uDAAe,CAAA;IACf,yDAAgB,CAAA;IAChB,kEAAoB,CAAA;IACpB,oDAAa,CAAA;IACb,gEAAmB,CAAA;IACnB,yDAAe,CAAA;IACf,yDAAe,CAAA;IACf,iEAAmB,CAAA;AACvB,CAAC,EAZiB,cAAc,KAAd,cAAc,QAY/B;AAED,MAAM,OAAO,eAAe;IACR,KAAK,CAAQ;IAC7B,gBAAgB,CAAkB,WAAW,CAAiB;IAE9D,YAAoB,KAAY,EAAE,UAA0B;QACxD,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;IAClC,CAAC;IAED,IAAW,SAAS;QAChB,OAAO,IAAI,CAAC,WAAW,KAAK,cAAc,CAAC,SAAS,CAAC;IACzD,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IACrD,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IACrD,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,IAAW,WAAW;QAClB,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;IAC1D,CAAC;IAED,IAAW,IAAI;QACX,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;IACnD,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;IACzD,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IACrD,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IACrD,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;IACzD,CAAC;IAED,gBAAgB;IACR,aAAa,CAAC,SAAyB;QAC3C,OAAO,CAAC,IAAI,CAAC,WAAW,GAAG,SAAS,CAAC,KAAK,SAAS,CAAC;IACxD,CAAC;IAED,gBAAgB;IACT,MAAM,CAAC,OAAO,CAAC,KAAY,EAAE,UAA0B;QAC1D,OAAO,IAAI,eAAe,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;IAClD,CAAC;CACJ"}