First upload version 0.0.1

2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions
--- a/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsights.d.ts
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsights.d.ts
@@ -0,0 +1,67 @@
+import { Llama } from "../../bindings/Llama.js";
+import { GgufFileInfo } from "../types/GgufFileInfoTypes.js";
+import { GgufInsightsConfigurationResolver } from "./GgufInsightsConfigurationResolver.js";
+import { GgufInsightsTokens } from "./GgufInsightsTokens.js";
+export type GgufInsightsResourceRequirements = {
+    cpuRam: number;
+    gpuVram: number;
+};
+export declare class GgufInsights {
+    private constructor();
+    /**
+     * Get warnings about the model file that would affect its usage.
+     *
+     * Most of these warnings are also generated by `llama.cpp`
+     */
+    getWarnings(modelFilePath?: string): string[];
+    get ggufFileInfo(): GgufFileInfo;
+    get configurationResolver(): GgufInsightsConfigurationResolver;
+    get tokens(): GgufInsightsTokens;
+    /** The context size the model was trained on */
+    get trainContextSize(): number | undefined;
+    /** The size of an embedding vector the model can produce */
+    get embeddingVectorSize(): number | undefined;
+    get totalLayers(): number;
+    get modelSize(): number;
+    get flashAttentionSupported(): boolean;
+    get hasEncoder(): boolean;
+    get hasDecoder(): boolean;
+    get isRecurrent(): boolean;
+    get supportsRanking(): boolean;
+    /**
+     * The size of the SWA (Sliding Window Attention).
+     *
+     * When `undefined`, the model does not use sliding window attention.
+     */
+    get swaSize(): number | undefined;
+    estimateModelResourceRequirements({ gpuLayers, useMmap, gpuSupportsMmap }: {
+        gpuLayers: number;
+        useMmap?: boolean;
+        gpuSupportsMmap?: boolean;
+    }): GgufInsightsResourceRequirements;
+    /**
+     * Estimates the memory required to create a context of the given parameters based on the implementation details of `llama.cpp`.
+     * The calculation doesn't include a precise estimation of the graph overhead memory, so it uses a rough estimate for that.
+     * The estimation for the graph overhead memory will be improved in the future to be more precise, but it's good enough for now.
+     */
+    estimateContextResourceRequirements({ contextSize, modelGpuLayers, batchSize, sequences, isEmbeddingContext, includeGraphOverhead, flashAttention, swaFullCache }: {
+        contextSize: number;
+        modelGpuLayers: number;
+        batchSize?: number;
+        sequences?: number;
+        isEmbeddingContext?: boolean;
+        flashAttention?: boolean;
+        includeGraphOverhead?: boolean;
+        swaFullCache?: boolean;
+    }): GgufInsightsResourceRequirements;
+    /**
+     * @param ggufFileInfo
+     * @param llama - If you already have a `Llama` instance, pass it to reuse it for the `GgufInsights` instance.
+     * If you don't pass a `Llama` instance, a basic `Llama` instance is created as a fallback - it's a slim instance that
+     * doesn't instantiate a `llama.cpp` backend, so it won't utilize the GPU at all, and be shared with other `GgufInsights` instances
+     * that need a fallback `Llama` instance.
+     */
+    static from(ggufFileInfo: GgufFileInfo, llama?: Llama): Promise<GgufInsights>;
+}
+export declare function parseRankingTemplate(template: string | undefined | null): string | undefined;
+export declare function isRankingTemplateValid(template: string | undefined | null): boolean;
--- a/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsights.js
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsights.js
@@ -0,0 +1,653 @@
+import { getLlamaWithoutBackend } from "../../bindings/utils/getLlamaWithoutBackend.js";
+import { getDefaultContextBatchSize, getDefaultContextSequences } from "../../evaluator/LlamaContext/LlamaContext.js";
+import { GgufArchitectureType } from "../types/GgufMetadataTypes.js";
+import { getReadablePath } from "../../cli/utils/getReadablePath.js";
+import { padSafeContextSize } from "../../evaluator/LlamaContext/utils/padSafeContextSize.js";
+import { GgufInsightsConfigurationResolver } from "./GgufInsightsConfigurationResolver.js";
+import { GgufInsightsTokens } from "./GgufInsightsTokens.js";
+export class GgufInsights {
+    /** @internal */ _llama;
+    /** @internal */ _modelSize;
+    /** @internal */ _totalFileLayers = null;
+    /** @internal */ _supportsRanking;
+    /** @internal */ _ggufFileInfo;
+    /** @internal */ _configurationResolver;
+    /** @internal */ _tokens;
+    constructor(ggufFileInfo, llama) {
+        this._llama = llama;
+        this._ggufFileInfo = ggufFileInfo;
+        this._modelSize = calculateTensorsSize(ggufFileInfo.fullTensorInfo ?? [], llama, true, true);
+        this._configurationResolver = GgufInsightsConfigurationResolver._create(this);
+        this._tokens = GgufInsightsTokens._create(this);
+    }
+    /**
+     * Get warnings about the model file that would affect its usage.
+     *
+     * Most of these warnings are also generated by `llama.cpp`
+     */
+    getWarnings(modelFilePath) {
+        const warnings = [];
+        const modelFilePathText = (modelFilePath != null && modelFilePath !== "")
+            ? ` ("${getReadablePath(modelFilePath)}")`
+            : "";
+        if (this._ggufFileInfo?.metadata?.tokenizer?.ggml?.model === "gpt2" &&
+            this._ggufFileInfo?.metadata?.tokenizer?.ggml?.model == null) {
+            // equivalent to the warning in `llama.cpp` under `llm_load_vocab`: "missing pre-tokenizer type, using: 'default'"
+            warnings.push(`This model file${modelFilePathText} is missing a pre-tokenizer configuration. ` +
+                "This may cause incorrect tokenization and thus degrade the generation quality. " +
+                "Consider using a newer model or regenerating this GGUF model file");
+        }
+        return warnings;
+    }
+    get ggufFileInfo() {
+        return this._ggufFileInfo;
+    }
+    get configurationResolver() {
+        return this._configurationResolver;
+    }
+    get tokens() {
+        return this._tokens;
+    }
+    /** The context size the model was trained on */
+    get trainContextSize() {
+        return this._ggufFileInfo.architectureMetadata.context_length;
+    }
+    /** The size of an embedding vector the model can produce */
+    get embeddingVectorSize() {
+        return this._ggufFileInfo.architectureMetadata.embedding_length;
+    }
+    get totalLayers() {
+        const outputLayers = 1;
+        return this._getTotalFileLayers() + outputLayers;
+    }
+    get modelSize() {
+        return this._modelSize;
+    }
+    get flashAttentionSupported() {
+        // source: `llama_new_context_with_model` in `llama.cpp`
+        if (this._ggufFileInfo.metadata?.general?.architecture === GgufArchitectureType.grok)
+            return false;
+        else if (this._ggufFileInfo.metadata?.general?.architecture === GgufArchitectureType.gemma2)
+            return false;
+        else {
+            const nHead = this._ggufFileInfo.architectureMetadata?.attention?.head_count ?? 0;
+            const nEmbd = this._ggufFileInfo.architectureMetadata?.embedding_length ?? 0;
+            const nEmbdHeadK = this._ggufFileInfo.architectureMetadata?.attention?.key_length ?? ((nHead == 0) ? 0 : (nEmbd / nHead));
+            const nEmbdHeadV = this._ggufFileInfo.architectureMetadata?.attention?.value_length ?? ((nHead == 0) ? 0 : nEmbd / nHead);
+            if (nEmbdHeadK !== nEmbdHeadV)
+                return false;
+        }
+        return true;
+    }
+    get hasEncoder() {
+        switch (this._ggufFileInfo.metadata?.general?.architecture) {
+            case GgufArchitectureType.t5:
+            case GgufArchitectureType.t5encoder:
+                return true;
+        }
+        return false;
+    }
+    get hasDecoder() {
+        switch (this._ggufFileInfo.metadata?.general?.architecture) {
+            case GgufArchitectureType.t5encoder:
+                return false;
+        }
+        return true;
+    }
+    get isRecurrent() {
+        switch (this._ggufFileInfo.metadata?.general?.architecture) {
+            case GgufArchitectureType.mamba:
+            case GgufArchitectureType.mamba2:
+            case GgufArchitectureType.rwkv6:
+            case GgufArchitectureType.rwkv6qwen2:
+            case GgufArchitectureType.rwkv7:
+            case GgufArchitectureType.arwkv7:
+                return true;
+        }
+        return false;
+    }
+    get supportsRanking() {
+        if (this._supportsRanking != null)
+            return this._supportsRanking;
+        const layers = this._ggufFileInfo.fullTensorInfo ?? [];
+        for (let i = layers.length - 1; i >= 0; i--) {
+            const tensor = layers[i];
+            if (tensor == null)
+                continue;
+            if (tensor.name === "cls.weight" || tensor.name === "cls.output.weight") {
+                this._supportsRanking = this.tokens.sepToken != null || this.tokens.eosToken != null ||
+                    isRankingTemplateValid(parseRankingTemplate(this._ggufFileInfo.metadata?.tokenizer?.["chat_template.rerank"]));
+                this._supportsRanking &&= !(this.hasEncoder && this.hasDecoder); // encoder-decoder models are not supported
+                return this._supportsRanking;
+            }
+        }
+        this._supportsRanking = false;
+        return this._supportsRanking;
+    }
+    /**
+     * The size of the SWA (Sliding Window Attention).
+     *
+     * When `undefined`, the model does not use sliding window attention.
+     */
+    get swaSize() {
+        const slidingWindow = this._ggufFileInfo?.architectureMetadata?.attention?.sliding_window;
+        if (slidingWindow == null || slidingWindow <= 0)
+            return undefined;
+        const trainContextSize = this.trainContextSize;
+        if (trainContextSize != null && slidingWindow >= trainContextSize)
+            return undefined;
+        return slidingWindow;
+    }
+    estimateModelResourceRequirements({ gpuLayers, useMmap = this._llama.supportsMmap, gpuSupportsMmap = this._llama.gpuSupportsMmap }) {
+        const { cpu, gpu } = this._getTensorResourceSplit(gpuLayers);
+        return {
+            cpuRam: calculateTensorsSize(cpu, this._llama, false),
+            gpuVram: calculateTensorsSize(gpu, this._llama, useMmap && gpuSupportsMmap)
+        };
+    }
+    /**
+     * Estimates the memory required to create a context of the given parameters based on the implementation details of `llama.cpp`.
+     * The calculation doesn't include a precise estimation of the graph overhead memory, so it uses a rough estimate for that.
+     * The estimation for the graph overhead memory will be improved in the future to be more precise, but it's good enough for now.
+     */
+    estimateContextResourceRequirements({ contextSize, modelGpuLayers, batchSize, sequences, isEmbeddingContext = false, includeGraphOverhead = true, flashAttention = false, swaFullCache = false }) {
+        if (sequences == null)
+            sequences = getDefaultContextSequences();
+        if (batchSize == null)
+            batchSize = getDefaultContextBatchSize({ contextSize, sequences });
+        const llmData = this._ggufFileInfo.architectureMetadata;
+        const tensorInfo = this._ggufFileInfo.fullTensorInfo ?? [];
+        const slidingWindow = this.swaSize ?? 0;
+        const kvUnified = false;
+        const usingSWA = !swaFullCache && slidingWindow > 0 && slidingWindow < contextSize &&
+            (this.trainContextSize == null || slidingWindow < this.trainContextSize);
+        const swaPattern = getSwaPatternForArchitecture(this._ggufFileInfo.metadata?.general?.architecture);
+        const nonSwaPercent = swaPattern <= 1
+            ? 1
+            : (1 / (swaPattern + (flashAttention ? -0.5 : -1)));
+        // source: `llama_kv_cache_unified::get_padding` in `llama-kv-cache.cpp`
+        const kvCachePadding = 1;
+        const actualContextSize = kvUnified
+            ? padSafeContextSize(sequences * contextSize, "up")
+            : sequences * padSafeContextSize(contextSize, "up");
+        const kvSize = usingSWA
+            ? ((1 - nonSwaPercent) * Math.min(actualContextSize, ggmlPad(sequences * slidingWindow + batchSize, kvCachePadding)) +
+                nonSwaPercent * actualContextSize)
+            : actualContextSize;
+        const totalFileLayers = this._getTotalFileLayers();
+        const finalGpuLayers = Math.max(0, Math.min(modelGpuLayers ?? totalFileLayers, totalFileLayers));
+        const finalCpuLayers = totalFileLayers - finalGpuLayers;
+        const usingGpu = finalGpuLayers !== 0;
+        const vocabularySize = llmData.vocab_size ?? this._ggufFileInfo.metadata.tokenizer?.ggml?.tokens?.length ?? 0;
+        const embeddingSize = llmData.embedding_length ?? 0;
+        const floatBytes = 4; // sizeof(float)
+        const int32TBytes = 4; // sizeof(int32_t)
+        const estimateOutput = (nOutputs) => {
+            // source: `llama_context::output_reserve` in `llama-context.cpp`
+            const nOutputsMax = Math.max(batchSize, nOutputs);
+            const isT5 = this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.t5;
+            const hasLogits = isT5 || !isEmbeddingContext;
+            const hasEmbd = isT5 || isEmbeddingContext;
+            const logitsSize = hasLogits
+                ? (vocabularySize * nOutputsMax)
+                : 0;
+            const embdSize = hasEmbd
+                ? (embeddingSize * nOutputsMax)
+                : 0;
+            const outputBufferSize = (logitsSize + embdSize) * floatBytes;
+            const outputIdsArr = int32TBytes * batchSize;
+            return outputBufferSize + outputIdsArr;
+        };
+        const estimateGraphOverheadMemory = () => {
+            const s1MB = Math.pow(1024, 2);
+            const tensorInfo = this._ggufFileInfo.fullTensorInfo ?? [];
+            const expertCount = llmData?.expert_count ?? 0;
+            const headCount = llmData?.attention?.head_count ?? 0;
+            const embeddingLength = llmData?.embedding_length ?? 0;
+            let defaultCalculationAdjustment = 0;
+            if (batchSize == null)
+                return 0;
+            if (this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.llama) {
+                if (expertCount > 0) {
+                    const expertsUsedCount = this._ggufFileInfo.architectureMetadata.expert_used_count ?? 2;
+                    return int32TBytes * batchSize * (((expertsUsedCount + 1) * embeddingLength) + (kvSize * headCount));
+                }
+                return int32TBytes * batchSize * (embeddingLength + (kvSize * headCount));
+            }
+            else if (this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.qwen2) {
+                if (modelGpuLayers === this.totalLayers) {
+                    defaultCalculationAdjustment -= (s1MB * 340) * (this.trainContextSize == null
+                        ? 1
+                        : kvSize / this.trainContextSize);
+                }
+                else {
+                    defaultCalculationAdjustment -= (s1MB * 250) + ((s1MB * 50) * (this.trainContextSize == null
+                        ? 1
+                        : kvSize / this.trainContextSize));
+                }
+            }
+            else if (this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.gemma) {
+                // only works properly when all layers are on the GPU, which is why it's commented out:
+                // return int32TBytes * batchSize * ((llmData.embedding_length ?? 0));
+                if (modelGpuLayers === this.totalLayers) {
+                    defaultCalculationAdjustment += (s1MB * 40) - ((s1MB * 270) * (this.trainContextSize == null
+                        ? 1
+                        : kvSize / this.trainContextSize));
+                }
+                else {
+                    defaultCalculationAdjustment += -(s1MB * 550) + ((s1MB * 150) * (this.trainContextSize == null
+                        ? 1
+                        : Math.max(0, (1 - (kvSize / this.trainContextSize)))));
+                }
+            }
+            else if (this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.stablelm) {
+                const headCount = this._ggufFileInfo.architectureMetadata.attention?.head_count ?? 0;
+                return (int32TBytes * batchSize * kvSize * headCount) - (50 * s1MB);
+                // if (modelGpuLayers === this.totalLayers) {
+                //     defaultCalculationAdjustment += -(s1MB * 20) + (
+                //         (s1MB * 250) * (
+                //             this.trainContextSize == null
+                //                 ? 1
+                //                 : kvSize / this.trainContextSize
+                //         )
+                //     );
+                // } else {
+                //     defaultCalculationAdjustment += -(s1MB * 40) + (
+                //         (s1MB * 300) * (
+                //             this.trainContextSize == null
+                //                 ? 1
+                //                 : kvSize / this.trainContextSize
+                //         )
+                //     );
+                // }
+            }
+            else if (this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.qwen3) {
+                return int32TBytes * batchSize * (embeddingLength + (kvSize * headCount));
+            }
+            else if (expertCount > 0) {
+                const expertsUsedCount = this._ggufFileInfo.architectureMetadata.expert_used_count ?? 2;
+                return int32TBytes * batchSize * (((expertsUsedCount + 1) * embeddingLength) + (kvSize * headCount));
+            }
+            const totalElements = tensorInfo.length === 0
+                ? this.totalLayers * (((llmData.embedding_length ?? 0) +
+                    (llmData.feed_forward_length ?? 0)) / 2)
+                : tensorInfo.reduce((res, tensor) => {
+                    return res + tensor.dimensions.reduce((res, dim) => res + Number(dim), 0);
+                }, 0);
+            if (this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.phi3) {
+                // magic numbers for estimation. will be improved in the future
+                return (totalElements * 123 * (kvSize / 4096)) + defaultCalculationAdjustment;
+            }
+            // magic numbers for estimation. will be improved in the future
+            return (totalElements * 77.655 * (kvSize / 4096)) + defaultCalculationAdjustment;
+        };
+        const gpuKVCacheSize = usingGpu
+            ? this._estimateKvMemorySizeInBytes(kvSize, finalGpuLayers < totalFileLayers
+                ? (finalGpuLayers + 1)
+                : finalGpuLayers)
+            : 0;
+        const cpuKVCacheSize = this._estimateKvMemorySizeInBytes(kvSize, finalCpuLayers);
+        // source: `llama_context::graph_max_nodes` in `llama-context.cpp`
+        const getMaxNodesMultiplier = (arch, nTokens) => {
+            if (arch === GgufArchitectureType.qwen3next)
+                return {
+                    min: nTokens * 40,
+                    multiplier: 32
+                };
+            return {
+                min: 1024,
+                multiplier: 8
+            };
+        };
+        const maxNodesMultiplier = getMaxNodesMultiplier(this._ggufFileInfo.metadata?.general?.architecture, Math.min(actualContextSize, batchSize));
+        const maxNodes = Math.max(maxNodesMultiplier.min, maxNodesMultiplier.multiplier * tensorInfo.length);
+        const cpuNodes = maxNodesMultiplier.multiplier * (tensorInfo.length * (finalCpuLayers / totalFileLayers));
+        const gpuNodes = maxNodes - cpuNodes;
+        const gpuComputeBufferSize = (this._llama._consts.ggmlTensorOverhead * gpuNodes) +
+            this._llama._bindings.getGgmlGraphOverheadCustom(gpuNodes, false);
+        const cpuComputeBufferSize = (this._llama._consts.ggmlTensorOverhead * cpuNodes) +
+            this._llama._bindings.getGgmlGraphOverheadCustom(cpuNodes, false);
+        const graphOverheadMemory = (flashAttention || !includeGraphOverhead)
+            ? 0
+            : estimateGraphOverheadMemory();
+        const graphOverheadGpuSize = usingGpu
+            ? Math.round(graphOverheadMemory * (finalGpuLayers / totalFileLayers))
+            : 0;
+        const graphOverheadCpuSize = graphOverheadMemory - graphOverheadGpuSize;
+        const outputBufferSize = estimateOutput(sequences);
+        const gpuVram = gpuKVCacheSize + gpuComputeBufferSize + graphOverheadGpuSize + outputBufferSize;
+        const cpuRam = cpuKVCacheSize + cpuComputeBufferSize + graphOverheadCpuSize + outputBufferSize;
+        return {
+            cpuRam,
+            gpuVram: usingGpu
+                ? gpuVram
+                : 0
+        };
+    }
+    /**
+     * Get the split tensor resources for CPU and GPU based on the number of GPU layers
+     * @internal
+     */
+    _getTensorResourceSplit(gpuLayers) {
+        const tensorInfo = this._ggufFileInfo.fullTensorInfo ?? [];
+        const architecture = this._ggufFileInfo.metadata?.general?.architecture;
+        if (gpuLayers === 0) {
+            return {
+                cpu: tensorInfo,
+                gpu: []
+            };
+        }
+        const fileLayers = this._getFileLayers();
+        const startGpuLayer = Math.max(0, fileLayers - gpuLayers);
+        const gpuTensors = [];
+        const cpuTensors = [];
+        let tokenEmbedLayer;
+        let mainOutputLayer;
+        for (const singleTensorInfo of tensorInfo) {
+            if (isMainOutputLayer(singleTensorInfo.name))
+                mainOutputLayer = singleTensorInfo;
+            else if (isTokenEmbedLayer(singleTensorInfo.name))
+                tokenEmbedLayer = singleTensorInfo;
+            // in the implementation of `llm_load_tensors`, layers with `LLM_TENSOR_LAYER_INPUT` are always
+            // loaded with `model.dev_input`, which is always set to the CPU
+            if (isInputLayer(singleTensorInfo.name)) {
+                cpuTensors.push(singleTensorInfo);
+                continue;
+                // in the implementation of `llm_load_tensors`, layers with `LLM_TENSOR_LAYER_OUTPUT` are always
+                // loaded with `model.dev_output`, which is set to the GPU only if all the layers are on the GPU
+            }
+            else if (isOutputLayer(singleTensorInfo.name)) {
+                if (gpuLayers === this.totalLayers) {
+                    gpuTensors.push(singleTensorInfo);
+                    continue;
+                }
+                else {
+                    cpuTensors.push(singleTensorInfo);
+                    continue;
+                }
+            }
+            const { layerNumber } = parseTensorName(singleTensorInfo.name);
+            if (gpuLayers !== this.totalLayers) {
+                if (architecture === GgufArchitectureType.qwen2 || architecture === GgufArchitectureType.gemma) {
+                    if (layerNumber != null && layerNumber >= startGpuLayer)
+                        gpuTensors.push(singleTensorInfo);
+                    else
+                        cpuTensors.push(singleTensorInfo);
+                    continue;
+                }
+            }
+            if (layerNumber == null || layerNumber >= startGpuLayer)
+                gpuTensors.push(singleTensorInfo);
+            else
+                cpuTensors.push(singleTensorInfo);
+        }
+        if (mainOutputLayer == null && tokenEmbedLayer != null && gpuLayers === this.totalLayers && !gpuTensors.includes(tokenEmbedLayer))
+            gpuTensors.push(tokenEmbedLayer);
+        return {
+            cpu: cpuTensors,
+            gpu: gpuTensors
+        };
+    }
+    /** @internal */
+    _determineNumberOfLayersFromTensorInfo() {
+        const layerNumbers = new Set();
+        for (const singleTensorInfo of (this._ggufFileInfo.fullTensorInfo ?? [])) {
+            const { layerNumber } = parseTensorName(singleTensorInfo.name);
+            if (layerNumber != null)
+                layerNumbers.add(layerNumber);
+        }
+        return layerNumbers.size;
+    }
+    /** @internal */
+    _getFileLayers() {
+        return this._ggufFileInfo.architectureMetadata.block_count ?? this._determineNumberOfLayersFromTensorInfo();
+    }
+    /** @internal */
+    _estimateKvMemorySizeInBytes(kvSize, layers) {
+        // source: `llama_kv_cache_init` in `llama.cpp`
+        const nHead = this._ggufFileInfo.architectureMetadata.attention?.head_count ?? 0;
+        const nEmbd = this._ggufFileInfo.architectureMetadata.embedding_length ?? 0;
+        const nEmbdHeadK = this._ggufFileInfo.architectureMetadata.attention?.key_length ?? ((nHead == 0) ? 0 : (nEmbd / nHead));
+        const nHeadKv = this._ggufFileInfo.architectureMetadata.attention?.head_count_kv ?? nHead;
+        const nEmbdHeadV = this._ggufFileInfo.architectureMetadata.attention?.value_length ?? ((nHead == 0) ? 0 : nEmbd / nHead);
+        const ssmDConv = this._ggufFileInfo.architectureMetadata.ssm?.conv_kernel ?? 0;
+        const ssmDInner = this._ggufFileInfo.architectureMetadata.ssm?.inner_size ?? 0;
+        const modelNEmbdKS = (this._ggufFileInfo.architectureMetadata.wkv?.head_size ?? 0) !== 0
+            ? (this._ggufFileInfo.architectureMetadata.token_shift_count ?? 0) * nEmbd
+            : (ssmDConv > 0 ? (ssmDConv - 1) : 0) * ssmDInner;
+        const ssmDState = this._ggufFileInfo.architectureMetadata.ssm?.state_size ?? 0;
+        const modelNEmbdVS = (this._ggufFileInfo.architectureMetadata.wkv?.head_size ?? 0) !== 0
+            ? nEmbd * (this._ggufFileInfo.architectureMetadata.wkv?.head_size ?? 0)
+            : ssmDState * ssmDInner;
+        let totalElementsK = 0;
+        let totalElementsV = 0;
+        for (let i = 0; i < layers; i++) {
+            const nHeadKvArrayItem = (typeof nHeadKv === "number")
+                ? nHeadKv
+                : nHeadKv[i] !== 0
+                    ? nHeadKv[i]
+                    : nHead;
+            const nEmbdKGqa = nEmbdHeadK * nHeadKvArrayItem;
+            const nEmbdVGqa = nEmbdHeadV * nHeadKvArrayItem;
+            const totalNEmbdKGqa = nEmbdKGqa + modelNEmbdKS;
+            const totalNEmbdVGqa = nEmbdVGqa + modelNEmbdVS;
+            totalElementsK += totalNEmbdKGqa * kvSize;
+            totalElementsV += totalNEmbdVGqa * kvSize;
+        }
+        const keyTypeSize = this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.mamba
+            // if `type_k` of `llama_context_params` changes to be configurable in `LlamaContext`,
+            // this would have to depend on that value
+            ? this._llama._consts.ggmlTypeF32Size
+            : this._llama._consts.ggmlTypeF16Size;
+        const valueTypeSize = this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.mamba
+            // if `type_v` of `llama_context_params` changes to be configurable in `LlamaContext`,
+            // this would have to depend on that value
+            ? this._llama._consts.ggmlTypeF32Size
+            : this._llama._consts.ggmlTypeF16Size;
+        return ((totalElementsK * keyTypeSize) +
+            (totalElementsV * valueTypeSize));
+    }
+    /** @internal */
+    _getTotalFileLayers() {
+        if (this._totalFileLayers != null)
+            return this._totalFileLayers;
+        this._totalFileLayers = this._getFileLayers();
+        return this._totalFileLayers;
+    }
+    /**
+     * @param ggufFileInfo
+     * @param llama - If you already have a `Llama` instance, pass it to reuse it for the `GgufInsights` instance.
+     * If you don't pass a `Llama` instance, a basic `Llama` instance is created as a fallback - it's a slim instance that
+     * doesn't instantiate a `llama.cpp` backend, so it won't utilize the GPU at all, and be shared with other `GgufInsights` instances
+     * that need a fallback `Llama` instance.
+     */
+    static async from(ggufFileInfo, llama) {
+        let resolvedLlama = llama;
+        if (resolvedLlama == null)
+            resolvedLlama = await getLlamaWithoutBackend();
+        return new GgufInsights(ggufFileInfo, resolvedLlama);
+    }
+}
+function parseTensorName(tensorName) {
+    if (tensorName == null)
+        return { layerNumber: undefined };
+    const layerTensorPrefix = "blk.";
+    if (!tensorName.startsWith(layerTensorPrefix))
+        return { layerNumber: undefined };
+    const dotIndex = tensorName.indexOf(".", layerTensorPrefix.length);
+    const layerNumberString = tensorName.slice(layerTensorPrefix.length, dotIndex < 0
+        ? tensorName.length
+        : dotIndex);
+    const layerNumber = parseInt(layerNumberString);
+    if (Number.isFinite(layerNumber))
+        return { layerNumber };
+    return { layerNumber: undefined };
+}
+function calculateTensorsSize(tensorsInfo, llama, useMmap, startFromTensorDataOffset = false) {
+    if (!useMmap) {
+        let size = 0;
+        for (const tensorInfo of tensorsInfo)
+            size += calculateTensorSize(tensorInfo, llama);
+        return size;
+    }
+    const fileStats = new Map();
+    for (const tensorInfo of tensorsInfo) {
+        let stats = fileStats.get(tensorInfo.filePart);
+        if (stats == null) {
+            stats = {
+                tensorsSize: 0
+            };
+            fileStats.set(tensorInfo.filePart, stats);
+        }
+        const tensorSize = calculateTensorSize(tensorInfo, llama);
+        stats.tensorsSize += tensorSize;
+        const startOffset = tensorInfo.offset;
+        const endOffset = typeof startOffset === "number"
+            ? startOffset + tensorSize
+            : startOffset + BigInt(tensorSize);
+        if (startFromTensorDataOffset)
+            stats.startOffset = Number(BigInt(tensorInfo.fileOffset) - BigInt(tensorInfo.offset));
+        else if (stats.startOffset == null || startOffset < stats.startOffset)
+            stats.startOffset = startOffset;
+        if (stats.endOffset == null || endOffset > stats.endOffset)
+            stats.endOffset = endOffset;
+    }
+    let size = 0;
+    for (const [, stats] of fileStats) {
+        const offsetSize = (stats.endOffset == null || stats.startOffset == null)
+            ? 0
+            : Number(BigInt(stats.endOffset) - BigInt(stats.startOffset));
+        const tensorsSize = stats.tensorsSize;
+        size += Math.max(offsetSize, tensorsSize);
+    }
+    return size;
+}
+function calculateTensorSize(tensor, llama) {
+    const typeSize = llama._bindings.getTypeSizeForGgmlType(tensor.ggmlType);
+    const blockSize = llama._bindings.getBlockSizeForGgmlType(tensor.ggmlType);
+    const ggmlMaxDims = llama._consts.ggmlMaxDims;
+    if (typeSize == null || blockSize == null)
+        throw new Error("Invalid type or block size");
+    const { ne, nb } = getTensorNeAndNb(tensor, { typeSize, blockSize, ggmlMaxDims });
+    if (blockSize === 1) {
+        let totalBytes = typeSize;
+        for (let i = 0; i < ggmlMaxDims; i++) {
+            totalBytes += (ne[i] - 1) * nb[i];
+        }
+        return totalBytes;
+    }
+    else {
+        let totalBytes = Math.floor((ne[0] * nb[0]) / blockSize);
+        for (let i = 1; i < ggmlMaxDims; i++) {
+            totalBytes += (ne[i] - 1) * nb[i];
+        }
+        return totalBytes;
+    }
+}
+function getTensorNeAndNb(tensor, { typeSize, blockSize, ggmlMaxDims }) {
+    // number of elements
+    // source: `ggml_new_tensor_impl` in `ggml.c`
+    const ne = [
+        ...tensor.dimensions,
+        ...(Array(Math.max(0, ggmlMaxDims - tensor.dimensions.length)).fill(1))
+    ].slice(0, ggmlMaxDims);
+    // number of bytes
+    // source: `ggml_new_tensor_impl` in `ggml.c`
+    const nb = [
+        typeSize,
+        Math.floor(typeSize * (ne[0] / blockSize)),
+        ...Array(ggmlMaxDims - 2).fill(0)
+    ];
+    for (let i = 2; i < ggmlMaxDims; i++) {
+        nb[i] = nb[i - 1] * ne[i - 1];
+    }
+    return {
+        ne,
+        nb
+    };
+}
+function isInputLayer(layerName) {
+    const [firstPart] = layerName.split(".");
+    if (firstPart == null)
+        return false;
+    // source: in `llama.cpp`, all tensor names from `LLM_TENSOR_NAMES` where
+    // in `llm_tensor_info_mapping` have a mapping to `LLM_TENSOR_LAYER_INPUT`
+    switch (firstPart) {
+        case "token_embd":
+        case "token_embd_norm":
+        case "token_types":
+        case "position_embd":
+            return true;
+    }
+    return false;
+}
+function isOutputLayer(layerName) {
+    const [firstPart, secondPart] = layerName.split(".");
+    if (firstPart == null)
+        return false;
+    // source: in `llama.cpp`, all tensor names from `LLM_TENSOR_NAMES` where
+    // in `llm_tensor_info_mapping` have a mapping to `LLM_TENSOR_LAYER_INPUT`
+    switch (firstPart) {
+        case "output":
+        case "output_norm":
+        case "cls":
+            return true;
+    }
+    if (secondPart == null)
+        return false;
+    // source: in `llama.cpp`, all tensor names from `LLM_TENSOR_NAMES` where
+    // in `llm_tensor_info_mapping` have a mapping to `LLM_TENSOR_LAYER_INPUT`
+    switch (firstPart + "." + secondPart) {
+        case "cls.output":
+        case "dec.output_norm":
+        case "enc.output_norm":
+            return true;
+    }
+    return false;
+}
+function isMainOutputLayer(layerName) {
+    const [firstPart] = layerName.split(".");
+    return firstPart === "output";
+}
+function isTokenEmbedLayer(layerName) {
+    const [firstPart] = layerName.split(".");
+    return firstPart === "token_embd";
+}
+function ggmlPad(value, padding) {
+    return ((value + padding - 1) & ~(padding - 1));
+}
+function getSwaPatternForArchitecture(architecture) {
+    // source: `llama_model::load_hparams` in `llama-model.cpp` - calls to `hparams.set_swa_pattern`
+    switch (architecture) {
+        case GgufArchitectureType.llama4:
+            return 4;
+        case GgufArchitectureType.phi3:
+            return 1;
+        case GgufArchitectureType.gemma2:
+            return 2;
+        case GgufArchitectureType.gemma3:
+            return 6;
+        case GgufArchitectureType.gemma3n:
+            return 5;
+        case GgufArchitectureType.cohere2:
+            return 4;
+        case GgufArchitectureType.exaone4:
+            return 4;
+        case GgufArchitectureType.gptOss:
+            return 2;
+        case GgufArchitectureType.smallthinker:
+            return 4;
+    }
+    return 1;
+}
+export function parseRankingTemplate(template) {
+    if (template == null)
+        return undefined;
+    return template
+        .replaceAll("{query}", "{{query}}")
+        .replaceAll("{document}", "{{document}}");
+}
+export function isRankingTemplateValid(template) {
+    return template != null && template.includes("{{query}}") && template.includes("{{document}}");
+}
+//# sourceMappingURL=GgufInsights.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsights.js.map
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsights.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts
@@ -0,0 +1,194 @@
+import { BuildGpu } from "../../bindings/types.js";
+import { LlamaModelOptions } from "../../evaluator/LlamaModel/LlamaModel.js";
+import { LlamaContextOptions } from "../../evaluator/LlamaContext/types.js";
+import type { GgufInsights } from "./GgufInsights.js";
+export declare const defaultTrainContextSizeForEstimationPurposes = 4096;
+export declare class GgufInsightsConfigurationResolver {
+    private constructor();
+    get ggufInsights(): GgufInsights;
+    /**
+     * Resolve the best configuration for loading a model and creating a context using the current hardware.
+     *
+     * Specifying a `targetGpuLayers` and/or `targetContextSize` will ensure the resolved configuration matches those values,
+     * but note it can lower the compatibility score if the hardware doesn't support it.
+     *
+     * Overriding hardware values it possible by configuring `hardwareOverrides`.
+     * @param options
+     * @param hardwareOverrides
+     */
+    resolveAndScoreConfig({ targetGpuLayers, targetContextSize, embeddingContext, flashAttention, swaFullCache, useMmap }?: {
+        targetGpuLayers?: number | "max";
+        targetContextSize?: number;
+        embeddingContext?: boolean;
+        flashAttention?: boolean;
+        swaFullCache?: boolean;
+        useMmap?: boolean;
+    }, { getVramState, getRamState, getSwapState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }?: {
+        getVramState?(): Promise<{
+            total: number;
+            free: number;
+            unifiedSize: number;
+        }>;
+        getRamState?(): Promise<{
+            total: number;
+            free: number;
+        }>;
+        getSwapState?(): Promise<{
+            total: number;
+            free: number;
+        }>;
+        llamaVramPaddingSize?: number;
+        llamaGpu?: BuildGpu;
+        llamaSupportsGpuOffloading?: boolean;
+    }): Promise<{
+        /**
+         * A number between `0` (inclusive) and `1` (inclusive) representing the compatibility score.
+         */
+        compatibilityScore: number;
+        /**
+         * A number starting at `0` with no upper limit representing the bonus score.
+         * For each multiplier of the specified `contextSize` that the resolved context size is larger by, 1 bonus point is given.
+         */
+        bonusScore: number;
+        /**
+         * The total score, which is the sum of the compatibility and bonus scores.
+         */
+        totalScore: number;
+        /**
+         * The resolved values used to calculate the scores.
+         */
+        resolvedValues: {
+            gpuLayers: number;
+            contextSize: number;
+            modelRamUsage: number;
+            contextRamUsage: number;
+            totalRamUsage: number;
+            modelVramUsage: number;
+            contextVramUsage: number;
+            totalVramUsage: number;
+        };
+    }>;
+    /**
+     * Score the compatibility of the model configuration with the current GPU and VRAM state.
+     * Assumes a model is loaded with the default `"auto"` configurations.
+     * Scored based on the following criteria:
+     * - The number of GPU layers that can be offloaded to the GPU (only if there's a GPU. If there's no GPU then by how small the model is)
+     * - Whether all layers can be offloaded to the GPU (gives additional points)
+     * - Whether the resolved context size is at least as large as the specified `contextSize`
+     *
+     * If the resolved context size is larger than the specified context size, for each multiplier of the specified `contextSize`
+     * that the resolved context size is larger by, 1 bonus point is given in the `bonusScore`.
+     *
+     * `maximumFittedContextSizeMultiplier` is used to improve the proportionality of the bonus score between models.
+     * Set this to any value higher than `<max compared model context size> / contextSize`.
+     * Defaults to `100`.
+     *
+     * `maximumUnfitConfigurationResourceMultiplier` is used to improve the proportionality of the bonus score between unfit models.
+     * Set this to any value higher than `<max compared model resource usage> / <total available resources>`.
+     * Defaults to `100`.
+     *
+     * `contextSize` defaults to `4096` (if the model train context size is lower than this, the model train context size is used instead).
+     */
+    scoreModelConfigurationCompatibility({ contextSize, embeddingContext, flashAttention, swaFullCache, maximumFittedContextSizeMultiplier, maximumUnfitConfigurationResourceMultiplier, forceStrictContextSize, forceGpuLayers, useMmap }?: {
+        contextSize?: number;
+        embeddingContext?: boolean;
+        flashAttention?: boolean;
+        swaFullCache?: boolean;
+        maximumFittedContextSizeMultiplier?: number;
+        maximumUnfitConfigurationResourceMultiplier?: number;
+        /**
+         * Do not resolve a context size larger than the specified `contextSize`.
+         *
+         * Defaults to `false`.
+         */
+        forceStrictContextSize?: boolean;
+        forceGpuLayers?: number | "max";
+        useMmap?: boolean;
+    }, { getVramState, getRamState, getSwapState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }?: {
+        getVramState?(): Promise<{
+            total: number;
+            free: number;
+            unifiedSize: number;
+        }>;
+        getRamState?(): Promise<{
+            total: number;
+            free: number;
+        }>;
+        getSwapState?(): Promise<{
+            total: number;
+            free: number;
+        }>;
+        llamaVramPaddingSize?: number;
+        llamaGpu?: BuildGpu;
+        llamaSupportsGpuOffloading?: boolean;
+    }): Promise<{
+        /**
+         * A number between `0` (inclusive) and `1` (inclusive) representing the compatibility score.
+         */
+        compatibilityScore: number;
+        /**
+         * A number starting at `0` with no upper limit representing the bonus score.
+         * For each multiplier of the specified `contextSize` that the resolved context size is larger by, 1 bonus point is given.
+         */
+        bonusScore: number;
+        /**
+         * The total score, which is the sum of the compatibility and bonus scores.
+         */
+        totalScore: number;
+        /**
+         * The resolved values used to calculate the scores.
+         */
+        resolvedValues: {
+            gpuLayers: number;
+            contextSize: number;
+            modelRamUsage: number;
+            contextRamUsage: number;
+            totalRamUsage: number;
+            modelVramUsage: number;
+            contextVramUsage: number;
+            totalVramUsage: number;
+        };
+    }>;
+    resolveModelGpuLayers(gpuLayers?: LlamaModelOptions["gpuLayers"], { ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }?: {
+        ignoreMemorySafetyChecks?: boolean;
+        getVramState?(): Promise<{
+            total: number;
+            free: number;
+        }>;
+        llamaVramPaddingSize?: number;
+        llamaGpu?: BuildGpu;
+        llamaSupportsGpuOffloading?: boolean;
+        defaultContextFlashAttention?: boolean;
+        defaultContextSwaFullCache?: boolean;
+        useMmap?: boolean;
+    }): Promise<number>;
+    /**
+     * Resolve a context size option for the given options and constraints.
+     *
+     * If there's no context size that can fit the available resources, an `InsufficientMemoryError` is thrown.
+     */
+    resolveContextContextSize(contextSize: LlamaContextOptions["contextSize"], { modelGpuLayers, batchSize, modelTrainContextSize, flashAttention, swaFullCache, getVramState, getRamState, getSwapState, llamaGpu, ignoreMemorySafetyChecks, isEmbeddingContext, sequences }: {
+        modelGpuLayers: number;
+        modelTrainContextSize: number;
+        flashAttention?: boolean;
+        swaFullCache?: boolean;
+        batchSize?: LlamaContextOptions["batchSize"];
+        sequences?: number;
+        getVramState?(): Promise<{
+            total: number;
+            free: number;
+            unifiedSize: number;
+        }>;
+        getRamState?(): Promise<{
+            total: number;
+            free: number;
+        }>;
+        getSwapState?(): Promise<{
+            total: number;
+            free: number;
+        }>;
+        llamaGpu?: BuildGpu;
+        ignoreMemorySafetyChecks?: boolean;
+        isEmbeddingContext?: boolean;
+    }): Promise<number>;
+}
--- a/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsConfigurationResolver.js
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsConfigurationResolver.js
@@ -0,0 +1,272 @@
+import { getDefaultContextSequences } from "../../evaluator/LlamaContext/LlamaContext.js";
+import { InsufficientMemoryError } from "../../utils/InsufficientMemoryError.js";
+import { resolveModelGpuLayersOption } from "./utils/resolveModelGpuLayersOption.js";
+import { resolveContextContextSizeOption } from "./utils/resolveContextContextSizeOption.js";
+import { scoreLevels } from "./utils/scoreLevels.js";
+import { getRamUsageFromUnifiedVram } from "./utils/getRamUsageFromUnifiedVram.js";
+export const defaultTrainContextSizeForEstimationPurposes = 4096;
+const defaultContextSizeForUnfitContextSizeConfiguration = 2048;
+export class GgufInsightsConfigurationResolver {
+    /** @internal */ _ggufInsights;
+    constructor(ggufInsights) {
+        this._ggufInsights = ggufInsights;
+    }
+    get ggufInsights() {
+        return this._ggufInsights;
+    }
+    /**
+     * Resolve the best configuration for loading a model and creating a context using the current hardware.
+     *
+     * Specifying a `targetGpuLayers` and/or `targetContextSize` will ensure the resolved configuration matches those values,
+     * but note it can lower the compatibility score if the hardware doesn't support it.
+     *
+     * Overriding hardware values it possible by configuring `hardwareOverrides`.
+     * @param options
+     * @param hardwareOverrides
+     */
+    async resolveAndScoreConfig({ targetGpuLayers, targetContextSize, embeddingContext = false, flashAttention = false, swaFullCache = false, useMmap = this._ggufInsights._llama.supportsMmap } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => this._ggufInsights._llama._ramOrchestrator.getMemoryState()), getSwapState = (() => this._ggufInsights._llama._swapOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
+        const compatibilityScore = await this.scoreModelConfigurationCompatibility({
+            flashAttention,
+            swaFullCache,
+            contextSize: targetContextSize,
+            embeddingContext,
+            forceGpuLayers: targetGpuLayers,
+            forceStrictContextSize: targetContextSize != null,
+            useMmap
+        }, {
+            getVramState,
+            getRamState,
+            getSwapState,
+            llamaVramPaddingSize,
+            llamaGpu,
+            llamaSupportsGpuOffloading
+        });
+        return compatibilityScore;
+    }
+    /**
+     * Score the compatibility of the model configuration with the current GPU and VRAM state.
+     * Assumes a model is loaded with the default `"auto"` configurations.
+     * Scored based on the following criteria:
+     * - The number of GPU layers that can be offloaded to the GPU (only if there's a GPU. If there's no GPU then by how small the model is)
+     * - Whether all layers can be offloaded to the GPU (gives additional points)
+     * - Whether the resolved context size is at least as large as the specified `contextSize`
+     *
+     * If the resolved context size is larger than the specified context size, for each multiplier of the specified `contextSize`
+     * that the resolved context size is larger by, 1 bonus point is given in the `bonusScore`.
+     *
+     * `maximumFittedContextSizeMultiplier` is used to improve the proportionality of the bonus score between models.
+     * Set this to any value higher than `<max compared model context size> / contextSize`.
+     * Defaults to `100`.
+     *
+     * `maximumUnfitConfigurationResourceMultiplier` is used to improve the proportionality of the bonus score between unfit models.
+     * Set this to any value higher than `<max compared model resource usage> / <total available resources>`.
+     * Defaults to `100`.
+     *
+     * `contextSize` defaults to `4096` (if the model train context size is lower than this, the model train context size is used instead).
+     */
+    async scoreModelConfigurationCompatibility({ contextSize = Math.min(4096, this._ggufInsights.trainContextSize ?? 4096), embeddingContext = false, flashAttention = false, swaFullCache = false, maximumFittedContextSizeMultiplier = 100, maximumUnfitConfigurationResourceMultiplier = 100, forceStrictContextSize = false, forceGpuLayers, useMmap = this._ggufInsights._llama.supportsMmap } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => this._ggufInsights._llama._ramOrchestrator.getMemoryState()), getSwapState = (() => this._ggufInsights._llama._swapOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
+        const [vramState, ramState, swapState] = await Promise.all([
+            getVramState(),
+            getRamState(),
+            getSwapState()
+        ]);
+        let resolvedGpuLayers = (forceGpuLayers == null || forceGpuLayers == "max")
+            ? this.ggufInsights.totalLayers
+            : forceGpuLayers;
+        let gpuLayersFitMemory = false;
+        try {
+            resolvedGpuLayers = await this.resolveModelGpuLayers(forceGpuLayers != null
+                ? forceGpuLayers
+                : embeddingContext
+                    ? {
+                        fitContext: {
+                            embeddingContext: true,
+                            contextSize: forceStrictContextSize
+                                ? contextSize
+                                : undefined
+                        }
+                    }
+                    : forceStrictContextSize != null
+                        ? { fitContext: { contextSize } }
+                        : "auto", {
+                getVramState: async () => vramState,
+                llamaVramPaddingSize,
+                llamaGpu,
+                llamaSupportsGpuOffloading,
+                defaultContextFlashAttention: flashAttention,
+                defaultContextSwaFullCache: swaFullCache,
+                ignoreMemorySafetyChecks: forceGpuLayers != null,
+                useMmap
+            });
+            gpuLayersFitMemory = true;
+        }
+        catch (err) {
+            if (!(err instanceof InsufficientMemoryError))
+                throw err;
+        }
+        const canUseGpu = llamaSupportsGpuOffloading && llamaGpu !== false;
+        const estimatedModelResourceUsage = this._ggufInsights.estimateModelResourceRequirements({
+            gpuLayers: resolvedGpuLayers,
+            useMmap
+        });
+        let resolvedContextSize = forceStrictContextSize
+            ? contextSize
+            : Math.min(this.ggufInsights.trainContextSize ?? defaultContextSizeForUnfitContextSizeConfiguration, defaultContextSizeForUnfitContextSizeConfiguration);
+        let contextFitsMemory = false;
+        try {
+            resolvedContextSize = await this.resolveContextContextSize("auto", {
+                getVramState: async () => ({
+                    total: vramState.total,
+                    free: Math.max(0, vramState.free - estimatedModelResourceUsage.gpuVram),
+                    unifiedSize: vramState.unifiedSize
+                }),
+                getRamState: async () => ({
+                    total: ramState.total,
+                    free: Math.max(0, ramState.free - estimatedModelResourceUsage.cpuRam +
+                        (-getRamUsageFromUnifiedVram(estimatedModelResourceUsage.gpuVram, vramState)))
+                }),
+                getSwapState: async () => ({
+                    total: swapState.total,
+                    free: Math.max(0, swapState.free - Math.max(0, estimatedModelResourceUsage.cpuRam +
+                        (-getRamUsageFromUnifiedVram(estimatedModelResourceUsage.gpuVram, vramState)) +
+                        (-ramState.free)))
+                }),
+                llamaGpu,
+                isEmbeddingContext: embeddingContext,
+                modelGpuLayers: resolvedGpuLayers,
+                modelTrainContextSize: this._ggufInsights.trainContextSize ?? defaultTrainContextSizeForEstimationPurposes,
+                ignoreMemorySafetyChecks: forceStrictContextSize,
+                flashAttention,
+                swaFullCache
+            });
+            contextFitsMemory = true;
+            if (forceStrictContextSize && resolvedContextSize < contextSize) {
+                contextFitsMemory = false;
+                resolvedContextSize = contextSize;
+            }
+            else if (forceStrictContextSize && resolvedContextSize > contextSize) {
+                resolvedContextSize = contextSize;
+            }
+        }
+        catch (err) {
+            if (!(err instanceof InsufficientMemoryError))
+                throw err;
+        }
+        const estimatedContextResourceUsage = this._ggufInsights.estimateContextResourceRequirements({
+            contextSize: resolvedContextSize,
+            isEmbeddingContext: embeddingContext,
+            modelGpuLayers: resolvedGpuLayers,
+            flashAttention,
+            swaFullCache
+        });
+        const rankPoints = {
+            gpuLayers: 60,
+            allLayersAreOffloaded: 10,
+            contextSize: 30,
+            ramUsageFitsInRam: 10,
+            cpuOnlySmallModelSize: 70, // also defined inside `scoreModelSizeForCpuOnlyUsage`
+            bonusContextSize: 10
+        };
+        const gpuLayersPoints = rankPoints.gpuLayers * Math.min(1, resolvedGpuLayers / this._ggufInsights.totalLayers);
+        const allLayersAreOffloadedPoints = rankPoints.allLayersAreOffloaded * (resolvedGpuLayers === this._ggufInsights.totalLayers ? 1 : 0);
+        const contextSizePoints = contextFitsMemory
+            ? rankPoints.contextSize * Math.min(1, resolvedContextSize / contextSize)
+            : 0;
+        const ramUsageFitsInRamPoints = rankPoints.ramUsageFitsInRam * (estimatedModelResourceUsage.cpuRam <= ramState.free
+            ? 1
+            : estimatedModelResourceUsage.cpuRam <= ramState.free + swapState.free
+                ? 0.8
+                : estimatedModelResourceUsage.cpuRam <= ramState.total
+                    ? 0.5
+                    : (0.5 - Math.min(0.5, 0.5 * ((estimatedModelResourceUsage.cpuRam - ramState.total) / ramState.total))));
+        const bonusContextSizePoints = contextFitsMemory
+            ? (10 * Math.min(1, (Math.max(0, resolvedContextSize - contextSize) / contextSize) / maximumFittedContextSizeMultiplier))
+            : 0;
+        let compatibilityScore = canUseGpu
+            ? ((gpuLayersPoints + allLayersAreOffloadedPoints + contextSizePoints + ramUsageFitsInRamPoints) /
+                (rankPoints.gpuLayers + rankPoints.allLayersAreOffloaded + rankPoints.contextSize + rankPoints.ramUsageFitsInRam))
+            : ((contextSizePoints + ramUsageFitsInRamPoints + scoreModelSizeForCpuOnlyUsage(this._ggufInsights.modelSize)) /
+                (rankPoints.contextSize + rankPoints.ramUsageFitsInRam + rankPoints.cpuOnlySmallModelSize));
+        let bonusScore = bonusContextSizePoints / rankPoints.bonusContextSize;
+        if (!gpuLayersFitMemory || !contextFitsMemory ||
+            estimatedModelResourceUsage.gpuVram + estimatedContextResourceUsage.gpuVram > vramState.total ||
+            estimatedModelResourceUsage.cpuRam + estimatedContextResourceUsage.cpuRam > ramState.total + swapState.total) {
+            const totalVramRequirement = estimatedModelResourceUsage.gpuVram + estimatedContextResourceUsage.gpuVram;
+            const totalRamRequirement = estimatedModelResourceUsage.cpuRam + estimatedContextResourceUsage.cpuRam;
+            compatibilityScore = 0;
+            bonusScore = ((1 - (totalVramRequirement / (vramState.total * maximumUnfitConfigurationResourceMultiplier))) +
+                (1 - (totalRamRequirement / ((ramState.total + swapState.total) * maximumUnfitConfigurationResourceMultiplier)))) / 2;
+        }
+        return {
+            compatibilityScore,
+            bonusScore,
+            totalScore: compatibilityScore + bonusScore,
+            resolvedValues: {
+                gpuLayers: resolvedGpuLayers,
+                contextSize: resolvedContextSize,
+                modelRamUsage: estimatedModelResourceUsage.cpuRam,
+                contextRamUsage: estimatedContextResourceUsage.cpuRam,
+                totalRamUsage: estimatedModelResourceUsage.cpuRam + estimatedContextResourceUsage.cpuRam,
+                modelVramUsage: estimatedModelResourceUsage.gpuVram,
+                contextVramUsage: estimatedContextResourceUsage.gpuVram,
+                totalVramUsage: estimatedModelResourceUsage.gpuVram + estimatedContextResourceUsage.gpuVram
+            }
+        };
+    }
+    async resolveModelGpuLayers(gpuLayers, { ignoreMemorySafetyChecks = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading, defaultContextFlashAttention = false, defaultContextSwaFullCache = false, useMmap = this._ggufInsights._llama.supportsMmap } = {}) {
+        return resolveModelGpuLayersOption(gpuLayers, {
+            ggufInsights: this._ggufInsights,
+            ignoreMemorySafetyChecks,
+            getVramState,
+            llamaVramPaddingSize,
+            llamaGpu,
+            llamaSupportsGpuOffloading,
+            defaultContextFlashAttention,
+            defaultContextSwaFullCache,
+            useMmap
+        });
+    }
+    /**
+     * Resolve a context size option for the given options and constraints.
+     *
+     * If there's no context size that can fit the available resources, an `InsufficientMemoryError` is thrown.
+     */
+    async resolveContextContextSize(contextSize, { modelGpuLayers, batchSize, modelTrainContextSize, flashAttention = false, swaFullCache = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => this._ggufInsights._llama._ramOrchestrator.getMemoryState()), getSwapState = (() => this._ggufInsights._llama._swapOrchestrator.getMemoryState()), llamaGpu = this._ggufInsights._llama.gpu, ignoreMemorySafetyChecks = false, isEmbeddingContext = false, sequences = getDefaultContextSequences() }) {
+        return await resolveContextContextSizeOption({
+            contextSize,
+            batchSize,
+            sequences,
+            modelFileInsights: this._ggufInsights,
+            modelGpuLayers,
+            modelTrainContextSize,
+            flashAttention,
+            swaFullCache,
+            getVramState,
+            getRamState,
+            getSwapState,
+            llamaGpu,
+            ignoreMemorySafetyChecks,
+            isEmbeddingContext
+        });
+    }
+    /** @internal */
+    static _create(ggufInsights) {
+        return new GgufInsightsConfigurationResolver(ggufInsights);
+    }
+}
+function scoreModelSizeForCpuOnlyUsage(modelSize) {
+    const s1GB = Math.pow(1024, 3);
+    return 70 - scoreLevels(modelSize, [{
+            start: s1GB,
+            end: s1GB * 2.5,
+            points: 46
+        }, {
+            start: s1GB * 2.5,
+            end: s1GB * 4,
+            points: 17
+        }, {
+            start: s1GB * 4,
+            points: 7
+        }]);
+}
+//# sourceMappingURL=GgufInsightsConfigurationResolver.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsTokens.d.ts
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsTokens.d.ts
@@ -0,0 +1,5 @@
+export declare class GgufInsightsTokens {
+    private constructor();
+    get sepToken(): number | null;
+    get eosToken(): number | null;
+}
--- a/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsTokens.js
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsTokens.js
@@ -0,0 +1,40 @@
+export class GgufInsightsTokens {
+    /** @internal */ _ggufInsights;
+    constructor(ggufInsights) {
+        this._ggufInsights = ggufInsights;
+    }
+    get sepToken() {
+        const tokenizerModel = this._ggufInsights._ggufFileInfo?.metadata?.tokenizer?.ggml?.model;
+        const totalTokens = this._ggufInsights._ggufFileInfo?.metadata?.tokenizer?.ggml?.tokens?.length;
+        let sepTokenId = this._ggufInsights._ggufFileInfo?.metadata?.tokenizer?.ggml?.["seperator_token_id"];
+        if (sepTokenId == null && tokenizerModel === "bert") {
+            sepTokenId = 102; // source: `llama_vocab::impl::load` in `llama-vocab.cpp`
+        }
+        if (totalTokens != null && sepTokenId != null && sepTokenId >= totalTokens)
+            return null;
+        return sepTokenId ?? null;
+    }
+    get eosToken() {
+        const tokenizerModel = this._ggufInsights._ggufFileInfo?.metadata?.tokenizer?.ggml?.model;
+        const totalTokens = this._ggufInsights._ggufFileInfo?.metadata?.tokenizer?.ggml?.tokens?.length;
+        const eosTokenId = this._ggufInsights._ggufFileInfo?.metadata?.tokenizer?.ggml?.["eos_token_id"];
+        if (eosTokenId != null && totalTokens != null && eosTokenId < totalTokens)
+            return eosTokenId;
+        switch (tokenizerModel) {
+            case "no_vocab": return null;
+            case "none": return null;
+            case "bert": return null;
+            case "rwkv": return null;
+            case "llama": return 2;
+            case "gpt2": return 11;
+            case "t5": return 1;
+            case "plamo2": return 2;
+        }
+        return 2; // source: `llama_vocab::impl::load` in `llama-vocab.cpp`
+    }
+    /** @internal */
+    static _create(ggufInsights) {
+        return new GgufInsightsTokens(ggufInsights);
+    }
+}
+//# sourceMappingURL=GgufInsightsTokens.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsTokens.js.map
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/GgufInsightsTokens.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"GgufInsightsTokens.js","sourceRoot":"","sources":["../../../src/gguf/insights/GgufInsightsTokens.ts"],"names":[],"mappings":"AAGA,MAAM,OAAO,kBAAkB;IAC3B,gBAAgB,CAAkB,aAAa,CAAe;IAE9D,YAAoB,YAA0B;QAC1C,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAED,IAAW,QAAQ;QACf,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,CAAC;QAC1F,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC;QAEhG,IAAI,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,oBAAoB,CAAC,CAAC;QACrG,IAAI,UAAU,IAAI,IAAI,IAAI,cAAc,KAAK,MAAM,EAAE,CAAC;YAClD,UAAU,GAAG,GAAG,CAAC,CAAC,yDAAyD;QAC/E,CAAC;QAED,IAAI,WAAW,IAAI,IAAI,IAAI,UAAU,IAAI,IAAI,IAAI,UAAU,IAAI,WAAW;YACtE,OAAO,IAAI,CAAC;QAEhB,OAAO,UAAU,IAAI,IAAI,CAAC;IAC9B,CAAC;IAED,IAAW,QAAQ;QACf,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,CAAC;QAC1F,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC;QAEhG,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,cAAc,CAAC,CAAC;QACjG,IAAI,UAAU,IAAI,IAAI,IAAI,WAAW,IAAI,IAAI,IAAI,UAAU,GAAG,WAAW;YACrE,OAAO,UAAU,CAAC;QAEtB,QAAQ,cAAc,EAAE,CAAC;YACrB,KAAK,UAAU,CAAC,CAAC,OAAO,IAAI,CAAC;YAC7B,KAAK,MAAM,CAAC,CAAC,OAAO,IAAI,CAAC;YACzB,KAAK,MAAM,CAAC,CAAC,OAAO,IAAI,CAAC;YACzB,KAAK,MAAM,CAAC,CAAC,OAAO,IAAI,CAAC;YACzB,KAAK,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;YACvB,KAAK,MAAM,CAAC,CAAC,OAAO,EAAE,CAAC;YACvB,KAAK,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC;YACpB,KAAK,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC;QAC5B,CAAC;QACD,OAAO,CAAC,CAAC,CAAC,yDAAyD;IACvE,CAAC;IAED,gBAAgB;IACT,MAAM,CAAC,OAAO,CAAC,YAA0B;QAC5C,OAAO,IAAI,kBAAkB,CAAC,YAAY,CAAC,CAAC;IAChD,CAAC;CACJ"}
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.d.ts
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.d.ts
@@ -0,0 +1,5 @@
+export declare function getRamUsageFromUnifiedVram(vramUsage: number, vramState: {
+    total: number;
+    free: number;
+    unifiedSize: number;
+}): number;
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js
@@ -0,0 +1,7 @@
+export function getRamUsageFromUnifiedVram(vramUsage, vramState) {
+    const onlyVramSize = vramState.total - vramState.unifiedSize;
+    const existingUsage = Math.max(0, vramState.total - vramState.free);
+    const unifiedRamUsage = Math.min(vramState.unifiedSize, Math.max(0, vramUsage - Math.max(0, onlyVramSize - existingUsage)));
+    return unifiedRamUsage;
+}
+//# sourceMappingURL=getRamUsageFromUnifiedVram.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js.map
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"getRamUsageFromUnifiedVram.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/getRamUsageFromUnifiedVram.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,0BAA0B,CAAC,SAAiB,EAAE,SAA6D;IACvH,MAAM,YAAY,GAAG,SAAS,CAAC,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IAC7D,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEpE,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAE5H,OAAO,eAAe,CAAC;AAC3B,CAAC"}
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts
@@ -0,0 +1,30 @@
+import { LlamaContextOptions } from "../../../evaluator/LlamaContext/types.js";
+import { GgufInsights } from "../GgufInsights.js";
+import { BuildGpu } from "../../../bindings/types.js";
+export declare function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, flashAttention, swaFullCache, getVramState, getRamState, getSwapState, ignoreMemorySafetyChecks, isEmbeddingContext, maxContextSizeSwapUse }: {
+    contextSize?: LlamaContextOptions["contextSize"];
+    batchSize?: LlamaContextOptions["batchSize"];
+    sequences: number;
+    modelFileInsights: GgufInsights;
+    modelGpuLayers: number;
+    modelTrainContextSize: number;
+    flashAttention: boolean;
+    swaFullCache: boolean;
+    getVramState(): Promise<{
+        total: number;
+        free: number;
+        unifiedSize: number;
+    }>;
+    getRamState(): Promise<{
+        total: number;
+        free: number;
+    }>;
+    getSwapState(): Promise<{
+        total: number;
+        free: number;
+    }>;
+    llamaGpu: BuildGpu;
+    ignoreMemorySafetyChecks?: boolean;
+    isEmbeddingContext?: boolean;
+    maxContextSizeSwapUse?: number;
+}): Promise<number>;
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.js
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.js
@@ -0,0 +1,111 @@
+import { minAllowedContextSizeInCalculations } from "../../../config.js";
+import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../../evaluator/LlamaContext/LlamaContext.js";
+import { InsufficientMemoryError } from "../../../utils/InsufficientMemoryError.js";
+import { getRamUsageFromUnifiedVram } from "./getRamUsageFromUnifiedVram.js";
+const defaultMaxContextSizeSwapUse = 2048;
+export async function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, flashAttention, swaFullCache, getVramState, getRamState, getSwapState, ignoreMemorySafetyChecks = false, isEmbeddingContext = false, maxContextSizeSwapUse = defaultMaxContextSizeSwapUse }) {
+    if (contextSize == null)
+        contextSize = "auto";
+    if (typeof contextSize === "number") {
+        const resolvedContextSize = Math.max(1, Math.floor(contextSize));
+        if (ignoreMemorySafetyChecks)
+            return resolvedContextSize;
+        const [vramState, ramState, swapState] = await Promise.all([
+            getVramState(),
+            getRamState(),
+            getSwapState()
+        ]);
+        const contextResourceRequirements = modelFileInsights.estimateContextResourceRequirements({
+            contextSize: resolvedContextSize,
+            batchSize: batchSize ?? getDefaultContextBatchSize({ contextSize: resolvedContextSize, sequences }),
+            modelGpuLayers: modelGpuLayers,
+            sequences,
+            flashAttention,
+            swaFullCache,
+            isEmbeddingContext
+        });
+        if (contextResourceRequirements.gpuVram > vramState.free)
+            throw new InsufficientMemoryError(`A context size of ${resolvedContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available VRAM`);
+        else if (contextResourceRequirements.cpuRam > (ramState.free + swapState.free - getRamUsageFromUnifiedVram(contextResourceRequirements.gpuVram, vramState)))
+            throw new InsufficientMemoryError(`A context size of ${resolvedContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available RAM${swapState.total > 0 ? " (including swap)" : ""}`);
+        return resolvedContextSize;
+    }
+    else if (contextSize === "auto" || typeof contextSize === "object") {
+        const [vramState, ramState, swapState] = await Promise.all([
+            getVramState(),
+            getRamState(),
+            getSwapState()
+        ]);
+        const maxContextSize = contextSize === "auto"
+            ? getDefaultModelContextSize({ trainContextSize: modelTrainContextSize })
+            : Math.min(contextSize.max ?? getDefaultModelContextSize({ trainContextSize: modelTrainContextSize }), getDefaultModelContextSize({ trainContextSize: modelTrainContextSize }));
+        const minContextSize = contextSize === "auto"
+            ? minAllowedContextSizeInCalculations
+            : Math.max(contextSize.min ?? minAllowedContextSizeInCalculations, minAllowedContextSizeInCalculations);
+        let highestCompatibleContextSize = null;
+        let step = -Math.max(1, Math.floor((maxContextSize - minContextSize) / 4));
+        for (let testContextSize = maxContextSize; testContextSize >= minContextSize && testContextSize <= maxContextSize;) {
+            const contextResourceRequirements = modelFileInsights.estimateContextResourceRequirements({
+                contextSize: testContextSize,
+                batchSize: batchSize ?? getDefaultContextBatchSize({ contextSize: testContextSize, sequences }),
+                modelGpuLayers: modelGpuLayers,
+                sequences,
+                flashAttention,
+                swaFullCache,
+                isEmbeddingContext
+            });
+            if (contextResourceRequirements.gpuVram <= vramState.free &&
+                contextResourceRequirements.cpuRam <= (ramState.free - getRamUsageFromUnifiedVram(contextResourceRequirements.gpuVram, vramState) + (testContextSize <= maxContextSizeSwapUse
+                    ? swapState.free
+                    : 0))) {
+                if (highestCompatibleContextSize == null || testContextSize >= highestCompatibleContextSize) {
+                    highestCompatibleContextSize = testContextSize;
+                    if (step === -1)
+                        break;
+                    else if (step < 0)
+                        step = Math.max(1, Math.floor(-step / 2));
+                }
+            }
+            else if (step > 0)
+                step = -Math.max(1, Math.floor(step / 2));
+            if (testContextSize == minContextSize && step === -1)
+                break;
+            testContextSize += step;
+            if (testContextSize < minContextSize) {
+                testContextSize = minContextSize;
+                step = Math.max(1, Math.floor(Math.abs(step) / 2));
+            }
+            else if (testContextSize > maxContextSize) {
+                testContextSize = maxContextSize;
+                step = -Math.max(1, Math.floor(Math.abs(step) / 2));
+            }
+        }
+        if (highestCompatibleContextSize != null)
+            return highestCompatibleContextSize;
+        if (ignoreMemorySafetyChecks)
+            return minContextSize;
+        const minContextSizeResourceRequirements = modelFileInsights.estimateContextResourceRequirements({
+            contextSize: minContextSize,
+            batchSize: batchSize ?? getDefaultContextBatchSize({ contextSize: minContextSize, sequences }),
+            modelGpuLayers: modelGpuLayers,
+            sequences,
+            flashAttention,
+            swaFullCache,
+            isEmbeddingContext
+        });
+        const unifiedRamUsage = getRamUsageFromUnifiedVram(minContextSizeResourceRequirements.gpuVram, vramState);
+        if (minContextSizeResourceRequirements.gpuVram > vramState.free &&
+            minContextSizeResourceRequirements.cpuRam > ramState.free + swapState.free - unifiedRamUsage)
+            throw new InsufficientMemoryError(`A context size of ${minContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available VRAM and RAM${swapState.total > 0 ? " (including swap)" : ""}`);
+        else if (minContextSizeResourceRequirements.gpuVram > vramState.free)
+            throw new InsufficientMemoryError(`A context size of ${minContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available VRAM`);
+        else if (minContextSizeResourceRequirements.cpuRam > ramState.free + swapState.free - unifiedRamUsage)
+            throw new InsufficientMemoryError(`A context size of ${minContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available RAM${swapState.total > 0 ? " (including swap)" : ""}`);
+        else if (minContextSizeResourceRequirements.cpuRam > ramState.free - unifiedRamUsage)
+            throw new InsufficientMemoryError(`A context size of ${minContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available RAM`);
+        else
+            throw new InsufficientMemoryError(`A context size of ${minContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available resources`);
+    }
+    throw new Error(`Invalid context size: "${contextSize}"`);
+}
+//# sourceMappingURL=resolveContextContextSizeOption.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts
@@ -0,0 +1,17 @@
+import { LlamaModelOptions } from "../../../evaluator/LlamaModel/LlamaModel.js";
+import { BuildGpu } from "../../../bindings/types.js";
+import type { GgufInsights } from "../GgufInsights.js";
+export declare function resolveModelGpuLayersOption(gpuLayers: LlamaModelOptions["gpuLayers"], { ggufInsights, ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }: {
+    ggufInsights: GgufInsights;
+    ignoreMemorySafetyChecks?: boolean;
+    getVramState(): Promise<{
+        total: number;
+        free: number;
+    }>;
+    llamaVramPaddingSize: number;
+    llamaGpu: BuildGpu;
+    llamaSupportsGpuOffloading: boolean;
+    defaultContextFlashAttention: boolean;
+    defaultContextSwaFullCache: boolean;
+    useMmap?: boolean;
+}): Promise<number>;
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.js
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.js
@@ -0,0 +1,239 @@
+import { InsufficientMemoryError } from "../../../utils/InsufficientMemoryError.js";
+import { findBestOption } from "../../../utils/findBestOption.js";
+import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../../evaluator/LlamaContext/LlamaContext.js";
+import { minAllowedContextSizeInCalculations } from "../../../config.js";
+import { scoreLevels } from "./scoreLevels.js";
+const fitContextExtraMemoryPaddingPercentage = 0.5;
+export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }) {
+    if (gpuLayers == null)
+        gpuLayers = "auto";
+    if (!llamaSupportsGpuOffloading)
+        return 0;
+    if (gpuLayers === "max" || typeof gpuLayers === "number") {
+        const resolvedGpuLayers = typeof gpuLayers === "number"
+            ? Math.max(0, Math.min(ggufInsights.totalLayers, gpuLayers))
+            : ggufInsights.totalLayers;
+        if (ignoreMemorySafetyChecks)
+            return resolvedGpuLayers;
+        const vramState = await getVramState();
+        const maxLayersRequirements = getVramRequiredForGpuLayers({
+            gpuLayers: resolvedGpuLayers,
+            ggufInsights,
+            currentVram: vramState.free,
+            defaultContextFlashAttention,
+            defaultContextSwaFullCache,
+            useMmap
+        });
+        if (maxLayersRequirements == null)
+            throw new InsufficientMemoryError("Not enough VRAM to fit the model with the specified settings");
+        return resolvedGpuLayers;
+    }
+    else if (gpuLayers === "auto" || typeof gpuLayers === "object") {
+        if (llamaGpu === false)
+            return 0;
+        const vramState = await getVramState();
+        if (vramState.total === 0)
+            return 0;
+        let freeVram = vramState.free;
+        if (typeof gpuLayers === "object" && gpuLayers.fitContext?.contextSize != null) {
+            freeVram -= llamaVramPaddingSize * fitContextExtraMemoryPaddingPercentage;
+            if (freeVram < 0)
+                freeVram = 0;
+        }
+        const bestGpuLayersOption = getBestGpuLayersForFreeVram({
+            ggufInsights,
+            freeVram,
+            fitContext: typeof gpuLayers === "object"
+                ? gpuLayers.fitContext
+                : undefined,
+            minGpuLayers: typeof gpuLayers === "object"
+                ? gpuLayers.min
+                : undefined,
+            maxGpuLayers: typeof gpuLayers === "object"
+                ? gpuLayers.max
+                : undefined,
+            defaultContextFlashAttention,
+            defaultContextSwaFullCache,
+            useMmap
+        });
+        const hasGpuLayersRequirements = typeof gpuLayers === "object" &&
+            (gpuLayers.min != null || gpuLayers.max != null || gpuLayers.fitContext?.contextSize != null);
+        if (!ignoreMemorySafetyChecks && bestGpuLayersOption == null && hasGpuLayersRequirements)
+            throw new InsufficientMemoryError("Not enough VRAM to fit the model with the specified settings");
+        return bestGpuLayersOption ?? 0;
+    }
+    throw new Error(`Invalid gpuLayers value: ${gpuLayers}`);
+}
+function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }) {
+    return findBestOption({
+        *generator() {
+            const minLayers = Math.floor(Math.max(0, minGpuLayers ?? 0));
+            const maxLayers = Math.floor(Math.min(ggufInsights.totalLayers, maxGpuLayers ?? ggufInsights.totalLayers));
+            for (let layers = maxLayers; layers >= minLayers; layers--) {
+                yield {
+                    gpuLayers: layers
+                };
+            }
+        },
+        score(option) {
+            const layersRequirements = getVramRequiredForGpuLayers({
+                gpuLayers: option.gpuLayers,
+                ggufInsights,
+                currentVram: freeVram,
+                fitContext,
+                defaultContextFlashAttention,
+                defaultContextSwaFullCache,
+                useMmap
+            });
+            if (layersRequirements == null)
+                return null;
+            return scoreGpuLayersAndContextCombination({ gpuLayers: option.gpuLayers, contextSize: layersRequirements.contextSize }, {
+                totalGpuLayers: ggufInsights.totalLayers,
+                trainContextSize: getDefaultModelContextSize({ trainContextSize: ggufInsights.trainContextSize })
+            });
+        }
+    })?.gpuLayers ?? null;
+}
+function scoreGpuLayersAndContextCombination({ gpuLayers, contextSize }, { totalGpuLayers, trainContextSize }) {
+    function scoreGpuLayers() {
+        return scoreLevels(gpuLayers, [{
+                start: 0,
+                points: 4
+            }, {
+                start: 1,
+                points: 26
+            }, {
+                start: totalGpuLayers,
+                points: 14,
+                end: totalGpuLayers
+            }]);
+    }
+    function scoreContextSize() {
+        const gpuLayersPercentage = gpuLayers / totalGpuLayers;
+        return scoreLevels(contextSize, [{
+                start: 0,
+                points: 2
+            }, {
+                start: 1024,
+                points: 4
+            }, {
+                start: 2048,
+                points: gpuLayersPercentage < 0.1 ? 1 : 8
+            }, {
+                start: 4096,
+                points: gpuLayersPercentage < 0.3 ? 4 : 16
+            }, {
+                start: 8192,
+                points: gpuLayersPercentage < 0.6 ? 1 : 8,
+                end: Math.max(trainContextSize, 16384)
+            }]);
+    }
+    return scoreGpuLayers() + scoreContextSize();
+}
+function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext, defaultContextFlashAttention = false, defaultContextSwaFullCache = false, useMmap }) {
+    const modelVram = ggufInsights.estimateModelResourceRequirements({
+        gpuLayers,
+        useMmap
+    }).gpuVram;
+    if (modelVram > currentVram)
+        return null;
+    if (fitContext != null && fitContext.contextSize != null) {
+        const contextVram = ggufInsights.estimateContextResourceRequirements({
+            contextSize: fitContext.contextSize,
+            batchSize: getDefaultContextBatchSize({ contextSize: fitContext.contextSize, sequences: 1 }),
+            modelGpuLayers: gpuLayers,
+            sequences: 1,
+            isEmbeddingContext: fitContext.embeddingContext ?? false,
+            flashAttention: defaultContextFlashAttention,
+            swaFullCache: defaultContextSwaFullCache
+        }).gpuVram;
+        const totalVram = modelVram + contextVram;
+        if (totalVram > currentVram)
+            return null;
+        return {
+            contextSize: fitContext.contextSize,
+            contextVram,
+            totalVram
+        };
+    }
+    const maxContext = findMaxPossibleContextSizeForVram({
+        gpuLayers,
+        ggufInsights,
+        vram: currentVram - modelVram,
+        isEmbeddingContext: fitContext?.embeddingContext ?? false,
+        flashAttention: defaultContextFlashAttention,
+        swaFullCache: defaultContextSwaFullCache
+    });
+    if (maxContext == null || modelVram + maxContext.vram > currentVram)
+        return null;
+    return {
+        contextSize: maxContext.contextSize,
+        contextVram: maxContext.vram,
+        totalVram: modelVram + maxContext.vram
+    };
+}
+function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext, flashAttention, swaFullCache }) {
+    const maxContextSize = getDefaultModelContextSize({ trainContextSize: ggufInsights.trainContextSize });
+    return findMaxValidValue({
+        maxValue: maxContextSize,
+        minValue: minAllowedContextSizeInCalculations,
+        minStep: 1,
+        test(contextSize) {
+            const contextVram = ggufInsights.estimateContextResourceRequirements({
+                contextSize,
+                batchSize: getDefaultContextBatchSize({ contextSize, sequences: 1 }),
+                modelGpuLayers: gpuLayers,
+                sequences: 1,
+                isEmbeddingContext,
+                flashAttention,
+                swaFullCache
+            }).gpuVram;
+            if (contextVram <= vram)
+                return {
+                    contextSize,
+                    vram: contextVram
+                };
+            return null;
+        }
+    });
+}
+function findMaxValidValue({ maxValue, minValue, minStep = 1, test }) {
+    let step = -Math.max(minStep, Math.floor((maxValue - minValue) / 4));
+    let bestValue = null;
+    for (let value = maxValue; value >= minValue;) {
+        const result = (bestValue != null && value === bestValue.value)
+            ? bestValue.result
+            : test(value);
+        if (result != null) {
+            if (bestValue == null || value >= bestValue.value) {
+                bestValue = { value: value, result: result };
+                if (step === -minStep)
+                    break;
+                else if (step < 0)
+                    step = Math.max(minStep, Math.floor(-step / 2));
+            }
+        }
+        else if (bestValue != null && value < bestValue.value) {
+            value = bestValue.value;
+            step = Math.max(minStep, Math.floor(Math.abs(step) / 2));
+            continue;
+        }
+        else if (step > 0)
+            step = -Math.max(minStep, Math.floor(step / 2));
+        if (value === minValue && step === -minStep)
+            break;
+        value += step;
+        if (value < minValue) {
+            value = minValue;
+            step = Math.max(minStep, Math.floor(Math.abs(step) / 2));
+        }
+        else if (value > maxValue) {
+            value = maxValue;
+            step = -Math.max(minStep, Math.floor(Math.abs(step) / 2));
+        }
+    }
+    if (bestValue != null)
+        return bestValue.result;
+    return null;
+}
+//# sourceMappingURL=resolveModelGpuLayersOption.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.d.ts
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.d.ts
@@ -0,0 +1,5 @@
+export declare function scoreLevels(num: number, levels: {
+    start: number;
+    end?: number;
+    points: number;
+}[]): number;
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.js
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.js
@@ -0,0 +1,16 @@
+export function scoreLevels(num, levels) {
+    let res = 0;
+    for (let i = 0; i < levels.length; i++) {
+        const level = levels[i];
+        const start = level.start;
+        const end = level.end ?? levels[i + 1]?.start ?? Math.max(start, num);
+        if (num < start)
+            break;
+        else if (num >= end)
+            res += level.points;
+        else
+            res += level.points * ((num - start) / (end - start));
+    }
+    return res;
+}
+//# sourceMappingURL=scoreLevels.js.map
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.js.map
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"scoreLevels.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/scoreLevels.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,WAAW,CAAC,GAAW,EAAE,MAAuD;IAC5F,IAAI,GAAG,GAAG,CAAC,CAAC;IAEZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;QACzB,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC;QAC1B,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,IAAI,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAEtE,IAAI,GAAG,GAAG,KAAK;YACX,MAAM;aACL,IAAI,GAAG,IAAI,GAAG;YACf,GAAG,IAAI,KAAK,CAAC,MAAM,CAAC;;YAEpB,GAAG,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}
				`@@ -0,0 +1 @@`
				{"version":3,"file":"GgufInsightsTokens.js","sourceRoot":"","sources":["../../../src/gguf/insights/GgufInsightsTokens.ts"],"names":[],"mappings":"AAGA,MAAM,OAAO,kBAAkB;IAC3B,gBAAgB,CAAkB,aAAa,CAAe;IAE9D,YAAoB,YAA0B;QAC1C,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAED,IAAW,QAAQ;QACf,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,CAAC;QAC1F,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC;QAEhG,IAAI,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,oBAAoB,CAAC,CAAC;QACrG,IAAI,UAAU,IAAI,IAAI,IAAI,cAAc,KAAK,MAAM,EAAE,CAAC;YAClD,UAAU,GAAG,GAAG,CAAC,CAAC,yDAAyD;QAC/E,CAAC;QAED,IAAI,WAAW,IAAI,IAAI,IAAI,UAAU,IAAI,IAAI,IAAI,UAAU,IAAI,WAAW;YACtE,OAAO,IAAI,CAAC;QAEhB,OAAO,UAAU,IAAI,IAAI,CAAC;IAC9B,CAAC;IAED,IAAW,QAAQ;QACf,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,CAAC;QAC1F,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC;QAEhG,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,cAAc,CAAC,CAAC;QACjG,IAAI,UAAU,IAAI,IAAI,IAAI,WAAW,IAAI,IAAI,IAAI,UAAU,GAAG,WAAW;YACrE,OAAO,UAAU,CAAC;QAEtB,QAAQ,cAAc,EAAE,CAAC;YACrB,KAAK,UAAU,CAAC,CAAC,OAAO,IAAI,CAAC;YAC7B,KAAK,MAAM,CAAC,CAAC,OAAO,IAAI,CAAC;YACzB,KAAK,MAAM,CAAC,CAAC,OAAO,IAAI,CAAC;YACzB,KAAK,MAAM,CAAC,CAAC,OAAO,IAAI,CAAC;YACzB,KAAK,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;YACvB,KAAK,MAAM,CAAC,CAAC,OAAO,EAAE,CAAC;YACvB,KAAK,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC;YACpB,KAAK,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC;QAC5B,CAAC;QACD,OAAO,CAAC,CAAC,CAAC,yDAAyD;IACvE,CAAC;IAED,gBAAgB;IACT,MAAM,CAAC,OAAO,CAAC,YAA0B;QAC5C,OAAO,IAAI,kBAAkB,CAAC,YAAY,CAAC,CAAC;IAChD,CAAC;CACJ"}
				`@@ -0,0 +1 @@`
				{"version":3,"file":"getRamUsageFromUnifiedVram.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/getRamUsageFromUnifiedVram.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,0BAA0B,CAAC,SAAiB,EAAE,SAA6D;IACvH,MAAM,YAAY,GAAG,SAAS,CAAC,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IAC7D,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEpE,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAE5H,OAAO,eAAe,CAAC;AAC3B,CAAC"}
				`@@ -0,0 +1 @@`
				{"version":3,"file":"scoreLevels.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/scoreLevels.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,WAAW,CAAC,GAAW,EAAE,MAAuD;IAC5F,IAAI,GAAG,GAAG,CAAC,CAAC;IAEZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;QACzB,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC;QAC1B,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,IAAI,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAEtE,IAAI,GAAG,GAAG,KAAK;YACX,MAAM;aACL,IAAI,GAAG,IAAI,GAAG;YACf,GAAG,IAAI,KAAK,CAAC,MAAM,CAAC;;YAEpB,GAAG,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}