First upload version 0.0.1

2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions
--- a/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.js
+++ b/node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.js
@@ -0,0 +1,239 @@
+import { InsufficientMemoryError } from "../../../utils/InsufficientMemoryError.js";
+import { findBestOption } from "../../../utils/findBestOption.js";
+import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../../evaluator/LlamaContext/LlamaContext.js";
+import { minAllowedContextSizeInCalculations } from "../../../config.js";
+import { scoreLevels } from "./scoreLevels.js";
+const fitContextExtraMemoryPaddingPercentage = 0.5;
+export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }) {
+    if (gpuLayers == null)
+        gpuLayers = "auto";
+    if (!llamaSupportsGpuOffloading)
+        return 0;
+    if (gpuLayers === "max" || typeof gpuLayers === "number") {
+        const resolvedGpuLayers = typeof gpuLayers === "number"
+            ? Math.max(0, Math.min(ggufInsights.totalLayers, gpuLayers))
+            : ggufInsights.totalLayers;
+        if (ignoreMemorySafetyChecks)
+            return resolvedGpuLayers;
+        const vramState = await getVramState();
+        const maxLayersRequirements = getVramRequiredForGpuLayers({
+            gpuLayers: resolvedGpuLayers,
+            ggufInsights,
+            currentVram: vramState.free,
+            defaultContextFlashAttention,
+            defaultContextSwaFullCache,
+            useMmap
+        });
+        if (maxLayersRequirements == null)
+            throw new InsufficientMemoryError("Not enough VRAM to fit the model with the specified settings");
+        return resolvedGpuLayers;
+    }
+    else if (gpuLayers === "auto" || typeof gpuLayers === "object") {
+        if (llamaGpu === false)
+            return 0;
+        const vramState = await getVramState();
+        if (vramState.total === 0)
+            return 0;
+        let freeVram = vramState.free;
+        if (typeof gpuLayers === "object" && gpuLayers.fitContext?.contextSize != null) {
+            freeVram -= llamaVramPaddingSize * fitContextExtraMemoryPaddingPercentage;
+            if (freeVram < 0)
+                freeVram = 0;
+        }
+        const bestGpuLayersOption = getBestGpuLayersForFreeVram({
+            ggufInsights,
+            freeVram,
+            fitContext: typeof gpuLayers === "object"
+                ? gpuLayers.fitContext
+                : undefined,
+            minGpuLayers: typeof gpuLayers === "object"
+                ? gpuLayers.min
+                : undefined,
+            maxGpuLayers: typeof gpuLayers === "object"
+                ? gpuLayers.max
+                : undefined,
+            defaultContextFlashAttention,
+            defaultContextSwaFullCache,
+            useMmap
+        });
+        const hasGpuLayersRequirements = typeof gpuLayers === "object" &&
+            (gpuLayers.min != null || gpuLayers.max != null || gpuLayers.fitContext?.contextSize != null);
+        if (!ignoreMemorySafetyChecks && bestGpuLayersOption == null && hasGpuLayersRequirements)
+            throw new InsufficientMemoryError("Not enough VRAM to fit the model with the specified settings");
+        return bestGpuLayersOption ?? 0;
+    }
+    throw new Error(`Invalid gpuLayers value: ${gpuLayers}`);
+}
+function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }) {
+    return findBestOption({
+        *generator() {
+            const minLayers = Math.floor(Math.max(0, minGpuLayers ?? 0));
+            const maxLayers = Math.floor(Math.min(ggufInsights.totalLayers, maxGpuLayers ?? ggufInsights.totalLayers));
+            for (let layers = maxLayers; layers >= minLayers; layers--) {
+                yield {
+                    gpuLayers: layers
+                };
+            }
+        },
+        score(option) {
+            const layersRequirements = getVramRequiredForGpuLayers({
+                gpuLayers: option.gpuLayers,
+                ggufInsights,
+                currentVram: freeVram,
+                fitContext,
+                defaultContextFlashAttention,
+                defaultContextSwaFullCache,
+                useMmap
+            });
+            if (layersRequirements == null)
+                return null;
+            return scoreGpuLayersAndContextCombination({ gpuLayers: option.gpuLayers, contextSize: layersRequirements.contextSize }, {
+                totalGpuLayers: ggufInsights.totalLayers,
+                trainContextSize: getDefaultModelContextSize({ trainContextSize: ggufInsights.trainContextSize })
+            });
+        }
+    })?.gpuLayers ?? null;
+}
+function scoreGpuLayersAndContextCombination({ gpuLayers, contextSize }, { totalGpuLayers, trainContextSize }) {
+    function scoreGpuLayers() {
+        return scoreLevels(gpuLayers, [{
+                start: 0,
+                points: 4
+            }, {
+                start: 1,
+                points: 26
+            }, {
+                start: totalGpuLayers,
+                points: 14,
+                end: totalGpuLayers
+            }]);
+    }
+    function scoreContextSize() {
+        const gpuLayersPercentage = gpuLayers / totalGpuLayers;
+        return scoreLevels(contextSize, [{
+                start: 0,
+                points: 2
+            }, {
+                start: 1024,
+                points: 4
+            }, {
+                start: 2048,
+                points: gpuLayersPercentage < 0.1 ? 1 : 8
+            }, {
+                start: 4096,
+                points: gpuLayersPercentage < 0.3 ? 4 : 16
+            }, {
+                start: 8192,
+                points: gpuLayersPercentage < 0.6 ? 1 : 8,
+                end: Math.max(trainContextSize, 16384)
+            }]);
+    }
+    return scoreGpuLayers() + scoreContextSize();
+}
+function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext, defaultContextFlashAttention = false, defaultContextSwaFullCache = false, useMmap }) {
+    const modelVram = ggufInsights.estimateModelResourceRequirements({
+        gpuLayers,
+        useMmap
+    }).gpuVram;
+    if (modelVram > currentVram)
+        return null;
+    if (fitContext != null && fitContext.contextSize != null) {
+        const contextVram = ggufInsights.estimateContextResourceRequirements({
+            contextSize: fitContext.contextSize,
+            batchSize: getDefaultContextBatchSize({ contextSize: fitContext.contextSize, sequences: 1 }),
+            modelGpuLayers: gpuLayers,
+            sequences: 1,
+            isEmbeddingContext: fitContext.embeddingContext ?? false,
+            flashAttention: defaultContextFlashAttention,
+            swaFullCache: defaultContextSwaFullCache
+        }).gpuVram;
+        const totalVram = modelVram + contextVram;
+        if (totalVram > currentVram)
+            return null;
+        return {
+            contextSize: fitContext.contextSize,
+            contextVram,
+            totalVram
+        };
+    }
+    const maxContext = findMaxPossibleContextSizeForVram({
+        gpuLayers,
+        ggufInsights,
+        vram: currentVram - modelVram,
+        isEmbeddingContext: fitContext?.embeddingContext ?? false,
+        flashAttention: defaultContextFlashAttention,
+        swaFullCache: defaultContextSwaFullCache
+    });
+    if (maxContext == null || modelVram + maxContext.vram > currentVram)
+        return null;
+    return {
+        contextSize: maxContext.contextSize,
+        contextVram: maxContext.vram,
+        totalVram: modelVram + maxContext.vram
+    };
+}
+function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext, flashAttention, swaFullCache }) {
+    const maxContextSize = getDefaultModelContextSize({ trainContextSize: ggufInsights.trainContextSize });
+    return findMaxValidValue({
+        maxValue: maxContextSize,
+        minValue: minAllowedContextSizeInCalculations,
+        minStep: 1,
+        test(contextSize) {
+            const contextVram = ggufInsights.estimateContextResourceRequirements({
+                contextSize,
+                batchSize: getDefaultContextBatchSize({ contextSize, sequences: 1 }),
+                modelGpuLayers: gpuLayers,
+                sequences: 1,
+                isEmbeddingContext,
+                flashAttention,
+                swaFullCache
+            }).gpuVram;
+            if (contextVram <= vram)
+                return {
+                    contextSize,
+                    vram: contextVram
+                };
+            return null;
+        }
+    });
+}
+function findMaxValidValue({ maxValue, minValue, minStep = 1, test }) {
+    let step = -Math.max(minStep, Math.floor((maxValue - minValue) / 4));
+    let bestValue = null;
+    for (let value = maxValue; value >= minValue;) {
+        const result = (bestValue != null && value === bestValue.value)
+            ? bestValue.result
+            : test(value);
+        if (result != null) {
+            if (bestValue == null || value >= bestValue.value) {
+                bestValue = { value: value, result: result };
+                if (step === -minStep)
+                    break;
+                else if (step < 0)
+                    step = Math.max(minStep, Math.floor(-step / 2));
+            }
+        }
+        else if (bestValue != null && value < bestValue.value) {
+            value = bestValue.value;
+            step = Math.max(minStep, Math.floor(Math.abs(step) / 2));
+            continue;
+        }
+        else if (step > 0)
+            step = -Math.max(minStep, Math.floor(step / 2));
+        if (value === minValue && step === -minStep)
+            break;
+        value += step;
+        if (value < minValue) {
+            value = minValue;
+            step = Math.max(minStep, Math.floor(Math.abs(step) / 2));
+        }
+        else if (value > maxValue) {
+            value = maxValue;
+            step = -Math.max(minStep, Math.floor(Math.abs(step) / 2));
+        }
+    }
+    if (bestValue != null)
+        return bestValue.result;
+    return null;
+}
+//# sourceMappingURL=resolveModelGpuLayersOption.js.map