First upload version 0.0.1
This commit is contained in:
5
node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.d.ts
generated
vendored
Normal file
5
node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.d.ts
generated
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
export declare function getRamUsageFromUnifiedVram(vramUsage: number, vramState: {
|
||||
total: number;
|
||||
free: number;
|
||||
unifiedSize: number;
|
||||
}): number;
|
||||
7
node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js
generated
vendored
Normal file
7
node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
export function getRamUsageFromUnifiedVram(vramUsage, vramState) {
|
||||
const onlyVramSize = vramState.total - vramState.unifiedSize;
|
||||
const existingUsage = Math.max(0, vramState.total - vramState.free);
|
||||
const unifiedRamUsage = Math.min(vramState.unifiedSize, Math.max(0, vramUsage - Math.max(0, onlyVramSize - existingUsage)));
|
||||
return unifiedRamUsage;
|
||||
}
|
||||
//# sourceMappingURL=getRamUsageFromUnifiedVram.js.map
|
||||
1
node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js.map
generated
vendored
Normal file
1
node_modules/node-llama-cpp/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"getRamUsageFromUnifiedVram.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/getRamUsageFromUnifiedVram.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,0BAA0B,CAAC,SAAiB,EAAE,SAA6D;IACvH,MAAM,YAAY,GAAG,SAAS,CAAC,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IAC7D,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEpE,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IAE5H,OAAO,eAAe,CAAC;AAC3B,CAAC"}
|
||||
30
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts
generated
vendored
Normal file
30
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts
generated
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
import { LlamaContextOptions } from "../../../evaluator/LlamaContext/types.js";
|
||||
import { GgufInsights } from "../GgufInsights.js";
|
||||
import { BuildGpu } from "../../../bindings/types.js";
|
||||
export declare function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, flashAttention, swaFullCache, getVramState, getRamState, getSwapState, ignoreMemorySafetyChecks, isEmbeddingContext, maxContextSizeSwapUse }: {
|
||||
contextSize?: LlamaContextOptions["contextSize"];
|
||||
batchSize?: LlamaContextOptions["batchSize"];
|
||||
sequences: number;
|
||||
modelFileInsights: GgufInsights;
|
||||
modelGpuLayers: number;
|
||||
modelTrainContextSize: number;
|
||||
flashAttention: boolean;
|
||||
swaFullCache: boolean;
|
||||
getVramState(): Promise<{
|
||||
total: number;
|
||||
free: number;
|
||||
unifiedSize: number;
|
||||
}>;
|
||||
getRamState(): Promise<{
|
||||
total: number;
|
||||
free: number;
|
||||
}>;
|
||||
getSwapState(): Promise<{
|
||||
total: number;
|
||||
free: number;
|
||||
}>;
|
||||
llamaGpu: BuildGpu;
|
||||
ignoreMemorySafetyChecks?: boolean;
|
||||
isEmbeddingContext?: boolean;
|
||||
maxContextSizeSwapUse?: number;
|
||||
}): Promise<number>;
|
||||
111
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.js
generated
vendored
Normal file
111
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.js
generated
vendored
Normal file
@@ -0,0 +1,111 @@
|
||||
import { minAllowedContextSizeInCalculations } from "../../../config.js";
|
||||
import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../../evaluator/LlamaContext/LlamaContext.js";
|
||||
import { InsufficientMemoryError } from "../../../utils/InsufficientMemoryError.js";
|
||||
import { getRamUsageFromUnifiedVram } from "./getRamUsageFromUnifiedVram.js";
|
||||
const defaultMaxContextSizeSwapUse = 2048;
|
||||
export async function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, flashAttention, swaFullCache, getVramState, getRamState, getSwapState, ignoreMemorySafetyChecks = false, isEmbeddingContext = false, maxContextSizeSwapUse = defaultMaxContextSizeSwapUse }) {
|
||||
if (contextSize == null)
|
||||
contextSize = "auto";
|
||||
if (typeof contextSize === "number") {
|
||||
const resolvedContextSize = Math.max(1, Math.floor(contextSize));
|
||||
if (ignoreMemorySafetyChecks)
|
||||
return resolvedContextSize;
|
||||
const [vramState, ramState, swapState] = await Promise.all([
|
||||
getVramState(),
|
||||
getRamState(),
|
||||
getSwapState()
|
||||
]);
|
||||
const contextResourceRequirements = modelFileInsights.estimateContextResourceRequirements({
|
||||
contextSize: resolvedContextSize,
|
||||
batchSize: batchSize ?? getDefaultContextBatchSize({ contextSize: resolvedContextSize, sequences }),
|
||||
modelGpuLayers: modelGpuLayers,
|
||||
sequences,
|
||||
flashAttention,
|
||||
swaFullCache,
|
||||
isEmbeddingContext
|
||||
});
|
||||
if (contextResourceRequirements.gpuVram > vramState.free)
|
||||
throw new InsufficientMemoryError(`A context size of ${resolvedContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available VRAM`);
|
||||
else if (contextResourceRequirements.cpuRam > (ramState.free + swapState.free - getRamUsageFromUnifiedVram(contextResourceRequirements.gpuVram, vramState)))
|
||||
throw new InsufficientMemoryError(`A context size of ${resolvedContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available RAM${swapState.total > 0 ? " (including swap)" : ""}`);
|
||||
return resolvedContextSize;
|
||||
}
|
||||
else if (contextSize === "auto" || typeof contextSize === "object") {
|
||||
const [vramState, ramState, swapState] = await Promise.all([
|
||||
getVramState(),
|
||||
getRamState(),
|
||||
getSwapState()
|
||||
]);
|
||||
const maxContextSize = contextSize === "auto"
|
||||
? getDefaultModelContextSize({ trainContextSize: modelTrainContextSize })
|
||||
: Math.min(contextSize.max ?? getDefaultModelContextSize({ trainContextSize: modelTrainContextSize }), getDefaultModelContextSize({ trainContextSize: modelTrainContextSize }));
|
||||
const minContextSize = contextSize === "auto"
|
||||
? minAllowedContextSizeInCalculations
|
||||
: Math.max(contextSize.min ?? minAllowedContextSizeInCalculations, minAllowedContextSizeInCalculations);
|
||||
let highestCompatibleContextSize = null;
|
||||
let step = -Math.max(1, Math.floor((maxContextSize - minContextSize) / 4));
|
||||
for (let testContextSize = maxContextSize; testContextSize >= minContextSize && testContextSize <= maxContextSize;) {
|
||||
const contextResourceRequirements = modelFileInsights.estimateContextResourceRequirements({
|
||||
contextSize: testContextSize,
|
||||
batchSize: batchSize ?? getDefaultContextBatchSize({ contextSize: testContextSize, sequences }),
|
||||
modelGpuLayers: modelGpuLayers,
|
||||
sequences,
|
||||
flashAttention,
|
||||
swaFullCache,
|
||||
isEmbeddingContext
|
||||
});
|
||||
if (contextResourceRequirements.gpuVram <= vramState.free &&
|
||||
contextResourceRequirements.cpuRam <= (ramState.free - getRamUsageFromUnifiedVram(contextResourceRequirements.gpuVram, vramState) + (testContextSize <= maxContextSizeSwapUse
|
||||
? swapState.free
|
||||
: 0))) {
|
||||
if (highestCompatibleContextSize == null || testContextSize >= highestCompatibleContextSize) {
|
||||
highestCompatibleContextSize = testContextSize;
|
||||
if (step === -1)
|
||||
break;
|
||||
else if (step < 0)
|
||||
step = Math.max(1, Math.floor(-step / 2));
|
||||
}
|
||||
}
|
||||
else if (step > 0)
|
||||
step = -Math.max(1, Math.floor(step / 2));
|
||||
if (testContextSize == minContextSize && step === -1)
|
||||
break;
|
||||
testContextSize += step;
|
||||
if (testContextSize < minContextSize) {
|
||||
testContextSize = minContextSize;
|
||||
step = Math.max(1, Math.floor(Math.abs(step) / 2));
|
||||
}
|
||||
else if (testContextSize > maxContextSize) {
|
||||
testContextSize = maxContextSize;
|
||||
step = -Math.max(1, Math.floor(Math.abs(step) / 2));
|
||||
}
|
||||
}
|
||||
if (highestCompatibleContextSize != null)
|
||||
return highestCompatibleContextSize;
|
||||
if (ignoreMemorySafetyChecks)
|
||||
return minContextSize;
|
||||
const minContextSizeResourceRequirements = modelFileInsights.estimateContextResourceRequirements({
|
||||
contextSize: minContextSize,
|
||||
batchSize: batchSize ?? getDefaultContextBatchSize({ contextSize: minContextSize, sequences }),
|
||||
modelGpuLayers: modelGpuLayers,
|
||||
sequences,
|
||||
flashAttention,
|
||||
swaFullCache,
|
||||
isEmbeddingContext
|
||||
});
|
||||
const unifiedRamUsage = getRamUsageFromUnifiedVram(minContextSizeResourceRequirements.gpuVram, vramState);
|
||||
if (minContextSizeResourceRequirements.gpuVram > vramState.free &&
|
||||
minContextSizeResourceRequirements.cpuRam > ramState.free + swapState.free - unifiedRamUsage)
|
||||
throw new InsufficientMemoryError(`A context size of ${minContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available VRAM and RAM${swapState.total > 0 ? " (including swap)" : ""}`);
|
||||
else if (minContextSizeResourceRequirements.gpuVram > vramState.free)
|
||||
throw new InsufficientMemoryError(`A context size of ${minContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available VRAM`);
|
||||
else if (minContextSizeResourceRequirements.cpuRam > ramState.free + swapState.free - unifiedRamUsage)
|
||||
throw new InsufficientMemoryError(`A context size of ${minContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available RAM${swapState.total > 0 ? " (including swap)" : ""}`);
|
||||
else if (minContextSizeResourceRequirements.cpuRam > ramState.free - unifiedRamUsage)
|
||||
throw new InsufficientMemoryError(`A context size of ${minContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available RAM`);
|
||||
else
|
||||
throw new InsufficientMemoryError(`A context size of ${minContextSize}${sequences > 1 ? ` with ${sequences} sequences` : ""} is too large for the available resources`);
|
||||
}
|
||||
throw new Error(`Invalid context size: "${contextSize}"`);
|
||||
}
|
||||
//# sourceMappingURL=resolveContextContextSizeOption.js.map
|
||||
1
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map
generated
vendored
Normal file
1
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
17
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts
generated
vendored
Normal file
17
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts
generated
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
import { LlamaModelOptions } from "../../../evaluator/LlamaModel/LlamaModel.js";
|
||||
import { BuildGpu } from "../../../bindings/types.js";
|
||||
import type { GgufInsights } from "../GgufInsights.js";
|
||||
export declare function resolveModelGpuLayersOption(gpuLayers: LlamaModelOptions["gpuLayers"], { ggufInsights, ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }: {
|
||||
ggufInsights: GgufInsights;
|
||||
ignoreMemorySafetyChecks?: boolean;
|
||||
getVramState(): Promise<{
|
||||
total: number;
|
||||
free: number;
|
||||
}>;
|
||||
llamaVramPaddingSize: number;
|
||||
llamaGpu: BuildGpu;
|
||||
llamaSupportsGpuOffloading: boolean;
|
||||
defaultContextFlashAttention: boolean;
|
||||
defaultContextSwaFullCache: boolean;
|
||||
useMmap?: boolean;
|
||||
}): Promise<number>;
|
||||
239
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.js
generated
vendored
Normal file
239
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.js
generated
vendored
Normal file
@@ -0,0 +1,239 @@
|
||||
import { InsufficientMemoryError } from "../../../utils/InsufficientMemoryError.js";
|
||||
import { findBestOption } from "../../../utils/findBestOption.js";
|
||||
import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../../evaluator/LlamaContext/LlamaContext.js";
|
||||
import { minAllowedContextSizeInCalculations } from "../../../config.js";
|
||||
import { scoreLevels } from "./scoreLevels.js";
|
||||
const fitContextExtraMemoryPaddingPercentage = 0.5;
|
||||
export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }) {
|
||||
if (gpuLayers == null)
|
||||
gpuLayers = "auto";
|
||||
if (!llamaSupportsGpuOffloading)
|
||||
return 0;
|
||||
if (gpuLayers === "max" || typeof gpuLayers === "number") {
|
||||
const resolvedGpuLayers = typeof gpuLayers === "number"
|
||||
? Math.max(0, Math.min(ggufInsights.totalLayers, gpuLayers))
|
||||
: ggufInsights.totalLayers;
|
||||
if (ignoreMemorySafetyChecks)
|
||||
return resolvedGpuLayers;
|
||||
const vramState = await getVramState();
|
||||
const maxLayersRequirements = getVramRequiredForGpuLayers({
|
||||
gpuLayers: resolvedGpuLayers,
|
||||
ggufInsights,
|
||||
currentVram: vramState.free,
|
||||
defaultContextFlashAttention,
|
||||
defaultContextSwaFullCache,
|
||||
useMmap
|
||||
});
|
||||
if (maxLayersRequirements == null)
|
||||
throw new InsufficientMemoryError("Not enough VRAM to fit the model with the specified settings");
|
||||
return resolvedGpuLayers;
|
||||
}
|
||||
else if (gpuLayers === "auto" || typeof gpuLayers === "object") {
|
||||
if (llamaGpu === false)
|
||||
return 0;
|
||||
const vramState = await getVramState();
|
||||
if (vramState.total === 0)
|
||||
return 0;
|
||||
let freeVram = vramState.free;
|
||||
if (typeof gpuLayers === "object" && gpuLayers.fitContext?.contextSize != null) {
|
||||
freeVram -= llamaVramPaddingSize * fitContextExtraMemoryPaddingPercentage;
|
||||
if (freeVram < 0)
|
||||
freeVram = 0;
|
||||
}
|
||||
const bestGpuLayersOption = getBestGpuLayersForFreeVram({
|
||||
ggufInsights,
|
||||
freeVram,
|
||||
fitContext: typeof gpuLayers === "object"
|
||||
? gpuLayers.fitContext
|
||||
: undefined,
|
||||
minGpuLayers: typeof gpuLayers === "object"
|
||||
? gpuLayers.min
|
||||
: undefined,
|
||||
maxGpuLayers: typeof gpuLayers === "object"
|
||||
? gpuLayers.max
|
||||
: undefined,
|
||||
defaultContextFlashAttention,
|
||||
defaultContextSwaFullCache,
|
||||
useMmap
|
||||
});
|
||||
const hasGpuLayersRequirements = typeof gpuLayers === "object" &&
|
||||
(gpuLayers.min != null || gpuLayers.max != null || gpuLayers.fitContext?.contextSize != null);
|
||||
if (!ignoreMemorySafetyChecks && bestGpuLayersOption == null && hasGpuLayersRequirements)
|
||||
throw new InsufficientMemoryError("Not enough VRAM to fit the model with the specified settings");
|
||||
return bestGpuLayersOption ?? 0;
|
||||
}
|
||||
throw new Error(`Invalid gpuLayers value: ${gpuLayers}`);
|
||||
}
|
||||
function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }) {
|
||||
return findBestOption({
|
||||
*generator() {
|
||||
const minLayers = Math.floor(Math.max(0, minGpuLayers ?? 0));
|
||||
const maxLayers = Math.floor(Math.min(ggufInsights.totalLayers, maxGpuLayers ?? ggufInsights.totalLayers));
|
||||
for (let layers = maxLayers; layers >= minLayers; layers--) {
|
||||
yield {
|
||||
gpuLayers: layers
|
||||
};
|
||||
}
|
||||
},
|
||||
score(option) {
|
||||
const layersRequirements = getVramRequiredForGpuLayers({
|
||||
gpuLayers: option.gpuLayers,
|
||||
ggufInsights,
|
||||
currentVram: freeVram,
|
||||
fitContext,
|
||||
defaultContextFlashAttention,
|
||||
defaultContextSwaFullCache,
|
||||
useMmap
|
||||
});
|
||||
if (layersRequirements == null)
|
||||
return null;
|
||||
return scoreGpuLayersAndContextCombination({ gpuLayers: option.gpuLayers, contextSize: layersRequirements.contextSize }, {
|
||||
totalGpuLayers: ggufInsights.totalLayers,
|
||||
trainContextSize: getDefaultModelContextSize({ trainContextSize: ggufInsights.trainContextSize })
|
||||
});
|
||||
}
|
||||
})?.gpuLayers ?? null;
|
||||
}
|
||||
function scoreGpuLayersAndContextCombination({ gpuLayers, contextSize }, { totalGpuLayers, trainContextSize }) {
|
||||
function scoreGpuLayers() {
|
||||
return scoreLevels(gpuLayers, [{
|
||||
start: 0,
|
||||
points: 4
|
||||
}, {
|
||||
start: 1,
|
||||
points: 26
|
||||
}, {
|
||||
start: totalGpuLayers,
|
||||
points: 14,
|
||||
end: totalGpuLayers
|
||||
}]);
|
||||
}
|
||||
function scoreContextSize() {
|
||||
const gpuLayersPercentage = gpuLayers / totalGpuLayers;
|
||||
return scoreLevels(contextSize, [{
|
||||
start: 0,
|
||||
points: 2
|
||||
}, {
|
||||
start: 1024,
|
||||
points: 4
|
||||
}, {
|
||||
start: 2048,
|
||||
points: gpuLayersPercentage < 0.1 ? 1 : 8
|
||||
}, {
|
||||
start: 4096,
|
||||
points: gpuLayersPercentage < 0.3 ? 4 : 16
|
||||
}, {
|
||||
start: 8192,
|
||||
points: gpuLayersPercentage < 0.6 ? 1 : 8,
|
||||
end: Math.max(trainContextSize, 16384)
|
||||
}]);
|
||||
}
|
||||
return scoreGpuLayers() + scoreContextSize();
|
||||
}
|
||||
function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext, defaultContextFlashAttention = false, defaultContextSwaFullCache = false, useMmap }) {
|
||||
const modelVram = ggufInsights.estimateModelResourceRequirements({
|
||||
gpuLayers,
|
||||
useMmap
|
||||
}).gpuVram;
|
||||
if (modelVram > currentVram)
|
||||
return null;
|
||||
if (fitContext != null && fitContext.contextSize != null) {
|
||||
const contextVram = ggufInsights.estimateContextResourceRequirements({
|
||||
contextSize: fitContext.contextSize,
|
||||
batchSize: getDefaultContextBatchSize({ contextSize: fitContext.contextSize, sequences: 1 }),
|
||||
modelGpuLayers: gpuLayers,
|
||||
sequences: 1,
|
||||
isEmbeddingContext: fitContext.embeddingContext ?? false,
|
||||
flashAttention: defaultContextFlashAttention,
|
||||
swaFullCache: defaultContextSwaFullCache
|
||||
}).gpuVram;
|
||||
const totalVram = modelVram + contextVram;
|
||||
if (totalVram > currentVram)
|
||||
return null;
|
||||
return {
|
||||
contextSize: fitContext.contextSize,
|
||||
contextVram,
|
||||
totalVram
|
||||
};
|
||||
}
|
||||
const maxContext = findMaxPossibleContextSizeForVram({
|
||||
gpuLayers,
|
||||
ggufInsights,
|
||||
vram: currentVram - modelVram,
|
||||
isEmbeddingContext: fitContext?.embeddingContext ?? false,
|
||||
flashAttention: defaultContextFlashAttention,
|
||||
swaFullCache: defaultContextSwaFullCache
|
||||
});
|
||||
if (maxContext == null || modelVram + maxContext.vram > currentVram)
|
||||
return null;
|
||||
return {
|
||||
contextSize: maxContext.contextSize,
|
||||
contextVram: maxContext.vram,
|
||||
totalVram: modelVram + maxContext.vram
|
||||
};
|
||||
}
|
||||
function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext, flashAttention, swaFullCache }) {
|
||||
const maxContextSize = getDefaultModelContextSize({ trainContextSize: ggufInsights.trainContextSize });
|
||||
return findMaxValidValue({
|
||||
maxValue: maxContextSize,
|
||||
minValue: minAllowedContextSizeInCalculations,
|
||||
minStep: 1,
|
||||
test(contextSize) {
|
||||
const contextVram = ggufInsights.estimateContextResourceRequirements({
|
||||
contextSize,
|
||||
batchSize: getDefaultContextBatchSize({ contextSize, sequences: 1 }),
|
||||
modelGpuLayers: gpuLayers,
|
||||
sequences: 1,
|
||||
isEmbeddingContext,
|
||||
flashAttention,
|
||||
swaFullCache
|
||||
}).gpuVram;
|
||||
if (contextVram <= vram)
|
||||
return {
|
||||
contextSize,
|
||||
vram: contextVram
|
||||
};
|
||||
return null;
|
||||
}
|
||||
});
|
||||
}
|
||||
function findMaxValidValue({ maxValue, minValue, minStep = 1, test }) {
|
||||
let step = -Math.max(minStep, Math.floor((maxValue - minValue) / 4));
|
||||
let bestValue = null;
|
||||
for (let value = maxValue; value >= minValue;) {
|
||||
const result = (bestValue != null && value === bestValue.value)
|
||||
? bestValue.result
|
||||
: test(value);
|
||||
if (result != null) {
|
||||
if (bestValue == null || value >= bestValue.value) {
|
||||
bestValue = { value: value, result: result };
|
||||
if (step === -minStep)
|
||||
break;
|
||||
else if (step < 0)
|
||||
step = Math.max(minStep, Math.floor(-step / 2));
|
||||
}
|
||||
}
|
||||
else if (bestValue != null && value < bestValue.value) {
|
||||
value = bestValue.value;
|
||||
step = Math.max(minStep, Math.floor(Math.abs(step) / 2));
|
||||
continue;
|
||||
}
|
||||
else if (step > 0)
|
||||
step = -Math.max(minStep, Math.floor(step / 2));
|
||||
if (value === minValue && step === -minStep)
|
||||
break;
|
||||
value += step;
|
||||
if (value < minValue) {
|
||||
value = minValue;
|
||||
step = Math.max(minStep, Math.floor(Math.abs(step) / 2));
|
||||
}
|
||||
else if (value > maxValue) {
|
||||
value = maxValue;
|
||||
step = -Math.max(minStep, Math.floor(Math.abs(step) / 2));
|
||||
}
|
||||
}
|
||||
if (bestValue != null)
|
||||
return bestValue.result;
|
||||
return null;
|
||||
}
|
||||
//# sourceMappingURL=resolveModelGpuLayersOption.js.map
|
||||
1
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map
generated
vendored
Normal file
1
node_modules/node-llama-cpp/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
5
node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.d.ts
generated
vendored
Normal file
5
node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.d.ts
generated
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
export declare function scoreLevels(num: number, levels: {
|
||||
start: number;
|
||||
end?: number;
|
||||
points: number;
|
||||
}[]): number;
|
||||
16
node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.js
generated
vendored
Normal file
16
node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.js
generated
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
export function scoreLevels(num, levels) {
|
||||
let res = 0;
|
||||
for (let i = 0; i < levels.length; i++) {
|
||||
const level = levels[i];
|
||||
const start = level.start;
|
||||
const end = level.end ?? levels[i + 1]?.start ?? Math.max(start, num);
|
||||
if (num < start)
|
||||
break;
|
||||
else if (num >= end)
|
||||
res += level.points;
|
||||
else
|
||||
res += level.points * ((num - start) / (end - start));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
//# sourceMappingURL=scoreLevels.js.map
|
||||
1
node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.js.map
generated
vendored
Normal file
1
node_modules/node-llama-cpp/dist/gguf/insights/utils/scoreLevels.js.map
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"scoreLevels.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/scoreLevels.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,WAAW,CAAC,GAAW,EAAE,MAAuD;IAC5F,IAAI,GAAG,GAAG,CAAC,CAAC;IAEZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;QACzB,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC;QAC1B,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,IAAI,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAEtE,IAAI,GAAG,GAAG,KAAK;YACX,MAAM;aACL,IAAI,GAAG,IAAI,GAAG;YACf,GAAG,IAAI,KAAK,CAAC,MAAM,CAAC;;YAEpB,GAAG,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}
|
||||
Reference in New Issue
Block a user