First upload version 0.0.1

This commit is contained in:
Neyra
2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions

46
node_modules/node-llama-cpp/llama/.clang-format generated vendored Normal file
View File

@@ -0,0 +1,46 @@
BasedOnStyle: Google
IndentWidth: 4
UseTab: Never
TabWidth: 4
ColumnLimit: 140
AllowShortCaseLabelsOnASingleLine: true
AllowShortFunctionsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlignTrailingComments: false
SpaceAfterTemplateKeyword: false
AllowShortBlocksOnASingleLine: false
MaxEmptyLinesToKeep: 3
NamespaceIndentation: None
CommentPragmas: '^[^ ]'
FixNamespaceComments: false
IndentAccessModifiers: true
SpaceAfterCStyleCast: false
PointerAlignment: Left
IndentCaseLabels: true
BinPackArguments: false
BinPackParameters: false
Cpp11BracedListStyle: false
SpaceBeforeCpp11BracedList: true
SpaceInEmptyBlock: true
KeepEmptyLinesAtTheStartOfBlocks: false
DerivePointerAlignment: false
AlwaysBreakTemplateDeclarations: No
DeriveLineEnding: false
UseCRLF: false
AllowAllArgumentsOnNextLine: true
PackConstructorInitializers: CurrentLine
AlignAfterOpenBracket: BlockIndent
BraceWrapping:
AfterStruct: false
AfterClass: false
AfterUnion: false
AfterEnum: false
AfterControlStatement: false
AfterFunction: false
AfterNamespace: false
AfterExternBlock: false
BeforeElse: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false

141
node_modules/node-llama-cpp/llama/CMakeLists.txt generated vendored Normal file
View File

@@ -0,0 +1,141 @@
cmake_minimum_required(VERSION 3.19)
if (NLC_CURRENT_PLATFORM STREQUAL "win-x64" OR NLC_CURRENT_PLATFORM STREQUAL "win-arm64")
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
include("./cmake/addVariantSuffix.cmake")
if (NLC_CURRENT_PLATFORM STREQUAL "win-x64")
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL" CACHE STRING "" FORCE)
else()
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL" CACHE STRING "" FORCE)
endif()
endif()
if (NLC_TARGET_PLATFORM STREQUAL "win-arm64" AND (CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL "Ninja Multi-Config") AND NOT MINGW)
if(NLC_CURRENT_PLATFORM STREQUAL "win-x64")
include("./profiles/llvm.win32.host-x64.target-arm64.cmake")
elseif(NLC_CURRENT_PLATFORM STREQUAL "win-arm64")
include("./profiles/llvm.win32.host-arm64.target-arm64.cmake")
endif()
elseif (NLC_CURRENT_PLATFORM STREQUAL "win-x64" AND NLC_TARGET_PLATFORM STREQUAL "win-x64" AND (CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL "Ninja Multi-Config") AND NOT MINGW)
include("./profiles/llvm.win32.host-x64.target-x64.cmake")
endif()
project("llama-addon" C CXX)
if (MSVC)
if (GGML_STATIC)
add_link_options(-static)
if (MINGW)
add_link_options(-static-libgcc -static-libstdc++)
endif()
endif()
# add_compile_options(/EHsc)
else()
add_compile_options(-fexceptions)
endif()
add_definitions(-DNAPI_VERSION=7)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_PLATFORM_NO_VERSIONED_SONAME ON)
set(LLAMA_BUILD_COMMON ON)
if (MINGW)
set(GGML_BACKEND_DL OFF)
set(BUILD_SHARED_LIBS ON)
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
add_compile_options(-Wno-c++17-extensions)
endif()
if(APPLE)
set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
set(CMAKE_BUILD_RPATH "@loader_path")
set(CMAKE_INSTALL_RPATH "@loader_path")
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
else()
set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android")
set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
set(CMAKE_BUILD_RPATH "$ORIGIN")
set(CMAKE_INSTALL_RPATH "$ORIGIN")
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
endif()
endif()
execute_process(COMMAND node -p "require('node-addon-api').include.slice(1,-1)"
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE NODE_ADDON_API_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE)
include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})
if (DEFINED GGML_NATIVE)
set(NLC_GGML_NATIVE ${GGML_NATIVE})
elseif(CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH})
set(NLC_GGML_NATIVE OFF)
else()
set(NLC_GGML_NATIVE ON)
endif()
add_subdirectory("llama.cpp")
include_directories("llama.cpp")
include_directories("./llama.cpp/common")
# This is needed to use methods in "llama-grammar.h" and "unicode.h"
target_include_directories(llama PUBLIC "./llama.cpp/src")
unset(GPU_INFO_HEADERS)
unset(GPU_INFO_SOURCES)
unset(GPU_INFO_EXTRA_LIBS)
if (GGML_VULKAN OR GGML_KOMPUTE)
find_package(Vulkan)
if (Vulkan_FOUND)
if (GGML_VULKAN)
message(STATUS "Using Vulkan for GPU info")
elseif (GGML_KOMPUTE)
message(STATUS "Using Vulkan for GPU info because Kompute is enabled")
endif()
list(APPEND GPU_INFO_HEADERS gpuInfo/vulkan-gpu-info.h)
list(APPEND GPU_INFO_SOURCES gpuInfo/vulkan-gpu-info.cpp)
add_compile_definitions(GPU_INFO_USE_VULKAN)
list(APPEND GPU_INFO_EXTRA_LIBS Vulkan::Vulkan)
else()
message(FATAL_ERROR "Vulkan was not found")
endif()
endif()
list(REMOVE_DUPLICATES GPU_INFO_HEADERS)
list(REMOVE_DUPLICATES GPU_INFO_SOURCES)
list(REMOVE_DUPLICATES GPU_INFO_EXTRA_LIBS)
addVariantSuffix(llama ${NLC_VARIANT})
addVariantSuffix(ggml ${NLC_VARIANT})
file(GLOB SOURCE_FILES "addon/*.cpp" "addon/**/*.cpp" ${GPU_INFO_SOURCES})
add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC} ${GPU_INFO_HEADERS})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB})
target_link_libraries(${PROJECT_NAME} "llama")
target_link_libraries(${PROJECT_NAME} "common")
if (DEFINED GPU_INFO_EXTRA_LIBS)
target_link_libraries(${PROJECT_NAME} ${GPU_INFO_EXTRA_LIBS})
endif()
if(MSVC AND CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
# Generate node.lib
execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS})
endif()

View File

@@ -0,0 +1,985 @@
#include <thread>
#include <algorithm>
#include <cmath>
#include "common/common.h"
#include "llama-vocab.h"
#include "llama.h"
#include "addonGlobals.h"
#include "AddonModel.h"
#include "AddonModelLora.h"
#include "AddonGrammarEvaluationState.h"
#include "AddonContext.h"
static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
uint64_t totalSize = 0;
if (embd) {
totalSize += sizeof(float) * n_tokens_alloc * embd;
} else {
totalSize += sizeof(llama_token) * n_tokens_alloc;
}
totalSize += sizeof(llama_pos) * n_tokens_alloc;
totalSize += sizeof(int32_t) * n_tokens_alloc;
totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
totalSize += sizeof(int8_t) * n_tokens_alloc;
return totalSize;
}
class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
public:
AddonContext* ctx;
AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
: Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
ctx(ctx),
deferred(Napi::Promise::Deferred::New(env)) {
ctx->Ref();
}
~AddonContextDecodeBatchWorker() {
ctx->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
// Perform the evaluation using llama_decode.
int r = llama_decode(ctx->ctx, ctx->batch);
if (r != 0) {
if (r == 1) {
SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
} else {
SetError("Eval has failed");
}
return;
}
llama_synchronize(ctx->ctx);
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_decode\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonContextLoadContextWorker : public Napi::AsyncWorker {
public:
AddonContext* context;
AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
: Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
context(context),
deferred(Napi::Promise::Deferred::New(env)) {
context->Ref();
}
~AddonContextLoadContextWorker() {
context->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
context->ctx = llama_init_from_model(context->model->model, context->context_params);
context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_init_from_model\"");
}
}
void OnOK() {
if (context->contextLoaded) {
uint64_t contextMemorySize = llama_state_get_size(context->ctx);
adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
context->loadedContextMemorySize = contextMemorySize;
}
deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
public:
AddonContext* context;
AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
: Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
context(context),
deferred(Napi::Promise::Deferred::New(env)) {
context->Ref();
}
~AddonContextUnloadContextWorker() {
context->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
llama_free(context->ctx);
context->contextLoaded = false;
try {
if (context->has_batch) {
llama_batch_free(context->batch);
context->has_batch = false;
context->batch_n_tokens = 0;
}
context->dispose();
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_batch_free\"");
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_free\"");
}
}
void OnOK() {
adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
context->loadedContextMemorySize = 0;
adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
context->batchMemorySize = 0;
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
public:
AddonContext* ctx;
AddonSampler* sampler;
bool arrayResult = false;
bool returnProbabilities = false;
bool returnConfidence = false;
float tokenConfidence = -1;
bool has_probabilities = false;
size_t probabilities_size;
llama_token * probabilities_tokens;
float * probabilities_probs;
int32_t batchLogitIndex;
llama_token result;
bool no_output = false;
AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
: Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
ctx(ctx),
deferred(Napi::Promise::Deferred::New(info.Env())) {
ctx->Ref();
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
sampler = Napi::ObjectWrap<AddonSampler>::Unwrap(info[1].As<Napi::Object>());
arrayResult = info.Length() > 2 && info[2].IsBoolean();
returnProbabilities = arrayResult ? info[2].As<Napi::Boolean>().Value() : false;
returnConfidence = arrayResult && info.Length() > 3 && info[3].IsBoolean() ? info[3].As<Napi::Boolean>().Value() : false;
sampler->Ref();
}
~AddonContextSampleTokenWorker() {
ctx->Unref();
sampler->Unref();
if (has_probabilities) {
delete[] probabilities_tokens;
delete[] probabilities_probs;
}
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
SampleToken();
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"SampleToken\"");
}
}
void SampleToken() {
if (llama_get_logits(ctx->ctx) == nullptr) {
SetError("This model does not support token generation");
return;
}
sampler->rebuildChainIfNeeded();
const auto * logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
const int n_vocab = llama_vocab_n_tokens(ctx->model->vocab);
auto & candidates = sampler->tokenCandidates;
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
}
llama_token_data_array cur_p = {
/* .data = */ candidates.data(),
/* .size = */ candidates.size(),
/* .selected = */ -1,
/* .sorted = */ false,
};
llama_sampler_apply(sampler->chain, &cur_p);
if (!(cur_p.selected >= 0 && cur_p.selected < (int32_t)cur_p.size)) {
no_output = true;
return;
}
auto new_token_id = cur_p.data[cur_p.selected].id;
if (returnProbabilities || returnConfidence) {
if (!cur_p.sorted) {
std::sort(cur_p.data, cur_p.data + cur_p.size, [](const llama_token_data & a, const llama_token_data & b) {
return a.logit > b.logit;
});
cur_p.sorted = true;
for (size_t i = 0; i < cur_p.size; i++) {
if (cur_p.data[i].id == new_token_id) {
cur_p.selected = i;
break;
}
}
}
}
if (returnProbabilities) {
probabilities_size = cur_p.size;
probabilities_tokens = new llama_token[probabilities_size];
probabilities_probs = new float[probabilities_size];
float maxLogit = cur_p.size > 0 ? cur_p.data[0].logit : -INFINITY;
for (size_t i = 0; i < cur_p.size; i++) {
auto logit = cur_p.data[i].logit;
probabilities_tokens[i] = cur_p.data[i].id;
probabilities_probs[i] = logit;
if (logit > maxLogit) {
maxLogit = logit;
}
}
if (probabilities_size > 0 && maxLogit != -INFINITY) {
float sum = 0.0f;
for (size_t i = 0; i < probabilities_size; i++) {
float prob = expf(probabilities_probs[i] - maxLogit);
probabilities_probs[i] = prob;
sum += prob;
}
for (size_t i = 0; i < probabilities_size; i++) {
probabilities_probs[i] /= sum;
}
}
has_probabilities = true;
}
if (returnConfidence) {
if (has_probabilities && cur_p.selected < probabilities_size) {
tokenConfidence = probabilities_probs[cur_p.selected];
} else {
float maxLogit = cur_p.data[0].logit;
float sum = 0.0f;
for (size_t i = 0; i < cur_p.size; i++) {
auto logit = cur_p.data[i].logit;
if (logit > maxLogit) {
maxLogit = logit;
}
}
for (size_t i = 0; i < cur_p.size; i++) {
sum += expf(cur_p.data[i].logit - maxLogit);
}
tokenConfidence = expf(cur_p.data[cur_p.selected].logit - maxLogit) / sum;
}
}
try {
sampler->acceptToken(new_token_id);
result = new_token_id;
} catch (const std::exception& e) {
SetError(std::string("Failed to accept token in sampler: ") + e.what());
} catch(...) {
SetError("Unknown error when calling \"acceptToken\"");
}
}
void OnOK() {
Napi::Number resultToken;
if (no_output) {
resultToken = Napi::Number::New(Env(), -1);
} else {
resultToken = Napi::Number::New(Env(), static_cast<uint32_t>(result));
}
if (!arrayResult) {
deferred.Resolve(resultToken);
return;
}
Napi::Array resultArray = Napi::Array::New(Env(), 2);
resultArray.Set(Napi::Number::New(Env(), 0), resultToken);
if (has_probabilities) {
Napi::Array probabilities = Napi::Array::New(Env(), probabilities_size * 2);
for (size_t i = 0; i < probabilities_size; i++) {
probabilities.Set(i * 2, Napi::Number::New(Env(), probabilities_tokens[i]));
probabilities.Set(i * 2 + 1, Napi::Number::New(Env(), probabilities_probs[i]));
}
resultArray.Set(1, probabilities);
}
if (returnConfidence && tokenConfidence != -1) {
resultArray.Set(2, Napi::Number::New(Env(), tokenConfidence));
}
deferred.Resolve(resultArray);
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
model->Ref();
context_params = llama_context_default_params();
context_params.n_ctx = 4096;
context_params.n_threads = std::max(cpu_get_num_math(), 1);
context_params.n_threads_batch = context_params.n_threads;
context_params.no_perf = true;
context_params.swa_full = false;
if (info.Length() > 1 && info[1].IsObject()) {
Napi::Object options = info[1].As<Napi::Object>();
if (options.Has("contextSize")) {
context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
}
if (options.Has("batchSize")) {
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
}
if (options.Has("sequences")) {
context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
}
if (options.Has("embeddings")) {
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
}
if (options.Has("ranking") && options.Get("ranking").As<Napi::Boolean>().Value()) {
context_params.pooling_type = LLAMA_POOLING_TYPE_RANK;
}
if (options.Has("flashAttention")) {
bool flashAttention = options.Get("flashAttention").As<Napi::Boolean>().Value();
context_params.flash_attn_type = flashAttention ? LLAMA_FLASH_ATTN_TYPE_ENABLED : LLAMA_FLASH_ATTN_TYPE_DISABLED;
}
if (options.Has("threads")) {
const auto n_threads = options.Get("threads").As<Napi::Number>().Int32Value();
const auto resolved_n_threads = n_threads == 0 ? std::max((int32_t)std::thread::hardware_concurrency(), context_params.n_threads) : n_threads;
context_params.n_threads = resolved_n_threads;
context_params.n_threads_batch = resolved_n_threads;
}
if (options.Has("performanceTracking")) {
context_params.no_perf = !(options.Get("performanceTracking").As<Napi::Boolean>().Value());
}
if (options.Has("swaFullCache")) {
context_params.swa_full = options.Get("swaFullCache").As<Napi::Boolean>().Value();
}
}
}
AddonContext::~AddonContext() {
dispose();
}
void AddonContext::dispose() {
if (disposed) {
return;
}
disposed = true;
if (contextLoaded) {
contextLoaded = false;
llama_free(ctx);
adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
loadedContextMemorySize = 0;
}
model->Unref();
disposeBatch();
}
void AddonContext::disposeBatch() {
if (!has_batch) {
return;
}
llama_batch_free(batch);
has_batch = false;
batch_n_tokens = 0;
adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
batchMemorySize = 0;
}
Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
if (disposed) {
return info.Env().Undefined();
}
if (contextLoaded) {
contextLoaded = false;
AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
worker->Queue();
return worker->GetPromise();
} else {
dispose();
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
deferred.Resolve(info.Env().Undefined());
return deferred.Promise();
}
}
Napi::Value AddonContext::GetContextSize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
}
Napi::Value AddonContext::InitBatch(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
if (has_batch) {
llama_batch_free(batch);
}
int32_t n_tokens = info[0].As<Napi::Number>().Int32Value();
batch = llama_batch_init(n_tokens, 0, 1);
has_batch = true;
batch_n_tokens = n_tokens;
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_model_n_embd(model->model), context_params.n_batch);
if (newBatchMemorySize > batchMemorySize) {
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
batchMemorySize = newBatchMemorySize;
} else if (newBatchMemorySize < batchMemorySize) {
adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
batchMemorySize = newBatchMemorySize;
}
return info.Env().Undefined();
}
Napi::Value AddonContext::DisposeBatch(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
disposeBatch();
return info.Env().Undefined();
}
Napi::Value AddonContext::AddToBatch(const Napi::CallbackInfo& info) {
if (!has_batch) {
Napi::Error::New(info.Env(), "No batch is initialized").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
int32_t firstTokenContextIndex = info[1].As<Napi::Number>().Int32Value();
Napi::Uint32Array tokens = info[2].As<Napi::Uint32Array>();
Napi::Uint32Array tokenLogitIndexes = info[3].As<Napi::Uint32Array>();
auto tokensLength = tokens.ElementLength();
auto tokenLogitIndexesLength = tokenLogitIndexes.ElementLength();
GGML_ASSERT(batch.n_tokens + tokensLength <= batch_n_tokens);
Napi::Uint32Array resLogitIndexes = Napi::Uint32Array::New(info.Env(), tokenLogitIndexesLength);
for (size_t i = 0, l = 0; i < tokensLength; i++) {
if (l < tokenLogitIndexesLength && l < tokenLogitIndexesLength && tokenLogitIndexes[l] == i) {
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, true);
resLogitIndexes[l] = batch.n_tokens - 1;
l++;
} else {
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
}
}
return resLogitIndexes;
}
Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
bool result = llama_memory_seq_rm(llama_get_memory(ctx), sequenceId, -1, -1);
if (!result) {
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return info.Env().Undefined();
}
Napi::Value AddonContext::RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
bool result = llama_memory_seq_rm(llama_get_memory(ctx), sequenceId, startPos, endPos);
return Napi::Boolean::New(info.Env(), result);
}
Napi::Value AddonContext::ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
llama_memory_seq_add(llama_get_memory(ctx), sequenceId, startPos, endPos, shiftDelta);
return info.Env().Undefined();
}
Napi::Value AddonContext::GetSequenceKvCacheMinPosition(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
const auto minPosition = llama_memory_seq_pos_min(llama_get_memory(ctx), sequenceId);
return Napi::Number::New(info.Env(), minPosition);
}
Napi::Value AddonContext::GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
const auto maxPosition = llama_memory_seq_pos_max(llama_get_memory(ctx), sequenceId);
return Napi::Number::New(info.Env(), maxPosition);
}
Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
int32_t maxVectorSize = (info.Length() > 1 && info[1].IsNumber()) ? info[1].As<Napi::Number>().Int32Value() : 0;
if (inputTokensLength <= 0) {
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
const int n_embd = llama_model_n_embd(model->model);
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
if (embeddings == NULL) {
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
}
if (embeddings == NULL) {
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
return info.Env().Undefined();
}
size_t resultSize = maxVectorSize == 0 ? n_embd : std::min(n_embd, maxVectorSize);
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), resultSize);
for (size_t i = 0; i < resultSize; i++) {
result[i] = embeddings[i];
}
return result;
}
Napi::Value AddonContext::GetStateSize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
}
Napi::Value AddonContext::GetThreads(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_n_threads(ctx));
}
Napi::Value AddonContext::SetThreads(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
const auto threads = info[0].As<Napi::Number>().Int32Value();
const auto resolvedThreads = threads == 0
? std::max((int32_t)std::thread::hardware_concurrency(), std::max(cpu_get_num_math(), 1))
: threads;
if (llama_n_threads(ctx) != resolvedThreads) {
llama_set_n_threads(ctx, resolvedThreads, resolvedThreads);
}
return info.Env().Undefined();
}
class AddonContextSaveSequenceStateToFileWorker : public Napi::AsyncWorker {
public:
AddonContext* context;
std::string filepath;
llama_seq_id sequenceId;
std::vector<llama_token> tokens;
size_t savedFileSize = 0;
AddonContextSaveSequenceStateToFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
: Napi::AsyncWorker(info.Env(), "AddonContextSaveSequenceStateToFileWorker"),
context(context),
deferred(Napi::Promise::Deferred::New(info.Env())) {
context->Ref();
filepath = info[0].As<Napi::String>().Utf8Value();
sequenceId = info[1].As<Napi::Number>().Int32Value();
Napi::Uint32Array inputTokens = info[2].As<Napi::Uint32Array>();
tokens.resize(inputTokens.ElementLength());
for (size_t i = 0; i < tokens.size(); i++) {
tokens[i] = inputTokens[i];
}
}
~AddonContextSaveSequenceStateToFileWorker() {
context->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
savedFileSize = llama_state_seq_save_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size());
if (savedFileSize == 0) {
SetError("Failed to save state to file");
return;
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_state_seq_save_file\"");
}
}
void OnOK() {
deferred.Resolve(Napi::Number::New(Env(), savedFileSize));
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
Napi::Value AddonContext::SaveSequenceStateToFile(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
AddonContextSaveSequenceStateToFileWorker* worker = new AddonContextSaveSequenceStateToFileWorker(info, this);
worker->Queue();
return worker->GetPromise();
}
class AddonContextLoadSequenceStateFromFileWorker : public Napi::AsyncWorker {
public:
AddonContext* context;
std::string filepath;
llama_seq_id sequenceId;
size_t maxContextSize;
std::vector<llama_token> tokens;
AddonContextLoadSequenceStateFromFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
: Napi::AsyncWorker(info.Env(), "AddonContextLoadSequenceStateFromFileWorker"),
context(context),
deferred(Napi::Promise::Deferred::New(info.Env())) {
context->Ref();
filepath = info[0].As<Napi::String>().Utf8Value();
sequenceId = info[1].As<Napi::Number>().Int32Value();
maxContextSize = info[2].As<Napi::Number>().Uint32Value();
tokens.resize(maxContextSize);
}
~AddonContextLoadSequenceStateFromFileWorker() {
context->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
size_t tokenCount = 0;
const size_t fileSize = llama_state_seq_load_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size(), &tokenCount);
if (fileSize == 0) {
SetError("Failed to load state from file. Current context sequence size may be smaller that the state of the file");
return;
}
tokens.resize(tokenCount);
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_state_seq_load_file\"");
}
}
void OnOK() {
size_t tokenCount = tokens.size();
Napi::Uint32Array result = Napi::Uint32Array::New(Env(), tokenCount);
for (size_t i = 0; i < tokenCount; i++) {
result[i] = tokens[i];
}
deferred.Resolve(result);
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
Napi::Value AddonContext::LoadSequenceStateFromFile(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
AddonContextLoadSequenceStateFromFileWorker* worker = new AddonContextLoadSequenceStateFromFileWorker(info, this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
llama_perf_context_print(ctx);
llama_perf_context_reset(ctx);
return info.Env().Undefined();
}
Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info) {
constexpr auto vocabSizeMaxDifference = 128; // SPEC_VOCAB_MAX_SIZE_DIFFERENCE
constexpr auto vocabCheckStartTokenId = 5; // SPEC_VOCAB_CHECK_START_TOKEN_ID
const AddonContext * draftContext = Napi::ObjectWrap<AddonContext>::Unwrap(info[0].As<Napi::Object>());
const auto currentCtx = ctx;
const auto draftCtx = draftContext->ctx;
const auto currentModel = model->model;
const auto draftModel = draftContext->model->model;
const auto currentVocab = model->vocab;
const auto draftVocab = draftContext->model->vocab;
if (llama_vocab_type(currentVocab) != llama_vocab_type(draftVocab)) {
Napi::Error::New(info.Env(), "Speculative draft model vocabulary type must match the target model vocabulary type").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
if (llama_vocab_get_add_bos(currentVocab) != llama_vocab_get_add_bos(draftVocab) ||
llama_vocab_get_add_eos(currentVocab) != llama_vocab_get_add_eos(draftVocab) ||
llama_vocab_bos(currentVocab) != llama_vocab_bos(draftVocab) ||
llama_vocab_eos(currentVocab) != llama_vocab_eos(draftVocab)
) {
Napi::Error::New(info.Env(), "Speculative draft model special tokens must match the target model special tokens").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
const int currentModelVocabSize = llama_vocab_n_tokens(currentVocab);
const int draftModelVocabSize = llama_vocab_n_tokens(draftVocab);
const int vocabDiff = std::abs(currentModelVocabSize - draftModelVocabSize);
if (vocabDiff > vocabSizeMaxDifference) {
Napi::Error::New(
info.Env(),
std::string("Speculative draft model vocabulary must closely match the target model vocabulary size (vocabulary size difference: ") +
std::to_string(vocabDiff) + std::string(", max allowed: ") + std::to_string(vocabSizeMaxDifference) + std::string(")")
).ThrowAsJavaScriptException();
return info.Env().Undefined();
}
const int minVocabSize = std::min(currentModelVocabSize, draftModelVocabSize);
for (int i = vocabCheckStartTokenId; i < minVocabSize; ++i) {
const char * currentTokenText = llama_vocab_get_text(currentVocab, i);
const char * draftTokenText = llama_vocab_get_text(draftVocab, i);
if (std::strcmp(currentTokenText, draftTokenText) != 0) {
Napi::Error::New(
info.Env(),
std::string("Speculative draft model vocabulary must match the target model vocabulary, but token ") +
std::to_string(i) + std::string(" content differs. Target: \"") + std::string(currentTokenText) +
std::string("\", Draft: \"") + std::string(draftTokenText) + std::string("")
).ThrowAsJavaScriptException();
return info.Env().Undefined();
}
}
return info.Env().Undefined();
}
Napi::Value AddonContext::SetLora(const Napi::CallbackInfo& info) {
AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
float scale = info[1].As<Napi::Number>().FloatValue();
llama_set_adapter_lora(ctx, lora->lora_adapter, scale);
return info.Env().Undefined();
}
void AddonContext::init(Napi::Object exports) {
exports.Set(
"AddonContext",
DefineClass(
exports.Env(),
"AddonContext",
{
InstanceMethod("init", &AddonContext::Init),
InstanceMethod("getContextSize", &AddonContext::GetContextSize),
InstanceMethod("initBatch", &AddonContext::InitBatch),
InstanceMethod("addToBatch", &AddonContext::AddToBatch),
InstanceMethod("disposeSequence", &AddonContext::DisposeSequence),
InstanceMethod("removeTokenCellsFromSequence", &AddonContext::RemoveTokenCellsFromSequence),
InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
InstanceMethod("getSequenceKvCacheMinPosition", &AddonContext::GetSequenceKvCacheMinPosition),
InstanceMethod("getSequenceKvCacheMaxPosition", &AddonContext::GetSequenceKvCacheMaxPosition),
InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
InstanceMethod("sampleToken", &AddonContext::SampleToken),
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
InstanceMethod("getStateSize", &AddonContext::GetStateSize),
InstanceMethod("getThreads", &AddonContext::GetThreads),
InstanceMethod("setThreads", &AddonContext::SetThreads),
InstanceMethod("printTimings", &AddonContext::PrintTimings),
InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
InstanceMethod("saveSequenceStateToFile", &AddonContext::SaveSequenceStateToFile),
InstanceMethod("loadSequenceStateFromFile", &AddonContext::LoadSequenceStateFromFile),
InstanceMethod("setLora", &AddonContext::SetLora),
InstanceMethod("dispose", &AddonContext::Dispose),
}
)
);
}

58
node_modules/node-llama-cpp/llama/addon/AddonContext.h generated vendored Normal file
View File

@@ -0,0 +1,58 @@
#pragma once
#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
#include "AddonSampler.h"
class AddonContext : public Napi::ObjectWrap<AddonContext> {
public:
AddonModel* model;
llama_context_params context_params;
llama_context* ctx;
llama_batch batch;
uint64_t batchMemorySize = 0;
bool has_batch = false;
int32_t batch_n_tokens = 0;
int n_cur = 0;
uint64_t loadedContextMemorySize = 0;
bool contextLoaded = false;
bool disposed = false;
AddonContext(const Napi::CallbackInfo& info);
~AddonContext();
void dispose();
void disposeBatch();
Napi::Value Init(const Napi::CallbackInfo& info);
Napi::Value Dispose(const Napi::CallbackInfo& info);
Napi::Value GetContextSize(const Napi::CallbackInfo& info);
Napi::Value InitBatch(const Napi::CallbackInfo& info);
Napi::Value DisposeBatch(const Napi::CallbackInfo& info);
Napi::Value AddToBatch(const Napi::CallbackInfo& info);
Napi::Value DisposeSequence(const Napi::CallbackInfo& info);
Napi::Value RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info);
Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info);
Napi::Value GetSequenceKvCacheMinPosition(const Napi::CallbackInfo& info);
Napi::Value GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo& info);
Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
Napi::Value SampleToken(const Napi::CallbackInfo& info);
Napi::Value GetEmbedding(const Napi::CallbackInfo& info);
Napi::Value GetStateSize(const Napi::CallbackInfo& info);
Napi::Value GetThreads(const Napi::CallbackInfo& info);
Napi::Value SetThreads(const Napi::CallbackInfo& info);
Napi::Value SaveSequenceStateToFile(const Napi::CallbackInfo& info);
Napi::Value LoadSequenceStateFromFile(const Napi::CallbackInfo& info);
Napi::Value PrintTimings(const Napi::CallbackInfo& info);
Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);
Napi::Value SetLora(const Napi::CallbackInfo& info);
static void init(Napi::Object exports);
};

View File

@@ -0,0 +1,92 @@
#include "addonGlobals.h"
#include "AddonGrammar.h"
AddonGrammar::AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
grammarCode = info[0].As<Napi::String>().Utf8Value();
if (info.Length() > 1 && info[1].IsObject()) {
Napi::Object options = info[1].As<Napi::Object>();
if (options.Has("addonExports")) {
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
hasAddonExportsRef = true;
}
if (options.Has("rootRuleName")) {
rootRuleName = options.Get("rootRuleName").As<Napi::String>().Utf8Value();
}
}
auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str(), false, nullptr, 0, nullptr, 0);
// will be empty if there are parse errors
if (parsed_grammar == nullptr) {
Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
return;
}
llama_grammar_free_impl(parsed_grammar);
}
AddonGrammar::~AddonGrammar() {
if (hasAddonExportsRef) {
addonExportsRef.Unref();
hasAddonExportsRef = false;
}
}
Napi::Value AddonGrammar::isTextCompatible(const Napi::CallbackInfo& info) {
const std::string testText = info[0].As<Napi::String>().Utf8Value();
auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str(), false, nullptr, 0, nullptr, 0);
// will be empty if there are parse errors
if (parsed_grammar == nullptr) {
Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
return Napi::Boolean::New(info.Env(), false);
}
const auto cpts = unicode_cpts_from_utf8(testText);
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(parsed_grammar);
for (const auto & cpt : cpts) {
try {
llama_grammar_accept(parsed_grammar, cpt);
} catch (const std::exception & e) {
llama_grammar_free_impl(parsed_grammar);
return Napi::Boolean::New(info.Env(), false);
} catch (...) {
llama_grammar_free_impl(parsed_grammar);
return Napi::Boolean::New(info.Env(), false);
}
if (stacks_cur.empty()) {
// no stacks means that the grammar failed to match at this point
llama_grammar_free_impl(parsed_grammar);
return Napi::Boolean::New(info.Env(), false);
}
}
for (const auto & stack : stacks_cur) {
if (stack.empty()) {
// an empty stack means that the grammar has been completed
llama_grammar_free_impl(parsed_grammar);
return Napi::Boolean::New(info.Env(), true);
}
}
llama_grammar_free_impl(parsed_grammar);
return Napi::Boolean::New(info.Env(), false);
}
void AddonGrammar::init(Napi::Object exports) {
exports.Set(
"AddonGrammar",
DefineClass(
exports.Env(),
"AddonGrammar",
{
InstanceMethod("isTextCompatible", &AddonGrammar::isTextCompatible),
}
)
);
}

22
node_modules/node-llama-cpp/llama/addon/AddonGrammar.h generated vendored Normal file
View File

@@ -0,0 +1,22 @@
#pragma once
#include "llama.h"
#include "common/common.h"
#include "llama-grammar.h"
#include "src/unicode.h"
#include "napi.h"
#include "addonGlobals.h"
class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
public:
std::string grammarCode = "";
std::string rootRuleName = "root";
Napi::Reference<Napi::Object> addonExportsRef;
bool hasAddonExportsRef = false;
AddonGrammar(const Napi::CallbackInfo& info);
~AddonGrammar();
Napi::Value isTextCompatible(const Napi::CallbackInfo& info);
static void init(Napi::Object exports);
};

View File

@@ -0,0 +1,36 @@
#include <sstream>
#include "addonGlobals.h"
#include "common/common.h"
#include "llama.h"
#include "AddonGrammarEvaluationState.h"
#include "AddonGrammar.h"
AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
if (info.Length() == 1) {
AddonGrammarEvaluationState* existingState = Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
model = existingState->model;
model->Ref();
grammarDef = existingState->grammarDef;
grammarDef->Ref();
sampler = llama_sampler_clone(existingState->sampler);
} else {
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
model->Ref();
grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
grammarDef->Ref();
sampler = llama_sampler_init_grammar(model->vocab, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
}
}
AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {
llama_sampler_free(sampler);
grammarDef->Unref();
model->Unref();
}
void AddonGrammarEvaluationState::init(Napi::Object exports) {
exports.Set("AddonGrammarEvaluationState", DefineClass(exports.Env(), "AddonGrammarEvaluationState", {}));
}

View File

@@ -0,0 +1,17 @@
#pragma once
#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
#include "AddonModel.h"
class AddonGrammarEvaluationState : public Napi::ObjectWrap<AddonGrammarEvaluationState> {
public:
AddonModel* model;
AddonGrammar* grammarDef;
llama_sampler * sampler = nullptr;
AddonGrammarEvaluationState(const Napi::CallbackInfo& info);
~AddonGrammarEvaluationState();
static void init(Napi::Object exports);
};

691
node_modules/node-llama-cpp/llama/addon/AddonModel.cpp generated vendored Normal file
View File

@@ -0,0 +1,691 @@
#include <sstream>
#include "addonGlobals.h"
#include "globals/addonLog.h"
#include "globals/addonProgress.h"
#include "common/common.h"
#include "llama.h"
#include "AddonModel.h"
#include "AddonModelData.h"
#include "AddonModelLora.h"
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
if (token < 0 || token == LLAMA_TOKEN_NULL) {
return Napi::Number::From(info.Env(), -1);
}
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
return Napi::Number::From(info.Env(), -1);
}
return Napi::Number::From(info.Env(), token);
}
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
if (token < 0) {
return Napi::Number::From(info.Env(), -1);
}
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
return Napi::Number::From(info.Env(), -1);
}
return Napi::Number::From(info.Env(), token);
}
static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
AddonModel* addonModel = (AddonModel *) user_data;
unsigned percentage = (unsigned) (100 * progress);
if (percentage > addonModel->modelLoadPercentage) {
addonModel->modelLoadPercentage = percentage;
// original llama.cpp logs
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
if (percentage >= 100) {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
}
}
if (progress > addonModel->rawModelLoadPercentage) {
addonModel->rawModelLoadPercentage = progress;
if (addonModel->onLoadProgressEventCallbackSet) {
addon_progress_event* data = new addon_progress_event {
progress
};
auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
if (status != napi_ok) {
delete data;
}
}
}
return !(addonModel->abortModelLoad);
}
class AddonModelLoadModelWorker : public Napi::AsyncWorker {
public:
AddonModel* model;
AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
: Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
model(model),
deferred(Napi::Promise::Deferred::New(env)) {
model->Ref();
}
~AddonModelLoadModelWorker() {
model->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
model->model = llama_model_load_from_file(model->modelPath.c_str(), model->model_params);
model->vocab = llama_model_get_vocab(model->model);
model->modelLoaded = model->model != nullptr && model->model != NULL;
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_model_load_from_file\"");
}
}
void OnOK() {
if (model->modelLoaded) {
uint64_t modelSize = llama_model_size(model->model);
adjustNapiExternalMemoryAdd(Env(), modelSize);
model->loadedModelSize = modelSize;
}
deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
if (model->onLoadProgressEventCallbackSet) {
model->addonThreadSafeOnLoadProgressEventCallback.Release();
}
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
public:
AddonModel* model;
AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
: Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
model(model),
deferred(Napi::Promise::Deferred::New(env)) {
model->Ref();
}
~AddonModelUnloadModelWorker() {
model->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
llama_model_free(model->model);
model->modelLoaded = false;
model->dispose();
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_model_free\"");
}
}
void OnOK() {
adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
model->loadedModelSize = 0;
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
public:
AddonModelLora* modelLora;
AddonModelLoadLoraWorker(
const Napi::Env& env,
AddonModelLora* modelLora
)
: Napi::AsyncWorker(env, "AddonModelLoadLoraWorker"),
modelLora(modelLora),
deferred(Napi::Promise::Deferred::New(env)) {
modelLora->model->Ref();
modelLora->Ref();
}
~AddonModelLoadLoraWorker() {
modelLora->model->Unref();
modelLora->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
const auto loraAdapter = llama_adapter_lora_init(modelLora->model->model, modelLora->loraFilePath.c_str());
if (loraAdapter == nullptr) {
SetError(
std::string(
std::string("Failed to initialize LoRA adapter \"" + modelLora->loraFilePath + "\"")
)
);
return;
}
modelLora->lora_adapter = loraAdapter;
modelLora->model->Ref();
if (modelLora->model->data != nullptr) {
modelLora->model->data->loraAdapters.insert(modelLora);
} else {
modelLora->dispose(true);
SetError("Model data is not initialized");
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_adapter_lora_init\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
AddonModel::AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
data = new AddonModelData();
model_params = llama_model_default_params();
// Get the model path
modelPath = info[0].As<Napi::String>().Utf8Value();
if (info.Length() > 1 && info[1].IsObject()) {
Napi::Object options = info[1].As<Napi::Object>();
if (options.Has("addonExports")) {
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
hasAddonExportsRef = true;
}
if (options.Has("gpuLayers")) {
model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
}
if (options.Has("vocabOnly")) {
model_params.vocab_only = options.Get("vocabOnly").As<Napi::Boolean>().Value();
}
if (options.Has("useMmap")) {
model_params.use_mmap = options.Get("useMmap").As<Napi::Boolean>().Value();
}
if (options.Has("useDirectIo")) {
model_params.use_direct_io = options.Get("useDirectIo").As<Napi::Boolean>().Value();
}
if (options.Has("useMlock")) {
model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
}
if (options.Has("checkTensors")) {
model_params.check_tensors = options.Get("checkTensors").As<Napi::Boolean>().Value();
}
if (options.Has("onLoadProgress")) {
auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
if (onLoadProgressJSCallback.IsFunction()) {
AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
info.Env(),
onLoadProgressJSCallback,
"onLoadProgressCallback",
0,
1,
context,
[](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
addonModel->onLoadProgressEventCallbackSet = false;
delete ctx;
},
this
);
onLoadProgressEventCallbackSet = true;
}
}
if (options.Has("hasLoadAbortSignal")) {
hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
}
if (options.Has("overridesList")) {
Napi::Array overridesList = options.Get("overridesList").As<Napi::Array>();
kv_overrides.reserve(overridesList.Length());
for (uint32_t i = 0; i < overridesList.Length(); i++) {
Napi::Array overrideItem = overridesList.Get(i).As<Napi::Array>();
auto key = overrideItem.Get((uint32_t)0).As<Napi::String>().Utf8Value();
auto value = overrideItem.Get((uint32_t)1);
if (key.length() > 127) {
continue;
}
llama_model_kv_override kvo;
std::strncpy(kvo.key, key.c_str(), key.length());
kvo.key[key.length()] = 0;
if (value.IsString()) {
auto valueString = value.As<Napi::String>().Utf8Value();
if (valueString.length() > 127) {
continue;
}
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
std::strncpy(kvo.val_str, valueString.c_str(), valueString.length());
kvo.val_str[valueString.length()] = 0;
fputs(std::string("Override: " + key + " = " + valueString + "\n").c_str(), stdout);
fflush(stdout);
} else if (value.IsNumber() || value.IsBigInt()) {
auto numberType = overrideItem.Get((uint32_t)2).As<Napi::Number>().Int32Value();
if (numberType == 0) {
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
kvo.val_i64 = value.As<Napi::Number>().Int64Value();
} else {
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
kvo.val_f64 = value.As<Napi::Number>().DoubleValue();
}
continue;
} else if (value.IsBoolean()) {
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
kvo.val_bool = value.As<Napi::Boolean>().Value();
}
kv_overrides.emplace_back(std::move(kvo));
}
if (!kv_overrides.empty()) {
kv_overrides.emplace_back();
kv_overrides.back().key[0] = 0;
}
model_params.kv_overrides = kv_overrides.data();
}
if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
model_params.progress_callback_user_data = &(*this);
model_params.progress_callback = llamaModelParamsProgressCallback;
}
}
}
AddonModel::~AddonModel() {
dispose();
}
void AddonModel::dispose() {
if (disposed) {
return;
}
disposed = true;
if (data != nullptr) {
auto currentData = data;
data = nullptr;
delete currentData;
}
if (modelLoaded) {
modelLoaded = false;
llama_model_free(model);
adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
loadedModelSize = 0;
}
if (hasAddonExportsRef) {
addonExportsRef.Unref();
hasAddonExportsRef = false;
}
}
Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonModel::LoadLora(const Napi::CallbackInfo& info) {
AddonModelLora* modelLora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
AddonModelLoadLoraWorker* worker = new AddonModelLoadLoraWorker(this->Env(), modelLora);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonModel::AbortActiveModelLoad(const Napi::CallbackInfo& info) {
abortModelLoad = true;
return info.Env().Undefined();
}
Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
if (disposed) {
return info.Env().Undefined();
}
if (modelLoaded) {
modelLoaded = false;
AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
worker->Queue();
return worker->GetPromise();
} else {
dispose();
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
deferred.Resolve(info.Env().Undefined());
return deferred.Promise();
}
}
Napi::Value AddonModel::Tokenize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
std::string text = info[0].As<Napi::String>().Utf8Value();
bool specialTokens = info[1].As<Napi::Boolean>().Value();
std::vector<llama_token> tokens = common_tokenize(vocab, text, false, specialTokens);
Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
for (size_t i = 0; i < tokens.size(); ++i) {
result[i] = static_cast<uint32_t>(tokens[i]);
}
return result;
}
Napi::Value AddonModel::Detokenize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
bool decodeSpecialTokens = info.Length() > 0
? info[1].As<Napi::Boolean>().Value()
: false;
std::string result;
result.resize(std::max(result.capacity(), tokens.ElementLength()));
int n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
if (n_chars < 0) {
result.resize(-n_chars);
n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
GGML_ASSERT(n_chars <= result.size()); // whitespace trimming is performed after per-token detokenization
}
result.resize(n_chars);
return Napi::String::New(info.Env(), result);
}
Napi::Value AddonModel::GetTrainContextSize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_model_n_ctx_train(model));
}
Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_model_n_embd(model));
}
Napi::Value AddonModel::GetTotalSize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_model_size(model));
}
Napi::Value AddonModel::GetTotalParameters(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_model_n_params(model));
}
Napi::Value AddonModel::GetModelDescription(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
char model_desc[128];
int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
return Napi::String::New(info.Env(), model_desc, actual_length);
}
Napi::Value AddonModel::TokenBos(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiControlToken(info, vocab, llama_vocab_bos(vocab));
}
Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiControlToken(info, vocab, llama_vocab_eos(vocab));
}
Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_nl(vocab));
}
Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_fim_pre(vocab));
}
Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_fim_mid(vocab));
}
Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_fim_suf(vocab));
}
Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_eot(vocab));
}
Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_sep(vocab));
}
Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int token = info[0].As<Napi::Number>().Int32Value();
std::stringstream ss;
const char* str = llama_vocab_get_text(vocab, token);
if (str == nullptr) {
return info.Env().Undefined();
}
ss << str;
return Napi::String::New(info.Env(), ss.str());
}
Napi::Value AddonModel::GetTokenAttributes(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
if (info[0].IsNumber() == false) {
return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_ATTR_UNDEFINED));
}
int token = info[0].As<Napi::Number>().Int32Value();
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
}
Napi::Value AddonModel::IsEogToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
if (info[0].IsNumber() == false) {
return Napi::Boolean::New(info.Env(), false);
}
int token = info[0].As<Napi::Number>().Int32Value();
return Napi::Boolean::New(info.Env(), llama_vocab_is_eog(vocab, token));
}
Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
auto vocabularyType = llama_vocab_type(vocab);
return Napi::Number::From(info.Env(), int32_t(vocabularyType));
}
Napi::Value AddonModel::ShouldPrependBosToken(const Napi::CallbackInfo& info) {
const bool addBos = llama_vocab_get_add_bos(vocab);
return Napi::Boolean::New(info.Env(), addBos);
}
Napi::Value AddonModel::ShouldAppendEosToken(const Napi::CallbackInfo& info) {
const bool addEos = llama_vocab_get_add_eos(vocab);
return Napi::Boolean::New(info.Env(), addEos);
}
Napi::Value AddonModel::GetModelSize(const Napi::CallbackInfo& info) {
return Napi::Number::From(info.Env(), llama_model_size(model));
}
void AddonModel::init(Napi::Object exports) {
exports.Set(
"AddonModel",
DefineClass(
exports.Env(),
"AddonModel",
{
InstanceMethod("init", &AddonModel::Init),
InstanceMethod("loadLora", &AddonModel::LoadLora),
InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
InstanceMethod("tokenize", &AddonModel::Tokenize),
InstanceMethod("detokenize", &AddonModel::Detokenize),
InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
InstanceMethod("tokenBos", &AddonModel::TokenBos),
InstanceMethod("tokenEos", &AddonModel::TokenEos),
InstanceMethod("tokenNl", &AddonModel::TokenNl),
InstanceMethod("prefixToken", &AddonModel::PrefixToken),
InstanceMethod("middleToken", &AddonModel::MiddleToken),
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
InstanceMethod("eotToken", &AddonModel::EotToken),
InstanceMethod("sepToken", &AddonModel::SepToken),
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
InstanceMethod("isEogToken", &AddonModel::IsEogToken),
InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
InstanceMethod("shouldAppendEosToken", &AddonModel::ShouldAppendEosToken),
InstanceMethod("getModelSize", &AddonModel::GetModelSize),
InstanceMethod("dispose", &AddonModel::Dispose),
}
)
);
}

64
node_modules/node-llama-cpp/llama/addon/AddonModel.h generated vendored Normal file
View File

@@ -0,0 +1,64 @@
#pragma once
#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
#include "globals/addonProgress.h"
class AddonModel : public Napi::ObjectWrap<AddonModel> {
public:
llama_model_params model_params;
std::vector<llama_model_kv_override> kv_overrides;
llama_model* model;
const llama_vocab* vocab;
uint64_t loadedModelSize = 0;
Napi::Reference<Napi::Object> addonExportsRef;
bool hasAddonExportsRef = false;
AddonModelData* data;
std::string modelPath;
bool modelLoaded = false;
bool abortModelLoad = false;
bool model_load_stopped = false;
float rawModelLoadPercentage = 0;
unsigned modelLoadPercentage = 0;
AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
bool onLoadProgressEventCallbackSet = false;
bool hasLoadAbortSignal = false;
bool disposed = false;
AddonModel(const Napi::CallbackInfo& info);
~AddonModel();
void dispose();
Napi::Value Init(const Napi::CallbackInfo& info);
Napi::Value LoadLora(const Napi::CallbackInfo& info);
Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info);
Napi::Value Dispose(const Napi::CallbackInfo& info);
Napi::Value Tokenize(const Napi::CallbackInfo& info);
Napi::Value Detokenize(const Napi::CallbackInfo& info);
Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info);
Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info);
Napi::Value GetTotalSize(const Napi::CallbackInfo& info);
Napi::Value GetTotalParameters(const Napi::CallbackInfo& info);
Napi::Value GetModelDescription(const Napi::CallbackInfo& info);
Napi::Value TokenBos(const Napi::CallbackInfo& info);
Napi::Value TokenEos(const Napi::CallbackInfo& info);
Napi::Value TokenNl(const Napi::CallbackInfo& info);
Napi::Value PrefixToken(const Napi::CallbackInfo& info);
Napi::Value MiddleToken(const Napi::CallbackInfo& info);
Napi::Value SuffixToken(const Napi::CallbackInfo& info);
Napi::Value EotToken(const Napi::CallbackInfo& info);
Napi::Value SepToken(const Napi::CallbackInfo& info);
Napi::Value GetTokenString(const Napi::CallbackInfo& info);
Napi::Value GetTokenAttributes(const Napi::CallbackInfo& info);
Napi::Value IsEogToken(const Napi::CallbackInfo& info);
Napi::Value GetVocabularyType(const Napi::CallbackInfo& info);
Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info);
Napi::Value ShouldAppendEosToken(const Napi::CallbackInfo& info);
Napi::Value GetModelSize(const Napi::CallbackInfo& info);
static void init(Napi::Object exports);
};

View File

@@ -0,0 +1,25 @@
#include <iostream>
#include "addonGlobals.h"
#include "AddonModelData.h"
#include "AddonModelLora.h"
AddonModelData::AddonModelData() {
}
AddonModelData::~AddonModelData() {
std::set<AddonModelLora *> currentLoraAdapters;
currentLoraAdapters.swap(loraAdapters);
for (auto lora : currentLoraAdapters) {
lora->dispose(true);
}
currentLoraAdapters.clear();
}
void AddonModelData::removeLora(AddonModelLora* lora) {
auto pos = loraAdapters.find(lora);
if (pos != loraAdapters.end()) {
loraAdapters.erase(pos);
}
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include <set>
#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
class AddonModelData {
public:
std::set<AddonModelLora *> loraAdapters;
AddonModelData();
~AddonModelData();
void removeLora(AddonModelLora* lora);
};

View File

@@ -0,0 +1,103 @@
#include "addonGlobals.h"
#include "AddonModel.h"
#include "AddonModelData.h"
#include "AddonModelLora.h"
class AddonModelLoraUnloadLoraWorker : public Napi::AsyncWorker {
public:
AddonModelLora* addonLora;
AddonModelLoraUnloadLoraWorker(const Napi::Env& env, AddonModelLora* addonLora)
: Napi::AsyncWorker(env, "AddonModelLoraUnloadLoraWorker"),
addonLora(addonLora),
deferred(Napi::Promise::Deferred::New(env)) {
addonLora->Ref();
}
~AddonModelLoraUnloadLoraWorker() {
addonLora->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
addonLora->dispose();
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_adapter_lora_free\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
AddonModelLora::AddonModelLora(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModelLora>(info) {
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
loraFilePath = info[1].As<Napi::String>().Utf8Value();
lora_adapter = nullptr;
}
AddonModelLora::~AddonModelLora() {
dispose();
}
void AddonModelLora::dispose(bool skipErase) {
if (lora_adapter != nullptr) {
lora_adapter = nullptr;
if (!skipErase && model->data != nullptr) {
model->data->removeLora(this);
}
model->Unref();
}
}
Napi::Value AddonModelLora::GetFilePath(const Napi::CallbackInfo& info) {
return Napi::String::New(info.Env(), loraFilePath);
}
Napi::Value AddonModelLora::GetUsages(const Napi::CallbackInfo& info) {
return Napi::Number::From(info.Env(), usages);
}
void AddonModelLora::SetUsages(const Napi::CallbackInfo& info, const Napi::Value &value) {
usages = value.As<Napi::Number>().Uint32Value();
}
Napi::Value AddonModelLora::Dispose(const Napi::CallbackInfo& info) {
AddonModelLoraUnloadLoraWorker* worker = new AddonModelLoraUnloadLoraWorker(this->Env(), this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonModelLora::GetDisposed(const Napi::CallbackInfo& info) {
return Napi::Boolean::New(info.Env(), lora_adapter == nullptr);
}
void AddonModelLora::init(Napi::Object exports) {
exports.Set(
"AddonModelLora",
DefineClass(
exports.Env(),
"AddonModelLora",
{
InstanceAccessor("usages", &AddonModelLora::GetUsages, &AddonModelLora::SetUsages),
InstanceAccessor("filePath", &AddonModelLora::GetFilePath, nullptr),
InstanceAccessor("disposed", &AddonModelLora::GetDisposed, nullptr),
InstanceMethod("dispose", &AddonModelLora::Dispose),
}
)
);
}

View File

@@ -0,0 +1,28 @@
#pragma once
#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
class AddonModelLora : public Napi::ObjectWrap<AddonModelLora> {
public:
AddonModel* model;
llama_adapter_lora * lora_adapter;
std::string loraFilePath;
uint32_t usages = 0;
AddonModelLora(const Napi::CallbackInfo& info);
~AddonModelLora();
void dispose(bool skipErase = false);
Napi::Value GetFilePath(const Napi::CallbackInfo& info);
Napi::Value GetUsages(const Napi::CallbackInfo& info);
void SetUsages(const Napi::CallbackInfo& info, const Napi::Value &value);
Napi::Value GetDisposed(const Napi::CallbackInfo& info);
Napi::Value Dispose(const Napi::CallbackInfo& info);
static void init(Napi::Object exports);
};

View File

@@ -0,0 +1,511 @@
#include <cmath>
#include "common/common.h"
#include "globals/addonLog.h"
#include "ggml.h"
#include "llama.h"
#include "AddonGrammarEvaluationState.h"
#include "AddonSampler.h"
AddonSampler::AddonSampler(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonSampler>(info) {
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
model->Ref();
tokenCandidates.resize(llama_vocab_n_tokens(model->vocab));
tokenCandidates.reserve(llama_vocab_n_tokens(model->vocab));
}
AddonSampler::~AddonSampler() {
dispose();
}
void AddonSampler::dispose() {
if (disposed) {
return;
}
disposed = true;
model->Unref();
freeChain();
if (temperatureSampler != nullptr) {
llama_sampler_free(temperatureSampler);
temperatureSampler = nullptr;
}
if (greedySampler != nullptr) {
llama_sampler_free(greedySampler);
greedySampler = nullptr;
}
if (minPSampler != nullptr) {
llama_sampler_free(minPSampler);
minPSampler = nullptr;
}
if (topKSampler != nullptr) {
llama_sampler_free(topKSampler);
topKSampler = nullptr;
}
if (topPSampler != nullptr) {
llama_sampler_free(topPSampler);
topPSampler = nullptr;
}
if (seedSampler != nullptr) {
llama_sampler_free(seedSampler);
seedSampler = nullptr;
}
if (repeatPenaltySampler != nullptr) {
llama_sampler_free(repeatPenaltySampler);
repeatPenaltySampler = nullptr;
}
if (tokenBiasSampler != nullptr) {
llama_sampler_free(tokenBiasSampler);
tokenBiasSampler = nullptr;
}
if (grammarEvaluationState != nullptr) {
grammarEvaluationState->Unref();
grammarEvaluationState = nullptr;
}
}
void AddonSampler::freeChain() {
if (chain == nullptr) {
return;
}
// ensure existing state of samplers isn't cleared
while (llama_sampler_chain_n(chain) > 0) {
llama_sampler_chain_remove(chain, 0);
}
llama_sampler_free(chain);
chain = nullptr;
}
void AddonSampler::rebuildChainIfNeeded() {
if (disposed) {
throw std::runtime_error("Sampler is disposed");
}
if (chain != nullptr) {
return;
}
auto sampler_params = llama_sampler_chain_default_params();
chain = llama_sampler_chain_init(sampler_params);
if (tokenBiasSampler != nullptr) {
llama_sampler_chain_add(chain, tokenBiasSampler);
}
if (repeatPenaltySampler != nullptr) {
llama_sampler_chain_add(chain, repeatPenaltySampler);
}
if (grammarEvaluationState != nullptr) {
llama_sampler_chain_add(chain, grammarEvaluationState->sampler);
}
if (greedySampler != nullptr) {
llama_sampler_chain_add(chain, greedySampler);
} else {
if (topKSampler != nullptr) {
llama_sampler_chain_add(chain, topKSampler);
}
if (topPSampler != nullptr) {
llama_sampler_chain_add(chain, topPSampler);
}
if (minPSampler != nullptr) {
llama_sampler_chain_add(chain, minPSampler);
}
if (temperatureSampler != nullptr) {
llama_sampler_chain_add(chain, temperatureSampler);
}
if (seedSampler != nullptr) {
llama_sampler_chain_add(chain, seedSampler);
}
}
}
void AddonSampler::acceptToken(llama_token token) {
if (repeatPenaltySampler != nullptr) {
llama_sampler_accept(repeatPenaltySampler, token);
repeatPenalty_lastTokens.push_back(token);
}
if (grammarEvaluationState != nullptr && grammarEvaluationState->sampler != nullptr && !llama_vocab_is_eog(model->vocab, token)) {
llama_sampler_accept(grammarEvaluationState->sampler, token);
}
}
Napi::Value AddonSampler::Dispose(const Napi::CallbackInfo& info) {
dispose();
return info.Env().Undefined();
}
Napi::Value AddonSampler::ApplyConfig(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Sampler is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
size_t min_keep = std::max(1, n_probs);
Napi::Object config = info[0].As<Napi::Object>();
if (config.Has("temperature")) {
auto temperature = config.Get("temperature").As<Napi::Number>().FloatValue();
if (temperature != temperatureSampler_temperature || !temperatureSampler_initialized) {
temperatureSampler_initialized = true;
temperatureSampler_temperature = temperature;
freeChain();
if (temperatureSampler != nullptr) {
llama_sampler_free(temperatureSampler);
temperatureSampler = nullptr;
}
if (temperatureSampler_temperature <= 0) {
greedySampler = llama_sampler_init_greedy();
} else {
temperatureSampler = llama_sampler_init_temp(temperatureSampler_temperature);
if (greedySampler != nullptr) {
llama_sampler_free(greedySampler);
greedySampler = nullptr;
}
}
}
} else {
if (temperatureSampler != nullptr) {
freeChain();
llama_sampler_free(temperatureSampler);
temperatureSampler = nullptr;
}
if (greedySampler == nullptr) {
greedySampler = llama_sampler_init_greedy();
}
}
if (config.Has("minP")) {
auto minP = config.Get("minP").As<Napi::Number>().FloatValue();
if (minP != minPSampler_minP) {
minPSampler_minP = minP;
freeChain();
if (minPSampler != nullptr) {
llama_sampler_free(minPSampler);
minPSampler = nullptr;
}
if (minPSampler_minP != 0) {
minPSampler = llama_sampler_init_min_p(minPSampler_minP, min_keep);
}
}
} else if (minPSampler != nullptr) {
freeChain();
llama_sampler_free(minPSampler);
minPSampler = nullptr;
}
if (config.Has("topK")) {
auto topK = config.Get("topK").As<Napi::Number>().Int32Value();
if (topK != topKSampler_topK || !topKSampler_initialized) {
topKSampler_initialized = true;
topKSampler_topK = topK;
freeChain();
if (topKSampler != nullptr) {
llama_sampler_free(topKSampler);
topKSampler = nullptr;
}
const int32_t resolved_top_k = topKSampler_topK <= 0
? llama_vocab_n_tokens(model->vocab)
: std::min(topKSampler_topK, llama_vocab_n_tokens(model->vocab));
topKSampler = llama_sampler_init_top_k(resolved_top_k);
}
} else if (topKSampler != nullptr) {
freeChain();
llama_sampler_free(topKSampler);
topKSampler = nullptr;
}
if (config.Has("topP")) {
auto topP = config.Get("topP").As<Napi::Number>().FloatValue();
if (topP != topPSampler_topP) {
topPSampler_topP = topP;
freeChain();
if (topPSampler != nullptr) {
llama_sampler_free(topPSampler);
topPSampler = nullptr;
}
if (topPSampler_topP >= 1) {
topPSampler = llama_sampler_init_top_p(topPSampler_topP, min_keep);
}
}
} else if (topPSampler != nullptr) {
freeChain();
llama_sampler_free(topPSampler);
topPSampler = nullptr;
}
if (config.Has("seed")) {
auto seed = config.Get("seed").As<Napi::Number>().Uint32Value();
if (seed != seedSampler_seed || seedSampler == nullptr) {
seedSampler_seed = seed;
freeChain();
if (seedSampler != nullptr) {
llama_sampler_free(seedSampler);
seedSampler = nullptr;
}
seedSampler = llama_sampler_init_dist(seedSampler_seed);
}
} else if (seedSampler == nullptr) {
freeChain();
seedSampler = llama_sampler_init_dist(time(NULL));
}
if (config.Has("repeatPenaltyTokens")) {
Napi::Uint32Array repeat_penalty_tokens_uint32_array = config.Get("repeatPenaltyTokens").As<Napi::Uint32Array>();
auto repeatPenalty = config.Has("repeatPenalty")
? config.Get("repeatPenalty").As<Napi::Number>().FloatValue()
: 1;
auto repeatPenaltyMaxTokens = config.Has("repeatPenaltyMaxTokens")
? config.Get("repeatPenaltyMaxTokens").As<Napi::Number>().Int32Value()
: 64;
auto repeatPenaltyPresencePenalty = config.Has("repeatPenaltyPresencePenalty")
? config.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue()
: 0;
auto repeatPenaltyFrequencyPenalty = config.Has("repeatPenaltyFrequencyPenalty")
? config.Get("repeatPenaltyFrequencyPenalty").As<Napi::Number>().FloatValue()
: 0;
auto repeatPenaltyEnabled = repeatPenalty != 1 && repeatPenaltyMaxTokens > 0;
bool shouldCreateSampler = false;
if (!repeatPenaltyEnabled) {
if (repeatPenaltySampler != nullptr) {
freeChain();
llama_sampler_free(repeatPenaltySampler);
repeatPenaltySampler = nullptr;
}
} else if (repeatPenaltySampler == nullptr) {
freeChain();
shouldCreateSampler = true;
} else {
bool existingSamplerMatchesConfig = true;
existingSamplerMatchesConfig &= repeatPenalty_maxTokens == repeatPenaltyMaxTokens;
existingSamplerMatchesConfig &= repeatPenalty_penalty == repeatPenalty;
existingSamplerMatchesConfig &= repeatPenalty_presencePenalty == repeatPenaltyPresencePenalty;
existingSamplerMatchesConfig &= repeatPenalty_frequencyPenalty == repeatPenaltyFrequencyPenalty;
if (existingSamplerMatchesConfig) {
if (repeat_penalty_tokens_uint32_array.ElementLength() > 0) {
const auto firstToken = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[0]);
if (repeatPenalty_lastTokens.rat(0) != firstToken &&
repeatPenalty_lastTokens.size() == repeatPenalty_maxTokens &&
repeat_penalty_tokens_uint32_array.ElementLength() == repeatPenalty_maxTokens
) {
const auto lastToken = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[repeat_penalty_tokens_uint32_array.ElementLength() - 1]);
llama_sampler_accept(repeatPenaltySampler, lastToken);
repeatPenalty_lastTokens.push_back(lastToken);
}
}
for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength() && existingSamplerMatchesConfig; i++) {
auto token = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]);
if (i < repeatPenalty_lastTokens.size()) {
existingSamplerMatchesConfig &= repeatPenalty_lastTokens.rat(i) == token;
} else {
llama_sampler_accept(repeatPenaltySampler, token);
repeatPenalty_lastTokens.push_back(token);
}
}
}
if (!existingSamplerMatchesConfig) {
freeChain();
llama_sampler_free(repeatPenaltySampler);
repeatPenaltySampler = nullptr;
shouldCreateSampler = true;
}
}
if (shouldCreateSampler) {
repeatPenaltySampler = llama_sampler_init_penalties(
repeatPenaltyMaxTokens,
repeatPenalty,
repeatPenaltyFrequencyPenalty,
repeatPenaltyPresencePenalty
);
repeatPenalty_lastTokens = RingBuffer<llama_token>(repeatPenaltyMaxTokens);
for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength(); i++) {
llama_sampler_accept(repeatPenaltySampler, static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
repeatPenalty_lastTokens.push_back(static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
}
repeatPenalty_maxTokens = repeatPenaltyMaxTokens;
repeatPenalty_penalty = repeatPenalty;
repeatPenalty_presencePenalty = repeatPenaltyPresencePenalty;
repeatPenalty_frequencyPenalty = repeatPenaltyFrequencyPenalty;
}
} else if (repeatPenaltySampler != nullptr) {
freeChain();
llama_sampler_free(repeatPenaltySampler);
repeatPenaltySampler = nullptr;
}
if (config.Has("tokenBiasKeys") && config.Has("tokenBiasValues")) {
Napi::Uint32Array tokenBiasKeys = config.Get("tokenBiasKeys").As<Napi::Uint32Array>();
Napi::Float32Array tokenBiasValues = config.Get("tokenBiasValues").As<Napi::Float32Array>();
if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength() && tokenBiasKeys.ElementLength() > 0) {
bool existingSamplerMatchesConfig = tokenBiasSampler != nullptr;
if (tokenBiasSampler != nullptr && tokenBiasSampler_biases.size() == tokenBiasKeys.ElementLength()) {
for (size_t i = 0; i < tokenBiasKeys.ElementLength() && existingSamplerMatchesConfig; i++) {
existingSamplerMatchesConfig &= tokenBiasSampler_biases[i].token == static_cast<llama_token>(tokenBiasKeys[i]);
existingSamplerMatchesConfig &= tokenBiasSampler_biases[i].bias == tokenBiasValues[i];
}
} else {
existingSamplerMatchesConfig = false;
}
if (!existingSamplerMatchesConfig) {
if (tokenBiasSampler != nullptr) {
freeChain();
llama_sampler_free(tokenBiasSampler);
tokenBiasSampler = nullptr;
}
tokenBiasSampler_biases.clear();
tokenBiasSampler_biases.reserve(tokenBiasKeys.ElementLength());
for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
tokenBiasSampler_biases.emplace_back(llama_logit_bias { static_cast<llama_token>(tokenBiasKeys[i]), tokenBiasValues[i] });
}
tokenBiasSampler = llama_sampler_init_logit_bias(
llama_vocab_n_tokens(model->vocab),
tokenBiasSampler_biases.size(),
tokenBiasSampler_biases.data()
);
}
} else if (tokenBiasSampler != nullptr) {
freeChain();
llama_sampler_free(tokenBiasSampler);
tokenBiasSampler = nullptr;
}
} else if (tokenBiasSampler != nullptr) {
freeChain();
llama_sampler_free(tokenBiasSampler);
tokenBiasSampler = nullptr;
}
if (config.Has("grammarEvaluationState")) {
const auto configGrammarEvaluationState =
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(config.Get("grammarEvaluationState").As<Napi::Object>());
if (grammarEvaluationState != configGrammarEvaluationState) {
freeChain();
if (grammarEvaluationState != nullptr) {
grammarEvaluationState->Unref();
grammarEvaluationState = nullptr;
}
grammarEvaluationState = configGrammarEvaluationState;
grammarEvaluationState->Ref();
}
} else if (grammarEvaluationState != nullptr) {
freeChain();
grammarEvaluationState->Unref();
grammarEvaluationState = nullptr;
}
return info.Env().Undefined();
}
Napi::Value AddonSampler::AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
AddonGrammarEvaluationState* grammar_evaluation_state =
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
if ((grammar_evaluation_state)->sampler != nullptr) {
try {
llama_sampler_accept((grammar_evaluation_state)->sampler, tokenId);
} catch (const std::exception & e) {
Napi::Error::New(info.Env(), std::string("Failed to accept token in grammar sampler: ") + e.what()).ThrowAsJavaScriptException();
return info.Env().Undefined();
} catch (...) {
Napi::Error::New(info.Env(), "Failed to accept token in grammar sampler").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
}
return info.Env().Undefined();
}
Napi::Value AddonSampler::CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
AddonGrammarEvaluationState* grammar_evaluation_state =
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
if ((grammar_evaluation_state)->sampler != nullptr) {
std::vector<llama_token_data> candidates;
candidates.reserve(1);
candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
try {
llama_sampler_apply((grammar_evaluation_state)->sampler, &candidates_p);
} catch (const std::exception & e) {
addonLog(GGML_LOG_LEVEL_DEBUG, std::string("Failed to apply grammar sampler: ") + e.what());
return Napi::Boolean::New(info.Env(), false);
} catch (...) {
return Napi::Boolean::New(info.Env(), false);
}
if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
return Napi::Boolean::New(info.Env(), false);
}
return Napi::Boolean::New(info.Env(), true);
}
return Napi::Boolean::New(info.Env(), false);
}
void AddonSampler::init(Napi::Object exports) {
exports.Set(
"AddonSampler",
DefineClass(
exports.Env(),
"AddonSampler",
{
InstanceMethod("dispose", &AddonSampler::Dispose),
InstanceMethod("applyConfig", &AddonSampler::ApplyConfig),
StaticMethod("acceptGrammarEvaluationStateToken", &AddonSampler::AcceptGrammarEvaluationStateToken),
StaticMethod("canBeNextTokenForGrammarEvaluationState", &AddonSampler::CanBeNextTokenForGrammarEvaluationState),
}
)
);
}

63
node_modules/node-llama-cpp/llama/addon/AddonSampler.h generated vendored Normal file
View File

@@ -0,0 +1,63 @@
#pragma once
#include "llama.h"
#include "napi.h"
#include "RingBuffer.h"
#include "addonGlobals.h"
#include "AddonModel.h"
class AddonSampler : public Napi::ObjectWrap<AddonSampler> {
public:
AddonModel* model;
llama_sampler * chain = nullptr;
llama_sampler * temperatureSampler = nullptr;
bool temperatureSampler_initialized = false;
float temperatureSampler_temperature = 0.0f; // 0.0f = disabled
llama_sampler * greedySampler = nullptr;
llama_sampler * minPSampler = nullptr;
float minPSampler_minP = 0.0f; // Min p sampling <=0.0f = disabled
llama_sampler * topKSampler = nullptr;
bool topKSampler_initialized = false;
int topKSampler_topK = 0;
llama_sampler * topPSampler = nullptr;
float topPSampler_topP = 0.0f; // Top p sampling >=1.0 = disabled
llama_sampler * seedSampler = nullptr;
uint32_t seedSampler_seed = 0;
llama_sampler * repeatPenaltySampler = nullptr;
RingBuffer<llama_token> repeatPenalty_lastTokens = RingBuffer<llama_token>(64);
int32_t repeatPenalty_maxTokens = 64;
float repeatPenalty_penalty = 1.10f; // 1.0 = disabled
float repeatPenalty_presencePenalty = 0.00f; // 0.0 = disabled
float repeatPenalty_frequencyPenalty = 0.00f; // 0.0 = disabled
llama_sampler * tokenBiasSampler = nullptr;
std::vector<llama_logit_bias> tokenBiasSampler_biases;
AddonGrammarEvaluationState* grammarEvaluationState = nullptr;
std::vector<llama_token_data> tokenCandidates;
bool disposed = false;
AddonSampler(const Napi::CallbackInfo& info);
~AddonSampler();
void dispose();
void freeChain();
void rebuildChainIfNeeded();
void acceptToken(llama_token token);
Napi::Value Dispose(const Napi::CallbackInfo& info);
Napi::Value ApplyConfig(const Napi::CallbackInfo& info);
static Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info);
static Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info);
static void init(Napi::Object exports);
};

109
node_modules/node-llama-cpp/llama/addon/RingBuffer.h generated vendored Normal file
View File

@@ -0,0 +1,109 @@
// copied from llama-impl.h
template<typename T>
struct RingBuffer {
RingBuffer(size_t cap) : capacity(cap), data(cap) {}
T & front() {
if (sz == 0) {
throw std::runtime_error("ring buffer is empty");
}
return data[first];
}
const T & front() const {
if (sz == 0) {
throw std::runtime_error("ring buffer is empty");
}
return data[first];
}
T & back() {
if (sz == 0) {
throw std::runtime_error("ring buffer is empty");
}
return data[pos];
}
const T & back() const {
if (sz == 0) {
throw std::runtime_error("ring buffer is empty");
}
return data[pos];
}
void push_back(const T & value) {
if (capacity == 0) {
throw std::runtime_error("ring buffer: capacity is zero");
}
if (sz == capacity) {
// advance the start when buffer is full
first = (first + 1) % capacity;
} else {
sz++;
}
data[pos] = value;
pos = (pos + 1) % capacity;
}
T pop_front() {
if (sz == 0) {
throw std::runtime_error("ring buffer is empty");
}
T value = data[first];
first = (first + 1) % capacity;
sz--;
return value;
}
//T & operator[](size_t i) {
// if (i >= sz) {
// throw std::runtime_error("ring buffer: index out of bounds");
// }
// return data[(first + i) % capacity];
//}
//const T & at(size_t i) const {
// if (i >= sz) {
// throw std::runtime_error("ring buffer: index out of bounds");
// }
// return data[(first + i) % capacity];
//}
const T & rat(size_t i) const {
if (i >= sz) {
throw std::runtime_error("ring buffer: index out of bounds");
}
return data[(first + sz - i - 1) % capacity];
}
std::vector<T> to_vector() const {
std::vector<T> result;
result.reserve(sz);
for (size_t i = 0; i < sz; i++) {
result.push_back(data[(first + i) % capacity]);
}
return result;
}
void clear() {
// here only reset the status of the buffer
sz = 0;
first = 0;
pos = 0;
}
bool empty() const {
return sz == 0;
}
size_t size() const {
return sz;
}
size_t capacity = 0;
size_t sz = 0;
size_t first = 0;
size_t pos = 0;
std::vector<T> data;
};

314
node_modules/node-llama-cpp/llama/addon/addon.cpp generated vendored Normal file
View File

@@ -0,0 +1,314 @@
#include "addonGlobals.h"
#include "AddonModel.h"
#include "AddonModelLora.h"
#include "AddonGrammar.h"
#include "AddonGrammarEvaluationState.h"
#include "AddonSampler.h"
#include "AddonContext.h"
#include "globals/addonLog.h"
#include "globals/addonProgress.h"
#include "globals/getGpuInfo.h"
#include "globals/getSwapInfo.h"
#include "globals/getMemoryInfo.h"
#include <atomic>
bool backendInitialized = false;
bool backendDisposed = false;
Napi::Value systemInfo(const Napi::CallbackInfo& info) {
return Napi::String::From(info.Env(), llama_print_system_info());
}
Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
}
Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
return Napi::Boolean::New(info.Env(), llama_supports_mmap());
}
Napi::Value addonGetGpuSupportsMmap(const Napi::CallbackInfo& info) {
const auto llamaSupportsMmap = llama_supports_mmap();
const auto gpuDevice = getGpuDevice().first;
if (gpuDevice == nullptr) {
return Napi::Boolean::New(info.Env(), false);
}
ggml_backend_dev_props props;
ggml_backend_dev_get_props(gpuDevice, &props);
const bool gpuSupportsMmap = llama_supports_mmap() && props.caps.buffer_from_host_ptr;
return Napi::Boolean::New(info.Env(), gpuSupportsMmap);
}
Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
return Napi::Boolean::New(info.Env(), llama_supports_mlock());
}
Napi::Value addonGetMathCores(const Napi::CallbackInfo& info) {
return Napi::Number::New(info.Env(), cpu_get_num_math());
}
Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
return info.Env().Undefined();
}
const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
return Napi::Number::New(info.Env(), blockSize);
}
Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
return info.Env().Undefined();
}
const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
return Napi::Number::New(info.Env(), typeSize);
}
Napi::Value addonGetGgmlGraphOverheadCustom(const Napi::CallbackInfo& info) {
if (info.Length() < 2 || !info[0].IsNumber() || !info[1].IsBoolean()) {
return Napi::Number::New(info.Env(), 0);
}
const size_t size = info[0].As<Napi::Number>().Uint32Value();
const bool grads = info[1].As<Napi::Boolean>().Value();
const auto graphOverhead = ggml_graph_overhead_custom(size, grads);
return Napi::Number::New(info.Env(), graphOverhead);
}
Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
Napi::Object consts = Napi::Object::New(info.Env());
consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
return consts;
}
class AddonBackendLoadWorker : public Napi::AsyncWorker {
public:
AddonBackendLoadWorker(const Napi::Env& env)
: Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
deferred(Napi::Promise::Deferred::New(env)) {
}
~AddonBackendLoadWorker() {
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
llama_backend_init();
try {
if (backendDisposed) {
llama_backend_free();
} else {
backendInitialized = true;
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_backend_free\"");
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_backend_init\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonBackendUnloadWorker : public Napi::AsyncWorker {
public:
AddonBackendUnloadWorker(const Napi::Env& env)
: Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
deferred(Napi::Promise::Deferred::New(env)) {
}
~AddonBackendUnloadWorker() {
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
if (backendInitialized) {
backendInitialized = false;
llama_backend_free();
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_backend_free\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) {
const std::string forceLoadLibrariesSearchPath = info.Length() == 0
? ""
: info[0].IsString()
? info[0].As<Napi::String>().Utf8Value()
: "";
ggml_backend_reg_count();
if (forceLoadLibrariesSearchPath.length() > 0) {
ggml_backend_load_all_from_path(forceLoadLibrariesSearchPath.c_str());
}
return info.Env().Undefined();
}
Napi::Value addonSetNuma(const Napi::CallbackInfo& info) {
const bool numaDisabled = info.Length() == 0
? true
: info[0].IsBoolean()
? !info[0].As<Napi::Boolean>().Value()
: false;
if (numaDisabled)
return info.Env().Undefined();
const auto numaType = info[0].IsString()
? info[0].As<Napi::String>().Utf8Value()
: "";
if (numaType == "distribute") {
llama_numa_init(GGML_NUMA_STRATEGY_DISTRIBUTE);
} else if (numaType == "isolate") {
llama_numa_init(GGML_NUMA_STRATEGY_ISOLATE);
} else if (numaType == "numactl") {
llama_numa_init(GGML_NUMA_STRATEGY_NUMACTL);
} else if (numaType == "mirror") {
llama_numa_init(GGML_NUMA_STRATEGY_MIRROR);
} else {
Napi::Error::New(info.Env(), std::string("Invalid NUMA strategy \"") + numaType + "\"").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return info.Env().Undefined();
}
Napi::Value markLoaded(const Napi::CallbackInfo& info) {
static std::atomic_bool loaded = false;
return Napi::Boolean::New(info.Env(), loaded.exchange(true));
}
Napi::Value addonInit(const Napi::CallbackInfo& info) {
if (backendInitialized) {
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
deferred.Resolve(info.Env().Undefined());
return deferred.Promise();
}
AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
worker->Queue();
return worker->GetPromise();
}
Napi::Value addonDispose(const Napi::CallbackInfo& info) {
if (backendDisposed) {
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
deferred.Resolve(info.Env().Undefined());
return deferred.Promise();
}
backendDisposed = true;
AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
worker->Queue();
return worker->GetPromise();
}
static void addonFreeLlamaBackend(Napi::Env env, int* data) {
if (backendDisposed) {
return;
}
backendDisposed = true;
if (backendInitialized) {
backendInitialized = false;
llama_backend_free();
}
}
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
exports.DefineProperties({
Napi::PropertyDescriptor::Function("markLoaded", markLoaded),
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
Napi::PropertyDescriptor::Function("getGpuSupportsMmap", addonGetGpuSupportsMmap),
Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
Napi::PropertyDescriptor::Function("getMathCores", addonGetMathCores),
Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
Napi::PropertyDescriptor::Function("getGgmlGraphOverheadCustom", addonGetGgmlGraphOverheadCustom),
Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
Napi::PropertyDescriptor::Function("setLogger", setLogger),
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
Napi::PropertyDescriptor::Function("ensureGpuDeviceIsSupported", ensureGpuDeviceIsSupported),
Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo),
Napi::PropertyDescriptor::Function("getMemoryInfo", getMemoryInfo),
Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends),
Napi::PropertyDescriptor::Function("setNuma", addonSetNuma),
Napi::PropertyDescriptor::Function("init", addonInit),
Napi::PropertyDescriptor::Function("dispose", addonDispose),
});
AddonModel::init(exports);
AddonModelLora::init(exports);
AddonGrammar::init(exports);
AddonGrammarEvaluationState::init(exports);
AddonContext::init(exports);
AddonSampler::init(exports);
llama_log_set(addonLlamaCppLogCallback, nullptr);
exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
return exports;
}
NODE_API_MODULE(NODE_GYP_MODULE_NAME, registerCallback)

View File

@@ -0,0 +1,22 @@
#include <sstream>
#include <vector>
#include "addonGlobals.h"
#include "napi.h"
void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
while (size > 0) {
int64_t adjustSize = std::min(size, chunkSize);
Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
size -= adjustSize;
}
}
void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
while (size > 0) {
int64_t adjustSize = std::min(size, chunkSize);
Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
size -= adjustSize;
}
}

12
node_modules/node-llama-cpp/llama/addon/addonGlobals.h generated vendored Normal file
View File

@@ -0,0 +1,12 @@
#pragma once
#include "napi.h"
class AddonModel;
class AddonModelLora;
class AddonModelData;
class AddonContext;
class AddonGrammar;
class AddonGrammarEvaluationState;
void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size);
void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size);

View File

@@ -0,0 +1,143 @@
#include <sstream>
#include "addonLog.h"
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
bool addonJsLoggerCallbackSet = false;
int addonLoggerLogLevel = 5;
int addonLastLoggerLogLevel = 6;
static int addonGetGgmlLogLevelNumber(ggml_log_level level) {
switch (level) {
case GGML_LOG_LEVEL_ERROR: return 2;
case GGML_LOG_LEVEL_WARN: return 3;
case GGML_LOG_LEVEL_INFO: return 4;
case GGML_LOG_LEVEL_NONE: return 5;
case GGML_LOG_LEVEL_DEBUG: return 6;
case GGML_LOG_LEVEL_CONT: return addonLastLoggerLogLevel;
}
return 1;
}
void addonCallJsLogCallback(
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
) {
bool called = false;
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
try {
callback.Call({
Napi::Number::New(env, data->logLevelNumber),
Napi::String::New(env, data->stringStream->str()),
});
called = true;
} catch (const Napi::Error& e) {
called = false;
}
}
if (!called && data != nullptr) {
if (data->logLevelNumber == 2) {
fputs(data->stringStream->str().c_str(), stderr);
fflush(stderr);
} else {
fputs(data->stringStream->str().c_str(), stdout);
fflush(stdout);
}
}
if (data != nullptr) {
delete data->stringStream;
delete data;
}
}
void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
int logLevelNumber = addonGetGgmlLogLevelNumber(level);
addonLastLoggerLogLevel = logLevelNumber;
if (logLevelNumber > addonLoggerLogLevel) {
return;
}
if (addonJsLoggerCallbackSet) {
std::stringstream* stringStream = new std::stringstream();
if (text != nullptr) {
*stringStream << text;
}
addon_logger_log* data = new addon_logger_log {
logLevelNumber,
stringStream,
};
auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
if (status == napi_ok) {
return;
} else {
delete stringStream;
delete data;
}
}
if (text != nullptr) {
if (level == 2) {
fputs(text, stderr);
fflush(stderr);
} else {
fputs(text, stdout);
fflush(stdout);
}
}
}
Napi::Value setLogger(const Napi::CallbackInfo& info) {
if (addonJsLoggerCallbackSet) {
addonJsLoggerCallbackSet = false;
addonThreadSafeLoggerCallback.Release();
}
if (info.Length() < 1 || !info[0].IsFunction()) {
return info.Env().Undefined();
}
auto addonLoggerJSCallback = info[0].As<Napi::Function>();
AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
info.Env(),
addonLoggerJSCallback,
"loggerCallback",
0,
1,
context,
[](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
addonJsLoggerCallbackSet = false;
delete ctx;
}
);
addonJsLoggerCallbackSet = true;
// prevent blocking the main node process from exiting due to active resources
addonThreadSafeLoggerCallback.Unref(info.Env());
return info.Env().Undefined();
}
Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
if (info.Length() < 1 || !info[0].IsNumber()) {
addonLoggerLogLevel = 5;
return info.Env().Undefined();
}
addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
return info.Env().Undefined();
}
void addonLog(ggml_log_level level, const std::string text) {
addonLlamaCppLogCallback(level, std::string("[addon] " + text + "\n").c_str(), nullptr);
}

View File

@@ -0,0 +1,24 @@
#pragma once
#include "llama.h"
#include "napi.h"
struct addon_logger_log {
public:
const int logLevelNumber;
const std::stringstream* stringStream;
};
void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
void addonCallJsLogCallback(
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
);
using AddonThreadSafeLogCallbackFunction =
Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
Napi::Value setLogger(const Napi::CallbackInfo& info);
Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info);
void addonLog(ggml_log_level level, const std::string text);

View File

@@ -0,0 +1,15 @@
#include "addonProgress.h"
void addonCallJsProgressCallback(
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
) {
if (env != nullptr && callback != nullptr) {
try {
callback.Call({Napi::Number::New(env, data->progress)});
} catch (const Napi::Error& e) {}
}
if (data != nullptr) {
delete data;
}
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include "napi.h"
struct addon_progress_event {
public:
const float progress;
};
using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
void addonCallJsProgressCallback(
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
);
using AddonThreadSafeProgressEventCallbackFunction =
Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;

View File

@@ -0,0 +1,146 @@
#include "getGpuInfo.h"
#include "addonLog.h"
#ifdef __APPLE__
#include <TargetConditionals.h>
#endif
#ifdef GPU_INFO_USE_VULKAN
# include "../../gpuInfo/vulkan-gpu-info.h"
#endif
#ifdef GPU_INFO_USE_VULKAN
void logVulkanWarning(const char* message) {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
}
#endif
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
ggml_backend_dev_t device = NULL;
size_t deviceTotal = 0;
size_t deviceFree = 0;
uint64_t total = 0;
uint64_t used = 0;
uint64_t unifiedVramSize = 0;
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
device = ggml_backend_dev_get(i);
auto deviceType = ggml_backend_dev_type(device);
if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
deviceTotal = 0;
deviceFree = 0;
ggml_backend_dev_memory(device, &deviceFree, &deviceTotal);
total += deviceTotal;
used += deviceTotal - deviceFree;
#if defined(__arm64__) || defined(__aarch64__)
if (std::string(ggml_backend_dev_name(device)) == "Metal") {
unifiedVramSize += deviceTotal;
}
#endif
}
}
#ifdef GPU_INFO_USE_VULKAN
uint64_t vulkanDeviceTotal = 0;
uint64_t vulkanDeviceUsed = 0;
uint64_t vulkanDeviceUnifiedVramSize = 0;
const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, &vulkanDeviceUnifiedVramSize, logVulkanWarning);
if (vulkanDeviceSupportsMemoryBudgetExtension) {
if (vulkanDeviceUnifiedVramSize > total) {
// this means that we counted memory from devices that aren't used by llama.cpp
vulkanDeviceUnifiedVramSize = 0;
}
unifiedVramSize += vulkanDeviceUnifiedVramSize;
}
if (used == 0 && vulkanDeviceUsed != 0) {
used = vulkanDeviceUsed;
}
#endif
Napi::Object result = Napi::Object::New(info.Env());
result.Set("total", Napi::Number::From(info.Env(), total));
result.Set("used", Napi::Number::From(info.Env(), used));
result.Set("unifiedSize", Napi::Number::From(info.Env(), unifiedVramSize));
return result;
}
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
std::vector<std::string> deviceNames;
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
ggml_backend_dev_t device = ggml_backend_dev_get(i);
auto deviceType = ggml_backend_dev_type(device);
if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
deviceNames.push_back(std::string(ggml_backend_dev_description(device)));
}
}
Napi::Object result = Napi::Object::New(info.Env());
Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
for (size_t i = 0; i < deviceNames.size(); ++i) {
deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
}
result.Set("deviceNames", deviceNamesNapiArray);
return result;
}
std::pair<ggml_backend_dev_t, std::string> getGpuDevice() {
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
ggml_backend_dev_t device = ggml_backend_dev_get(i);
const auto deviceName = std::string(ggml_backend_dev_name(device));
if (deviceName == "Metal") {
return std::pair<ggml_backend_dev_t, std::string>(device, "metal");
} else if (std::string(deviceName).find("Vulkan") == 0) {
return std::pair<ggml_backend_dev_t, std::string>(device, "vulkan");
} else if (std::string(deviceName).find("CUDA") == 0 || std::string(deviceName).find("ROCm") == 0 || std::string(deviceName).find("MUSA") == 0) {
return std::pair<ggml_backend_dev_t, std::string>(device, "cuda");
}
}
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
ggml_backend_dev_t device = ggml_backend_dev_get(i);
const auto deviceName = std::string(ggml_backend_dev_name(device));
if (deviceName == "CPU") {
return std::pair<ggml_backend_dev_t, std::string>(device, "cpu");
}
}
return std::pair<ggml_backend_dev_t, std::string>(nullptr, "");
}
Napi::Value getGpuType(const Napi::CallbackInfo& info) {
const auto gpuDeviceRes = getGpuDevice();
const auto device = gpuDeviceRes.first;
const auto deviceType = gpuDeviceRes.second;
if (deviceType == "cpu") {
return Napi::Boolean::New(info.Env(), false);
} else if (device != nullptr && deviceType != "") {
return Napi::String::New(info.Env(), deviceType);
}
return info.Env().Undefined();
}
Napi::Value ensureGpuDeviceIsSupported(const Napi::CallbackInfo& info) {
#ifdef GPU_INFO_USE_VULKAN
if (!checkIsVulkanEnvSupported(logVulkanWarning)) {
Napi::Error::New(info.Env(), "Vulkan device is not supported").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
#endif
return info.Env().Undefined();
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <utility>
#include <string>
#include "napi.h"
#include "llama.h"
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info);
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info);
std::pair<ggml_backend_dev_t, std::string> getGpuDevice();
Napi::Value getGpuType(const Napi::CallbackInfo& info);
Napi::Value ensureGpuDeviceIsSupported(const Napi::CallbackInfo& info);

View File

@@ -0,0 +1,63 @@
#include "getMemoryInfo.h"
#include "addonLog.h"
#ifdef __APPLE__
#include <iostream>
#include <mach/mach.h>
#include <sys/sysctl.h>
#elif __linux__
#include <fstream>
#include <sstream>
#include <string>
#elif _WIN32
#include <iostream>
#include <windows.h>
#include <psapi.h>
#endif
Napi::Value getMemoryInfo(const Napi::CallbackInfo& info) {
uint64_t totalMemoryUsage = 0;
#ifdef __APPLE__
struct mach_task_basic_info taskInfo;
mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&taskInfo, &infoCount) == KERN_SUCCESS) {
totalMemoryUsage = taskInfo.virtual_size;
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
}
#elif __linux__
std::ifstream procStatus("/proc/self/status");
std::string line;
bool foundMemoryUsage = false;
while (std::getline(procStatus, line)) {
if (line.rfind("VmSize:", 0) == 0) { // Resident Set Size (current memory usage)
std::istringstream iss(line);
std::string key, unit;
size_t value;
if (iss >> key >> value >> unit) {
totalMemoryUsage = value * 1024; // Convert from kB to bytes
foundMemoryUsage = true;
}
break;
}
}
if (!foundMemoryUsage) {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
}
#elif _WIN32
PROCESS_MEMORY_COUNTERS_EX memCounters;
if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&memCounters, sizeof(memCounters))) {
totalMemoryUsage = memCounters.PrivateUsage;
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
}
#endif
Napi::Object obj = Napi::Object::New(info.Env());
obj.Set("total", Napi::Number::New(info.Env(), totalMemoryUsage));
return obj;
}

View File

@@ -0,0 +1,4 @@
#pragma once
#include "napi.h"
Napi::Value getMemoryInfo(const Napi::CallbackInfo& info);

View File

@@ -0,0 +1,69 @@
#include "getSwapInfo.h"
#include "addonLog.h"
#ifdef __APPLE__
#include <iostream>
#include <mach/mach.h>
#include <sys/sysctl.h>
#elif __linux__
#include <iostream>
#include <sys/sysinfo.h>
#elif _WIN32
#include <iostream>
#include <windows.h>
#include <psapi.h>
#endif
Napi::Value getSwapInfo(const Napi::CallbackInfo& info) {
uint64_t totalSwap = 0;
uint64_t freeSwap = 0;
uint64_t maxSize = 0;
bool maxSizeSet = true;
#ifdef __APPLE__
struct xsw_usage swapInfo;
size_t size = sizeof(swapInfo);
if (sysctlbyname("vm.swapusage", &swapInfo, &size, NULL, 0) == 0) {
totalSwap = swapInfo.xsu_total;
freeSwap = swapInfo.xsu_avail;
maxSizeSet = false;
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get swap info").c_str(), nullptr);
}
#elif __linux__
struct sysinfo sysInfo;
if (sysinfo(&sysInfo) == 0) {
totalSwap = sysInfo.totalswap;
freeSwap = sysInfo.freeswap;
maxSize = sysInfo.totalswap;
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get swap info").c_str(), nullptr);
}
#elif _WIN32
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
if (GlobalMemoryStatusEx(&memInfo)) {
PERFORMANCE_INFORMATION perfInfo;
perfInfo.cb = sizeof(PERFORMANCE_INFORMATION);
if (GetPerformanceInfo(&perfInfo, sizeof(perfInfo))) {
totalSwap = memInfo.ullTotalPageFile;
freeSwap = memInfo.ullAvailPageFile;
maxSize = perfInfo.CommitLimit * perfInfo.PageSize;
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get max pagefile size").c_str(), nullptr);
}
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get pagefile info").c_str(), nullptr);
}
#endif
Napi::Object obj = Napi::Object::New(info.Env());
obj.Set("total", Napi::Number::New(info.Env(), totalSwap));
obj.Set("free", Napi::Number::New(info.Env(), freeSwap));
obj.Set("maxSize", maxSizeSet ? Napi::Number::New(info.Env(), maxSize) : Napi::Number::New(info.Env(), -1));
return obj;
}

View File

@@ -0,0 +1,4 @@
#pragma once
#include "napi.h"
Napi::Value getSwapInfo(const Napi::CallbackInfo& info);

View File

@@ -0,0 +1,3 @@
{
"release": "b7836"
}

View File

@@ -0,0 +1,21 @@
function(addVariantSuffix originalTarget variantSuffix)
if (NOT TARGET ${originalTarget} OR ${variantSuffix} STREQUAL "")
return()
endif()
set(_name "${originalTarget}.${variantSuffix}")
set_target_properties(${originalTarget} PROPERTIES
OUTPUT_NAME "${_name}"
RUNTIME_OUTPUT_NAME "${_name}" # Windows .dll
LIBRARY_OUTPUT_NAME "${_name}" # Unix shared lib
ARCHIVE_OUTPUT_NAME "${_name}" # static / import lib
)
if (APPLE)
set_target_properties(${originalTarget} PROPERTIES
MACOSX_RPATH ON
INSTALL_NAME_DIR "@rpath"
)
endif()
endfunction()

View File

@@ -0,0 +1,68 @@
function(ensureNinjaPath)
if ((NOT DEFINED CMAKE_MAKE_PROGRAM OR NOT EXISTS "${CMAKE_MAKE_PROGRAM}" OR NOT CMAKE_MAKE_PROGRAM) AND (CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL "Ninja Multi-Config"))
find_program(NINJA_EXECUTABLE ninja)
set(CMAKE_MAKE_PROGRAM "")
set(CMAKE_MAKE_PROGRAM "" PARENT_SCOPE)
if(NINJA_EXECUTABLE AND EXISTS "${NINJA_EXECUTABLE}")
set(CMAKE_MAKE_PROGRAM "${NINJA_EXECUTABLE}")
set(CMAKE_MAKE_PROGRAM "${NINJA_EXECUTABLE}" CACHE FILEPATH "Make program")
set(CMAKE_MAKE_PROGRAM "${NINJA_EXECUTABLE}" PARENT_SCOPE)
endif()
if (NOT CMAKE_MAKE_PROGRAM OR NOT EXISTS "${CMAKE_MAKE_PROGRAM}")
set(PROGRAMDATA_PATH "$ENV{ProgramData}")
file(TO_CMAKE_PATH "${PROGRAMDATA_PATH}" PROGRAMDATA_PATH)
if (PROGRAMDATA_PATH AND EXISTS "${PROGRAMDATA_PATH}")
file(GLOB_RECURSE FOUND_NINJA_EXE "${PROGRAMDATA_PATH}/chocolatey/bin/ninja.exe")
if(FOUND_NINJA_EXE)
list(GET FOUND_NINJA_EXE 0 FOUND_CMAKE_MAKE_PROGRAM)
set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}")
set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" CACHE FILEPATH "Make program")
set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" PARENT_SCOPE)
endif()
endif()
endif()
if (NOT CMAKE_MAKE_PROGRAM OR NOT EXISTS "${CMAKE_MAKE_PROGRAM}")
set(LOCALAPPDATA_PATH "$ENV{LOCALAPPDATA}")
file(TO_CMAKE_PATH "${LOCALAPPDATA_PATH}" LOCALAPPDATA_PATH)
if (LOCALAPPDATA_PATH AND EXISTS "${LOCALAPPDATA_PATH}")
file(GLOB_RECURSE FOUND_NINJA_EXE "${LOCALAPPDATA_PATH}/Microsoft/WinGet/Packages/Ninja-build.Ninja_Microsoft.Winget.*/ninja.exe")
if(FOUND_NINJA_EXE)
list(GET FOUND_NINJA_EXE 0 FOUND_CMAKE_MAKE_PROGRAM)
set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}")
set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" CACHE FILEPATH "Make program")
set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" PARENT_SCOPE)
endif()
endif()
endif()
if (NOT CMAKE_MAKE_PROGRAM OR NOT EXISTS "${CMAKE_MAKE_PROGRAM}")
foreach(PATH IN LISTS PROGRAMFILES_PATHS)
file(GLOB_RECURSE FOUND_NINJA_EXE
"${PATH}/Microsoft Visual Studio/*/CMake/Ninja/ninja.exe"
"${PATH}/Microsoft Visual Studio/**/*/CMake/Ninja/ninja.exe"
"${PATH}/Microsoft Visual Studio/*/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe"
"${PATH}/Microsoft Visual Studio/**/*/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe")
if(FOUND_NINJA_EXE)
list(GET FOUND_NINJA_EXE 0 FOUND_CMAKE_MAKE_PROGRAM)
set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}")
set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" CACHE FILEPATH "Make program")
set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" PARENT_SCOPE)
break()
endif()
endforeach()
endif()
if (NOT CMAKE_MAKE_PROGRAM OR NOT EXISTS "${CMAKE_MAKE_PROGRAM}")
message(FATAL_ERROR "Ninja build system not found. Please install Ninja or Visual Studio Build Tools.")
endif()
endif()
endfunction()

View File

@@ -0,0 +1,34 @@
function(ensureNodeLib HOST_ARCH TARGET_ARCH)
if (CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
if (NOT DEFINED NODE_LIB_CMAKE_AR)
foreach(PATH IN LISTS PROGRAMFILES_PATHS)
if(NODE_LIB_CMAKE_AR)
break()
endif()
file(GLOB_RECURSE FOUND_LIB_EXE
"${PATH}/Microsoft Visual Studio/*/VC/Tools/MSVC/*/bin/Host${HOST_ARCH}/${TARGET_ARCH}/lib.exe"
"${PATH}/Microsoft Visual Studio/**/*/VC/Tools/MSVC/*/bin/Host${HOST_ARCH}/${TARGET_ARCH}/lib.exe")
if(FOUND_LIB_EXE)
list(GET FOUND_LIB_EXE 0 NODE_LIB_CMAKE_AR)
break()
endif()
endforeach()
endif()
set(NODE_LIB_CMAKE_AR_MACHINE_FLAG "")
if (TARGET_ARCH STREQUAL "x64")
set(NODE_LIB_CMAKE_AR_MACHINE_FLAG "/MACHINE:X64")
elseif (TARGET_ARCH STREQUAL "arm64")
set(NODE_LIB_CMAKE_AR_MACHINE_FLAG "/MACHINE:ARM64")
endif()
if (EXISTS "${NODE_LIB_CMAKE_AR}")
# Generate node.lib
execute_process(COMMAND ${NODE_LIB_CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS} ${NODE_LIB_CMAKE_AR_MACHINE_FLAG} /nologo)
else()
message(FATAL_ERROR "Windows Resource Compiler (lib.exe) not found. Please install Visual Studio Build Tools.")
endif()
endif()
endfunction()

View File

@@ -0,0 +1,12 @@
function(llvmApplyGnuModeAdaptations)
# adapt cmake-js to work with llvm in GNU mode
if (NOT CMAKE_SHARED_LINKER_FLAGS MATCHES "-Xlinker /DELAYLOAD:NODE.EXE")
string(REPLACE "/DELAYLOAD:NODE.EXE" "-Xlinker /DELAYLOAD:NODE.EXE -Xlinker /defaultlib:delayimp"
UPDATED_CMAKE_SHARED_LINKER_FLAGS
"${CMAKE_SHARED_LINKER_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${UPDATED_CMAKE_SHARED_LINKER_FLAGS}" PARENT_SCOPE)
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Xclang --dependent-lib=msvcrt" PARENT_SCOPE)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Xclang --dependent-lib=msvcrt" PARENT_SCOPE)
endfunction()

View File

@@ -0,0 +1,37 @@
function(llvmEnsureCmakeAr CURRENT_ARCH)
set (LLVM_DIR_ARCH_NAME "")
if (CURRENT_ARCH STREQUAL "x64")
set (LLVM_DIR_ARCH_NAME "x64")
elseif (CURRENT_ARCH STREQUAL "arm64")
set (LLVM_DIR_ARCH_NAME "ARM64")
endif()
if (NOT DEFINED CMAKE_AR OR NOT EXISTS "${CMAKE_AR}")
set(LLVM_INSTALL_PATHS "")
foreach(PATH IN LISTS PROGRAMFILES_PATHS)
list(APPEND LLVM_INSTALL_PATHS "${PATH}/LLVM")
file(GLOB_RECURSE FOUND_LLVM_ROOT LIST_DIRECTORIES true
"${PATH}/Microsoft Visual Studio/*/VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}"
"${PATH}/Microsoft Visual Studio/**/*/VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}")
list(FILTER FOUND_LLVM_ROOT INCLUDE REGEX "VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}$")
if(FOUND_LLVM_ROOT)
list(APPEND LLVM_INSTALL_PATHS ${FOUND_LLVM_ROOT})
endif()
endforeach()
if(DEFINED LLVM_ROOT AND EXISTS "${LLVM_ROOT}")
list(INSERT LLVM_INSTALL_PATHS 0 "${LLVM_ROOT}")
endif()
list(REMOVE_DUPLICATES LLVM_INSTALL_PATHS)
foreach(PATH IN LISTS LLVM_INSTALL_PATHS)
if(EXISTS "${PATH}/bin/llvm-ar.exe" AND EXISTS "${PATH}/bin/llvm-ar.exe")
set(CMAKE_AR "${PATH}/bin/llvm-ar.exe" PARENT_SCOPE)
break()
endif()
endforeach()
endif()
endfunction()

View File

@@ -0,0 +1,87 @@
function(llvmUseGnuModeCompilers CURRENT_ARCH)
set(LLVM_INSTALLATION_URL "https://github.com/llvm/llvm-project/releases/tag/llvmorg-19.1.5")
set(CMAKE_C_COMPILER clang)
set(CMAKE_C_COMPILER clang PARENT_SCOPE)
set(CMAKE_CXX_COMPILER clang++)
set(CMAKE_CXX_COMPILER clang++ PARENT_SCOPE)
set(CMAKE_RC_COMPILER llvm-rc)
set(CMAKE_RC_COMPILER llvm-rc PARENT_SCOPE)
set (LLVM_DIR_ARCH_NAME "")
if (CURRENT_ARCH STREQUAL "x64")
set (LLVM_DIR_ARCH_NAME "x64")
elseif (CURRENT_ARCH STREQUAL "arm64")
set (LLVM_DIR_ARCH_NAME "ARM64")
endif()
set(LLVM_INSTALL_PATHS "")
foreach(PATH IN LISTS PROGRAMFILES_PATHS)
list(APPEND LLVM_INSTALL_PATHS "${PATH}/LLVM")
file(GLOB_RECURSE FOUND_LLVM_ROOT LIST_DIRECTORIES true
"${PATH}/Microsoft Visual Studio/*/VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}"
"${PATH}/Microsoft Visual Studio/**/*/VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}")
list(FILTER FOUND_LLVM_ROOT INCLUDE REGEX "VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}$")
if(FOUND_LLVM_ROOT)
list(APPEND LLVM_INSTALL_PATHS ${FOUND_LLVM_ROOT})
endif()
endforeach()
if(DEFINED LLVM_ROOT AND EXISTS "${LLVM_ROOT}")
list(INSERT LLVM_INSTALL_PATHS 0 "${LLVM_ROOT}")
endif()
list(REMOVE_DUPLICATES LLVM_INSTALL_PATHS)
set(LLVM_ROOT "")
set(LLVM_ROOT "" PARENT_SCOPE)
foreach(PATH IN LISTS LLVM_INSTALL_PATHS)
if(EXISTS "${PATH}/bin/clang.exe" AND EXISTS "${PATH}/bin/clang++.exe" AND EXISTS "${PATH}/bin/llvm-rc.exe")
set(LLVM_ROOT "${PATH}")
set(LLVM_ROOT "${PATH}" PARENT_SCOPE)
break()
endif()
endforeach()
if(LLVM_ROOT STREQUAL "")
if (CURRENT_ARCH STREQUAL "arm64")
message(FATAL_ERROR "LLVM installation was not found. Please install LLVM for WoA (Windows on Arm): ${LLVM_INSTALLATION_URL}")
else()
message(FATAL_ERROR "LLVM installation was not found. Please install LLVM: ${LLVM_INSTALLATION_URL}")
endif()
endif()
if (NOT EXISTS "${CMAKE_C_COMPILER}" OR NOT EXISTS "${CMAKE_CXX_COMPILER}" OR NOT EXISTS "${CMAKE_RC_COMPILER}")
set(CMAKE_C_COMPILER "${LLVM_ROOT}/bin/clang.exe")
set(CMAKE_C_COMPILER "${LLVM_ROOT}/bin/clang.exe" PARENT_SCOPE)
set(CMAKE_CXX_COMPILER "${LLVM_ROOT}/bin/clang++.exe")
set(CMAKE_CXX_COMPILER "${LLVM_ROOT}/bin/clang++.exe" PARENT_SCOPE)
set(CMAKE_RC_COMPILER "${LLVM_ROOT}/bin/llvm-rc.exe")
set(CMAKE_RC_COMPILER "${LLVM_ROOT}/bin/llvm-rc.exe" PARENT_SCOPE)
endif()
if (NOT EXISTS "${CMAKE_C_COMPILER}")
if (CURRENT_ARCH STREQUAL "arm64")
message(FATAL_ERROR "Clang compiler not found at ${CMAKE_C_COMPILER}. Please reinstall LLVM for WoA (Windows on Arm): ${LLVM_INSTALLATION_URL}")
else()
message(FATAL_ERROR "Clang compiler not found at ${CMAKE_C_COMPILER}. Please reinstall LLVM: ${LLVM_INSTALLATION_URL}")
endif()
endif()
if (NOT EXISTS "${CMAKE_CXX_COMPILER}")
if (CURRENT_ARCH STREQUAL "arm64")
message(FATAL_ERROR "Clang++ compiler not found at ${CMAKE_CXX_COMPILER}. Please reinstall LLVM for WoA (Windows on Arm): ${LLVM_INSTALLATION_URL}")
else()
message(FATAL_ERROR "Clang++ compiler not found at ${CMAKE_CXX_COMPILER}. Please reinstall LLVM: ${LLVM_INSTALLATION_URL}")
endif()
endif()
if (NOT EXISTS "${CMAKE_RC_COMPILER}")
if (CURRENT_ARCH STREQUAL "arm64")
message(FATAL_ERROR "LLVM Resource Compiler not found at ${CMAKE_RC_COMPILER}. Please reinstall LLVM for WoA (Windows on Arm): ${LLVM_INSTALLATION_URL}")
else()
message(FATAL_ERROR "LLVM Resource Compiler not found at ${CMAKE_RC_COMPILER}. Please reinstall LLVM: ${LLVM_INSTALLATION_URL}")
endif()
endif()
endfunction()

View File

@@ -0,0 +1,35 @@
function(setProgramFilesPaths CURRENT_ARCH)
set(PROGRAMFILES_X86_ENV_NAME "ProgramFiles(x86)")
set(PROGRAMFILES "$ENV{ProgramFiles}")
set(PROGRAMFILES_X86 "$ENV{${PROGRAMFILES_X86_ENV_NAME}}")
file(TO_CMAKE_PATH "${PROGRAMFILES}" PROGRAMFILES)
file(TO_CMAKE_PATH "${PROGRAMFILES_X86}" PROGRAMFILES_X86)
if(CURRENT_ARCH STREQUAL "arm64")
set(PROGRAMFILES_ARM64_ENV_NAME "ProgramFiles(Arm)")
set(PROGRAMFILES_ARM64 "$ENV{${PROGRAMFILES_ARM64_ENV_NAME}}")
file(TO_CMAKE_PATH "${PROGRAMFILES_ARM64}" PROGRAMFILES_ARM64)
set(PROGRAMFILES_PATHS_LIST
"${PROGRAMFILES_ARM64}"
"${PROGRAMFILES}"
"${PROGRAMFILES_X86}"
"C:/Program Files (Arm)"
"C:/Program Files"
"C:/Program Files (x86)"
)
list(REMOVE_DUPLICATES PROGRAMFILES_PATHS_LIST)
set(PROGRAMFILES_PATHS ${PROGRAMFILES_PATHS_LIST} PARENT_SCOPE)
else()
set(PROGRAMFILES_PATHS_LIST
"${PROGRAMFILES}"
"${PROGRAMFILES_X86}"
"C:/Program Files"
"C:/Program Files (x86)"
)
list(REMOVE_DUPLICATES PROGRAMFILES_PATHS_LIST)
set(PROGRAMFILES_PATHS ${PROGRAMFILES_PATHS_LIST} PARENT_SCOPE)
endif()
endfunction()

BIN
node_modules/node-llama-cpp/llama/gitRelease.bundle generated vendored Normal file

Binary file not shown.

View File

@@ -0,0 +1,200 @@
#include <stddef.h>
#include <map>
#include <vector>
#include <vulkan/vulkan.hpp>
constexpr std::uint32_t VK_VENDOR_ID_AMD = 0x1002;
constexpr std::uint32_t VK_VENDOR_ID_APPLE = 0x106b;
constexpr std::uint32_t VK_VENDOR_ID_INTEL = 0x8086;
constexpr std::uint32_t VK_VENDOR_ID_NVIDIA = 0x10de;
typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
static vk::Instance vulkanInstance() {
vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
return vk::createInstance(createInfo);
}
static std::vector<vk::PhysicalDevice> dedupedDevices() {
vk::Instance instance = vulkanInstance();
auto physicalDevices = instance.enumeratePhysicalDevices();
std::vector<vk::PhysicalDevice> dedupedDevices;
dedupedDevices.reserve(physicalDevices.size());
// adapted from `ggml_vk_instance_init` in `ggml-vulkan.cpp`
for (const auto& device : physicalDevices) {
vk::PhysicalDeviceProperties2 newProps;
vk::PhysicalDeviceDriverProperties newDriver;
vk::PhysicalDeviceIDProperties newId;
newProps.pNext = &newDriver;
newDriver.pNext = &newId;
device.getProperties2(&newProps);
auto oldDevice = std::find_if(
dedupedDevices.begin(),
dedupedDevices.end(),
[&newId](const vk::PhysicalDevice& oldDevice) {
vk::PhysicalDeviceProperties2 oldProps;
vk::PhysicalDeviceDriverProperties oldDriver;
vk::PhysicalDeviceIDProperties oldId;
oldProps.pNext = &oldDriver;
oldDriver.pNext = &oldId;
oldDevice.getProperties2(&oldProps);
bool equals = std::equal(std::begin(oldId.deviceUUID), std::end(oldId.deviceUUID), std::begin(newId.deviceUUID));
equals = equals || (
oldId.deviceLUIDValid && newId.deviceLUIDValid &&
std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID))
);
return equals;
}
);
if (oldDevice == dedupedDevices.end()) {
dedupedDevices.push_back(device);
continue;
}
vk::PhysicalDeviceProperties2 oldProps;
vk::PhysicalDeviceDriverProperties oldDriver;
oldProps.pNext = &oldDriver;
oldDevice->getProperties2(&oldProps);
std::map<vk::DriverId, int> driverPriorities {};
int oldPriority = 1000;
int newPriority = 1000;
switch (oldProps.properties.vendorID) {
case VK_VENDOR_ID_AMD:
driverPriorities[vk::DriverId::eMesaRadv] = 1;
driverPriorities[vk::DriverId::eAmdOpenSource] = 2;
driverPriorities[vk::DriverId::eAmdProprietary] = 3;
break;
case VK_VENDOR_ID_INTEL:
driverPriorities[vk::DriverId::eIntelOpenSourceMESA] = 1;
driverPriorities[vk::DriverId::eIntelProprietaryWindows] = 2;
break;
case VK_VENDOR_ID_NVIDIA:
driverPriorities[vk::DriverId::eNvidiaProprietary] = 1;
#if defined(VK_API_VERSION_1_3) && VK_HEADER_VERSION >= 235
driverPriorities[vk::DriverId::eMesaNvk] = 2;
#endif
break;
}
driverPriorities[vk::DriverId::eMesaDozen] = 4;
if (driverPriorities.count(oldDriver.driverID)) {
oldPriority = driverPriorities[oldDriver.driverID];
}
if (driverPriorities.count(newDriver.driverID)) {
newPriority = driverPriorities[newDriver.driverID];
}
if (newPriority < oldPriority) {
dedupedDevices.erase(std::remove(dedupedDevices.begin(), dedupedDevices.end(), *oldDevice), dedupedDevices.end());
dedupedDevices.push_back(device);
}
}
return dedupedDevices;
}
static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback, bool * checkSupported) {
auto physicalDevices = dedupedDevices();
size_t usedMem = 0;
size_t totalMem = 0;
size_t totalUnifiedMemorySize = 0;
for (size_t i = 0; i < physicalDevices.size(); i++) {
vk::PhysicalDevice physicalDevice = physicalDevices[i];
vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties();
vk::PhysicalDeviceProperties deviceProps = physicalDevice.getProperties();
if (deviceProps.deviceType == vk::PhysicalDeviceType::eCpu) {
// ignore CPU devices, as we don't want to count RAM from the CPU as VRAM
continue;
}
std::vector<vk::ExtensionProperties> extensionProperties = physicalDevice.enumerateDeviceExtensionProperties();
bool memoryBudgetExtensionSupported =
std::any_of(
extensionProperties.begin(),
extensionProperties.end(),
[](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName.data()) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;}
);
if (memoryBudgetExtensionSupported) {
vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties;
vk::PhysicalDeviceMemoryProperties2 memProps2 = {};
memProps2.pNext = &memoryBudgetProperties;
physicalDevice.getMemoryProperties2(&memProps2);
for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) {
const auto heap = memProps2.memoryProperties.memoryHeaps[i];
if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
totalMem += heap.size;
usedMem += memoryBudgetProperties.heapUsage[i] + (heap.size - memoryBudgetProperties.heapBudget[i]);
if (heap.flags & vk::MemoryHeapFlagBits::eMultiInstance) {
totalUnifiedMemorySize += heap.size;
}
if (heap.size > 0 && addDeviceNames) {
(*deviceNames).push_back(std::string(deviceProps.deviceName.data()));
}
if (checkSupported != nullptr && checkSupported) {
VkPhysicalDeviceFeatures2 features2 = {};
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
VkPhysicalDeviceVulkan11Features vk11Features = {};
vk11Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
features2.pNext = &vk11Features;
vkGetPhysicalDeviceFeatures2(physicalDevice, &features2);
if (!vk11Features.storageBuffer16BitAccess) {
*checkSupported = false;
}
}
}
}
} else {
// VK_EXT_memory_budget extension is not supported, so we cannot determine used memory
warningLogCallback(
(
"Vulkan VK_EXT_memory_budget extension not supported for device \"" +
std::string(deviceProps.deviceName.data()) + "\", so VRAM info cannot be determined for it"
).c_str()
);
return false;
}
}
*total = totalMem;
*used = usedMem;
*unifiedMemorySize = totalUnifiedMemorySize;
return true;
}
bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
return enumerateVulkanDevices(total, used, unifiedMemorySize, false, nullptr, warningLogCallback, nullptr);
}
bool checkIsVulkanEnvSupported(gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
size_t total = 0;
size_t used = 0;
size_t unifiedMemorySize = 0;
bool isSupported = true;
enumerateVulkanDevices(&total, &used, &unifiedMemorySize, false, nullptr, warningLogCallback, &isSupported);
return isSupported;
}

View File

@@ -0,0 +1,9 @@
#pragma once
#include <stddef.h>
#include <vector>
typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
bool checkIsVulkanEnvSupported(gpuInfoVulkanWarningLogCallback_t warningLogCallback);

409
node_modules/node-llama-cpp/llama/grammars/README.md generated vendored Normal file
View File

@@ -0,0 +1,409 @@
# GBNF Guide
GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis. GBNF grammars are supported in various ways in `tools/cli`, `tools/completion` and `tools/server`.
## Background
[Backus-Naur Form (BNF)](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form) is a notation for describing the syntax of formal languages like programming languages, file formats, and protocols. GBNF is an extension of BNF that primarily adds a few modern regex-like features.
## Basics
In GBNF, we define *production rules* that specify how a *non-terminal* (rule name) can be replaced with sequences of *terminals* (characters, specifically Unicode [code points](https://en.wikipedia.org/wiki/Code_point)) and other non-terminals. The basic format of a production rule is `nonterminal ::= sequence...`.
## Example
Before going deeper, let's look at some of the features demonstrated in `grammars/chess.gbnf`, a small chess notation grammar:
```
# `root` specifies the pattern for the overall output
root ::= (
# it must start with the characters "1. " followed by a sequence
# of characters that match the `move` rule, followed by a space, followed
# by another move, and then a newline
"1. " move " " move "\n"
# it's followed by one or more subsequent moves, numbered with one or two digits
([1-9] [0-9]? ". " move " " move "\n")+
)
# `move` is an abstract representation, which can be a pawn, nonpawn, or castle.
# The `[+#]?` denotes the possibility of checking or mate signs after moves
move ::= (pawn | nonpawn | castle) [+#]?
pawn ::= ...
nonpawn ::= ...
castle ::= ...
```
## Non-Terminals and Terminals
Non-terminal symbols (rule names) stand for a pattern of terminals and other non-terminals. They are required to be a dashed lowercase word, like `move`, `castle`, or `check-mate`.
Terminals are actual characters ([code points](https://en.wikipedia.org/wiki/Code_point)). They can be specified as a sequence like `"1"` or `"O-O"` or as ranges like `[1-9]` or `[NBKQR]`.
## Characters and character ranges
Terminals support the full range of Unicode. Unicode characters can be specified directly in the grammar, for example `hiragana ::= [ぁ-ゟ]`, or with escapes: 8-bit (`\xXX`), 16-bit (`\uXXXX`) or 32-bit (`\UXXXXXXXX`).
Character ranges can be negated with `^`:
```
single-line ::= [^\n]+ "\n"
```
## Sequences and Alternatives
The order of symbols in a sequence matters. For example, in `"1. " move " " move "\n"`, the `"1. "` must come before the first `move`, etc.
Alternatives, denoted by `|`, give different sequences that are acceptable. For example, in `move ::= pawn | nonpawn | castle`, `move` can be a `pawn` move, a `nonpawn` move, or a `castle`.
Parentheses `()` can be used to group sequences, which allows for embedding alternatives in a larger rule or applying repetition and optional symbols (below) to a sequence.
## Repetition and Optional Symbols
- `*` after a symbol or sequence means that it can be repeated zero or more times (equivalent to `{0,}`).
- `+` denotes that the symbol or sequence should appear one or more times (equivalent to `{1,}`).
- `?` makes the preceding symbol or sequence optional (equivalent to `{0,1}`).
- `{m}` repeats the precedent symbol or sequence exactly `m` times
- `{m,}` repeats the precedent symbol or sequence at least `m` times
- `{m,n}` repeats the precedent symbol or sequence at between `m` and `n` times (included)
- `{0,n}` repeats the precedent symbol or sequence at most `n` times (included)
## Tokens
Tokens allow grammars to match specific tokenizer tokens rather than character sequences. This is useful for constraining outputs based on special tokens (like `<think>` or `</think>`).
Tokens can be specified in two ways:
1. **Token ID**: Use angle brackets with the token ID in square brackets: `<[token-id]>`. For example, `<[1000]>` matches the token with ID 1000.
2. **Token string**: Use angle brackets with the token text directly: `<token>`. For example, `<think>` will match the token whose text is exactly `<think>`. This only works if the string tokenizes to exactly one token in the vocabulary, otherwise the grammar will fail to parse.
You can negate token matches using the `!` prefix: `!<[1000]>` or `!<think>` matches any token *except* the specified one.
```
# Match a thinking block: <think>...</think>
# Using token strings (requires these to be single tokens in the vocab)
root ::= <think> thinking </think> .*
thinking ::= !</think>*
# Equivalent grammar using explicit token IDs
# Assumes token 1000 = <think>, token 1001 = </think>
root ::= <[1000]> thinking <[1001]> .*
thinking ::= !<[1001]>*
```
## Comments and newlines
Comments can be specified with `#`:
```
# defines optional whitespace
ws ::= [ \t\n]+
```
Newlines are allowed between rules and between symbols or sequences nested inside parentheses. Additionally, a newline after an alternate marker `|` will continue the current rule, even outside of parentheses.
## The root rule
In a full grammar, the `root` rule always defines the starting point of the grammar. In other words, it specifies what the entire output must match.
```
# a grammar for lists
root ::= ("- " item)+
item ::= [^\n]+ "\n"
```
## Next steps
This guide provides a brief overview. Check out the GBNF files in this directory (`grammars/`) for examples of full grammars. You can try them out with:
```
./llama-cli -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
```
`llama.cpp` can also convert JSON schemas to grammars either ahead of time or at each request, see below.
## Troubleshooting
Grammars currently have performance gotchas (see https://github.com/ggml-org/llama.cpp/issues/4218).
### Efficient optional repetitions
A common pattern is to allow repetitions of a pattern `x` up to N times.
While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) may result in extremely slow sampling. Instead, you can write `x{0,N}` (or `(x (x (x ... (x)?...)?)?)?` w/ N-deep nesting in earlier llama.cpp versions).
## Using GBNF grammars
You can use GBNF grammars:
- In [llama-server](../tools/server)'s completion endpoints, passed as the `grammar` body field
- In [llama-cli](../tools/cli) and [llama-completion](../tools/completion), passed as the `--grammar` & `--grammar-file` flags
- With [test-gbnf-validator](../tests/test-gbnf-validator.cpp), to test them against strings.
## JSON Schemas → GBNF
`llama.cpp` supports converting a subset of https://json-schema.org/ to GBNF grammars:
- In [llama-server](../tools/server):
- For any completion endpoints, passed as the `json_schema` body field
- For the `/chat/completions` endpoint, passed inside the `response_format` body field (e.g. `{"type", "json_object", "schema": {"items": {}}}` or `{ type: "json_schema", json_schema: {"schema": ...} }`)
- In [llama-cli](../tools/cli) and [llama-completion](../tools/completion), passed as the `--json` / `-j` flag
- To convert to a grammar ahead of time:
- in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
- in JavaScript with [json-schema-to-grammar.mjs](../tools/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../tools/server)'s Web UI)
> [!NOTE]
> The JSON schema is only used to constrain the model output and is not injected into the prompt. The model has no visibility into the schema, so if you want it to understand the expected structure, describe it explicitly in your prompt. This does not apply to tool calling, where schemas are injected into the prompt.
Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggml-org/llama.cpp/pull/5978, https://github.com/ggml-org/llama.cpp/pull/6659 & https://github.com/ggml-org/llama.cpp/pull/6555).
```bash
llama-cli \
-hfr bartowski/Phi-3-medium-128k-instruct-GGUF \
-hff Phi-3-medium-128k-instruct-Q8_0.gguf \
-j '{
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"minLength": 1,
"maxLength": 100
},
"age": {
"type": "integer",
"minimum": 0,
"maximum": 150
}
},
"required": ["name", "age"],
"additionalProperties": false
},
"minItems": 10,
"maxItems": 100
}' \
-p 'Generate a {name, age}[] JSON array with famous actors of all ages.'
```
<details>
<summary>Show grammar</summary>
You can convert any schema in command-line with:
```bash
examples/json_schema_to_grammar.py name-age-schema.json
```
```
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
item ::= "{" space item-name-kv "," space item-age-kv "}" space
item-age ::= ([0-9] | ([1-8] [0-9] | [9] [0-9]) | "1" ([0-4] [0-9] | [5] "0")) space
item-age-kv ::= "\"age\"" space ":" space item-age
item-name ::= "\"" char{1,100} "\"" space
item-name-kv ::= "\"name\"" space ":" space item-name
root ::= "[" space item ("," space item){9,99} "]" space
space ::= | " " | "\n" [ \t]{0,20}
```
</details>
Here is also a list of known limitations (contributions welcome):
- `additionalProperties` defaults to `false` (produces faster grammars + reduces hallucinations).
- `"additionalProperties": true` may produce keys that contain unescaped newlines.
- Unsupported features are skipped silently. It is currently advised to use the command-line Python converter (see above) to see any warnings, and to inspect the resulting grammar / test it w/ [llama-gbnf-validator](../examples/gbnf-validator/gbnf-validator.cpp).
- Can't mix `properties` w/ `anyOf` / `oneOf` in the same type (https://github.com/ggml-org/llama.cpp/issues/7703)
- [prefixItems](https://json-schema.org/draft/2020-12/json-schema-core#name-prefixitems) is broken (but [items](https://json-schema.org/draft/2020-12/json-schema-core#name-items) works)
- `minimum`, `exclusiveMinimum`, `maximum`, `exclusiveMaximum`: only supported for `"type": "integer"` for now, not `number`
- Nested `$ref`s are broken (https://github.com/ggml-org/llama.cpp/issues/8073)
- [pattern](https://json-schema.org/draft/2020-12/json-schema-validation#name-pattern)s must start with `^` and end with `$`
- Remote `$ref`s not supported in the C++ version (Python & JavaScript versions fetch https refs)
- `string` [formats](https://json-schema.org/draft/2020-12/json-schema-validation#name-defined-formats) lack `uri`, `email`
- No [`patternProperties`](https://json-schema.org/draft/2020-12/json-schema-core#name-patternproperties)
And a non-exhaustive list of other unsupported features that are unlikely to be implemented (hard and/or too slow to support w/ stateless grammars):
- [`uniqueItems`](https://json-schema.org/draft/2020-12/json-schema-validation#name-uniqueitems)
- [`contains`](https://json-schema.org/draft/2020-12/json-schema-core#name-contains) / `minContains`
- `$anchor` (cf. [dereferencing](https://json-schema.org/draft/2020-12/json-schema-core#name-dereferencing))
- [`not`](https://json-schema.org/draft/2020-12/json-schema-core#name-not)
- [Conditionals](https://json-schema.org/draft/2020-12/json-schema-core#name-keywords-for-applying-subsche) `if` / `then` / `else` / `dependentSchemas`
### A word about additionalProperties
> [!WARNING]
> The JSON schemas spec states `object`s accept [additional properties](https://json-schema.org/understanding-json-schema/reference/object#additionalproperties) by default.
> Since this is slow and seems prone to hallucinations, we default to no additional properties.
> You can set `"additionalProperties": true` in the the schema of any object to explicitly allow additional properties.
If you're using [Pydantic](https://pydantic.dev/) to generate schemas, you can enable additional properties with the `extra` config on each model class:
```python
# pip install pydantic
import json
from typing import Annotated, List
from pydantic import BaseModel, Extra, Field
class QAPair(BaseModel):
class Config:
extra = 'allow' # triggers additionalProperties: true in the JSON schema
question: str
concise_answer: str
justification: str
class Summary(BaseModel):
class Config:
extra = 'allow'
key_facts: List[Annotated[str, Field(pattern='- .{5,}')]]
question_answers: List[Annotated[List[QAPair], Field(min_items=5)]]
print(json.dumps(Summary.model_json_schema(), indent=2))
```
<details>
<summary>Show JSON schema & grammar</summary>
```json
{
"$defs": {
"QAPair": {
"additionalProperties": true,
"properties": {
"question": {
"title": "Question",
"type": "string"
},
"concise_answer": {
"title": "Concise Answer",
"type": "string"
},
"justification": {
"title": "Justification",
"type": "string"
}
},
"required": [
"question",
"concise_answer",
"justification"
],
"title": "QAPair",
"type": "object"
}
},
"additionalProperties": true,
"properties": {
"key_facts": {
"items": {
"pattern": "^- .{5,}$",
"type": "string"
},
"title": "Key Facts",
"type": "array"
},
"question_answers": {
"items": {
"items": {
"$ref": "#/$defs/QAPair"
},
"minItems": 5,
"type": "array"
},
"title": "Question Answers",
"type": "array"
}
},
"required": [
"key_facts",
"question_answers"
],
"title": "Summary",
"type": "object"
}
```
```
QAPair ::= "{" space QAPair-question-kv "," space QAPair-concise-answer-kv "," space QAPair-justification-kv ( "," space ( QAPair-additional-kv ( "," space QAPair-additional-kv )* ) )? "}" space
QAPair-additional-k ::= ["] ( [c] ([o] ([n] ([c] ([i] ([s] ([e] ([_] ([a] ([n] ([s] ([w] ([e] ([r] char+ | [^"r] char*) | [^"e] char*) | [^"w] char*) | [^"s] char*) | [^"n] char*) | [^"a] char*) | [^"_] char*) | [^"e] char*) | [^"s] char*) | [^"i] char*) | [^"c] char*) | [^"n] char*) | [^"o] char*) | [j] ([u] ([s] ([t] ([i] ([f] ([i] ([c] ([a] ([t] ([i] ([o] ([n] char+ | [^"n] char*) | [^"o] char*) | [^"i] char*) | [^"t] char*) | [^"a] char*) | [^"c] char*) | [^"i] char*) | [^"f] char*) | [^"i] char*) | [^"t] char*) | [^"s] char*) | [^"u] char*) | [q] ([u] ([e] ([s] ([t] ([i] ([o] ([n] char+ | [^"n] char*) | [^"o] char*) | [^"i] char*) | [^"t] char*) | [^"s] char*) | [^"e] char*) | [^"u] char*) | [^"cjq] char* )? ["] space
QAPair-additional-kv ::= QAPair-additional-k ":" space value
QAPair-concise-answer-kv ::= "\"concise_answer\"" space ":" space string
QAPair-justification-kv ::= "\"justification\"" space ":" space string
QAPair-question-kv ::= "\"question\"" space ":" space string
additional-k ::= ["] ( [k] ([e] ([y] ([_] ([f] ([a] ([c] ([t] ([s] char+ | [^"s] char*) | [^"t] char*) | [^"c] char*) | [^"a] char*) | [^"f] char*) | [^"_] char*) | [^"y] char*) | [^"e] char*) | [q] ([u] ([e] ([s] ([t] ([i] ([o] ([n] ([_] ([a] ([n] ([s] ([w] ([e] ([r] ([s] char+ | [^"s] char*) | [^"r] char*) | [^"e] char*) | [^"w] char*) | [^"s] char*) | [^"n] char*) | [^"a] char*) | [^"_] char*) | [^"n] char*) | [^"o] char*) | [^"i] char*) | [^"t] char*) | [^"s] char*) | [^"e] char*) | [^"u] char*) | [^"kq] char* )? ["] space
additional-kv ::= additional-k ":" space value
array ::= "[" space ( value ("," space value)* )? "]" space
boolean ::= ("true" | "false") space
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
dot ::= [^\x0A\x0D]
integral-part ::= [0] | [1-9] [0-9]{0,15}
key-facts ::= "[" space (key-facts-item ("," space key-facts-item)*)? "]" space
key-facts-item ::= "\"" "- " key-facts-item-1{5,} "\"" space
key-facts-item-1 ::= dot
key-facts-kv ::= "\"key_facts\"" space ":" space key-facts
null ::= "null" space
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
question-answers ::= "[" space (question-answers-item ("," space question-answers-item)*)? "]" space
question-answers-item ::= "[" space question-answers-item-item ("," space question-answers-item-item){4,} "]" space
question-answers-item-item ::= QAPair
question-answers-kv ::= "\"question_answers\"" space ":" space question-answers
root ::= "{" space key-facts-kv "," space question-answers-kv ( "," space ( additional-kv ( "," space additional-kv )* ) )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
value ::= object | array | string | number | boolean | null
```
</details>
If you're using [Zod](https://zod.dev/), you can make your objects to explicitly allow extra properties w/ `nonstrict()` / `passthrough()` (or explicitly no extra props w/ `z.object(...).strict()` or `z.strictObject(...)`) but note that [zod-to-json-schema](https://github.com/StefanTerdell/zod-to-json-schema) currently always sets `"additionalProperties": false` anyway.
```js
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
const Foo = z.object({
age: z.number().positive(),
email: z.string().email(),
}).strict();
console.log(zodToJsonSchema(Foo));
```
<details>
<summary>Show JSON schema & grammar</summary>
```json
{
"type": "object",
"properties": {
"age": {
"type": "number",
"exclusiveMinimum": 0
},
"email": {
"type": "string",
"format": "email"
}
},
"required": [
"age",
"email"
],
"additionalProperties": false,
"$schema": "http://json-schema.org/draft-07/schema#"
}
```
```
age-kv ::= "\"age\"" space ":" space number
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
email-kv ::= "\"email\"" space ":" space string
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space age-kv "," space email-kv "}" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
```
</details>

View File

@@ -0,0 +1,6 @@
root ::= (expr "=" ws term "\n")+
expr ::= term ([-+*/] term)*
term ::= ident | num | "(" ws expr ")" ws
ident ::= [a-z] [a-z0-9_]* ws
num ::= [0-9]+ ws
ws ::= [ \t\n]*

42
node_modules/node-llama-cpp/llama/grammars/c.gbnf generated vendored Normal file
View File

@@ -0,0 +1,42 @@
root ::= (declaration)*
declaration ::= dataType identifier "(" parameter? ")" "{" statement* "}"
dataType ::= "int" ws | "float" ws | "char" ws
identifier ::= [a-zA-Z_] [a-zA-Z_0-9]*
parameter ::= dataType identifier
statement ::=
( dataType identifier ws "=" ws expression ";" ) |
( identifier ws "=" ws expression ";" ) |
( identifier ws "(" argList? ")" ";" ) |
( "return" ws expression ";" ) |
( "while" "(" condition ")" "{" statement* "}" ) |
( "for" "(" forInit ";" ws condition ";" ws forUpdate ")" "{" statement* "}" ) |
( "if" "(" condition ")" "{" statement* "}" ("else" "{" statement* "}")? ) |
( singleLineComment ) |
( multiLineComment )
forInit ::= dataType identifier ws "=" ws expression | identifier ws "=" ws expression
forUpdate ::= identifier ws "=" ws expression
condition ::= expression relationOperator expression
relationOperator ::= ("<=" | "<" | "==" | "!=" | ">=" | ">")
expression ::= term (("+" | "-") term)*
term ::= factor(("*" | "/") factor)*
factor ::= identifier | number | unaryTerm | funcCall | parenExpression
unaryTerm ::= "-" factor
funcCall ::= identifier "(" argList? ")"
parenExpression ::= "(" ws expression ws ")"
argList ::= expression ("," ws expression)*
number ::= [0-9]+
singleLineComment ::= "//" [^\n]* "\n"
multiLineComment ::= "/*" ( [^*] | ("*" [^/]) )* "*/"
ws ::= ([ \t\n]+)

13
node_modules/node-llama-cpp/llama/grammars/chess.gbnf generated vendored Normal file
View File

@@ -0,0 +1,13 @@
# Specifies chess moves as a list in algebraic notation, using PGN conventions
# Force first move to "1. ", then any 1-2 digit number after, relying on model to follow the pattern
root ::= "1. " move " " move "\n" ([1-9] [0-9]? ". " move " " move "\n")+
move ::= (pawn | nonpawn | castle) [+#]?
# piece type, optional file/rank, optional capture, dest file & rank
nonpawn ::= [NBKQR] [a-h]? [1-8]? "x"? [a-h] [1-8]
# optional file & capture, dest file & rank, optional promotion
pawn ::= ([a-h] "x")? [a-h] [1-8] ("=" [NBKQR])?
castle ::= "O-O" "-O"?

View File

@@ -0,0 +1,6 @@
# note: this might be incomplete, mostly an example
root ::= en-char+ ([ \t\n] en-char+)*
en-char ::= letter | digit | punctuation
letter ::= [a-zA-Z]
digit ::= [0-9]
punctuation ::= [!"#$%&'()*+,-./:;<=>?@[\\\]^_`{|}~]

View File

@@ -0,0 +1,7 @@
# A probably incorrect grammar for Japanese
root ::= jp-char+ ([ \t\n] jp-char+)*
jp-char ::= hiragana | katakana | punctuation | cjk
hiragana ::= [ぁ-ゟ]
katakana ::= [ァ-ヿ]
punctuation ::= [、-〾]
cjk ::= [一-鿿]

25
node_modules/node-llama-cpp/llama/grammars/json.gbnf generated vendored Normal file
View File

@@ -0,0 +1,25 @@
root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\\x7F\x00-\x1F] |
"\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
# Optional space: by convention, applied in this grammar after literal chars when allowed
ws ::= | " " | "\n" [ \t]{0,20}

View File

@@ -0,0 +1,34 @@
# This is the same as json.gbnf but we restrict whitespaces at the end of the root array
# Useful for generating JSON arrays
root ::= arr
value ::= object | array | string | number | ("true" | "false" | "null") ws
arr ::=
"[\n" ws (
value
(",\n" ws value)*
)? "]"
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\\x7F\x00-\x1F] |
"\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws
# Optional space: by convention, applied in this grammar after literal chars when allowed
ws ::= | " " | "\n" [ \t]{0,20}

4
node_modules/node-llama-cpp/llama/grammars/list.gbnf generated vendored Normal file
View File

@@ -0,0 +1,4 @@
root ::= item+
# Excludes various line break characters
item ::= "- " [^\r\n\x0b\x0c\x85\u2028\u2029]+ "\n"

View File

@@ -0,0 +1,4 @@
{
"tag": "b7836",
"llamaCppGithubRepo": "ggml-org/llama.cpp"
}

5
node_modules/node-llama-cpp/llama/package.json generated vendored Normal file
View File

@@ -0,0 +1,5 @@
{
"binary": {
"napi_versions": [7]
}
}

View File

@@ -0,0 +1,14 @@
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
setProgramFilesPaths("arm64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNodeLib.cmake")
ensureNodeLib("arm64" "arm64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmApplyGnuModeAdaptations.cmake")
llvmApplyGnuModeAdaptations()
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmEnsureCmakeAr.cmake")
llvmEnsureCmakeAr("arm64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
ensureNinjaPath()

View File

@@ -0,0 +1,14 @@
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
setProgramFilesPaths("x64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNodeLib.cmake")
ensureNodeLib("x64" "arm64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmApplyGnuModeAdaptations.cmake")
llvmApplyGnuModeAdaptations()
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmEnsureCmakeAr.cmake")
llvmEnsureCmakeAr("x64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
ensureNinjaPath()

View File

@@ -0,0 +1,14 @@
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
setProgramFilesPaths("x64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNodeLib.cmake")
ensureNodeLib("x64" "x64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmApplyGnuModeAdaptations.cmake")
llvmApplyGnuModeAdaptations()
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmEnsureCmakeAr.cmake")
llvmEnsureCmakeAr("x64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
ensureNinjaPath()

View File

@@ -0,0 +1,8 @@
set(CMAKE_SYSTEM_NAME Darwin) # macOS
set(CMAKE_SYSTEM_PROCESSOR arm64)
set(CMAKE_C_COMPILER clang)
set(CMAKE_CXX_COMPILER clang++)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch arm64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch arm64")

View File

@@ -0,0 +1,5 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR x86_64)
set(CMAKE_C_COMPILER x86_64-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER x86_64-linux-gnu-g++)

View File

@@ -0,0 +1,5 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)

View File

@@ -0,0 +1,5 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc)
set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)

View File

@@ -0,0 +1,20 @@
set(CMAKE_SYSTEM_NAME Windows)
set(CMAKE_SYSTEM_PROCESSOR x86_64)
set(target x86_64-pc-windows-msvc)
set(CMAKE_C_COMPILER_TARGET ${target})
set(CMAKE_CXX_COMPILER_TARGET ${target})
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
setProgramFilesPaths("x64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmUseGnuModeCompilers.cmake")
llvmUseGnuModeCompilers("x64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
ensureNinjaPath()
set(arch_c_flags "-march=native")
set(CMAKE_C_FLAGS_INIT "${arch_c_flags}")
set(CMAKE_CXX_FLAGS_INIT "${arch_c_flags}")

View File

@@ -0,0 +1,21 @@
set(CMAKE_SYSTEM_NAME Windows)
set(CMAKE_SYSTEM_PROCESSOR arm64)
set(target arm64-pc-windows-msvc)
set(CMAKE_C_COMPILER_TARGET ${target})
set(CMAKE_CXX_COMPILER_TARGET ${target})
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
setProgramFilesPaths("arm64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmUseGnuModeCompilers.cmake")
llvmUseGnuModeCompilers("arm64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
ensureNinjaPath()
set(arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only")
set(warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments")
set(CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}")
set(CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}")

View File

@@ -0,0 +1,21 @@
set(CMAKE_SYSTEM_NAME Windows)
set(CMAKE_SYSTEM_PROCESSOR arm64)
set(target arm64-pc-windows-msvc)
set(CMAKE_C_COMPILER_TARGET ${target})
set(CMAKE_CXX_COMPILER_TARGET ${target})
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
setProgramFilesPaths("x64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmUseGnuModeCompilers.cmake")
llvmUseGnuModeCompilers("x64")
include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
ensureNinjaPath()
set(arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only")
set(warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments")
set(CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}")
set(CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}")

10
node_modules/node-llama-cpp/llama/xpack/package.json generated vendored Normal file
View File

@@ -0,0 +1,10 @@
{
"xpack": {
"minimumXpmRequired": "0.16.3",
"dependencies": {},
"devDependencies": {},
"properties": {},
"actions": {},
"buildConfigurations": {}
}
}