First upload version 0.0.1

2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions
--- a/node_modules/node-llama-cpp/llama/.clang-format
+++ b/node_modules/node-llama-cpp/llama/.clang-format
@@ -0,0 +1,46 @@
+BasedOnStyle: Google
+IndentWidth: 4
+UseTab: Never
+TabWidth: 4
+ColumnLimit: 140
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlignTrailingComments: false
+SpaceAfterTemplateKeyword: false
+AllowShortBlocksOnASingleLine: false
+MaxEmptyLinesToKeep: 3
+NamespaceIndentation: None
+CommentPragmas: '^[^ ]'
+FixNamespaceComments: false
+IndentAccessModifiers: true
+SpaceAfterCStyleCast: false
+PointerAlignment: Left
+IndentCaseLabels: true
+BinPackArguments: false
+BinPackParameters: false
+Cpp11BracedListStyle: false
+SpaceBeforeCpp11BracedList: true
+SpaceInEmptyBlock: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+DerivePointerAlignment: false
+AlwaysBreakTemplateDeclarations: No
+DeriveLineEnding: false
+UseCRLF: false
+AllowAllArgumentsOnNextLine: true
+PackConstructorInitializers: CurrentLine
+AlignAfterOpenBracket: BlockIndent
+BraceWrapping:
+  AfterStruct: false
+  AfterClass: false
+  AfterUnion: false
+  AfterEnum: false
+  AfterControlStatement: false
+  AfterFunction: false
+  AfterNamespace: false
+  AfterExternBlock: false
+  BeforeElse: false
+  SplitEmptyFunction: false
+  SplitEmptyRecord: false
+  SplitEmptyNamespace: false
--- a/node_modules/node-llama-cpp/llama/CMakeLists.txt
+++ b/node_modules/node-llama-cpp/llama/CMakeLists.txt
@@ -0,0 +1,141 @@
+cmake_minimum_required(VERSION 3.19)
+
+if (NLC_CURRENT_PLATFORM STREQUAL "win-x64" OR NLC_CURRENT_PLATFORM STREQUAL "win-arm64")
+    set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
+endif()
+
+include("./cmake/addVariantSuffix.cmake")
+
+if (NLC_CURRENT_PLATFORM STREQUAL "win-x64")
+    if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL" CACHE STRING "" FORCE)
+    else()
+        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL" CACHE STRING "" FORCE)
+    endif()
+endif()
+
+if (NLC_TARGET_PLATFORM STREQUAL "win-arm64" AND (CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL "Ninja Multi-Config") AND NOT MINGW)
+    if(NLC_CURRENT_PLATFORM STREQUAL "win-x64")
+        include("./profiles/llvm.win32.host-x64.target-arm64.cmake")
+    elseif(NLC_CURRENT_PLATFORM STREQUAL "win-arm64")
+        include("./profiles/llvm.win32.host-arm64.target-arm64.cmake")
+    endif()
+elseif (NLC_CURRENT_PLATFORM STREQUAL "win-x64" AND NLC_TARGET_PLATFORM STREQUAL "win-x64" AND (CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL "Ninja Multi-Config") AND NOT MINGW)
+    include("./profiles/llvm.win32.host-x64.target-x64.cmake")
+endif()
+
+project("llama-addon" C CXX)
+
+if (MSVC)
+    if (GGML_STATIC)
+        add_link_options(-static)
+        if (MINGW)
+            add_link_options(-static-libgcc -static-libstdc++)
+        endif()
+    endif()
+    # add_compile_options(/EHsc)
+else()
+    add_compile_options(-fexceptions)
+endif()
+
+add_definitions(-DNAPI_VERSION=7)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+set(CMAKE_PLATFORM_NO_VERSIONED_SONAME ON)
+
+set(LLAMA_BUILD_COMMON ON)
+
+if (MINGW)
+    set(GGML_BACKEND_DL OFF)
+    set(BUILD_SHARED_LIBS ON)
+endif()
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+    add_compile_options(-Wno-c++17-extensions)
+endif()
+
+if(APPLE)
+    set(CMAKE_SKIP_BUILD_RPATH  FALSE)
+    set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
+    set(CMAKE_BUILD_RPATH "@loader_path")
+    set(CMAKE_INSTALL_RPATH "@loader_path")
+    set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+else()
+    set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
+
+    if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android")
+        set(CMAKE_SKIP_BUILD_RPATH FALSE)
+        set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
+        set(CMAKE_BUILD_RPATH "$ORIGIN")
+        set(CMAKE_INSTALL_RPATH "$ORIGIN")
+        set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
+    endif()
+endif()
+
+execute_process(COMMAND node -p "require('node-addon-api').include.slice(1,-1)"
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+    OUTPUT_VARIABLE NODE_ADDON_API_DIR
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})
+
+if (DEFINED GGML_NATIVE)
+    set(NLC_GGML_NATIVE ${GGML_NATIVE})
+elseif(CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH})
+    set(NLC_GGML_NATIVE OFF)
+else()
+    set(NLC_GGML_NATIVE ON)
+endif()
+
+add_subdirectory("llama.cpp")
+include_directories("llama.cpp")
+include_directories("./llama.cpp/common")
+
+# This is needed to use methods in "llama-grammar.h" and "unicode.h"
+target_include_directories(llama PUBLIC "./llama.cpp/src")
+
+unset(GPU_INFO_HEADERS)
+unset(GPU_INFO_SOURCES)
+unset(GPU_INFO_EXTRA_LIBS)
+
+if (GGML_VULKAN OR GGML_KOMPUTE)
+    find_package(Vulkan)
+    if (Vulkan_FOUND)
+        if (GGML_VULKAN)
+            message(STATUS "Using Vulkan for GPU info")
+        elseif (GGML_KOMPUTE)
+            message(STATUS "Using Vulkan for GPU info because Kompute is enabled")
+        endif()
+
+        list(APPEND GPU_INFO_HEADERS gpuInfo/vulkan-gpu-info.h)
+        list(APPEND GPU_INFO_SOURCES gpuInfo/vulkan-gpu-info.cpp)
+
+        add_compile_definitions(GPU_INFO_USE_VULKAN)
+
+        list(APPEND GPU_INFO_EXTRA_LIBS Vulkan::Vulkan)
+    else()
+        message(FATAL_ERROR "Vulkan was not found")
+    endif()
+endif()
+
+list(REMOVE_DUPLICATES GPU_INFO_HEADERS)
+list(REMOVE_DUPLICATES GPU_INFO_SOURCES)
+list(REMOVE_DUPLICATES GPU_INFO_EXTRA_LIBS)
+
+addVariantSuffix(llama ${NLC_VARIANT})
+addVariantSuffix(ggml ${NLC_VARIANT})
+
+file(GLOB SOURCE_FILES "addon/*.cpp" "addon/**/*.cpp" ${GPU_INFO_SOURCES})
+
+add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC} ${GPU_INFO_HEADERS})
+set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
+target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB})
+target_link_libraries(${PROJECT_NAME} "llama")
+target_link_libraries(${PROJECT_NAME} "common")
+
+if (DEFINED GPU_INFO_EXTRA_LIBS)
+    target_link_libraries(${PROJECT_NAME} ${GPU_INFO_EXTRA_LIBS})
+endif()
+
+if(MSVC AND CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
+    # Generate node.lib
+    execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS})
+endif()
--- a/node_modules/node-llama-cpp/llama/addon/AddonContext.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/AddonContext.cpp
@@ -0,0 +1,985 @@
+#include <thread>
+#include <algorithm>
+#include <cmath>
+#include "common/common.h"
+#include "llama-vocab.h"
+#include "llama.h"
+
+#include "addonGlobals.h"
+#include "AddonModel.h"
+#include "AddonModelLora.h"
+#include "AddonGrammarEvaluationState.h"
+#include "AddonContext.h"
+
+static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
+    uint64_t totalSize = 0;
+
+    if (embd) {
+        totalSize += sizeof(float) * n_tokens_alloc * embd;
+    } else {
+        totalSize += sizeof(llama_token) * n_tokens_alloc;
+    }
+
+    totalSize += sizeof(llama_pos) * n_tokens_alloc;
+    totalSize += sizeof(int32_t) * n_tokens_alloc;
+    totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
+
+    totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
+
+    totalSize += sizeof(int8_t) * n_tokens_alloc;
+
+    return totalSize;
+}
+
+class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
+    public:
+        AddonContext* ctx;
+
+        AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
+            : Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
+              ctx(ctx),
+              deferred(Napi::Promise::Deferred::New(env)) {
+            ctx->Ref();
+        }
+        ~AddonContextDecodeBatchWorker() {
+            ctx->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                // Perform the evaluation using llama_decode.
+                int r = llama_decode(ctx->ctx, ctx->batch);
+
+                if (r != 0) {
+                    if (r == 1) {
+                        SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
+                    } else {
+                        SetError("Eval has failed");
+                    }
+
+                    return;
+                }
+
+                llama_synchronize(ctx->ctx);
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_decode\"");
+            }
+        }
+        void OnOK() {
+            deferred.Resolve(Env().Undefined());
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+
+class AddonContextLoadContextWorker : public Napi::AsyncWorker {
+    public:
+        AddonContext* context;
+
+        AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
+            : Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
+              context(context),
+              deferred(Napi::Promise::Deferred::New(env)) {
+            context->Ref();
+        }
+        ~AddonContextLoadContextWorker() {
+            context->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                context->ctx = llama_init_from_model(context->model->model, context->context_params);
+
+                context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_init_from_model\"");
+            }
+        }
+        void OnOK() {
+            if (context->contextLoaded) {
+                uint64_t contextMemorySize = llama_state_get_size(context->ctx);
+                adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
+                context->loadedContextMemorySize = contextMemorySize;
+            }
+
+            deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
+    public:
+        AddonContext* context;
+
+        AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
+            : Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
+              context(context),
+              deferred(Napi::Promise::Deferred::New(env)) {
+            context->Ref();
+        }
+        ~AddonContextUnloadContextWorker() {
+            context->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                llama_free(context->ctx);
+                context->contextLoaded = false;
+
+                try {
+                    if (context->has_batch) {
+                        llama_batch_free(context->batch);
+                        context->has_batch = false;
+                        context->batch_n_tokens = 0;
+                    }
+
+                    context->dispose();
+                } catch (const std::exception& e) {
+                    SetError(e.what());
+                } catch(...) {
+                    SetError("Unknown error when calling \"llama_batch_free\"");
+                }
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_free\"");
+            }
+        }
+        void OnOK() {
+            adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
+            context->loadedContextMemorySize = 0;
+
+            adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
+            context->batchMemorySize = 0;
+
+            deferred.Resolve(Env().Undefined());
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+
+
+class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
+    public:
+        AddonContext* ctx;
+        AddonSampler* sampler;
+        bool arrayResult = false;
+        bool returnProbabilities = false;
+        bool returnConfidence = false;
+        float tokenConfidence = -1;
+        bool has_probabilities = false;
+        size_t probabilities_size;
+        llama_token * probabilities_tokens;
+        float * probabilities_probs;
+        int32_t batchLogitIndex;
+        llama_token result;
+        bool no_output = false;
+
+        AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
+            : Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
+              ctx(ctx),
+              deferred(Napi::Promise::Deferred::New(info.Env())) {
+            ctx->Ref();
+
+            batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
+            sampler = Napi::ObjectWrap<AddonSampler>::Unwrap(info[1].As<Napi::Object>());
+            arrayResult = info.Length() > 2 && info[2].IsBoolean();
+            returnProbabilities = arrayResult ? info[2].As<Napi::Boolean>().Value() : false;
+            returnConfidence = arrayResult && info.Length() > 3 && info[3].IsBoolean() ? info[3].As<Napi::Boolean>().Value() : false;
+            sampler->Ref();
+        }
+        ~AddonContextSampleTokenWorker() {
+            ctx->Unref();
+            sampler->Unref();
+
+            if (has_probabilities) {
+                delete[] probabilities_tokens;
+                delete[] probabilities_probs;
+            }
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                SampleToken();
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"SampleToken\"");
+            }
+        }
+
+        void SampleToken() {
+            if (llama_get_logits(ctx->ctx) == nullptr) {
+                SetError("This model does not support token generation");
+                return;
+            }
+
+            sampler->rebuildChainIfNeeded();
+
+            const auto * logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
+            const int n_vocab = llama_vocab_n_tokens(ctx->model->vocab);
+
+            auto & candidates = sampler->tokenCandidates;
+            for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
+                candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
+            }
+
+            llama_token_data_array cur_p = {
+                /* .data       = */ candidates.data(),
+                /* .size       = */ candidates.size(),
+                /* .selected   = */ -1,
+                /* .sorted     = */ false,
+            };
+
+            llama_sampler_apply(sampler->chain, &cur_p);
+
+            if (!(cur_p.selected >= 0 && cur_p.selected < (int32_t)cur_p.size)) {
+                no_output = true;
+                return;
+            }
+
+            auto new_token_id = cur_p.data[cur_p.selected].id;
+
+            if (returnProbabilities || returnConfidence) {
+                if (!cur_p.sorted) {
+                    std::sort(cur_p.data, cur_p.data + cur_p.size, [](const llama_token_data & a, const llama_token_data & b) {
+                        return a.logit > b.logit;
+                    });
+                    cur_p.sorted = true;
+
+                    for (size_t i = 0; i < cur_p.size; i++) {
+                        if (cur_p.data[i].id == new_token_id) {
+                            cur_p.selected = i;
+                            break;
+                        }
+                    }
+                }
+            }
+
+            if (returnProbabilities) {
+                probabilities_size = cur_p.size;
+                probabilities_tokens = new llama_token[probabilities_size];
+                probabilities_probs = new float[probabilities_size];
+                float maxLogit = cur_p.size > 0 ? cur_p.data[0].logit : -INFINITY;
+
+                for (size_t i = 0; i < cur_p.size; i++) {
+                    auto logit = cur_p.data[i].logit;
+
+                    probabilities_tokens[i] = cur_p.data[i].id;
+                    probabilities_probs[i] = logit;
+
+                    if (logit > maxLogit) {
+                        maxLogit = logit;
+                    }
+                }
+
+                if (probabilities_size > 0 && maxLogit != -INFINITY) {
+                    float sum = 0.0f;
+                    for (size_t i = 0; i < probabilities_size; i++) {
+                        float prob = expf(probabilities_probs[i] - maxLogit);
+                        probabilities_probs[i] = prob;
+                        sum += prob;
+                    }
+
+                    for (size_t i = 0; i < probabilities_size; i++) {
+                        probabilities_probs[i] /= sum;
+                    }
+                }
+
+                has_probabilities = true;
+            }
+
+            if (returnConfidence) {
+                if (has_probabilities && cur_p.selected < probabilities_size) {
+                    tokenConfidence = probabilities_probs[cur_p.selected];
+                } else {
+                    float maxLogit = cur_p.data[0].logit;
+                    float sum = 0.0f;
+                    for (size_t i = 0; i < cur_p.size; i++) {
+                        auto logit = cur_p.data[i].logit;
+
+                        if (logit > maxLogit) {
+                            maxLogit = logit;
+                        }
+                    }
+
+                    for (size_t i = 0; i < cur_p.size; i++) {
+                        sum += expf(cur_p.data[i].logit - maxLogit);
+                    }
+
+                    tokenConfidence = expf(cur_p.data[cur_p.selected].logit - maxLogit) / sum;
+                }
+            }
+
+            try {
+                sampler->acceptToken(new_token_id);
+                result = new_token_id;
+            } catch (const std::exception& e) {
+                SetError(std::string("Failed to accept token in sampler: ") + e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"acceptToken\"");
+            }
+        }
+        void OnOK() {
+            Napi::Number resultToken;
+            if (no_output) {
+                resultToken = Napi::Number::New(Env(), -1);
+            } else {
+                resultToken = Napi::Number::New(Env(), static_cast<uint32_t>(result));
+            }
+
+            if (!arrayResult) {
+                deferred.Resolve(resultToken);
+                return;
+            }
+
+            Napi::Array resultArray = Napi::Array::New(Env(), 2);
+            resultArray.Set(Napi::Number::New(Env(), 0), resultToken);
+
+            if (has_probabilities) {
+                Napi::Array probabilities = Napi::Array::New(Env(), probabilities_size * 2);
+                for (size_t i = 0; i < probabilities_size; i++) {
+                    probabilities.Set(i * 2, Napi::Number::New(Env(), probabilities_tokens[i]));
+                    probabilities.Set(i * 2 + 1, Napi::Number::New(Env(), probabilities_probs[i]));
+                }
+                resultArray.Set(1, probabilities);
+            }
+
+            if (returnConfidence && tokenConfidence != -1) {
+                resultArray.Set(2, Napi::Number::New(Env(), tokenConfidence));
+            }
+
+            deferred.Resolve(resultArray);
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+
+AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
+    model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
+    model->Ref();
+
+    context_params = llama_context_default_params();
+    context_params.n_ctx = 4096;
+    context_params.n_threads = std::max(cpu_get_num_math(), 1);
+    context_params.n_threads_batch = context_params.n_threads;
+    context_params.no_perf = true;
+    context_params.swa_full = false;
+
+    if (info.Length() > 1 && info[1].IsObject()) {
+        Napi::Object options = info[1].As<Napi::Object>();
+
+        if (options.Has("contextSize")) {
+            context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
+        }
+
+        if (options.Has("batchSize")) {
+            context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
+            context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
+        }
+
+        if (options.Has("sequences")) {
+            context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
+        }
+
+        if (options.Has("embeddings")) {
+            context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
+        }
+
+        if (options.Has("ranking") && options.Get("ranking").As<Napi::Boolean>().Value()) {
+            context_params.pooling_type = LLAMA_POOLING_TYPE_RANK;
+        }
+
+        if (options.Has("flashAttention")) {
+            bool flashAttention = options.Get("flashAttention").As<Napi::Boolean>().Value();
+            context_params.flash_attn_type = flashAttention ? LLAMA_FLASH_ATTN_TYPE_ENABLED : LLAMA_FLASH_ATTN_TYPE_DISABLED;
+        }
+
+        if (options.Has("threads")) {
+            const auto n_threads = options.Get("threads").As<Napi::Number>().Int32Value();
+            const auto resolved_n_threads = n_threads == 0 ? std::max((int32_t)std::thread::hardware_concurrency(), context_params.n_threads) : n_threads;
+
+            context_params.n_threads = resolved_n_threads;
+            context_params.n_threads_batch = resolved_n_threads;
+        }
+
+        if (options.Has("performanceTracking")) {
+            context_params.no_perf = !(options.Get("performanceTracking").As<Napi::Boolean>().Value());
+        }
+
+        if (options.Has("swaFullCache")) {
+            context_params.swa_full = options.Get("swaFullCache").As<Napi::Boolean>().Value();
+        }
+    }
+}
+AddonContext::~AddonContext() {
+    dispose();
+}
+
+void AddonContext::dispose() {
+    if (disposed) {
+        return;
+    }
+
+    disposed = true;
+    if (contextLoaded) {
+        contextLoaded = false;
+        llama_free(ctx);
+
+        adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
+        loadedContextMemorySize = 0;
+    }
+
+    model->Unref();
+
+    disposeBatch();
+}
+void AddonContext::disposeBatch() {
+    if (!has_batch) {
+        return;
+    }
+
+    llama_batch_free(batch);
+    has_batch = false;
+    batch_n_tokens = 0;
+
+    adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
+    batchMemorySize = 0;
+}
+
+Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
+    worker->Queue();
+    return worker->GetPromise();
+}
+Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        return info.Env().Undefined();
+    }
+
+    if (contextLoaded) {
+        contextLoaded = false;
+
+        AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
+        worker->Queue();
+        return worker->GetPromise();
+    } else {
+        dispose();
+
+        Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
+        deferred.Resolve(info.Env().Undefined());
+        return deferred.Promise();
+    }
+}
+
+Napi::Value AddonContext::GetContextSize(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
+}
+Napi::Value AddonContext::InitBatch(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    if (has_batch) {
+        llama_batch_free(batch);
+    }
+
+    int32_t n_tokens = info[0].As<Napi::Number>().Int32Value();
+
+    batch = llama_batch_init(n_tokens, 0, 1);
+    has_batch = true;
+    batch_n_tokens = n_tokens;
+
+    uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_model_n_embd(model->model), context_params.n_batch);
+    if (newBatchMemorySize > batchMemorySize) {
+        adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
+        batchMemorySize = newBatchMemorySize;
+    } else if (newBatchMemorySize < batchMemorySize) {
+        adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
+        batchMemorySize = newBatchMemorySize;
+    }
+
+    return info.Env().Undefined();
+}
+Napi::Value AddonContext::DisposeBatch(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    disposeBatch();
+
+    return info.Env().Undefined();
+}
+Napi::Value AddonContext::AddToBatch(const Napi::CallbackInfo& info) {
+    if (!has_batch) {
+        Napi::Error::New(info.Env(), "No batch is initialized").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
+    int32_t firstTokenContextIndex = info[1].As<Napi::Number>().Int32Value();
+    Napi::Uint32Array tokens = info[2].As<Napi::Uint32Array>();
+    Napi::Uint32Array tokenLogitIndexes = info[3].As<Napi::Uint32Array>();
+
+    auto tokensLength = tokens.ElementLength();
+    auto tokenLogitIndexesLength = tokenLogitIndexes.ElementLength();
+    GGML_ASSERT(batch.n_tokens + tokensLength <= batch_n_tokens);
+
+    Napi::Uint32Array resLogitIndexes = Napi::Uint32Array::New(info.Env(), tokenLogitIndexesLength);
+
+    for (size_t i = 0, l = 0; i < tokensLength; i++) {
+        if (l < tokenLogitIndexesLength && l < tokenLogitIndexesLength && tokenLogitIndexes[l] == i) {
+            common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, true);
+            resLogitIndexes[l] = batch.n_tokens - 1;
+            l++;
+        } else {
+            common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
+        }
+    }
+
+    return resLogitIndexes;
+}
+Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
+
+    bool result = llama_memory_seq_rm(llama_get_memory(ctx), sequenceId, -1, -1);
+
+    if (!result) {
+        Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return info.Env().Undefined();
+}
+Napi::Value AddonContext::RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
+    int32_t startPos = info[1].As<Napi::Number>().Int32Value();
+    int32_t endPos = info[2].As<Napi::Number>().Int32Value();
+
+    bool result = llama_memory_seq_rm(llama_get_memory(ctx), sequenceId, startPos, endPos);
+
+    return Napi::Boolean::New(info.Env(), result);
+}
+Napi::Value AddonContext::ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
+    int32_t startPos = info[1].As<Napi::Number>().Int32Value();
+    int32_t endPos = info[2].As<Napi::Number>().Int32Value();
+    int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
+
+    llama_memory_seq_add(llama_get_memory(ctx), sequenceId, startPos, endPos, shiftDelta);
+
+    return info.Env().Undefined();
+}
+Napi::Value AddonContext::GetSequenceKvCacheMinPosition(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
+
+
+    const auto minPosition = llama_memory_seq_pos_min(llama_get_memory(ctx), sequenceId);
+
+    return Napi::Number::New(info.Env(), minPosition);
+}
+Napi::Value AddonContext::GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
+
+
+    const auto maxPosition = llama_memory_seq_pos_max(llama_get_memory(ctx), sequenceId);
+
+    return Napi::Number::New(info.Env(), maxPosition);
+}
+Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
+    AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
+    worker->Queue();
+    return worker->GetPromise();
+}
+Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
+    AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
+    worker->Queue();
+    return worker->GetPromise();
+}
+
+Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
+    int32_t maxVectorSize = (info.Length() > 1 && info[1].IsNumber()) ? info[1].As<Napi::Number>().Int32Value() : 0;
+
+    if (inputTokensLength <= 0) {
+        Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    const int n_embd = llama_model_n_embd(model->model);
+    const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
+    const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
+    if (embeddings == NULL) {
+        embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
+    }
+
+    if (embeddings == NULL) {
+        Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    size_t resultSize = maxVectorSize == 0 ? n_embd : std::min(n_embd, maxVectorSize);
+    Napi::Float64Array result = Napi::Float64Array::New(info.Env(), resultSize);
+    for (size_t i = 0; i < resultSize; i++) {
+        result[i] = embeddings[i];
+    }
+
+    return result;
+}
+
+Napi::Value AddonContext::GetStateSize(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
+}
+
+Napi::Value AddonContext::GetThreads(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return Napi::Number::From(info.Env(), llama_n_threads(ctx));
+}
+
+Napi::Value AddonContext::SetThreads(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    const auto threads = info[0].As<Napi::Number>().Int32Value();
+    const auto resolvedThreads = threads == 0
+        ? std::max((int32_t)std::thread::hardware_concurrency(), std::max(cpu_get_num_math(), 1))
+        : threads;
+
+    if (llama_n_threads(ctx) != resolvedThreads) {
+        llama_set_n_threads(ctx, resolvedThreads, resolvedThreads);
+    }
+
+    return info.Env().Undefined();
+}
+
+class AddonContextSaveSequenceStateToFileWorker : public Napi::AsyncWorker {
+    public:
+        AddonContext* context;
+        std::string filepath;
+        llama_seq_id sequenceId;
+        std::vector<llama_token> tokens;
+        size_t savedFileSize = 0;
+
+        AddonContextSaveSequenceStateToFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
+            : Napi::AsyncWorker(info.Env(), "AddonContextSaveSequenceStateToFileWorker"),
+              context(context),
+              deferred(Napi::Promise::Deferred::New(info.Env())) {
+            context->Ref();
+
+            filepath = info[0].As<Napi::String>().Utf8Value();
+            sequenceId = info[1].As<Napi::Number>().Int32Value();
+            Napi::Uint32Array inputTokens = info[2].As<Napi::Uint32Array>();
+
+            tokens.resize(inputTokens.ElementLength());
+            for (size_t i = 0; i < tokens.size(); i++) {
+                tokens[i] = inputTokens[i];
+            }
+        }
+        ~AddonContextSaveSequenceStateToFileWorker() {
+            context->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                savedFileSize = llama_state_seq_save_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size());
+                if (savedFileSize == 0) {
+                    SetError("Failed to save state to file");
+                    return;
+                }
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_state_seq_save_file\"");
+            }
+        }
+        void OnOK() {
+            deferred.Resolve(Napi::Number::New(Env(), savedFileSize));
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+Napi::Value AddonContext::SaveSequenceStateToFile(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    AddonContextSaveSequenceStateToFileWorker* worker = new AddonContextSaveSequenceStateToFileWorker(info, this);
+    worker->Queue();
+    return worker->GetPromise();
+}
+
+class AddonContextLoadSequenceStateFromFileWorker : public Napi::AsyncWorker {
+    public:
+        AddonContext* context;
+        std::string filepath;
+        llama_seq_id sequenceId;
+        size_t maxContextSize;
+        std::vector<llama_token> tokens;
+
+        AddonContextLoadSequenceStateFromFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
+            : Napi::AsyncWorker(info.Env(), "AddonContextLoadSequenceStateFromFileWorker"),
+              context(context),
+              deferred(Napi::Promise::Deferred::New(info.Env())) {
+            context->Ref();
+
+            filepath = info[0].As<Napi::String>().Utf8Value();
+            sequenceId = info[1].As<Napi::Number>().Int32Value();
+            maxContextSize = info[2].As<Napi::Number>().Uint32Value();
+
+            tokens.resize(maxContextSize);
+        }
+        ~AddonContextLoadSequenceStateFromFileWorker() {
+            context->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                size_t tokenCount = 0;
+                const size_t fileSize = llama_state_seq_load_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size(), &tokenCount);
+                if (fileSize == 0) {
+                    SetError("Failed to load state from file. Current context sequence size may be smaller that the state of the file");
+                    return;
+                }
+
+                tokens.resize(tokenCount);
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_state_seq_load_file\"");
+            }
+        }
+        void OnOK() {
+            size_t tokenCount = tokens.size();
+            Napi::Uint32Array result = Napi::Uint32Array::New(Env(), tokenCount);
+
+            for (size_t i = 0; i < tokenCount; i++) {
+                result[i] = tokens[i];
+            }
+
+            deferred.Resolve(result);
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+Napi::Value AddonContext::LoadSequenceStateFromFile(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    AddonContextLoadSequenceStateFromFileWorker* worker = new AddonContextLoadSequenceStateFromFileWorker(info, this);
+    worker->Queue();
+    return worker->GetPromise();
+}
+
+Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
+    llama_perf_context_print(ctx);
+    llama_perf_context_reset(ctx);
+    return info.Env().Undefined();
+}
+
+Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info) {
+    constexpr auto vocabSizeMaxDifference = 128; // SPEC_VOCAB_MAX_SIZE_DIFFERENCE
+    constexpr auto vocabCheckStartTokenId = 5; // SPEC_VOCAB_CHECK_START_TOKEN_ID
+
+    const AddonContext * draftContext = Napi::ObjectWrap<AddonContext>::Unwrap(info[0].As<Napi::Object>());
+    const auto currentCtx = ctx;
+    const auto draftCtx = draftContext->ctx;
+    const auto currentModel = model->model;
+    const auto draftModel = draftContext->model->model;
+    const auto currentVocab = model->vocab;
+    const auto draftVocab = draftContext->model->vocab;
+
+    if (llama_vocab_type(currentVocab) != llama_vocab_type(draftVocab)) {
+        Napi::Error::New(info.Env(), "Speculative draft model vocabulary type must match the target model vocabulary type").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    if (llama_vocab_get_add_bos(currentVocab) != llama_vocab_get_add_bos(draftVocab) ||
+        llama_vocab_get_add_eos(currentVocab) != llama_vocab_get_add_eos(draftVocab) ||
+        llama_vocab_bos(currentVocab) != llama_vocab_bos(draftVocab) ||
+        llama_vocab_eos(currentVocab) != llama_vocab_eos(draftVocab)
+    ) {
+        Napi::Error::New(info.Env(), "Speculative draft model special tokens must match the target model special tokens").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    const int currentModelVocabSize = llama_vocab_n_tokens(currentVocab);
+    const int draftModelVocabSize = llama_vocab_n_tokens(draftVocab);
+
+    const int vocabDiff = std::abs(currentModelVocabSize - draftModelVocabSize);
+
+    if (vocabDiff > vocabSizeMaxDifference) {
+        Napi::Error::New(
+            info.Env(),
+            std::string("Speculative draft model vocabulary must closely match the target model vocabulary size (vocabulary size difference: ") +
+            std::to_string(vocabDiff) + std::string(", max allowed: ") + std::to_string(vocabSizeMaxDifference) + std::string(")")
+        ).ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    const int minVocabSize = std::min(currentModelVocabSize, draftModelVocabSize);
+    for (int i = vocabCheckStartTokenId; i < minVocabSize; ++i) {
+        const char * currentTokenText = llama_vocab_get_text(currentVocab, i);
+        const char * draftTokenText = llama_vocab_get_text(draftVocab, i);
+        if (std::strcmp(currentTokenText, draftTokenText) != 0) {
+            Napi::Error::New(
+                info.Env(),
+                std::string("Speculative draft model vocabulary must match the target model vocabulary, but token ") +
+                std::to_string(i) + std::string(" content differs. Target: \"") + std::string(currentTokenText) +
+                std::string("\", Draft: \"") + std::string(draftTokenText) + std::string("")
+            ).ThrowAsJavaScriptException();
+            return info.Env().Undefined();
+        }
+    }
+
+    return info.Env().Undefined();
+}
+
+Napi::Value AddonContext::SetLora(const Napi::CallbackInfo& info) {
+    AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
+    float scale = info[1].As<Napi::Number>().FloatValue();
+
+    llama_set_adapter_lora(ctx, lora->lora_adapter, scale);
+
+    return info.Env().Undefined();
+}
+
+void AddonContext::init(Napi::Object exports) {
+    exports.Set(
+        "AddonContext",
+        DefineClass(
+            exports.Env(),
+            "AddonContext",
+            {
+                InstanceMethod("init", &AddonContext::Init),
+                InstanceMethod("getContextSize", &AddonContext::GetContextSize),
+                InstanceMethod("initBatch", &AddonContext::InitBatch),
+                InstanceMethod("addToBatch", &AddonContext::AddToBatch),
+                InstanceMethod("disposeSequence", &AddonContext::DisposeSequence),
+                InstanceMethod("removeTokenCellsFromSequence", &AddonContext::RemoveTokenCellsFromSequence),
+                InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
+                InstanceMethod("getSequenceKvCacheMinPosition", &AddonContext::GetSequenceKvCacheMinPosition),
+                InstanceMethod("getSequenceKvCacheMaxPosition", &AddonContext::GetSequenceKvCacheMaxPosition),
+                InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
+                InstanceMethod("sampleToken", &AddonContext::SampleToken),
+                InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
+                InstanceMethod("getStateSize", &AddonContext::GetStateSize),
+                InstanceMethod("getThreads", &AddonContext::GetThreads),
+                InstanceMethod("setThreads", &AddonContext::SetThreads),
+                InstanceMethod("printTimings", &AddonContext::PrintTimings),
+                InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
+                InstanceMethod("saveSequenceStateToFile", &AddonContext::SaveSequenceStateToFile),
+                InstanceMethod("loadSequenceStateFromFile", &AddonContext::LoadSequenceStateFromFile),
+                InstanceMethod("setLora", &AddonContext::SetLora),
+                InstanceMethod("dispose", &AddonContext::Dispose),
+            }
+        )
+    );
+}
--- a/node_modules/node-llama-cpp/llama/addon/AddonContext.h
+++ b/node_modules/node-llama-cpp/llama/addon/AddonContext.h
@@ -0,0 +1,58 @@
+#pragma once
+#include "llama.h"
+#include "napi.h"
+#include "addonGlobals.h"
+#include "AddonSampler.h"
+
+class AddonContext : public Napi::ObjectWrap<AddonContext> {
+    public:
+        AddonModel* model;
+        llama_context_params context_params;
+        llama_context* ctx;
+        llama_batch batch;
+        uint64_t batchMemorySize = 0;
+        bool has_batch = false;
+        int32_t batch_n_tokens = 0;
+        int n_cur = 0;
+
+        uint64_t loadedContextMemorySize = 0;
+        bool contextLoaded = false;
+
+        bool disposed = false;
+
+        AddonContext(const Napi::CallbackInfo& info);
+        ~AddonContext();
+
+        void dispose();
+        void disposeBatch();
+
+        Napi::Value Init(const Napi::CallbackInfo& info);
+        Napi::Value Dispose(const Napi::CallbackInfo& info);
+
+        Napi::Value GetContextSize(const Napi::CallbackInfo& info);
+        Napi::Value InitBatch(const Napi::CallbackInfo& info);
+        Napi::Value DisposeBatch(const Napi::CallbackInfo& info);
+        Napi::Value AddToBatch(const Napi::CallbackInfo& info);
+        Napi::Value DisposeSequence(const Napi::CallbackInfo& info);
+        Napi::Value RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info);
+        Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info);
+        Napi::Value GetSequenceKvCacheMinPosition(const Napi::CallbackInfo& info);
+        Napi::Value GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo& info);
+        Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
+        Napi::Value SampleToken(const Napi::CallbackInfo& info);
+
+        Napi::Value GetEmbedding(const Napi::CallbackInfo& info);
+        Napi::Value GetStateSize(const Napi::CallbackInfo& info);
+        Napi::Value GetThreads(const Napi::CallbackInfo& info);
+        Napi::Value SetThreads(const Napi::CallbackInfo& info);
+
+        Napi::Value SaveSequenceStateToFile(const Napi::CallbackInfo& info);
+        Napi::Value LoadSequenceStateFromFile(const Napi::CallbackInfo& info);
+
+        Napi::Value PrintTimings(const Napi::CallbackInfo& info);
+        Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);
+
+        Napi::Value SetLora(const Napi::CallbackInfo& info);
+
+        static void init(Napi::Object exports);
+};
--- a/node_modules/node-llama-cpp/llama/addon/AddonGrammar.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/AddonGrammar.cpp
@@ -0,0 +1,92 @@
+#include "addonGlobals.h"
+#include "AddonGrammar.h"
+
+AddonGrammar::AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
+    grammarCode = info[0].As<Napi::String>().Utf8Value();
+
+    if (info.Length() > 1 && info[1].IsObject()) {
+        Napi::Object options = info[1].As<Napi::Object>();
+
+        if (options.Has("addonExports")) {
+            addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
+            hasAddonExportsRef = true;
+        }
+
+        if (options.Has("rootRuleName")) {
+            rootRuleName = options.Get("rootRuleName").As<Napi::String>().Utf8Value();
+        }
+    }
+
+    auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str(), false, nullptr, 0, nullptr, 0);
+    
+    // will be empty if there are parse errors
+    if (parsed_grammar == nullptr) {
+        Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
+        return;
+    }
+
+    llama_grammar_free_impl(parsed_grammar);
+}
+AddonGrammar::~AddonGrammar() {
+    if (hasAddonExportsRef) {
+        addonExportsRef.Unref();
+        hasAddonExportsRef = false;
+    }
+}
+
+Napi::Value AddonGrammar::isTextCompatible(const Napi::CallbackInfo& info) {
+    const std::string testText = info[0].As<Napi::String>().Utf8Value();
+
+    auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str(), false, nullptr, 0, nullptr, 0);
+    
+    // will be empty if there are parse errors
+    if (parsed_grammar == nullptr) {
+        Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
+        return Napi::Boolean::New(info.Env(), false);
+    }
+
+    const auto cpts = unicode_cpts_from_utf8(testText);
+    llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(parsed_grammar);
+
+    for (const auto & cpt : cpts) {
+        try {
+            llama_grammar_accept(parsed_grammar, cpt);
+        } catch (const std::exception & e) {
+            llama_grammar_free_impl(parsed_grammar);
+            return Napi::Boolean::New(info.Env(), false);
+        } catch (...) {
+            llama_grammar_free_impl(parsed_grammar);
+            return Napi::Boolean::New(info.Env(), false);
+        }
+
+        if (stacks_cur.empty()) {
+            // no stacks means that the grammar failed to match at this point
+            llama_grammar_free_impl(parsed_grammar);
+            return Napi::Boolean::New(info.Env(), false);
+        }
+    }
+
+    for (const auto & stack : stacks_cur) {
+        if (stack.empty()) {
+            // an empty stack means that the grammar has been completed
+            llama_grammar_free_impl(parsed_grammar);
+            return Napi::Boolean::New(info.Env(), true);
+        }
+    }
+
+    llama_grammar_free_impl(parsed_grammar);
+    return Napi::Boolean::New(info.Env(), false);
+}
+
+void AddonGrammar::init(Napi::Object exports) {
+    exports.Set(
+        "AddonGrammar",
+        DefineClass(
+            exports.Env(),
+            "AddonGrammar",
+            {
+                InstanceMethod("isTextCompatible", &AddonGrammar::isTextCompatible),
+            }
+        )
+    );
+}
--- a/node_modules/node-llama-cpp/llama/addon/AddonGrammar.h
+++ b/node_modules/node-llama-cpp/llama/addon/AddonGrammar.h
@@ -0,0 +1,22 @@
+#pragma once
+#include "llama.h"
+#include "common/common.h"
+#include "llama-grammar.h"
+#include "src/unicode.h"
+#include "napi.h"
+#include "addonGlobals.h"
+
+class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
+    public:
+        std::string grammarCode = "";
+        std::string rootRuleName = "root";
+        Napi::Reference<Napi::Object> addonExportsRef;
+        bool hasAddonExportsRef = false;
+
+        AddonGrammar(const Napi::CallbackInfo& info);
+        ~AddonGrammar();
+
+        Napi::Value isTextCompatible(const Napi::CallbackInfo& info);
+
+        static void init(Napi::Object exports);
+};
--- a/node_modules/node-llama-cpp/llama/addon/AddonGrammarEvaluationState.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/AddonGrammarEvaluationState.cpp
@@ -0,0 +1,36 @@
+#include <sstream>
+#include "addonGlobals.h"
+#include "common/common.h"
+#include "llama.h"
+#include "AddonGrammarEvaluationState.h"
+#include "AddonGrammar.h"
+
+AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
+    if (info.Length() == 1) {
+        AddonGrammarEvaluationState* existingState = Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
+        model = existingState->model;
+        model->Ref();
+
+        grammarDef = existingState->grammarDef;
+        grammarDef->Ref();
+
+        sampler = llama_sampler_clone(existingState->sampler);
+    } else {
+        model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
+        model->Ref();
+
+        grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
+        grammarDef->Ref();
+
+        sampler = llama_sampler_init_grammar(model->vocab, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
+    }
+}
+AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {
+    llama_sampler_free(sampler);
+    grammarDef->Unref();
+    model->Unref();
+}
+
+void AddonGrammarEvaluationState::init(Napi::Object exports) {
+    exports.Set("AddonGrammarEvaluationState", DefineClass(exports.Env(), "AddonGrammarEvaluationState", {}));
+}
--- a/node_modules/node-llama-cpp/llama/addon/AddonGrammarEvaluationState.h
+++ b/node_modules/node-llama-cpp/llama/addon/AddonGrammarEvaluationState.h
@@ -0,0 +1,17 @@
+#pragma once
+#include "llama.h"
+#include "napi.h"
+#include "addonGlobals.h"
+#include "AddonModel.h"
+
+class AddonGrammarEvaluationState : public Napi::ObjectWrap<AddonGrammarEvaluationState> {
+    public:
+        AddonModel* model;
+        AddonGrammar* grammarDef;
+        llama_sampler * sampler = nullptr;
+
+        AddonGrammarEvaluationState(const Napi::CallbackInfo& info);
+        ~AddonGrammarEvaluationState();
+
+        static void init(Napi::Object exports);
+};
--- a/node_modules/node-llama-cpp/llama/addon/AddonModel.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/AddonModel.cpp
@@ -0,0 +1,691 @@
+#include <sstream>
+#include "addonGlobals.h"
+#include "globals/addonLog.h"
+#include "globals/addonProgress.h"
+#include "common/common.h"
+#include "llama.h"
+#include "AddonModel.h"
+#include "AddonModelData.h"
+#include "AddonModelLora.h"
+
+static Napi::Value getNapiToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
+    if (token < 0 || token == LLAMA_TOKEN_NULL) {
+        return Napi::Number::From(info.Env(), -1);
+    }
+
+    auto tokenAttributes = llama_vocab_get_attr(vocab, token);
+
+    if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
+        return Napi::Number::From(info.Env(), -1);
+    }
+
+    return Napi::Number::From(info.Env(), token);
+}
+
+static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
+    if (token < 0) {
+        return Napi::Number::From(info.Env(), -1);
+    }
+
+    auto tokenAttributes = llama_vocab_get_attr(vocab, token);
+
+    if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
+        return Napi::Number::From(info.Env(), -1);
+    }
+
+    return Napi::Number::From(info.Env(), token);
+}
+
+static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
+    AddonModel* addonModel = (AddonModel *) user_data;
+    unsigned percentage = (unsigned) (100 * progress);
+
+    if (percentage > addonModel->modelLoadPercentage) {
+        addonModel->modelLoadPercentage = percentage;
+
+        // original llama.cpp logs
+        addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
+        if (percentage >= 100) {
+            addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
+        }
+    }
+
+    if (progress > addonModel->rawModelLoadPercentage) {
+        addonModel->rawModelLoadPercentage = progress;
+
+        if (addonModel->onLoadProgressEventCallbackSet) {
+            addon_progress_event* data = new addon_progress_event {
+                progress
+            };
+
+            auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
+
+            if (status != napi_ok) {
+                delete data;
+            }
+        }
+    }
+
+    return !(addonModel->abortModelLoad);
+}
+
+class AddonModelLoadModelWorker : public Napi::AsyncWorker {
+    public:
+        AddonModel* model;
+
+        AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
+            : Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
+              model(model),
+              deferred(Napi::Promise::Deferred::New(env)) {
+            model->Ref();
+        }
+        ~AddonModelLoadModelWorker() {
+            model->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                model->model = llama_model_load_from_file(model->modelPath.c_str(), model->model_params);
+                model->vocab = llama_model_get_vocab(model->model);
+
+                model->modelLoaded = model->model != nullptr && model->model != NULL;
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_model_load_from_file\"");
+            }
+        }
+        void OnOK() {
+            if (model->modelLoaded) {
+                uint64_t modelSize = llama_model_size(model->model);
+                adjustNapiExternalMemoryAdd(Env(), modelSize);
+                model->loadedModelSize = modelSize;
+            }
+
+            deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
+            if (model->onLoadProgressEventCallbackSet) {
+                model->addonThreadSafeOnLoadProgressEventCallback.Release();
+            }
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+
+class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
+    public:
+        AddonModel* model;
+
+        AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
+            : Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
+              model(model),
+              deferred(Napi::Promise::Deferred::New(env)) {
+            model->Ref();
+        }
+        ~AddonModelUnloadModelWorker() {
+            model->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                llama_model_free(model->model);
+                model->modelLoaded = false;
+
+                model->dispose();
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_model_free\"");
+            }
+        }
+        void OnOK() {
+            adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
+            model->loadedModelSize = 0;
+
+            deferred.Resolve(Env().Undefined());
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+
+class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
+    public:
+        AddonModelLora* modelLora;
+
+        AddonModelLoadLoraWorker(
+            const Napi::Env& env,
+            AddonModelLora* modelLora
+        )
+            : Napi::AsyncWorker(env, "AddonModelLoadLoraWorker"),
+              modelLora(modelLora),
+              deferred(Napi::Promise::Deferred::New(env)) {
+            modelLora->model->Ref();
+            modelLora->Ref();
+        }
+        ~AddonModelLoadLoraWorker() {
+            modelLora->model->Unref();
+            modelLora->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                const auto loraAdapter = llama_adapter_lora_init(modelLora->model->model, modelLora->loraFilePath.c_str());
+
+                if (loraAdapter == nullptr) {
+                    SetError(
+                        std::string(
+                            std::string("Failed to initialize LoRA adapter \"" + modelLora->loraFilePath + "\"")
+                        )
+                    );
+                    return;
+                }
+
+                modelLora->lora_adapter = loraAdapter;
+                modelLora->model->Ref();
+
+                if (modelLora->model->data != nullptr) {
+                    modelLora->model->data->loraAdapters.insert(modelLora);
+                } else {
+                    modelLora->dispose(true);
+                    SetError("Model data is not initialized");
+                }
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_adapter_lora_init\"");
+            }
+        }
+        void OnOK() {
+            deferred.Resolve(Env().Undefined());
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+
+AddonModel::AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
+    data = new AddonModelData();
+    model_params = llama_model_default_params();
+
+    // Get the model path
+    modelPath = info[0].As<Napi::String>().Utf8Value();
+
+    if (info.Length() > 1 && info[1].IsObject()) {
+        Napi::Object options = info[1].As<Napi::Object>();
+
+        if (options.Has("addonExports")) {
+            addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
+            hasAddonExportsRef = true;
+        }
+
+        if (options.Has("gpuLayers")) {
+            model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
+        }
+
+        if (options.Has("vocabOnly")) {
+            model_params.vocab_only = options.Get("vocabOnly").As<Napi::Boolean>().Value();
+        }
+
+        if (options.Has("useMmap")) {
+            model_params.use_mmap = options.Get("useMmap").As<Napi::Boolean>().Value();
+        }
+
+        if (options.Has("useDirectIo")) {
+            model_params.use_direct_io = options.Get("useDirectIo").As<Napi::Boolean>().Value();
+        }
+
+        if (options.Has("useMlock")) {
+            model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
+        }
+
+        if (options.Has("checkTensors")) {
+            model_params.check_tensors = options.Get("checkTensors").As<Napi::Boolean>().Value();
+        }
+
+        if (options.Has("onLoadProgress")) {
+            auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
+            if (onLoadProgressJSCallback.IsFunction()) {
+                AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
+                addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
+                    info.Env(),
+                    onLoadProgressJSCallback,
+                    "onLoadProgressCallback",
+                    0,
+                    1,
+                    context,
+                    [](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
+                        addonModel->onLoadProgressEventCallbackSet = false;
+
+                        delete ctx;
+                    },
+                    this
+                );
+                onLoadProgressEventCallbackSet = true;
+            }
+        }
+
+        if (options.Has("hasLoadAbortSignal")) {
+            hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
+        }
+
+        if (options.Has("overridesList")) {
+            Napi::Array overridesList = options.Get("overridesList").As<Napi::Array>();
+            kv_overrides.reserve(overridesList.Length());
+
+            for (uint32_t i = 0; i < overridesList.Length(); i++) {
+                Napi::Array overrideItem = overridesList.Get(i).As<Napi::Array>();
+                auto key = overrideItem.Get((uint32_t)0).As<Napi::String>().Utf8Value();
+                auto value = overrideItem.Get((uint32_t)1);
+
+                if (key.length() > 127) {
+                    continue;
+                }
+
+                llama_model_kv_override kvo;
+                std::strncpy(kvo.key, key.c_str(), key.length());
+                kvo.key[key.length()] = 0;
+
+                if (value.IsString()) {
+                    auto valueString = value.As<Napi::String>().Utf8Value();
+                    if (valueString.length() > 127) {
+                        continue;
+                    }
+
+                    kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
+                    std::strncpy(kvo.val_str, valueString.c_str(), valueString.length());
+                    kvo.val_str[valueString.length()] = 0;
+
+                    fputs(std::string("Override: " + key + " = " + valueString + "\n").c_str(), stdout);
+                    fflush(stdout);
+                } else if (value.IsNumber() || value.IsBigInt()) {
+                    auto numberType = overrideItem.Get((uint32_t)2).As<Napi::Number>().Int32Value();
+                    if (numberType == 0) {
+                        kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
+                        kvo.val_i64 = value.As<Napi::Number>().Int64Value();
+                    } else {
+                        kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
+                        kvo.val_f64 = value.As<Napi::Number>().DoubleValue();
+                    }
+
+                    continue;
+                } else if (value.IsBoolean()) {
+                    kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
+                    kvo.val_bool = value.As<Napi::Boolean>().Value();
+                }
+
+                kv_overrides.emplace_back(std::move(kvo));
+            }
+
+            if (!kv_overrides.empty()) {
+                kv_overrides.emplace_back();
+                kv_overrides.back().key[0] = 0;
+            }
+
+            model_params.kv_overrides = kv_overrides.data();
+        }
+
+        if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
+            model_params.progress_callback_user_data = &(*this);
+            model_params.progress_callback = llamaModelParamsProgressCallback;
+        }
+    }
+}
+
+AddonModel::~AddonModel() {
+    dispose();
+}
+void AddonModel::dispose() {
+    if (disposed) {
+        return;
+    }
+
+    disposed = true;
+    
+    if (data != nullptr) {
+        auto currentData = data;
+        data = nullptr;
+        delete currentData;
+    }
+
+    if (modelLoaded) {
+        modelLoaded = false;
+        llama_model_free(model);
+
+        adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
+        loadedModelSize = 0;
+    }
+
+    if (hasAddonExportsRef) {
+        addonExportsRef.Unref();
+        hasAddonExportsRef = false;
+    }
+}
+
+Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
+    worker->Queue();
+    return worker->GetPromise();
+}
+Napi::Value AddonModel::LoadLora(const Napi::CallbackInfo& info) {
+    AddonModelLora* modelLora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
+    AddonModelLoadLoraWorker* worker = new AddonModelLoadLoraWorker(this->Env(), modelLora);
+    worker->Queue();
+    return worker->GetPromise();
+}
+Napi::Value AddonModel::AbortActiveModelLoad(const Napi::CallbackInfo& info) {
+    abortModelLoad = true;
+    return info.Env().Undefined();
+}
+Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        return info.Env().Undefined();
+    }
+
+    if (modelLoaded) {
+        modelLoaded = false;
+
+        AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
+        worker->Queue();
+        return worker->GetPromise();
+    } else {
+        dispose();
+
+        Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
+        deferred.Resolve(info.Env().Undefined());
+        return deferred.Promise();
+    }
+}
+
+Napi::Value AddonModel::Tokenize(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    std::string text = info[0].As<Napi::String>().Utf8Value();
+    bool specialTokens = info[1].As<Napi::Boolean>().Value();
+
+    std::vector<llama_token> tokens = common_tokenize(vocab, text, false, specialTokens);
+
+    Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
+    for (size_t i = 0; i < tokens.size(); ++i) {
+        result[i] = static_cast<uint32_t>(tokens[i]);
+    }
+
+    return result;
+}
+Napi::Value AddonModel::Detokenize(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
+    bool decodeSpecialTokens = info.Length() > 0
+        ? info[1].As<Napi::Boolean>().Value()
+        : false;
+
+    std::string result;
+    result.resize(std::max(result.capacity(), tokens.ElementLength()));
+
+    int n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
+    if (n_chars < 0) {
+        result.resize(-n_chars);
+        n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
+        GGML_ASSERT(n_chars <= result.size());  // whitespace trimming is performed after per-token detokenization
+    }
+
+    result.resize(n_chars);
+
+    return Napi::String::New(info.Env(), result);
+}
+
+Napi::Value AddonModel::GetTrainContextSize(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return Napi::Number::From(info.Env(), llama_model_n_ctx_train(model));
+}
+
+Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return Napi::Number::From(info.Env(), llama_model_n_embd(model));
+}
+
+Napi::Value AddonModel::GetTotalSize(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return Napi::Number::From(info.Env(), llama_model_size(model));
+}
+
+Napi::Value AddonModel::GetTotalParameters(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return Napi::Number::From(info.Env(), llama_model_n_params(model));
+}
+
+Napi::Value AddonModel::GetModelDescription(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    char model_desc[128];
+    int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
+
+    return Napi::String::New(info.Env(), model_desc, actual_length);
+}
+
+Napi::Value AddonModel::TokenBos(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return getNapiControlToken(info, vocab, llama_vocab_bos(vocab));
+}
+Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return getNapiControlToken(info, vocab, llama_vocab_eos(vocab));
+}
+Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return getNapiToken(info, vocab, llama_vocab_nl(vocab));
+}
+Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return getNapiToken(info, vocab, llama_vocab_fim_pre(vocab));
+}
+Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return getNapiToken(info, vocab, llama_vocab_fim_mid(vocab));
+}
+Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return getNapiToken(info, vocab, llama_vocab_fim_suf(vocab));
+}
+Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return getNapiToken(info, vocab, llama_vocab_eot(vocab));
+}
+Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return getNapiToken(info, vocab, llama_vocab_sep(vocab));
+}
+Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    int token = info[0].As<Napi::Number>().Int32Value();
+    std::stringstream ss;
+
+    const char* str = llama_vocab_get_text(vocab, token);
+    if (str == nullptr) {
+        return info.Env().Undefined();
+    }
+
+    ss << str;
+
+    return Napi::String::New(info.Env(), ss.str());
+}
+
+Napi::Value AddonModel::GetTokenAttributes(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    if (info[0].IsNumber() == false) {
+        return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_ATTR_UNDEFINED));
+    }
+
+    int token = info[0].As<Napi::Number>().Int32Value();
+    auto tokenAttributes = llama_vocab_get_attr(vocab, token);
+
+    return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
+}
+Napi::Value AddonModel::IsEogToken(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    if (info[0].IsNumber() == false) {
+        return Napi::Boolean::New(info.Env(), false);
+    }
+
+    int token = info[0].As<Napi::Number>().Int32Value();
+
+    return Napi::Boolean::New(info.Env(), llama_vocab_is_eog(vocab, token));
+}
+Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    auto vocabularyType = llama_vocab_type(vocab);
+
+    return Napi::Number::From(info.Env(), int32_t(vocabularyType));
+}
+Napi::Value AddonModel::ShouldPrependBosToken(const Napi::CallbackInfo& info) {
+    const bool addBos = llama_vocab_get_add_bos(vocab);
+
+    return Napi::Boolean::New(info.Env(), addBos);
+}
+Napi::Value AddonModel::ShouldAppendEosToken(const Napi::CallbackInfo& info) {
+    const bool addEos = llama_vocab_get_add_eos(vocab);
+
+    return Napi::Boolean::New(info.Env(), addEos);
+}
+
+Napi::Value AddonModel::GetModelSize(const Napi::CallbackInfo& info) {
+    return Napi::Number::From(info.Env(), llama_model_size(model));
+}
+
+void AddonModel::init(Napi::Object exports) {
+    exports.Set(
+        "AddonModel",
+        DefineClass(
+            exports.Env(),
+            "AddonModel",
+            {
+                InstanceMethod("init", &AddonModel::Init),
+                InstanceMethod("loadLora", &AddonModel::LoadLora),
+                InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
+                InstanceMethod("tokenize", &AddonModel::Tokenize),
+                InstanceMethod("detokenize", &AddonModel::Detokenize),
+                InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
+                InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
+                InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
+                InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
+                InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
+                InstanceMethod("tokenBos", &AddonModel::TokenBos),
+                InstanceMethod("tokenEos", &AddonModel::TokenEos),
+                InstanceMethod("tokenNl", &AddonModel::TokenNl),
+                InstanceMethod("prefixToken", &AddonModel::PrefixToken),
+                InstanceMethod("middleToken", &AddonModel::MiddleToken),
+                InstanceMethod("suffixToken", &AddonModel::SuffixToken),
+                InstanceMethod("eotToken", &AddonModel::EotToken),
+                InstanceMethod("sepToken", &AddonModel::SepToken),
+                InstanceMethod("getTokenString", &AddonModel::GetTokenString),
+                InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
+                InstanceMethod("isEogToken", &AddonModel::IsEogToken),
+                InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
+                InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
+                InstanceMethod("shouldAppendEosToken", &AddonModel::ShouldAppendEosToken),
+                InstanceMethod("getModelSize", &AddonModel::GetModelSize),
+                InstanceMethod("dispose", &AddonModel::Dispose),
+            }
+        )
+    );
+}
--- a/node_modules/node-llama-cpp/llama/addon/AddonModel.h
+++ b/node_modules/node-llama-cpp/llama/addon/AddonModel.h
@@ -0,0 +1,64 @@
+#pragma once
+#include "llama.h"
+#include "napi.h"
+#include "addonGlobals.h"
+#include "globals/addonProgress.h"
+
+class AddonModel : public Napi::ObjectWrap<AddonModel> {
+    public:
+        llama_model_params model_params;
+        std::vector<llama_model_kv_override> kv_overrides;
+        llama_model* model;
+        const llama_vocab* vocab;
+        uint64_t loadedModelSize = 0;
+        Napi::Reference<Napi::Object> addonExportsRef;
+        bool hasAddonExportsRef = false;
+        AddonModelData* data;
+
+        std::string modelPath;
+        bool modelLoaded = false;
+        bool abortModelLoad = false;
+        bool model_load_stopped = false;
+        float rawModelLoadPercentage = 0;
+        unsigned modelLoadPercentage = 0;
+        AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
+        bool onLoadProgressEventCallbackSet = false;
+        bool hasLoadAbortSignal = false;
+
+        bool disposed = false;
+
+        AddonModel(const Napi::CallbackInfo& info);
+        ~AddonModel();
+        void dispose();
+
+        Napi::Value Init(const Napi::CallbackInfo& info);
+        Napi::Value LoadLora(const Napi::CallbackInfo& info);
+        Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info);
+        Napi::Value Dispose(const Napi::CallbackInfo& info);
+        Napi::Value Tokenize(const Napi::CallbackInfo& info);
+        Napi::Value Detokenize(const Napi::CallbackInfo& info);
+        Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info);
+        Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info);
+        Napi::Value GetTotalSize(const Napi::CallbackInfo& info);
+        Napi::Value GetTotalParameters(const Napi::CallbackInfo& info);
+        Napi::Value GetModelDescription(const Napi::CallbackInfo& info);
+
+        Napi::Value TokenBos(const Napi::CallbackInfo& info);
+        Napi::Value TokenEos(const Napi::CallbackInfo& info);
+        Napi::Value TokenNl(const Napi::CallbackInfo& info);
+        Napi::Value PrefixToken(const Napi::CallbackInfo& info);
+        Napi::Value MiddleToken(const Napi::CallbackInfo& info);
+        Napi::Value SuffixToken(const Napi::CallbackInfo& info);
+        Napi::Value EotToken(const Napi::CallbackInfo& info);
+        Napi::Value SepToken(const Napi::CallbackInfo& info);
+        Napi::Value GetTokenString(const Napi::CallbackInfo& info);
+
+        Napi::Value GetTokenAttributes(const Napi::CallbackInfo& info);
+        Napi::Value IsEogToken(const Napi::CallbackInfo& info);
+        Napi::Value GetVocabularyType(const Napi::CallbackInfo& info);
+        Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info);
+        Napi::Value ShouldAppendEosToken(const Napi::CallbackInfo& info);
+        Napi::Value GetModelSize(const Napi::CallbackInfo& info);
+
+        static void init(Napi::Object exports);
+};
--- a/node_modules/node-llama-cpp/llama/addon/AddonModelData.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/AddonModelData.cpp
@@ -0,0 +1,25 @@
+#include <iostream>
+
+#include "addonGlobals.h"
+#include "AddonModelData.h"
+#include "AddonModelLora.h"
+
+AddonModelData::AddonModelData() {
+
+}
+AddonModelData::~AddonModelData() {
+    std::set<AddonModelLora *> currentLoraAdapters;
+    currentLoraAdapters.swap(loraAdapters);
+
+    for (auto lora : currentLoraAdapters) {
+        lora->dispose(true);
+    }
+    currentLoraAdapters.clear();
+}
+
+void AddonModelData::removeLora(AddonModelLora* lora) {
+    auto pos = loraAdapters.find(lora);
+    if (pos != loraAdapters.end()) {
+        loraAdapters.erase(pos);
+    }
+}
--- a/node_modules/node-llama-cpp/llama/addon/AddonModelData.h
+++ b/node_modules/node-llama-cpp/llama/addon/AddonModelData.h
@@ -0,0 +1,15 @@
+#pragma once
+#include <set>
+#include "llama.h"
+#include "napi.h"
+#include "addonGlobals.h"
+
+class AddonModelData {
+    public:
+        std::set<AddonModelLora *> loraAdapters;
+
+        AddonModelData();
+        ~AddonModelData();
+
+        void removeLora(AddonModelLora* lora);
+};
--- a/node_modules/node-llama-cpp/llama/addon/AddonModelLora.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/AddonModelLora.cpp
@@ -0,0 +1,103 @@
+#include "addonGlobals.h"
+#include "AddonModel.h"
+#include "AddonModelData.h"
+#include "AddonModelLora.h"
+
+class AddonModelLoraUnloadLoraWorker : public Napi::AsyncWorker {
+    public:
+        AddonModelLora* addonLora;
+
+        AddonModelLoraUnloadLoraWorker(const Napi::Env& env, AddonModelLora* addonLora)
+            : Napi::AsyncWorker(env, "AddonModelLoraUnloadLoraWorker"),
+              addonLora(addonLora),
+              deferred(Napi::Promise::Deferred::New(env)) {
+            addonLora->Ref();
+        }
+        ~AddonModelLoraUnloadLoraWorker() {
+            addonLora->Unref();
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                addonLora->dispose();
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_adapter_lora_free\"");
+            }
+        }
+        void OnOK() {
+            deferred.Resolve(Env().Undefined());
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+
+AddonModelLora::AddonModelLora(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModelLora>(info) {
+    model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
+    loraFilePath = info[1].As<Napi::String>().Utf8Value();
+    lora_adapter = nullptr;
+}
+
+AddonModelLora::~AddonModelLora() {
+    dispose();
+}
+
+void AddonModelLora::dispose(bool skipErase) {
+    if (lora_adapter != nullptr) {
+        lora_adapter = nullptr;
+
+        if (!skipErase && model->data != nullptr) {
+            model->data->removeLora(this);
+        }
+
+        model->Unref();
+    }
+}
+
+Napi::Value AddonModelLora::GetFilePath(const Napi::CallbackInfo& info) {
+    return Napi::String::New(info.Env(), loraFilePath);
+}
+
+
+Napi::Value AddonModelLora::GetUsages(const Napi::CallbackInfo& info) {
+    return Napi::Number::From(info.Env(), usages);
+}
+
+void AddonModelLora::SetUsages(const Napi::CallbackInfo& info, const Napi::Value &value) {
+    usages = value.As<Napi::Number>().Uint32Value();
+}
+
+Napi::Value AddonModelLora::Dispose(const Napi::CallbackInfo& info) {
+    AddonModelLoraUnloadLoraWorker* worker = new AddonModelLoraUnloadLoraWorker(this->Env(), this);
+    worker->Queue();
+    return worker->GetPromise();
+}
+
+Napi::Value AddonModelLora::GetDisposed(const Napi::CallbackInfo& info) {
+    return Napi::Boolean::New(info.Env(), lora_adapter == nullptr);
+}
+
+void AddonModelLora::init(Napi::Object exports) {
+    exports.Set(
+        "AddonModelLora",
+        DefineClass(
+            exports.Env(),
+            "AddonModelLora",
+            {
+                InstanceAccessor("usages", &AddonModelLora::GetUsages, &AddonModelLora::SetUsages),
+                InstanceAccessor("filePath", &AddonModelLora::GetFilePath, nullptr),
+                InstanceAccessor("disposed", &AddonModelLora::GetDisposed, nullptr),
+                InstanceMethod("dispose", &AddonModelLora::Dispose),
+            }
+        )
+    );
+}
--- a/node_modules/node-llama-cpp/llama/addon/AddonModelLora.h
+++ b/node_modules/node-llama-cpp/llama/addon/AddonModelLora.h
@@ -0,0 +1,28 @@
+#pragma once
+#include "llama.h"
+#include "napi.h"
+#include "addonGlobals.h"
+
+class AddonModelLora : public Napi::ObjectWrap<AddonModelLora> {
+    public:
+        AddonModel* model;
+        llama_adapter_lora * lora_adapter;
+        std::string loraFilePath;
+        uint32_t usages = 0;
+
+        AddonModelLora(const Napi::CallbackInfo& info);
+        ~AddonModelLora();
+
+        void dispose(bool skipErase = false);
+
+        Napi::Value GetFilePath(const Napi::CallbackInfo& info);
+
+        Napi::Value GetUsages(const Napi::CallbackInfo& info);
+        void SetUsages(const Napi::CallbackInfo& info, const Napi::Value &value);
+
+        Napi::Value GetDisposed(const Napi::CallbackInfo& info);
+
+        Napi::Value Dispose(const Napi::CallbackInfo& info);
+
+        static void init(Napi::Object exports);
+};
--- a/node_modules/node-llama-cpp/llama/addon/AddonSampler.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/AddonSampler.cpp
@@ -0,0 +1,511 @@
+#include <cmath>
+#include "common/common.h"
+#include "globals/addonLog.h"
+#include "ggml.h"
+#include "llama.h"
+
+#include "AddonGrammarEvaluationState.h"
+#include "AddonSampler.h"
+
+AddonSampler::AddonSampler(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonSampler>(info) {
+    model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
+    model->Ref();
+
+    tokenCandidates.resize(llama_vocab_n_tokens(model->vocab));
+    tokenCandidates.reserve(llama_vocab_n_tokens(model->vocab));
+}
+AddonSampler::~AddonSampler() {
+    dispose();
+}
+
+void AddonSampler::dispose() {
+    if (disposed) {
+        return;
+    }
+
+    disposed = true;
+
+    model->Unref();
+    freeChain();
+
+    if (temperatureSampler != nullptr) {
+        llama_sampler_free(temperatureSampler);
+        temperatureSampler = nullptr;
+    }
+
+    if (greedySampler != nullptr) {
+        llama_sampler_free(greedySampler);
+        greedySampler = nullptr;
+    }
+
+    if (minPSampler != nullptr) {
+        llama_sampler_free(minPSampler);
+        minPSampler = nullptr;
+    }
+
+    if (topKSampler != nullptr) {
+        llama_sampler_free(topKSampler);
+        topKSampler = nullptr;
+    }
+
+    if (topPSampler != nullptr) {
+        llama_sampler_free(topPSampler);
+        topPSampler = nullptr;
+    }
+
+    if (seedSampler != nullptr) {
+        llama_sampler_free(seedSampler);
+        seedSampler = nullptr;
+    }
+
+    if (repeatPenaltySampler != nullptr) {
+        llama_sampler_free(repeatPenaltySampler);
+        repeatPenaltySampler = nullptr;
+    }
+
+    if (tokenBiasSampler != nullptr) {
+        llama_sampler_free(tokenBiasSampler);
+        tokenBiasSampler = nullptr;
+    }
+
+    if (grammarEvaluationState != nullptr) {
+        grammarEvaluationState->Unref();
+        grammarEvaluationState = nullptr;
+    }
+}
+
+void AddonSampler::freeChain() {
+    if (chain == nullptr) {
+        return;
+    }
+
+    // ensure existing state of samplers isn't cleared
+    while (llama_sampler_chain_n(chain) > 0) {
+        llama_sampler_chain_remove(chain, 0);
+    }
+
+    llama_sampler_free(chain);
+    chain = nullptr;
+}
+
+void AddonSampler::rebuildChainIfNeeded() {
+    if (disposed) {
+        throw std::runtime_error("Sampler is disposed");
+    }
+
+    if (chain != nullptr) {
+        return;
+    }
+
+    auto sampler_params = llama_sampler_chain_default_params();
+    chain = llama_sampler_chain_init(sampler_params);
+
+    if (tokenBiasSampler != nullptr) {
+        llama_sampler_chain_add(chain, tokenBiasSampler);
+    }
+
+    if (repeatPenaltySampler != nullptr) {
+        llama_sampler_chain_add(chain, repeatPenaltySampler);
+    }
+
+    if (grammarEvaluationState != nullptr) {
+        llama_sampler_chain_add(chain, grammarEvaluationState->sampler);
+    }
+
+    if (greedySampler != nullptr) {
+        llama_sampler_chain_add(chain, greedySampler);
+    } else {
+        if (topKSampler != nullptr) {
+            llama_sampler_chain_add(chain, topKSampler);
+        }
+
+        if (topPSampler != nullptr) {
+            llama_sampler_chain_add(chain, topPSampler);
+        }
+
+        if (minPSampler != nullptr) {
+            llama_sampler_chain_add(chain, minPSampler);
+        }
+
+        if (temperatureSampler != nullptr) {
+            llama_sampler_chain_add(chain, temperatureSampler);
+        }
+
+        if (seedSampler != nullptr) {
+            llama_sampler_chain_add(chain, seedSampler);
+        }
+    }
+}
+
+void AddonSampler::acceptToken(llama_token token) {
+    if (repeatPenaltySampler != nullptr) {
+        llama_sampler_accept(repeatPenaltySampler, token);
+        repeatPenalty_lastTokens.push_back(token);
+    }
+
+    if (grammarEvaluationState != nullptr && grammarEvaluationState->sampler != nullptr && !llama_vocab_is_eog(model->vocab, token)) {
+        llama_sampler_accept(grammarEvaluationState->sampler, token);
+    }
+}
+
+Napi::Value AddonSampler::Dispose(const Napi::CallbackInfo& info) {
+    dispose();
+    return info.Env().Undefined();
+}
+Napi::Value AddonSampler::ApplyConfig(const Napi::CallbackInfo& info) {
+    if (disposed) {
+        Napi::Error::New(info.Env(), "Sampler is disposed").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
+    size_t min_keep = std::max(1, n_probs);
+
+    Napi::Object config = info[0].As<Napi::Object>();
+
+    if (config.Has("temperature")) {
+        auto temperature = config.Get("temperature").As<Napi::Number>().FloatValue();
+        if (temperature != temperatureSampler_temperature || !temperatureSampler_initialized) {
+            temperatureSampler_initialized = true;
+            temperatureSampler_temperature = temperature;
+            freeChain();
+
+            if (temperatureSampler != nullptr) {
+                llama_sampler_free(temperatureSampler);
+                temperatureSampler = nullptr;
+            }
+
+            if (temperatureSampler_temperature <= 0) {
+                greedySampler = llama_sampler_init_greedy();
+            } else {
+                temperatureSampler = llama_sampler_init_temp(temperatureSampler_temperature);
+
+                if (greedySampler != nullptr) {
+                    llama_sampler_free(greedySampler);
+                    greedySampler = nullptr;
+                }
+            }
+        }
+    } else {
+         if (temperatureSampler != nullptr) {
+            freeChain();
+            llama_sampler_free(temperatureSampler);
+            temperatureSampler = nullptr;
+         }
+
+        if (greedySampler == nullptr) {
+            greedySampler = llama_sampler_init_greedy();
+        }
+    }
+
+    if (config.Has("minP")) {
+        auto minP = config.Get("minP").As<Napi::Number>().FloatValue();
+        if (minP != minPSampler_minP) {
+            minPSampler_minP = minP;
+            freeChain();
+
+            if (minPSampler != nullptr) {
+                llama_sampler_free(minPSampler);
+                minPSampler = nullptr;
+            }
+
+            if (minPSampler_minP != 0) {
+                minPSampler = llama_sampler_init_min_p(minPSampler_minP, min_keep);
+            }
+        }
+    } else if (minPSampler != nullptr) {
+        freeChain();
+        llama_sampler_free(minPSampler);
+        minPSampler = nullptr;
+    }
+
+    if (config.Has("topK")) {
+        auto topK = config.Get("topK").As<Napi::Number>().Int32Value();
+        if (topK != topKSampler_topK || !topKSampler_initialized) {
+            topKSampler_initialized = true;
+            topKSampler_topK = topK;
+            freeChain();
+
+            if (topKSampler != nullptr) {
+                llama_sampler_free(topKSampler);
+                topKSampler = nullptr;
+            }
+
+            const int32_t resolved_top_k = topKSampler_topK <= 0
+                ? llama_vocab_n_tokens(model->vocab)
+                : std::min(topKSampler_topK, llama_vocab_n_tokens(model->vocab));
+
+            topKSampler = llama_sampler_init_top_k(resolved_top_k);
+        }
+    } else if (topKSampler != nullptr) {
+        freeChain();
+        llama_sampler_free(topKSampler);
+        topKSampler = nullptr;
+    }
+
+    if (config.Has("topP")) {
+        auto topP = config.Get("topP").As<Napi::Number>().FloatValue();
+        if (topP != topPSampler_topP) {
+            topPSampler_topP = topP;
+            freeChain();
+
+            if (topPSampler != nullptr) {
+                llama_sampler_free(topPSampler);
+                topPSampler = nullptr;
+            }
+
+            if (topPSampler_topP >= 1) {
+                topPSampler = llama_sampler_init_top_p(topPSampler_topP, min_keep);
+            }
+        }
+    } else if (topPSampler != nullptr) {
+        freeChain();
+        llama_sampler_free(topPSampler);
+        topPSampler = nullptr;
+    }
+
+    if (config.Has("seed")) {
+        auto seed = config.Get("seed").As<Napi::Number>().Uint32Value();
+        if (seed != seedSampler_seed || seedSampler == nullptr) {
+            seedSampler_seed = seed;
+            freeChain();
+
+            if (seedSampler != nullptr) {
+                llama_sampler_free(seedSampler);
+                seedSampler = nullptr;
+            }
+
+            seedSampler = llama_sampler_init_dist(seedSampler_seed);
+        }
+    } else if (seedSampler == nullptr) {
+        freeChain();
+        seedSampler = llama_sampler_init_dist(time(NULL));
+    }
+
+    if (config.Has("repeatPenaltyTokens")) {
+        Napi::Uint32Array repeat_penalty_tokens_uint32_array = config.Get("repeatPenaltyTokens").As<Napi::Uint32Array>();
+        auto repeatPenalty = config.Has("repeatPenalty")
+            ? config.Get("repeatPenalty").As<Napi::Number>().FloatValue()
+            : 1;
+        auto repeatPenaltyMaxTokens = config.Has("repeatPenaltyMaxTokens")
+            ? config.Get("repeatPenaltyMaxTokens").As<Napi::Number>().Int32Value()
+            : 64;
+        auto repeatPenaltyPresencePenalty = config.Has("repeatPenaltyPresencePenalty")
+            ? config.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue()
+            : 0;
+        auto repeatPenaltyFrequencyPenalty = config.Has("repeatPenaltyFrequencyPenalty")
+            ? config.Get("repeatPenaltyFrequencyPenalty").As<Napi::Number>().FloatValue()
+            : 0;
+
+        auto repeatPenaltyEnabled = repeatPenalty != 1 && repeatPenaltyMaxTokens > 0;
+        bool shouldCreateSampler = false;
+
+        if (!repeatPenaltyEnabled) {
+            if (repeatPenaltySampler != nullptr) {
+                freeChain();
+                llama_sampler_free(repeatPenaltySampler);
+                repeatPenaltySampler = nullptr;
+            }
+        } else if (repeatPenaltySampler == nullptr) {
+            freeChain();
+            shouldCreateSampler = true;
+        } else {
+            bool existingSamplerMatchesConfig = true;
+            existingSamplerMatchesConfig &= repeatPenalty_maxTokens == repeatPenaltyMaxTokens;
+            existingSamplerMatchesConfig &= repeatPenalty_penalty == repeatPenalty;
+            existingSamplerMatchesConfig &= repeatPenalty_presencePenalty == repeatPenaltyPresencePenalty;
+            existingSamplerMatchesConfig &= repeatPenalty_frequencyPenalty == repeatPenaltyFrequencyPenalty;
+
+            if (existingSamplerMatchesConfig) {
+                if (repeat_penalty_tokens_uint32_array.ElementLength() > 0) {
+                    const auto firstToken = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[0]);
+                    if (repeatPenalty_lastTokens.rat(0) != firstToken &&
+                        repeatPenalty_lastTokens.size() == repeatPenalty_maxTokens &&
+                        repeat_penalty_tokens_uint32_array.ElementLength() == repeatPenalty_maxTokens
+                    ) {
+                        const auto lastToken = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[repeat_penalty_tokens_uint32_array.ElementLength() - 1]);
+                        llama_sampler_accept(repeatPenaltySampler, lastToken);
+                        repeatPenalty_lastTokens.push_back(lastToken);
+                    }
+                }
+                for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength() && existingSamplerMatchesConfig; i++) {
+                    auto token = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]);
+
+                    if (i < repeatPenalty_lastTokens.size()) {
+                        existingSamplerMatchesConfig &= repeatPenalty_lastTokens.rat(i) == token;
+                    } else {
+                        llama_sampler_accept(repeatPenaltySampler, token);
+                        repeatPenalty_lastTokens.push_back(token);
+                    }
+                }
+            }
+
+            if (!existingSamplerMatchesConfig) {
+                freeChain();
+                llama_sampler_free(repeatPenaltySampler);
+                repeatPenaltySampler = nullptr;
+
+                shouldCreateSampler = true;
+            }
+        }
+
+        if (shouldCreateSampler) {
+            repeatPenaltySampler = llama_sampler_init_penalties(
+                repeatPenaltyMaxTokens,
+                repeatPenalty,
+                repeatPenaltyFrequencyPenalty,
+                repeatPenaltyPresencePenalty
+            );
+            repeatPenalty_lastTokens = RingBuffer<llama_token>(repeatPenaltyMaxTokens);
+
+            for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength(); i++) {
+                llama_sampler_accept(repeatPenaltySampler, static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
+                repeatPenalty_lastTokens.push_back(static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
+            }
+
+            repeatPenalty_maxTokens = repeatPenaltyMaxTokens;
+            repeatPenalty_penalty = repeatPenalty;
+            repeatPenalty_presencePenalty = repeatPenaltyPresencePenalty;
+            repeatPenalty_frequencyPenalty = repeatPenaltyFrequencyPenalty;
+        }
+    } else if (repeatPenaltySampler != nullptr) {
+        freeChain();
+        llama_sampler_free(repeatPenaltySampler);
+        repeatPenaltySampler = nullptr;
+    }
+
+    if (config.Has("tokenBiasKeys") && config.Has("tokenBiasValues")) {
+        Napi::Uint32Array tokenBiasKeys = config.Get("tokenBiasKeys").As<Napi::Uint32Array>();
+        Napi::Float32Array tokenBiasValues = config.Get("tokenBiasValues").As<Napi::Float32Array>();
+
+        if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength() && tokenBiasKeys.ElementLength() > 0) {
+            bool existingSamplerMatchesConfig = tokenBiasSampler != nullptr;
+
+            if (tokenBiasSampler != nullptr && tokenBiasSampler_biases.size() == tokenBiasKeys.ElementLength()) {
+                for (size_t i = 0; i < tokenBiasKeys.ElementLength() && existingSamplerMatchesConfig; i++) {
+                    existingSamplerMatchesConfig &= tokenBiasSampler_biases[i].token == static_cast<llama_token>(tokenBiasKeys[i]);
+                    existingSamplerMatchesConfig &= tokenBiasSampler_biases[i].bias == tokenBiasValues[i];
+                }
+            } else {
+                existingSamplerMatchesConfig = false;
+            }
+
+            if (!existingSamplerMatchesConfig) {
+                if (tokenBiasSampler != nullptr) {
+                    freeChain();
+                    llama_sampler_free(tokenBiasSampler);
+                    tokenBiasSampler = nullptr;
+                }
+
+                tokenBiasSampler_biases.clear();
+                tokenBiasSampler_biases.reserve(tokenBiasKeys.ElementLength());
+
+                for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
+                    tokenBiasSampler_biases.emplace_back(llama_logit_bias { static_cast<llama_token>(tokenBiasKeys[i]), tokenBiasValues[i] });
+                }
+
+                tokenBiasSampler = llama_sampler_init_logit_bias(
+                    llama_vocab_n_tokens(model->vocab),
+                    tokenBiasSampler_biases.size(),
+                    tokenBiasSampler_biases.data()
+                );
+            }
+        } else if (tokenBiasSampler != nullptr) {
+            freeChain();
+            llama_sampler_free(tokenBiasSampler);
+            tokenBiasSampler = nullptr;
+        }
+    } else if (tokenBiasSampler != nullptr) {
+        freeChain();
+        llama_sampler_free(tokenBiasSampler);
+        tokenBiasSampler = nullptr;
+    }
+
+    if (config.Has("grammarEvaluationState")) {
+        const auto configGrammarEvaluationState =
+            Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(config.Get("grammarEvaluationState").As<Napi::Object>());
+
+        if (grammarEvaluationState != configGrammarEvaluationState) {
+            freeChain();
+
+            if (grammarEvaluationState != nullptr) {
+                grammarEvaluationState->Unref();
+                grammarEvaluationState = nullptr;
+            }
+
+            grammarEvaluationState = configGrammarEvaluationState;
+            grammarEvaluationState->Ref();
+        }
+    } else if (grammarEvaluationState != nullptr) {
+        freeChain();
+        grammarEvaluationState->Unref();
+        grammarEvaluationState = nullptr;
+    }
+
+    return info.Env().Undefined();
+}
+
+Napi::Value AddonSampler::AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
+    AddonGrammarEvaluationState* grammar_evaluation_state =
+        Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
+    llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
+
+    if ((grammar_evaluation_state)->sampler != nullptr) {
+        try {
+            llama_sampler_accept((grammar_evaluation_state)->sampler, tokenId);
+        } catch (const std::exception & e) {
+            Napi::Error::New(info.Env(), std::string("Failed to accept token in grammar sampler: ") + e.what()).ThrowAsJavaScriptException();
+            return info.Env().Undefined();
+        } catch (...) {
+            Napi::Error::New(info.Env(), "Failed to accept token in grammar sampler").ThrowAsJavaScriptException();
+            return info.Env().Undefined();
+        }
+    }
+
+    return info.Env().Undefined();
+}
+Napi::Value AddonSampler::CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
+    AddonGrammarEvaluationState* grammar_evaluation_state =
+        Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
+    llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
+
+    if ((grammar_evaluation_state)->sampler != nullptr) {
+        std::vector<llama_token_data> candidates;
+        candidates.reserve(1);
+        candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
+
+        llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
+        try {
+            llama_sampler_apply((grammar_evaluation_state)->sampler, &candidates_p);
+        } catch (const std::exception & e) {
+            addonLog(GGML_LOG_LEVEL_DEBUG, std::string("Failed to apply grammar sampler: ") + e.what());
+            return Napi::Boolean::New(info.Env(), false);
+        } catch (...) {
+            return Napi::Boolean::New(info.Env(), false);
+        }
+
+        if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
+            return Napi::Boolean::New(info.Env(), false);
+        }
+
+        return Napi::Boolean::New(info.Env(), true);
+    }
+
+    return Napi::Boolean::New(info.Env(), false);
+}
+
+void AddonSampler::init(Napi::Object exports) {
+    exports.Set(
+        "AddonSampler",
+        DefineClass(
+            exports.Env(),
+            "AddonSampler",
+            {
+                InstanceMethod("dispose", &AddonSampler::Dispose),
+                InstanceMethod("applyConfig", &AddonSampler::ApplyConfig),
+                StaticMethod("acceptGrammarEvaluationStateToken", &AddonSampler::AcceptGrammarEvaluationStateToken),
+                StaticMethod("canBeNextTokenForGrammarEvaluationState", &AddonSampler::CanBeNextTokenForGrammarEvaluationState),
+            }
+        )
+    );
+}
--- a/node_modules/node-llama-cpp/llama/addon/AddonSampler.h
+++ b/node_modules/node-llama-cpp/llama/addon/AddonSampler.h
@@ -0,0 +1,63 @@
+#pragma once
+#include "llama.h"
+#include "napi.h"
+#include "RingBuffer.h"
+#include "addonGlobals.h"
+#include "AddonModel.h"
+
+class AddonSampler : public Napi::ObjectWrap<AddonSampler> {
+    public:
+        AddonModel* model;
+        llama_sampler * chain = nullptr;
+
+        llama_sampler * temperatureSampler = nullptr;
+        bool temperatureSampler_initialized = false;
+        float temperatureSampler_temperature = 0.0f; // 0.0f = disabled
+
+        llama_sampler * greedySampler = nullptr;
+
+        llama_sampler * minPSampler = nullptr;
+        float minPSampler_minP = 0.0f; // Min p sampling <=0.0f = disabled
+
+        llama_sampler * topKSampler = nullptr;
+        bool topKSampler_initialized = false;
+        int topKSampler_topK = 0;
+
+        llama_sampler * topPSampler = nullptr;
+        float topPSampler_topP = 0.0f; // Top p sampling >=1.0 = disabled
+        
+        llama_sampler * seedSampler = nullptr;
+        uint32_t seedSampler_seed = 0;
+
+        llama_sampler * repeatPenaltySampler = nullptr;
+        RingBuffer<llama_token> repeatPenalty_lastTokens = RingBuffer<llama_token>(64);
+        int32_t repeatPenalty_maxTokens = 64;
+        float repeatPenalty_penalty = 1.10f;  // 1.0 = disabled
+        float repeatPenalty_presencePenalty = 0.00f;  // 0.0 = disabled
+        float repeatPenalty_frequencyPenalty = 0.00f;  // 0.0 = disabled
+
+        llama_sampler * tokenBiasSampler = nullptr;
+        std::vector<llama_logit_bias> tokenBiasSampler_biases;
+
+        AddonGrammarEvaluationState* grammarEvaluationState = nullptr;
+
+        std::vector<llama_token_data> tokenCandidates;
+
+        bool disposed = false;
+
+        AddonSampler(const Napi::CallbackInfo& info);
+        ~AddonSampler();
+
+        void dispose();
+        void freeChain();
+        void rebuildChainIfNeeded();
+        void acceptToken(llama_token token);
+
+        Napi::Value Dispose(const Napi::CallbackInfo& info);
+        Napi::Value ApplyConfig(const Napi::CallbackInfo& info);
+
+        static Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info);
+        static Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info);
+
+        static void init(Napi::Object exports);
+};
--- a/node_modules/node-llama-cpp/llama/addon/RingBuffer.h
+++ b/node_modules/node-llama-cpp/llama/addon/RingBuffer.h
@@ -0,0 +1,109 @@
+// copied from llama-impl.h
+template<typename T>
+struct RingBuffer {
+    RingBuffer(size_t cap) : capacity(cap), data(cap) {}
+
+    T & front() {
+        if (sz == 0) {
+            throw std::runtime_error("ring buffer is empty");
+        }
+        return data[first];
+    }
+
+    const T & front() const {
+        if (sz == 0) {
+            throw std::runtime_error("ring buffer is empty");
+        }
+        return data[first];
+    }
+
+    T & back() {
+        if (sz == 0) {
+            throw std::runtime_error("ring buffer is empty");
+        }
+        return data[pos];
+    }
+
+    const T & back() const {
+        if (sz == 0) {
+            throw std::runtime_error("ring buffer is empty");
+        }
+        return data[pos];
+    }
+
+    void push_back(const T & value) {
+        if (capacity == 0) {
+            throw std::runtime_error("ring buffer: capacity is zero");
+        }
+
+        if (sz == capacity) {
+            // advance the start when buffer is full
+            first = (first + 1) % capacity;
+        } else {
+            sz++;
+        }
+        data[pos] = value;
+        pos = (pos + 1) % capacity;
+    }
+
+    T pop_front() {
+        if (sz == 0) {
+            throw std::runtime_error("ring buffer is empty");
+        }
+        T value = data[first];
+        first = (first + 1) % capacity;
+        sz--;
+        return value;
+    }
+
+    //T & operator[](size_t i) {
+    //    if (i >= sz) {
+    //        throw std::runtime_error("ring buffer: index out of bounds");
+    //    }
+    //    return data[(first + i) % capacity];
+    //}
+
+    //const T & at(size_t i) const {
+    //    if (i >= sz) {
+    //        throw std::runtime_error("ring buffer: index out of bounds");
+    //    }
+    //    return data[(first + i) % capacity];
+    //}
+
+    const T & rat(size_t i) const {
+        if (i >= sz) {
+            throw std::runtime_error("ring buffer: index out of bounds");
+        }
+        return data[(first + sz - i - 1) % capacity];
+    }
+
+    std::vector<T> to_vector() const {
+        std::vector<T> result;
+        result.reserve(sz);
+        for (size_t i = 0; i < sz; i++) {
+            result.push_back(data[(first + i) % capacity]);
+        }
+        return result;
+    }
+
+    void clear() {
+        // here only reset the status of the buffer
+        sz = 0;
+        first = 0;
+        pos = 0;
+    }
+
+    bool empty() const {
+        return sz == 0;
+    }
+
+    size_t size() const {
+        return sz;
+    }
+
+    size_t capacity = 0;
+    size_t sz = 0;
+    size_t first = 0;
+    size_t pos = 0;
+    std::vector<T> data;
+};
--- a/node_modules/node-llama-cpp/llama/addon/addon.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/addon.cpp
@@ -0,0 +1,314 @@
+#include "addonGlobals.h"
+#include "AddonModel.h"
+#include "AddonModelLora.h"
+#include "AddonGrammar.h"
+#include "AddonGrammarEvaluationState.h"
+#include "AddonSampler.h"
+#include "AddonContext.h"
+#include "globals/addonLog.h"
+#include "globals/addonProgress.h"
+#include "globals/getGpuInfo.h"
+#include "globals/getSwapInfo.h"
+#include "globals/getMemoryInfo.h"
+
+#include <atomic>
+
+bool backendInitialized = false;
+bool backendDisposed = false;
+
+Napi::Value systemInfo(const Napi::CallbackInfo& info) {
+    return Napi::String::From(info.Env(), llama_print_system_info());
+}
+
+Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
+    return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
+}
+
+Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
+    return Napi::Boolean::New(info.Env(), llama_supports_mmap());
+}
+
+Napi::Value addonGetGpuSupportsMmap(const Napi::CallbackInfo& info) {
+    const auto llamaSupportsMmap = llama_supports_mmap();
+    const auto gpuDevice = getGpuDevice().first;
+
+    if (gpuDevice == nullptr) {
+        return Napi::Boolean::New(info.Env(), false);
+    }
+
+    ggml_backend_dev_props props;
+    ggml_backend_dev_get_props(gpuDevice, &props);
+
+    const bool gpuSupportsMmap = llama_supports_mmap() && props.caps.buffer_from_host_ptr;
+    return Napi::Boolean::New(info.Env(), gpuSupportsMmap);
+}
+
+Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
+    return Napi::Boolean::New(info.Env(), llama_supports_mlock());
+}
+
+Napi::Value addonGetMathCores(const Napi::CallbackInfo& info) {
+    return Napi::Number::New(info.Env(), cpu_get_num_math());
+}
+
+Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
+    const int ggmlType = info[0].As<Napi::Number>().Int32Value();
+
+    if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
+        return info.Env().Undefined();
+    }
+
+    const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
+
+    return Napi::Number::New(info.Env(), blockSize);
+}
+
+Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
+    const int ggmlType = info[0].As<Napi::Number>().Int32Value();
+
+    if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
+        return info.Env().Undefined();
+    }
+
+    const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
+
+    return Napi::Number::New(info.Env(), typeSize);
+}
+
+Napi::Value addonGetGgmlGraphOverheadCustom(const Napi::CallbackInfo& info) {
+    if (info.Length() < 2 || !info[0].IsNumber() || !info[1].IsBoolean()) {
+        return Napi::Number::New(info.Env(), 0);
+    }
+
+    const size_t size = info[0].As<Napi::Number>().Uint32Value();
+    const bool grads = info[1].As<Napi::Boolean>().Value();
+
+    const auto graphOverhead = ggml_graph_overhead_custom(size, grads);
+
+    return Napi::Number::New(info.Env(), graphOverhead);
+}
+
+Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
+    Napi::Object consts = Napi::Object::New(info.Env());
+    consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
+    consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
+    consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
+    consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
+    consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
+    consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
+
+    return consts;
+}
+
+class AddonBackendLoadWorker : public Napi::AsyncWorker {
+    public:
+        AddonBackendLoadWorker(const Napi::Env& env)
+            : Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
+              deferred(Napi::Promise::Deferred::New(env)) {
+        }
+        ~AddonBackendLoadWorker() {
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                llama_backend_init();
+
+                try {
+                    if (backendDisposed) {
+                        llama_backend_free();
+                    } else {
+                        backendInitialized = true;
+                    }
+                } catch (const std::exception& e) {
+                    SetError(e.what());
+                } catch(...) {
+                    SetError("Unknown error when calling \"llama_backend_free\"");
+                }
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_backend_init\"");
+            }
+        }
+        void OnOK() {
+            deferred.Resolve(Env().Undefined());
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+
+
+class AddonBackendUnloadWorker : public Napi::AsyncWorker {
+    public:
+        AddonBackendUnloadWorker(const Napi::Env& env)
+            : Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
+              deferred(Napi::Promise::Deferred::New(env)) {
+        }
+        ~AddonBackendUnloadWorker() {
+        }
+
+        Napi::Promise GetPromise() {
+            return deferred.Promise();
+        }
+
+    protected:
+        Napi::Promise::Deferred deferred;
+
+        void Execute() {
+            try {
+                if (backendInitialized) {
+                    backendInitialized = false;
+                    llama_backend_free();
+                }
+            } catch (const std::exception& e) {
+                SetError(e.what());
+            } catch(...) {
+                SetError("Unknown error when calling \"llama_backend_free\"");
+            }
+        }
+        void OnOK() {
+            deferred.Resolve(Env().Undefined());
+        }
+        void OnError(const Napi::Error& err) {
+            deferred.Reject(err.Value());
+        }
+};
+
+Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) {
+    const std::string forceLoadLibrariesSearchPath = info.Length() == 0
+        ? ""
+        : info[0].IsString()
+            ? info[0].As<Napi::String>().Utf8Value()
+            : "";
+
+    ggml_backend_reg_count();
+
+    if (forceLoadLibrariesSearchPath.length() > 0) {
+        ggml_backend_load_all_from_path(forceLoadLibrariesSearchPath.c_str());
+    }
+
+    return info.Env().Undefined();
+}
+
+Napi::Value addonSetNuma(const Napi::CallbackInfo& info) {
+    const bool numaDisabled = info.Length() == 0
+        ? true
+        : info[0].IsBoolean()
+            ? !info[0].As<Napi::Boolean>().Value()
+            : false;
+
+    if (numaDisabled)
+        return info.Env().Undefined();
+
+    const auto numaType = info[0].IsString()
+        ? info[0].As<Napi::String>().Utf8Value()
+        : "";
+
+    if (numaType == "distribute") {
+        llama_numa_init(GGML_NUMA_STRATEGY_DISTRIBUTE);
+    } else if (numaType == "isolate") {
+        llama_numa_init(GGML_NUMA_STRATEGY_ISOLATE);
+    } else if (numaType == "numactl") {
+        llama_numa_init(GGML_NUMA_STRATEGY_NUMACTL);
+    } else if (numaType == "mirror") {
+        llama_numa_init(GGML_NUMA_STRATEGY_MIRROR);
+    } else {
+        Napi::Error::New(info.Env(), std::string("Invalid NUMA strategy \"") + numaType + "\"").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+
+    return info.Env().Undefined();
+}
+
+Napi::Value markLoaded(const Napi::CallbackInfo& info) {
+    static std::atomic_bool loaded = false;
+    return Napi::Boolean::New(info.Env(), loaded.exchange(true));
+}
+
+Napi::Value addonInit(const Napi::CallbackInfo& info) {
+    if (backendInitialized) {
+        Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
+        deferred.Resolve(info.Env().Undefined());
+        return deferred.Promise();
+    }
+
+    AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
+    worker->Queue();
+    return worker->GetPromise();
+}
+
+Napi::Value addonDispose(const Napi::CallbackInfo& info) {
+    if (backendDisposed) {
+        Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
+        deferred.Resolve(info.Env().Undefined());
+        return deferred.Promise();
+    }
+
+    backendDisposed = true;
+
+    AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
+    worker->Queue();
+    return worker->GetPromise();
+}
+
+static void addonFreeLlamaBackend(Napi::Env env, int* data) {
+    if (backendDisposed) {
+        return;
+    }
+
+    backendDisposed = true;
+    if (backendInitialized) {
+        backendInitialized = false;
+        llama_backend_free();
+    }
+}
+
+Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
+    exports.DefineProperties({
+        Napi::PropertyDescriptor::Function("markLoaded", markLoaded),
+        Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
+        Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
+        Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
+        Napi::PropertyDescriptor::Function("getGpuSupportsMmap", addonGetGpuSupportsMmap),
+        Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
+        Napi::PropertyDescriptor::Function("getMathCores", addonGetMathCores),
+        Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
+        Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
+        Napi::PropertyDescriptor::Function("getGgmlGraphOverheadCustom", addonGetGgmlGraphOverheadCustom),
+        Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
+        Napi::PropertyDescriptor::Function("setLogger", setLogger),
+        Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
+        Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
+        Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
+        Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
+        Napi::PropertyDescriptor::Function("ensureGpuDeviceIsSupported", ensureGpuDeviceIsSupported),
+        Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo),
+        Napi::PropertyDescriptor::Function("getMemoryInfo", getMemoryInfo),
+        Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends),
+        Napi::PropertyDescriptor::Function("setNuma", addonSetNuma),
+        Napi::PropertyDescriptor::Function("init", addonInit),
+        Napi::PropertyDescriptor::Function("dispose", addonDispose),
+    });
+    AddonModel::init(exports);
+    AddonModelLora::init(exports);
+    AddonGrammar::init(exports);
+    AddonGrammarEvaluationState::init(exports);
+    AddonContext::init(exports);
+    AddonSampler::init(exports);
+
+    llama_log_set(addonLlamaCppLogCallback, nullptr);
+
+    exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
+
+    return exports;
+}
+
+NODE_API_MODULE(NODE_GYP_MODULE_NAME, registerCallback)
--- a/node_modules/node-llama-cpp/llama/addon/addonGlobals.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/addonGlobals.cpp
@@ -0,0 +1,22 @@
+#include <sstream>
+#include <vector>
+#include "addonGlobals.h"
+#include "napi.h"
+
+void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
+    const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
+    while (size > 0) {
+        int64_t adjustSize = std::min(size, chunkSize);
+        Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
+        size -= adjustSize;
+    }
+}
+
+void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
+    const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
+    while (size > 0) {
+        int64_t adjustSize = std::min(size, chunkSize);
+        Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
+        size -= adjustSize;
+    }
+}
--- a/node_modules/node-llama-cpp/llama/addon/addonGlobals.h
+++ b/node_modules/node-llama-cpp/llama/addon/addonGlobals.h
@@ -0,0 +1,12 @@
+#pragma once
+#include "napi.h"
+
+class AddonModel;
+class AddonModelLora;
+class AddonModelData;
+class AddonContext;
+class AddonGrammar;
+class AddonGrammarEvaluationState;
+
+void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size);
+void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size);
--- a/node_modules/node-llama-cpp/llama/addon/globals/addonLog.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/globals/addonLog.cpp
@@ -0,0 +1,143 @@
+#include <sstream>
+
+#include "addonLog.h"
+
+AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
+bool addonJsLoggerCallbackSet = false;
+int addonLoggerLogLevel = 5;
+int addonLastLoggerLogLevel = 6;
+
+static int addonGetGgmlLogLevelNumber(ggml_log_level level) {
+    switch (level) {
+        case GGML_LOG_LEVEL_ERROR: return 2;
+        case GGML_LOG_LEVEL_WARN: return 3;
+        case GGML_LOG_LEVEL_INFO: return 4;
+        case GGML_LOG_LEVEL_NONE: return 5;
+        case GGML_LOG_LEVEL_DEBUG: return 6;
+        case GGML_LOG_LEVEL_CONT: return addonLastLoggerLogLevel;
+    }
+
+    return 1;
+}
+
+void addonCallJsLogCallback(
+    Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
+) {
+    bool called = false;
+
+    if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
+        try {
+            callback.Call({
+                Napi::Number::New(env, data->logLevelNumber),
+                Napi::String::New(env, data->stringStream->str()),
+            });
+            called = true;
+        } catch (const Napi::Error& e) {
+            called = false;
+        }
+    }
+
+    if (!called && data != nullptr) {
+        if (data->logLevelNumber == 2) {
+            fputs(data->stringStream->str().c_str(), stderr);
+            fflush(stderr);
+        } else {
+            fputs(data->stringStream->str().c_str(), stdout);
+            fflush(stdout);
+        }
+    }
+
+    if (data != nullptr) {
+        delete data->stringStream;
+        delete data;
+    }
+}
+
+void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
+    int logLevelNumber = addonGetGgmlLogLevelNumber(level);
+    addonLastLoggerLogLevel = logLevelNumber;
+
+    if (logLevelNumber > addonLoggerLogLevel) {
+        return;
+    }
+
+    if (addonJsLoggerCallbackSet) {
+        std::stringstream* stringStream = new std::stringstream();
+        if (text != nullptr) {
+            *stringStream << text;
+        }
+
+        addon_logger_log* data = new addon_logger_log {
+            logLevelNumber,
+            stringStream,
+        };
+
+        auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
+
+        if (status == napi_ok) {
+            return;
+        } else {
+            delete stringStream;
+            delete data;
+        }
+    }
+
+    if (text != nullptr) {
+        if (level == 2) {
+            fputs(text, stderr);
+            fflush(stderr);
+        } else {
+            fputs(text, stdout);
+            fflush(stdout);
+        }
+    }
+}
+
+Napi::Value setLogger(const Napi::CallbackInfo& info) {
+    if (addonJsLoggerCallbackSet) {
+        addonJsLoggerCallbackSet = false;
+        addonThreadSafeLoggerCallback.Release();
+    }
+
+    if (info.Length() < 1 || !info[0].IsFunction()) {
+        return info.Env().Undefined();
+    }
+
+    auto addonLoggerJSCallback = info[0].As<Napi::Function>();
+    AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
+    addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
+        info.Env(),
+        addonLoggerJSCallback,
+        "loggerCallback",
+        0,
+        1,
+        context,
+        [](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
+            addonJsLoggerCallbackSet = false;
+
+            delete ctx;
+        }
+    );
+    addonJsLoggerCallbackSet = true;
+
+    // prevent blocking the main node process from exiting due to active resources
+    addonThreadSafeLoggerCallback.Unref(info.Env());
+
+    return info.Env().Undefined();
+}
+
+Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
+    if (info.Length() < 1 || !info[0].IsNumber()) {
+        addonLoggerLogLevel = 5;
+
+        return info.Env().Undefined();
+    }
+
+    addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
+
+    return info.Env().Undefined();
+}
+
+void addonLog(ggml_log_level level, const std::string text) {
+    addonLlamaCppLogCallback(level, std::string("[addon] " + text + "\n").c_str(), nullptr);
+}
--- a/node_modules/node-llama-cpp/llama/addon/globals/addonLog.h
+++ b/node_modules/node-llama-cpp/llama/addon/globals/addonLog.h
@@ -0,0 +1,24 @@
+#pragma once
+#include "llama.h"
+#include "napi.h"
+
+
+struct addon_logger_log {
+    public:
+        const int logLevelNumber;
+        const std::stringstream* stringStream;
+};
+
+void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
+
+using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
+void addonCallJsLogCallback(
+    Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
+);
+using AddonThreadSafeLogCallbackFunction =
+    Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
+
+Napi::Value setLogger(const Napi::CallbackInfo& info);
+Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info);
+
+void addonLog(ggml_log_level level, const std::string text);
--- a/node_modules/node-llama-cpp/llama/addon/globals/addonProgress.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/globals/addonProgress.cpp
@@ -0,0 +1,15 @@
+#include "addonProgress.h"
+
+void addonCallJsProgressCallback(
+    Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
+) {
+    if (env != nullptr && callback != nullptr) {
+        try {
+            callback.Call({Napi::Number::New(env, data->progress)});
+        } catch (const Napi::Error& e) {}
+    }
+
+    if (data != nullptr) {
+        delete data;
+    }
+}
--- a/node_modules/node-llama-cpp/llama/addon/globals/addonProgress.h
+++ b/node_modules/node-llama-cpp/llama/addon/globals/addonProgress.h
@@ -0,0 +1,15 @@
+#pragma once
+#include "napi.h"
+
+struct addon_progress_event {
+    public:
+        const float progress;
+};
+
+using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
+void addonCallJsProgressCallback(
+    Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
+);
+using AddonThreadSafeProgressEventCallbackFunction =
+    Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
+
--- a/node_modules/node-llama-cpp/llama/addon/globals/getGpuInfo.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/globals/getGpuInfo.cpp
@@ -0,0 +1,146 @@
+#include "getGpuInfo.h"
+#include "addonLog.h"
+
+#ifdef __APPLE__
+    #include <TargetConditionals.h>
+#endif
+
+#ifdef GPU_INFO_USE_VULKAN
+#  include "../../gpuInfo/vulkan-gpu-info.h"
+#endif
+
+
+#ifdef GPU_INFO_USE_VULKAN
+void logVulkanWarning(const char* message) {
+    addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
+}
+#endif
+
+Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
+    ggml_backend_dev_t device = NULL;
+    size_t deviceTotal = 0;
+    size_t deviceFree = 0;
+
+    uint64_t total = 0;
+    uint64_t used = 0;
+    uint64_t unifiedVramSize = 0;
+
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        device = ggml_backend_dev_get(i);
+        auto deviceType = ggml_backend_dev_type(device);
+        if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
+            deviceTotal = 0;
+            deviceFree = 0;
+            ggml_backend_dev_memory(device, &deviceFree, &deviceTotal);
+
+            total += deviceTotal;
+            used += deviceTotal - deviceFree;
+
+#if defined(__arm64__) || defined(__aarch64__)
+            if (std::string(ggml_backend_dev_name(device)) == "Metal") {
+                unifiedVramSize += deviceTotal;
+            }
+#endif
+        }
+    }
+
+#ifdef GPU_INFO_USE_VULKAN
+    uint64_t vulkanDeviceTotal = 0;
+    uint64_t vulkanDeviceUsed = 0;
+    uint64_t vulkanDeviceUnifiedVramSize = 0;
+    const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, &vulkanDeviceUnifiedVramSize, logVulkanWarning);
+
+    if (vulkanDeviceSupportsMemoryBudgetExtension) {
+        if (vulkanDeviceUnifiedVramSize > total) {
+            // this means that we counted memory from devices that aren't used by llama.cpp
+            vulkanDeviceUnifiedVramSize = 0;
+        }
+
+        unifiedVramSize += vulkanDeviceUnifiedVramSize;
+    }
+
+    if (used == 0 && vulkanDeviceUsed != 0) {
+        used = vulkanDeviceUsed;
+    }
+#endif
+
+    Napi::Object result = Napi::Object::New(info.Env());
+    result.Set("total", Napi::Number::From(info.Env(), total));
+    result.Set("used", Napi::Number::From(info.Env(), used));
+    result.Set("unifiedSize", Napi::Number::From(info.Env(), unifiedVramSize));
+
+    return result;
+}
+
+Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
+    std::vector<std::string> deviceNames;
+
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t device = ggml_backend_dev_get(i);
+        auto deviceType = ggml_backend_dev_type(device);
+        if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
+            deviceNames.push_back(std::string(ggml_backend_dev_description(device)));
+        }
+    }
+
+    Napi::Object result = Napi::Object::New(info.Env());
+
+    Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
+    for (size_t i = 0; i < deviceNames.size(); ++i) {
+        deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
+    }
+    result.Set("deviceNames", deviceNamesNapiArray);
+
+    return result;
+}
+
+std::pair<ggml_backend_dev_t, std::string> getGpuDevice() {
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t device = ggml_backend_dev_get(i);
+        const auto deviceName = std::string(ggml_backend_dev_name(device));
+
+        if (deviceName == "Metal") {
+            return std::pair<ggml_backend_dev_t, std::string>(device, "metal");
+        } else if (std::string(deviceName).find("Vulkan") == 0) {
+            return std::pair<ggml_backend_dev_t, std::string>(device, "vulkan");
+        } else if (std::string(deviceName).find("CUDA") == 0 || std::string(deviceName).find("ROCm") == 0 || std::string(deviceName).find("MUSA") == 0) {
+            return std::pair<ggml_backend_dev_t, std::string>(device, "cuda");
+        }
+    }
+
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t device = ggml_backend_dev_get(i);
+        const auto deviceName = std::string(ggml_backend_dev_name(device));
+
+        if (deviceName == "CPU") {
+            return std::pair<ggml_backend_dev_t, std::string>(device, "cpu");
+        }
+    }
+
+    return std::pair<ggml_backend_dev_t, std::string>(nullptr, "");
+}
+
+Napi::Value getGpuType(const Napi::CallbackInfo& info) {
+    const auto gpuDeviceRes = getGpuDevice();
+    const auto device = gpuDeviceRes.first;
+    const auto deviceType = gpuDeviceRes.second;
+
+    if (deviceType == "cpu") {
+        return Napi::Boolean::New(info.Env(), false);
+    } else if (device != nullptr && deviceType != "") {
+        return Napi::String::New(info.Env(), deviceType);
+    }
+
+    return info.Env().Undefined();
+}
+
+Napi::Value ensureGpuDeviceIsSupported(const Napi::CallbackInfo& info) {
+#ifdef GPU_INFO_USE_VULKAN
+    if (!checkIsVulkanEnvSupported(logVulkanWarning)) {
+        Napi::Error::New(info.Env(), "Vulkan device is not supported").ThrowAsJavaScriptException();
+        return info.Env().Undefined();
+    }
+#endif
+
+    return info.Env().Undefined();
+}
--- a/node_modules/node-llama-cpp/llama/addon/globals/getGpuInfo.h
+++ b/node_modules/node-llama-cpp/llama/addon/globals/getGpuInfo.h
@@ -0,0 +1,11 @@
+#pragma once
+#include <utility>
+#include <string>
+#include "napi.h"
+#include "llama.h"
+
+Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info);
+Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info);
+std::pair<ggml_backend_dev_t, std::string> getGpuDevice();
+Napi::Value getGpuType(const Napi::CallbackInfo& info);
+Napi::Value ensureGpuDeviceIsSupported(const Napi::CallbackInfo& info);
--- a/node_modules/node-llama-cpp/llama/addon/globals/getMemoryInfo.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/globals/getMemoryInfo.cpp
@@ -0,0 +1,63 @@
+#include "getMemoryInfo.h"
+#include "addonLog.h"
+
+#ifdef __APPLE__
+#include <iostream>
+#include <mach/mach.h>
+#include <sys/sysctl.h>
+#elif __linux__
+#include <fstream>
+#include <sstream>
+#include <string>
+#elif _WIN32
+#include <iostream>
+#include <windows.h>
+#include <psapi.h>
+#endif
+
+
+Napi::Value getMemoryInfo(const Napi::CallbackInfo& info) {
+    uint64_t totalMemoryUsage = 0;
+
+#ifdef __APPLE__
+    struct mach_task_basic_info taskInfo;
+    mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
+    if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&taskInfo, &infoCount) == KERN_SUCCESS) {
+        totalMemoryUsage = taskInfo.virtual_size;
+    } else {
+        addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
+    }
+#elif __linux__
+    std::ifstream procStatus("/proc/self/status");
+    std::string line;
+    bool foundMemoryUsage = false;
+    while (std::getline(procStatus, line)) {
+        if (line.rfind("VmSize:", 0) == 0) { // Resident Set Size (current memory usage)
+            std::istringstream iss(line);
+            std::string key, unit;
+            size_t value;
+            if (iss >> key >> value >> unit) {
+                totalMemoryUsage = value * 1024; // Convert from kB to bytes
+                foundMemoryUsage = true;
+            }
+            break;
+        }
+    }
+
+    if (!foundMemoryUsage) {
+        addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
+    }
+#elif _WIN32
+    PROCESS_MEMORY_COUNTERS_EX memCounters;
+
+    if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&memCounters, sizeof(memCounters))) {
+        totalMemoryUsage = memCounters.PrivateUsage;
+    } else {
+        addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
+    }
+#endif
+    
+    Napi::Object obj = Napi::Object::New(info.Env());
+    obj.Set("total", Napi::Number::New(info.Env(), totalMemoryUsage));
+    return obj;
+}
--- a/node_modules/node-llama-cpp/llama/addon/globals/getMemoryInfo.h
+++ b/node_modules/node-llama-cpp/llama/addon/globals/getMemoryInfo.h
@@ -0,0 +1,4 @@
+#pragma once
+#include "napi.h"
+
+Napi::Value getMemoryInfo(const Napi::CallbackInfo& info);
--- a/node_modules/node-llama-cpp/llama/addon/globals/getSwapInfo.cpp
+++ b/node_modules/node-llama-cpp/llama/addon/globals/getSwapInfo.cpp
@@ -0,0 +1,69 @@
+#include "getSwapInfo.h"
+#include "addonLog.h"
+
+#ifdef __APPLE__
+#include <iostream>
+#include <mach/mach.h>
+#include <sys/sysctl.h>
+#elif __linux__
+#include <iostream>
+#include <sys/sysinfo.h>
+#elif _WIN32
+#include <iostream>
+#include <windows.h>
+#include <psapi.h>
+#endif
+
+
+Napi::Value getSwapInfo(const Napi::CallbackInfo& info) {
+    uint64_t totalSwap = 0;
+    uint64_t freeSwap = 0;
+    uint64_t maxSize = 0;
+    bool maxSizeSet = true;
+
+#ifdef __APPLE__
+    struct xsw_usage swapInfo;
+    size_t size = sizeof(swapInfo);
+
+    if (sysctlbyname("vm.swapusage", &swapInfo, &size, NULL, 0) == 0) {
+        totalSwap = swapInfo.xsu_total;
+        freeSwap = swapInfo.xsu_avail;
+        maxSizeSet = false;
+    } else {
+        addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get swap info").c_str(), nullptr);
+    }
+#elif __linux__
+    struct sysinfo sysInfo;
+
+    if (sysinfo(&sysInfo) == 0) {
+        totalSwap = sysInfo.totalswap;
+        freeSwap = sysInfo.freeswap;
+        maxSize = sysInfo.totalswap;
+    } else {
+        addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get swap info").c_str(), nullptr);
+    }
+#elif _WIN32
+    MEMORYSTATUSEX memInfo;
+    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
+
+    if (GlobalMemoryStatusEx(&memInfo)) {
+        PERFORMANCE_INFORMATION perfInfo;
+        perfInfo.cb = sizeof(PERFORMANCE_INFORMATION);
+        if (GetPerformanceInfo(&perfInfo, sizeof(perfInfo))) {
+            totalSwap = memInfo.ullTotalPageFile;
+            freeSwap = memInfo.ullAvailPageFile;
+            maxSize = perfInfo.CommitLimit * perfInfo.PageSize;
+        } else {
+            addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get max pagefile size").c_str(), nullptr);
+        }
+    } else {
+        addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get pagefile info").c_str(), nullptr);
+    }
+#endif
+    
+    Napi::Object obj = Napi::Object::New(info.Env());
+    obj.Set("total", Napi::Number::New(info.Env(), totalSwap));
+    obj.Set("free", Napi::Number::New(info.Env(), freeSwap));
+    obj.Set("maxSize", maxSizeSet ? Napi::Number::New(info.Env(), maxSize) : Napi::Number::New(info.Env(), -1));
+    return obj;
+}
--- a/node_modules/node-llama-cpp/llama/addon/globals/getSwapInfo.h
+++ b/node_modules/node-llama-cpp/llama/addon/globals/getSwapInfo.h
@@ -0,0 +1,4 @@
+#pragma once
+#include "napi.h"
+
+Napi::Value getSwapInfo(const Napi::CallbackInfo& info);
--- a/node_modules/node-llama-cpp/llama/binariesGithubRelease.json
+++ b/node_modules/node-llama-cpp/llama/binariesGithubRelease.json
@@ -0,0 +1,3 @@
+{
+    "release": "b7836"
+}
--- a/node_modules/node-llama-cpp/llama/cmake/addVariantSuffix.cmake
+++ b/node_modules/node-llama-cpp/llama/cmake/addVariantSuffix.cmake
@@ -0,0 +1,21 @@
+function(addVariantSuffix originalTarget variantSuffix)
+    if (NOT TARGET ${originalTarget} OR ${variantSuffix} STREQUAL "")
+        return()
+    endif()
+
+    set(_name "${originalTarget}.${variantSuffix}")
+
+    set_target_properties(${originalTarget} PROPERTIES
+        OUTPUT_NAME "${_name}"
+        RUNTIME_OUTPUT_NAME "${_name}" # Windows .dll
+        LIBRARY_OUTPUT_NAME "${_name}" # Unix shared lib
+        ARCHIVE_OUTPUT_NAME "${_name}" # static / import lib
+    )
+
+    if (APPLE)
+        set_target_properties(${originalTarget} PROPERTIES
+            MACOSX_RPATH     ON
+            INSTALL_NAME_DIR "@rpath"
+        )
+    endif()
+endfunction()
--- a/node_modules/node-llama-cpp/llama/cmake/win32.ensureNinjaPath.cmake
+++ b/node_modules/node-llama-cpp/llama/cmake/win32.ensureNinjaPath.cmake
@@ -0,0 +1,68 @@
+function(ensureNinjaPath)
+    if ((NOT DEFINED CMAKE_MAKE_PROGRAM OR NOT EXISTS "${CMAKE_MAKE_PROGRAM}" OR NOT CMAKE_MAKE_PROGRAM) AND (CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL "Ninja Multi-Config"))
+        find_program(NINJA_EXECUTABLE ninja)
+
+        set(CMAKE_MAKE_PROGRAM "")
+        set(CMAKE_MAKE_PROGRAM "" PARENT_SCOPE)
+
+        if(NINJA_EXECUTABLE AND EXISTS "${NINJA_EXECUTABLE}")
+            set(CMAKE_MAKE_PROGRAM "${NINJA_EXECUTABLE}")
+            set(CMAKE_MAKE_PROGRAM "${NINJA_EXECUTABLE}" CACHE FILEPATH "Make program")
+            set(CMAKE_MAKE_PROGRAM "${NINJA_EXECUTABLE}" PARENT_SCOPE)
+        endif()
+
+        if (NOT CMAKE_MAKE_PROGRAM OR NOT EXISTS "${CMAKE_MAKE_PROGRAM}")
+            set(PROGRAMDATA_PATH "$ENV{ProgramData}")
+            file(TO_CMAKE_PATH "${PROGRAMDATA_PATH}" PROGRAMDATA_PATH)
+
+            if (PROGRAMDATA_PATH AND EXISTS "${PROGRAMDATA_PATH}")
+                file(GLOB_RECURSE FOUND_NINJA_EXE "${PROGRAMDATA_PATH}/chocolatey/bin/ninja.exe")
+
+                if(FOUND_NINJA_EXE)
+                    list(GET FOUND_NINJA_EXE 0 FOUND_CMAKE_MAKE_PROGRAM)
+                    set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}")
+                    set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" CACHE FILEPATH "Make program")
+                    set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" PARENT_SCOPE)
+                endif()
+            endif()
+        endif()
+
+        if (NOT CMAKE_MAKE_PROGRAM OR NOT EXISTS "${CMAKE_MAKE_PROGRAM}")
+            set(LOCALAPPDATA_PATH "$ENV{LOCALAPPDATA}")
+            file(TO_CMAKE_PATH "${LOCALAPPDATA_PATH}" LOCALAPPDATA_PATH)
+
+            if (LOCALAPPDATA_PATH AND EXISTS "${LOCALAPPDATA_PATH}")
+                file(GLOB_RECURSE FOUND_NINJA_EXE "${LOCALAPPDATA_PATH}/Microsoft/WinGet/Packages/Ninja-build.Ninja_Microsoft.Winget.*/ninja.exe")
+
+                if(FOUND_NINJA_EXE)
+                    list(GET FOUND_NINJA_EXE 0 FOUND_CMAKE_MAKE_PROGRAM)
+                    set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}")
+                    set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" CACHE FILEPATH "Make program")
+                    set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" PARENT_SCOPE)
+                endif()
+            endif()
+        endif()
+
+        if (NOT CMAKE_MAKE_PROGRAM OR NOT EXISTS "${CMAKE_MAKE_PROGRAM}")
+            foreach(PATH IN LISTS PROGRAMFILES_PATHS)
+                file(GLOB_RECURSE FOUND_NINJA_EXE
+                    "${PATH}/Microsoft Visual Studio/*/CMake/Ninja/ninja.exe"
+                    "${PATH}/Microsoft Visual Studio/**/*/CMake/Ninja/ninja.exe"
+                    "${PATH}/Microsoft Visual Studio/*/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe"
+                    "${PATH}/Microsoft Visual Studio/**/*/Common7/IDE/CommonExtensions/Microsoft/CMake/Ninja/ninja.exe")
+
+                if(FOUND_NINJA_EXE)
+                    list(GET FOUND_NINJA_EXE 0 FOUND_CMAKE_MAKE_PROGRAM)
+                    set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}")
+                    set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" CACHE FILEPATH "Make program")
+                    set(CMAKE_MAKE_PROGRAM "${FOUND_CMAKE_MAKE_PROGRAM}" PARENT_SCOPE)
+                    break()
+                endif()
+            endforeach()
+        endif()
+
+        if (NOT CMAKE_MAKE_PROGRAM OR NOT EXISTS "${CMAKE_MAKE_PROGRAM}")
+            message(FATAL_ERROR "Ninja build system not found. Please install Ninja or Visual Studio Build Tools.")
+        endif()
+    endif()
+endfunction()
--- a/node_modules/node-llama-cpp/llama/cmake/win32.ensureNodeLib.cmake
+++ b/node_modules/node-llama-cpp/llama/cmake/win32.ensureNodeLib.cmake
@@ -0,0 +1,34 @@
+function(ensureNodeLib HOST_ARCH TARGET_ARCH)
+    if (CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
+        if (NOT DEFINED NODE_LIB_CMAKE_AR)
+            foreach(PATH IN LISTS PROGRAMFILES_PATHS)
+                if(NODE_LIB_CMAKE_AR)
+                    break()
+                endif()
+
+                file(GLOB_RECURSE FOUND_LIB_EXE
+                    "${PATH}/Microsoft Visual Studio/*/VC/Tools/MSVC/*/bin/Host${HOST_ARCH}/${TARGET_ARCH}/lib.exe"
+                    "${PATH}/Microsoft Visual Studio/**/*/VC/Tools/MSVC/*/bin/Host${HOST_ARCH}/${TARGET_ARCH}/lib.exe")
+
+                if(FOUND_LIB_EXE)
+                    list(GET FOUND_LIB_EXE 0 NODE_LIB_CMAKE_AR)
+                    break()
+                endif()
+            endforeach()
+        endif()
+
+        set(NODE_LIB_CMAKE_AR_MACHINE_FLAG "")
+        if (TARGET_ARCH STREQUAL "x64")
+            set(NODE_LIB_CMAKE_AR_MACHINE_FLAG "/MACHINE:X64")
+        elseif (TARGET_ARCH STREQUAL "arm64")
+            set(NODE_LIB_CMAKE_AR_MACHINE_FLAG "/MACHINE:ARM64")
+        endif()
+
+        if (EXISTS "${NODE_LIB_CMAKE_AR}")
+            # Generate node.lib
+            execute_process(COMMAND ${NODE_LIB_CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS} ${NODE_LIB_CMAKE_AR_MACHINE_FLAG} /nologo)
+        else()
+            message(FATAL_ERROR "Windows Resource Compiler (lib.exe) not found. Please install Visual Studio Build Tools.")
+        endif()
+    endif()
+endfunction()
--- a/node_modules/node-llama-cpp/llama/cmake/win32.llvmApplyGnuModeAdaptations.cmake
+++ b/node_modules/node-llama-cpp/llama/cmake/win32.llvmApplyGnuModeAdaptations.cmake
@@ -0,0 +1,12 @@
+function(llvmApplyGnuModeAdaptations)
+    # adapt cmake-js to work with llvm in GNU mode
+    if (NOT CMAKE_SHARED_LINKER_FLAGS MATCHES "-Xlinker /DELAYLOAD:NODE.EXE")
+        string(REPLACE "/DELAYLOAD:NODE.EXE" "-Xlinker /DELAYLOAD:NODE.EXE -Xlinker /defaultlib:delayimp"
+            UPDATED_CMAKE_SHARED_LINKER_FLAGS
+            "${CMAKE_SHARED_LINKER_FLAGS}")
+        set(CMAKE_SHARED_LINKER_FLAGS "${UPDATED_CMAKE_SHARED_LINKER_FLAGS}" PARENT_SCOPE)
+    endif()
+
+    set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Xclang --dependent-lib=msvcrt" PARENT_SCOPE)
+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Xclang --dependent-lib=msvcrt" PARENT_SCOPE)
+endfunction()
--- a/node_modules/node-llama-cpp/llama/cmake/win32.llvmEnsureCmakeAr.cmake
+++ b/node_modules/node-llama-cpp/llama/cmake/win32.llvmEnsureCmakeAr.cmake
@@ -0,0 +1,37 @@
+function(llvmEnsureCmakeAr CURRENT_ARCH)
+    set (LLVM_DIR_ARCH_NAME "")
+    if (CURRENT_ARCH STREQUAL "x64")
+        set (LLVM_DIR_ARCH_NAME "x64")
+    elseif (CURRENT_ARCH STREQUAL "arm64")
+        set (LLVM_DIR_ARCH_NAME "ARM64")
+    endif()
+
+    if (NOT DEFINED CMAKE_AR OR NOT EXISTS "${CMAKE_AR}")
+        set(LLVM_INSTALL_PATHS "")
+        foreach(PATH IN LISTS PROGRAMFILES_PATHS)
+            list(APPEND LLVM_INSTALL_PATHS "${PATH}/LLVM")
+
+            file(GLOB_RECURSE FOUND_LLVM_ROOT LIST_DIRECTORIES true
+                "${PATH}/Microsoft Visual Studio/*/VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}"
+                "${PATH}/Microsoft Visual Studio/**/*/VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}")
+            list(FILTER FOUND_LLVM_ROOT INCLUDE REGEX "VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}$")
+
+            if(FOUND_LLVM_ROOT)
+                list(APPEND LLVM_INSTALL_PATHS ${FOUND_LLVM_ROOT})
+            endif()
+        endforeach()
+
+        if(DEFINED LLVM_ROOT AND EXISTS "${LLVM_ROOT}")
+            list(INSERT LLVM_INSTALL_PATHS 0 "${LLVM_ROOT}")
+        endif()
+
+        list(REMOVE_DUPLICATES LLVM_INSTALL_PATHS)
+
+        foreach(PATH IN LISTS LLVM_INSTALL_PATHS)
+            if(EXISTS "${PATH}/bin/llvm-ar.exe" AND EXISTS "${PATH}/bin/llvm-ar.exe")
+                set(CMAKE_AR "${PATH}/bin/llvm-ar.exe" PARENT_SCOPE)
+                break()
+            endif()
+        endforeach()
+    endif()
+endfunction()
--- a/node_modules/node-llama-cpp/llama/cmake/win32.llvmUseGnuModeCompilers.cmake
+++ b/node_modules/node-llama-cpp/llama/cmake/win32.llvmUseGnuModeCompilers.cmake
@@ -0,0 +1,87 @@
+function(llvmUseGnuModeCompilers CURRENT_ARCH)
+    set(LLVM_INSTALLATION_URL "https://github.com/llvm/llvm-project/releases/tag/llvmorg-19.1.5")
+
+    set(CMAKE_C_COMPILER clang)
+    set(CMAKE_C_COMPILER clang PARENT_SCOPE)
+    set(CMAKE_CXX_COMPILER clang++)
+    set(CMAKE_CXX_COMPILER clang++ PARENT_SCOPE)
+    set(CMAKE_RC_COMPILER llvm-rc)
+    set(CMAKE_RC_COMPILER llvm-rc PARENT_SCOPE)
+
+
+    set (LLVM_DIR_ARCH_NAME "")
+    if (CURRENT_ARCH STREQUAL "x64")
+        set (LLVM_DIR_ARCH_NAME "x64")
+    elseif (CURRENT_ARCH STREQUAL "arm64")
+        set (LLVM_DIR_ARCH_NAME "ARM64")
+    endif()
+
+    set(LLVM_INSTALL_PATHS "")
+    foreach(PATH IN LISTS PROGRAMFILES_PATHS)
+        list(APPEND LLVM_INSTALL_PATHS "${PATH}/LLVM")
+
+        file(GLOB_RECURSE FOUND_LLVM_ROOT LIST_DIRECTORIES true
+            "${PATH}/Microsoft Visual Studio/*/VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}"
+            "${PATH}/Microsoft Visual Studio/**/*/VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}")
+        list(FILTER FOUND_LLVM_ROOT INCLUDE REGEX "VC/Tools/Llvm/${LLVM_DIR_ARCH_NAME}$")
+
+        if(FOUND_LLVM_ROOT)
+            list(APPEND LLVM_INSTALL_PATHS ${FOUND_LLVM_ROOT})
+        endif()
+    endforeach()
+
+    if(DEFINED LLVM_ROOT AND EXISTS "${LLVM_ROOT}")
+        list(INSERT LLVM_INSTALL_PATHS 0 "${LLVM_ROOT}")
+    endif()
+
+    list(REMOVE_DUPLICATES LLVM_INSTALL_PATHS)
+
+    set(LLVM_ROOT "")
+    set(LLVM_ROOT "" PARENT_SCOPE)
+    foreach(PATH IN LISTS LLVM_INSTALL_PATHS)
+        if(EXISTS "${PATH}/bin/clang.exe" AND EXISTS "${PATH}/bin/clang++.exe" AND EXISTS "${PATH}/bin/llvm-rc.exe")
+            set(LLVM_ROOT "${PATH}")
+            set(LLVM_ROOT "${PATH}" PARENT_SCOPE)
+            break()
+        endif()
+    endforeach()
+
+    if(LLVM_ROOT STREQUAL "")
+        if (CURRENT_ARCH STREQUAL "arm64")
+            message(FATAL_ERROR "LLVM installation was not found. Please install LLVM for WoA (Windows on Arm): ${LLVM_INSTALLATION_URL}")
+        else()
+            message(FATAL_ERROR "LLVM installation was not found. Please install LLVM: ${LLVM_INSTALLATION_URL}")
+        endif()
+    endif()
+
+    if (NOT EXISTS "${CMAKE_C_COMPILER}" OR NOT EXISTS "${CMAKE_CXX_COMPILER}" OR NOT EXISTS "${CMAKE_RC_COMPILER}")
+        set(CMAKE_C_COMPILER "${LLVM_ROOT}/bin/clang.exe")
+        set(CMAKE_C_COMPILER "${LLVM_ROOT}/bin/clang.exe" PARENT_SCOPE)
+        set(CMAKE_CXX_COMPILER "${LLVM_ROOT}/bin/clang++.exe")
+        set(CMAKE_CXX_COMPILER "${LLVM_ROOT}/bin/clang++.exe" PARENT_SCOPE)
+        set(CMAKE_RC_COMPILER "${LLVM_ROOT}/bin/llvm-rc.exe")
+        set(CMAKE_RC_COMPILER "${LLVM_ROOT}/bin/llvm-rc.exe" PARENT_SCOPE)
+    endif()
+
+    if (NOT EXISTS "${CMAKE_C_COMPILER}")
+        if (CURRENT_ARCH STREQUAL "arm64")
+            message(FATAL_ERROR "Clang compiler not found at ${CMAKE_C_COMPILER}. Please reinstall LLVM for WoA (Windows on Arm): ${LLVM_INSTALLATION_URL}")
+        else()
+            message(FATAL_ERROR "Clang compiler not found at ${CMAKE_C_COMPILER}. Please reinstall LLVM: ${LLVM_INSTALLATION_URL}")
+        endif()
+    endif()
+    if (NOT EXISTS "${CMAKE_CXX_COMPILER}")
+        if (CURRENT_ARCH STREQUAL "arm64")
+            message(FATAL_ERROR "Clang++ compiler not found at ${CMAKE_CXX_COMPILER}. Please reinstall LLVM for WoA (Windows on Arm): ${LLVM_INSTALLATION_URL}")
+        else()
+            message(FATAL_ERROR "Clang++ compiler not found at ${CMAKE_CXX_COMPILER}. Please reinstall LLVM: ${LLVM_INSTALLATION_URL}")
+        endif()
+    endif()
+    if (NOT EXISTS "${CMAKE_RC_COMPILER}")
+        if (CURRENT_ARCH STREQUAL "arm64")
+            message(FATAL_ERROR "LLVM Resource Compiler not found at ${CMAKE_RC_COMPILER}. Please reinstall LLVM for WoA (Windows on Arm): ${LLVM_INSTALLATION_URL}")
+        else()
+            message(FATAL_ERROR "LLVM Resource Compiler not found at ${CMAKE_RC_COMPILER}. Please reinstall LLVM: ${LLVM_INSTALLATION_URL}")
+        endif()
+    endif()
+endfunction()
--- a/node_modules/node-llama-cpp/llama/cmake/win32.programFilesPaths.cmake
+++ b/node_modules/node-llama-cpp/llama/cmake/win32.programFilesPaths.cmake
@@ -0,0 +1,35 @@
+function(setProgramFilesPaths CURRENT_ARCH)
+    set(PROGRAMFILES_X86_ENV_NAME "ProgramFiles(x86)")
+
+    set(PROGRAMFILES "$ENV{ProgramFiles}")
+    set(PROGRAMFILES_X86 "$ENV{${PROGRAMFILES_X86_ENV_NAME}}")
+    file(TO_CMAKE_PATH "${PROGRAMFILES}" PROGRAMFILES)
+    file(TO_CMAKE_PATH "${PROGRAMFILES_X86}" PROGRAMFILES_X86)
+
+    if(CURRENT_ARCH STREQUAL "arm64")
+        set(PROGRAMFILES_ARM64_ENV_NAME "ProgramFiles(Arm)")
+
+        set(PROGRAMFILES_ARM64 "$ENV{${PROGRAMFILES_ARM64_ENV_NAME}}")
+        file(TO_CMAKE_PATH "${PROGRAMFILES_ARM64}" PROGRAMFILES_ARM64)
+
+        set(PROGRAMFILES_PATHS_LIST
+            "${PROGRAMFILES_ARM64}"
+            "${PROGRAMFILES}"
+            "${PROGRAMFILES_X86}"
+            "C:/Program Files (Arm)"
+            "C:/Program Files"
+            "C:/Program Files (x86)"
+        )
+        list(REMOVE_DUPLICATES PROGRAMFILES_PATHS_LIST)
+        set(PROGRAMFILES_PATHS ${PROGRAMFILES_PATHS_LIST} PARENT_SCOPE)
+    else()
+        set(PROGRAMFILES_PATHS_LIST
+            "${PROGRAMFILES}"
+            "${PROGRAMFILES_X86}"
+            "C:/Program Files"
+            "C:/Program Files (x86)"
+        )
+        list(REMOVE_DUPLICATES PROGRAMFILES_PATHS_LIST)
+        set(PROGRAMFILES_PATHS ${PROGRAMFILES_PATHS_LIST} PARENT_SCOPE)
+    endif()
+endfunction()
--- a/node_modules/node-llama-cpp/llama/gitRelease.bundle
+++ b/node_modules/node-llama-cpp/llama/gitRelease.bundle
--- a/node_modules/node-llama-cpp/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/node_modules/node-llama-cpp/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -0,0 +1,200 @@
+#include <stddef.h>
+#include <map>
+#include <vector>
+
+#include <vulkan/vulkan.hpp>
+
+constexpr std::uint32_t VK_VENDOR_ID_AMD = 0x1002;
+constexpr std::uint32_t VK_VENDOR_ID_APPLE = 0x106b;
+constexpr std::uint32_t VK_VENDOR_ID_INTEL = 0x8086;
+constexpr std::uint32_t VK_VENDOR_ID_NVIDIA = 0x10de;
+
+typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
+
+static vk::Instance vulkanInstance() {
+    vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
+    vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
+    return vk::createInstance(createInfo);
+}
+
+static std::vector<vk::PhysicalDevice> dedupedDevices() {
+    vk::Instance instance = vulkanInstance();
+    auto physicalDevices = instance.enumeratePhysicalDevices();
+    std::vector<vk::PhysicalDevice> dedupedDevices;
+    dedupedDevices.reserve(physicalDevices.size());
+
+    // adapted from `ggml_vk_instance_init` in `ggml-vulkan.cpp`
+    for (const auto& device : physicalDevices) {
+        vk::PhysicalDeviceProperties2 newProps;
+        vk::PhysicalDeviceDriverProperties newDriver;
+        vk::PhysicalDeviceIDProperties newId;
+        newProps.pNext = &newDriver;
+        newDriver.pNext = &newId;
+        device.getProperties2(&newProps);
+
+        auto oldDevice = std::find_if(
+            dedupedDevices.begin(),
+            dedupedDevices.end(),
+            [&newId](const vk::PhysicalDevice& oldDevice) {
+                vk::PhysicalDeviceProperties2 oldProps;
+                vk::PhysicalDeviceDriverProperties oldDriver;
+                vk::PhysicalDeviceIDProperties oldId;
+                oldProps.pNext = &oldDriver;
+                oldDriver.pNext = &oldId;
+                oldDevice.getProperties2(&oldProps);
+
+                bool equals = std::equal(std::begin(oldId.deviceUUID), std::end(oldId.deviceUUID), std::begin(newId.deviceUUID));
+                equals = equals || (
+                    oldId.deviceLUIDValid && newId.deviceLUIDValid &&
+                    std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID))
+                );
+
+                return equals;
+            }
+        );
+
+        if (oldDevice == dedupedDevices.end()) {
+            dedupedDevices.push_back(device);
+            continue;
+        }
+
+        vk::PhysicalDeviceProperties2 oldProps;
+        vk::PhysicalDeviceDriverProperties oldDriver;
+        oldProps.pNext = &oldDriver;
+        oldDevice->getProperties2(&oldProps);
+
+        std::map<vk::DriverId, int> driverPriorities {};
+        int oldPriority = 1000;
+        int newPriority = 1000;
+
+        switch (oldProps.properties.vendorID) {
+            case VK_VENDOR_ID_AMD:
+                driverPriorities[vk::DriverId::eMesaRadv] = 1;
+                driverPriorities[vk::DriverId::eAmdOpenSource] = 2;
+                driverPriorities[vk::DriverId::eAmdProprietary] = 3;
+                break;
+            case VK_VENDOR_ID_INTEL:
+                driverPriorities[vk::DriverId::eIntelOpenSourceMESA] = 1;
+                driverPriorities[vk::DriverId::eIntelProprietaryWindows] = 2;
+                break;
+            case VK_VENDOR_ID_NVIDIA:
+                driverPriorities[vk::DriverId::eNvidiaProprietary] = 1;
+#if defined(VK_API_VERSION_1_3) && VK_HEADER_VERSION >= 235
+                driverPriorities[vk::DriverId::eMesaNvk] = 2;
+#endif
+                break;
+        }
+        driverPriorities[vk::DriverId::eMesaDozen] = 4;
+
+        if (driverPriorities.count(oldDriver.driverID)) {
+            oldPriority = driverPriorities[oldDriver.driverID];
+        }
+        if (driverPriorities.count(newDriver.driverID)) {
+            newPriority = driverPriorities[newDriver.driverID];
+        }
+
+        if (newPriority < oldPriority) {
+            dedupedDevices.erase(std::remove(dedupedDevices.begin(), dedupedDevices.end(), *oldDevice), dedupedDevices.end());
+            dedupedDevices.push_back(device);
+        }
+    }
+
+    return dedupedDevices;
+}
+
+static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback, bool * checkSupported) {
+    auto physicalDevices = dedupedDevices();
+
+    size_t usedMem = 0;
+    size_t totalMem = 0;
+    size_t totalUnifiedMemorySize = 0;
+
+    for (size_t i = 0; i < physicalDevices.size(); i++) {
+        vk::PhysicalDevice physicalDevice = physicalDevices[i];
+        vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties();
+        vk::PhysicalDeviceProperties deviceProps = physicalDevice.getProperties();
+
+        if (deviceProps.deviceType == vk::PhysicalDeviceType::eCpu) {
+            // ignore CPU devices, as we don't want to count RAM from the CPU as VRAM
+            continue;
+        }
+
+        std::vector<vk::ExtensionProperties> extensionProperties = physicalDevice.enumerateDeviceExtensionProperties();
+        bool memoryBudgetExtensionSupported =
+            std::any_of(
+                extensionProperties.begin(),
+                extensionProperties.end(),
+                [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName.data()) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;}
+            );
+
+        if (memoryBudgetExtensionSupported) {
+            vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties;
+            vk::PhysicalDeviceMemoryProperties2 memProps2 = {};
+            memProps2.pNext = &memoryBudgetProperties;
+
+            physicalDevice.getMemoryProperties2(&memProps2);
+
+            for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) {
+                const auto heap = memProps2.memoryProperties.memoryHeaps[i];
+
+                if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
+                    totalMem += heap.size;
+                    usedMem += memoryBudgetProperties.heapUsage[i] + (heap.size - memoryBudgetProperties.heapBudget[i]);
+
+                    if (heap.flags & vk::MemoryHeapFlagBits::eMultiInstance) {
+                        totalUnifiedMemorySize += heap.size;
+                    }
+
+                    if (heap.size > 0 && addDeviceNames) {
+                        (*deviceNames).push_back(std::string(deviceProps.deviceName.data()));
+                    }
+
+                    if (checkSupported != nullptr && checkSupported) {
+                        VkPhysicalDeviceFeatures2 features2 = {};
+                        features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+
+                        VkPhysicalDeviceVulkan11Features vk11Features = {};
+                        vk11Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
+                        features2.pNext = &vk11Features;
+
+                        vkGetPhysicalDeviceFeatures2(physicalDevice, &features2);
+
+                        if (!vk11Features.storageBuffer16BitAccess) {
+                            *checkSupported = false;
+                        }
+                    }
+                }
+            }
+        } else {
+            // VK_EXT_memory_budget extension is not supported, so we cannot determine used memory
+            warningLogCallback(
+                (
+                    "Vulkan VK_EXT_memory_budget extension not supported for device \"" +
+                    std::string(deviceProps.deviceName.data()) + "\", so VRAM info cannot be determined for it"
+                ).c_str()
+            );
+            return false;
+        }
+    }
+
+    *total = totalMem;
+    *used = usedMem;
+    *unifiedMemorySize = totalUnifiedMemorySize;
+
+    return true;
+}
+
+bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
+    return enumerateVulkanDevices(total, used, unifiedMemorySize, false, nullptr, warningLogCallback, nullptr);
+}
+
+bool checkIsVulkanEnvSupported(gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
+    size_t total = 0;
+    size_t used = 0;
+    size_t unifiedMemorySize = 0;
+
+    bool isSupported = true;
+    enumerateVulkanDevices(&total, &used, &unifiedMemorySize, false, nullptr, warningLogCallback, &isSupported);
+
+    return isSupported;
+}
--- a/node_modules/node-llama-cpp/llama/gpuInfo/vulkan-gpu-info.h
+++ b/node_modules/node-llama-cpp/llama/gpuInfo/vulkan-gpu-info.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <stddef.h>
+#include <vector>
+
+typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
+
+bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
+bool checkIsVulkanEnvSupported(gpuInfoVulkanWarningLogCallback_t warningLogCallback);
--- a/node_modules/node-llama-cpp/llama/grammars/README.md
+++ b/node_modules/node-llama-cpp/llama/grammars/README.md
@@ -0,0 +1,409 @@
+# GBNF Guide
+
+GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis. GBNF grammars are supported in various ways in `tools/cli`, `tools/completion` and `tools/server`.
+
+## Background
+
+[Backus-Naur Form (BNF)](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form) is a notation for describing the syntax of formal languages like programming languages, file formats, and protocols. GBNF is an extension of BNF that primarily adds a few modern regex-like features.
+
+## Basics
+
+In GBNF, we define *production rules* that specify how a *non-terminal* (rule name) can be replaced with sequences of *terminals* (characters, specifically Unicode [code points](https://en.wikipedia.org/wiki/Code_point)) and other non-terminals. The basic format of a production rule is `nonterminal ::= sequence...`.
+
+## Example
+
+Before going deeper, let's look at some of the features demonstrated in `grammars/chess.gbnf`, a small chess notation grammar:
+```
+# `root` specifies the pattern for the overall output
+root ::= (
+    # it must start with the characters "1. " followed by a sequence
+    # of characters that match the `move` rule, followed by a space, followed
+    # by another move, and then a newline
+    "1. " move " " move "\n"
+
+    # it's followed by one or more subsequent moves, numbered with one or two digits
+    ([1-9] [0-9]? ". " move " " move "\n")+
+)
+
+# `move` is an abstract representation, which can be a pawn, nonpawn, or castle.
+# The `[+#]?` denotes the possibility of checking or mate signs after moves
+move ::= (pawn | nonpawn | castle) [+#]?
+
+pawn ::= ...
+nonpawn ::= ...
+castle ::= ...
+```
+
+## Non-Terminals and Terminals
+
+Non-terminal symbols (rule names) stand for a pattern of terminals and other non-terminals. They are required to be a dashed lowercase word, like `move`, `castle`, or `check-mate`.
+
+Terminals are actual characters ([code points](https://en.wikipedia.org/wiki/Code_point)). They can be specified as a sequence like `"1"` or `"O-O"` or as ranges like `[1-9]` or `[NBKQR]`.
+
+## Characters and character ranges
+
+Terminals support the full range of Unicode. Unicode characters can be specified directly in the grammar, for example `hiragana ::= [ぁ-ゟ]`, or with escapes: 8-bit (`\xXX`), 16-bit (`\uXXXX`) or 32-bit (`\UXXXXXXXX`).
+
+Character ranges can be negated with `^`:
+```
+single-line ::= [^\n]+ "\n"
+```
+
+## Sequences and Alternatives
+
+The order of symbols in a sequence matters. For example, in `"1. " move " " move "\n"`, the `"1. "` must come before the first `move`, etc.
+
+Alternatives, denoted by `|`, give different sequences that are acceptable. For example, in `move ::= pawn | nonpawn | castle`, `move` can be a `pawn` move, a `nonpawn` move, or a `castle`.
+
+Parentheses `()` can be used to group sequences, which allows for embedding alternatives in a larger rule or applying repetition and optional symbols (below) to a sequence.
+
+## Repetition and Optional Symbols
+
+- `*` after a symbol or sequence means that it can be repeated zero or more times (equivalent to `{0,}`).
+- `+` denotes that the symbol or sequence should appear one or more times (equivalent to `{1,}`).
+- `?` makes the preceding symbol or sequence optional (equivalent to `{0,1}`).
+- `{m}` repeats the precedent symbol or sequence exactly `m` times
+- `{m,}` repeats the precedent symbol or sequence at least `m` times
+- `{m,n}` repeats the precedent symbol or sequence at between `m` and `n` times (included)
+- `{0,n}` repeats the precedent symbol or sequence at most `n` times (included)
+
+## Tokens
+
+Tokens allow grammars to match specific tokenizer tokens rather than character sequences. This is useful for constraining outputs based on special tokens (like `<think>` or `</think>`).
+
+Tokens can be specified in two ways:
+
+1. **Token ID**: Use angle brackets with the token ID in square brackets: `<[token-id]>`. For example, `<[1000]>` matches the token with ID 1000.
+
+2. **Token string**: Use angle brackets with the token text directly: `<token>`. For example, `<think>` will match the token whose text is exactly `<think>`. This only works if the string tokenizes to exactly one token in the vocabulary, otherwise the grammar will fail to parse.
+
+You can negate token matches using the `!` prefix: `!<[1000]>` or `!<think>` matches any token *except* the specified one.
+
+```
+# Match a thinking block: <think>...</think>
+# Using token strings (requires these to be single tokens in the vocab)
+root ::= <think> thinking </think> .*
+thinking ::= !</think>*
+
+# Equivalent grammar using explicit token IDs
+# Assumes token 1000 = <think>, token 1001 = </think>
+root ::= <[1000]> thinking <[1001]> .*
+thinking ::= !<[1001]>*
+```
+
+## Comments and newlines
+
+Comments can be specified with `#`:
+```
+# defines optional whitespace
+ws ::= [ \t\n]+
+```
+
+Newlines are allowed between rules and between symbols or sequences nested inside parentheses. Additionally, a newline after an alternate marker `|` will continue the current rule, even outside of parentheses.
+
+## The root rule
+
+In a full grammar, the `root` rule always defines the starting point of the grammar. In other words, it specifies what the entire output must match.
+
+```
+# a grammar for lists
+root ::= ("- " item)+
+item ::= [^\n]+ "\n"
+```
+
+## Next steps
+
+This guide provides a brief overview. Check out the GBNF files in this directory (`grammars/`) for examples of full grammars. You can try them out with:
+```
+./llama-cli -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
+```
+
+`llama.cpp` can also convert JSON schemas to grammars either ahead of time or at each request, see below.
+
+## Troubleshooting
+
+Grammars currently have performance gotchas (see https://github.com/ggml-org/llama.cpp/issues/4218).
+
+### Efficient optional repetitions
+
+A common pattern is to allow repetitions of a pattern `x` up to N times.
+
+While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) may result in extremely slow sampling. Instead, you can write `x{0,N}` (or `(x (x (x ... (x)?...)?)?)?` w/ N-deep nesting in earlier llama.cpp versions).
+
+## Using GBNF grammars
+
+You can use GBNF grammars:
+
+- In [llama-server](../tools/server)'s completion endpoints, passed as the `grammar` body field
+- In [llama-cli](../tools/cli) and [llama-completion](../tools/completion), passed as the `--grammar` & `--grammar-file` flags
+- With [test-gbnf-validator](../tests/test-gbnf-validator.cpp), to test them against strings.
+
+## JSON Schemas → GBNF
+
+`llama.cpp` supports converting a subset of https://json-schema.org/ to GBNF grammars:
+
+- In [llama-server](../tools/server):
+    - For any completion endpoints, passed as the `json_schema` body field
+    - For the `/chat/completions` endpoint, passed inside the `response_format` body field (e.g. `{"type", "json_object", "schema": {"items": {}}}` or `{ type: "json_schema", json_schema: {"schema": ...} }`)
+- In [llama-cli](../tools/cli) and [llama-completion](../tools/completion), passed as the `--json` / `-j` flag
+- To convert to a grammar ahead of time:
+    - in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
+    - in JavaScript with [json-schema-to-grammar.mjs](../tools/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../tools/server)'s Web UI)
+
+> [!NOTE]
+> The JSON schema is only used to constrain the model output and is not injected into the prompt. The model has no visibility into the schema, so if you want it to understand the expected structure, describe it explicitly in your prompt. This does not apply to tool calling, where schemas are injected into the prompt.
+
+Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggml-org/llama.cpp/pull/5978, https://github.com/ggml-org/llama.cpp/pull/6659 & https://github.com/ggml-org/llama.cpp/pull/6555).
+
+```bash
+llama-cli \
+  -hfr bartowski/Phi-3-medium-128k-instruct-GGUF \
+  -hff Phi-3-medium-128k-instruct-Q8_0.gguf \
+  -j '{
+    "type": "array",
+    "items": {
+        "type": "object",
+        "properties": {
+            "name": {
+                "type": "string",
+                "minLength": 1,
+                "maxLength": 100
+            },
+            "age": {
+                "type": "integer",
+                "minimum": 0,
+                "maximum": 150
+            }
+        },
+        "required": ["name", "age"],
+        "additionalProperties": false
+    },
+    "minItems": 10,
+    "maxItems": 100
+  }' \
+  -p 'Generate a {name, age}[] JSON array with famous actors of all ages.'
+```
+
+<details>
+
+<summary>Show grammar</summary>
+
+You can convert any schema in command-line with:
+
+```bash
+examples/json_schema_to_grammar.py name-age-schema.json
+```
+
+```
+char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+item ::= "{" space item-name-kv "," space item-age-kv "}" space
+item-age ::= ([0-9] | ([1-8] [0-9] | [9] [0-9]) | "1" ([0-4] [0-9] | [5] "0")) space
+item-age-kv ::= "\"age\"" space ":" space item-age
+item-name ::= "\"" char{1,100} "\"" space
+item-name-kv ::= "\"name\"" space ":" space item-name
+root ::= "[" space item ("," space item){9,99} "]" space
+space ::= | " " | "\n" [ \t]{0,20}
+```
+
+</details>
+
+Here is also a list of known limitations (contributions welcome):
+
+- `additionalProperties` defaults to `false` (produces faster grammars + reduces hallucinations).
+- `"additionalProperties": true` may produce keys that contain unescaped newlines.
+- Unsupported features are skipped silently. It is currently advised to use the command-line Python converter (see above) to see any warnings, and to inspect the resulting grammar / test it w/ [llama-gbnf-validator](../examples/gbnf-validator/gbnf-validator.cpp).
+- Can't mix `properties` w/ `anyOf` / `oneOf` in the same type (https://github.com/ggml-org/llama.cpp/issues/7703)
+- [prefixItems](https://json-schema.org/draft/2020-12/json-schema-core#name-prefixitems) is broken (but [items](https://json-schema.org/draft/2020-12/json-schema-core#name-items) works)
+- `minimum`, `exclusiveMinimum`, `maximum`, `exclusiveMaximum`: only supported for `"type": "integer"` for now, not `number`
+- Nested `$ref`s are broken (https://github.com/ggml-org/llama.cpp/issues/8073)
+- [pattern](https://json-schema.org/draft/2020-12/json-schema-validation#name-pattern)s must start with `^` and end with `$`
+- Remote `$ref`s not supported in the C++ version (Python & JavaScript versions fetch https refs)
+- `string` [formats](https://json-schema.org/draft/2020-12/json-schema-validation#name-defined-formats) lack `uri`, `email`
+- No [`patternProperties`](https://json-schema.org/draft/2020-12/json-schema-core#name-patternproperties)
+
+And a non-exhaustive list of other unsupported features that are unlikely to be implemented (hard and/or too slow to support w/ stateless grammars):
+
+- [`uniqueItems`](https://json-schema.org/draft/2020-12/json-schema-validation#name-uniqueitems)
+- [`contains`](https://json-schema.org/draft/2020-12/json-schema-core#name-contains) / `minContains`
+- `$anchor` (cf. [dereferencing](https://json-schema.org/draft/2020-12/json-schema-core#name-dereferencing))
+- [`not`](https://json-schema.org/draft/2020-12/json-schema-core#name-not)
+- [Conditionals](https://json-schema.org/draft/2020-12/json-schema-core#name-keywords-for-applying-subsche) `if` / `then` / `else` / `dependentSchemas`
+
+### A word about additionalProperties
+
+> [!WARNING]
+> The JSON schemas spec states `object`s accept [additional properties](https://json-schema.org/understanding-json-schema/reference/object#additionalproperties) by default.
+> Since this is slow and seems prone to hallucinations, we default to no additional properties.
+> You can set `"additionalProperties": true` in the the schema of any object to explicitly allow additional properties.
+
+If you're using [Pydantic](https://pydantic.dev/) to generate schemas, you can enable additional properties with the `extra` config on each model class:
+
+```python
+# pip install pydantic
+import json
+from typing import Annotated, List
+from pydantic import BaseModel, Extra, Field
+class QAPair(BaseModel):
+    class Config:
+        extra = 'allow'  # triggers additionalProperties: true in the JSON schema
+    question: str
+    concise_answer: str
+    justification: str
+
+class Summary(BaseModel):
+    class Config:
+        extra = 'allow'
+    key_facts: List[Annotated[str, Field(pattern='- .{5,}')]]
+    question_answers: List[Annotated[List[QAPair], Field(min_items=5)]]
+
+print(json.dumps(Summary.model_json_schema(), indent=2))
+```
+
+<details>
+<summary>Show JSON schema & grammar</summary>
+
+```json
+{
+  "$defs": {
+    "QAPair": {
+      "additionalProperties": true,
+      "properties": {
+        "question": {
+          "title": "Question",
+          "type": "string"
+        },
+        "concise_answer": {
+          "title": "Concise Answer",
+          "type": "string"
+        },
+        "justification": {
+          "title": "Justification",
+          "type": "string"
+        }
+      },
+      "required": [
+        "question",
+        "concise_answer",
+        "justification"
+      ],
+      "title": "QAPair",
+      "type": "object"
+    }
+  },
+  "additionalProperties": true,
+  "properties": {
+    "key_facts": {
+      "items": {
+        "pattern": "^- .{5,}$",
+        "type": "string"
+      },
+      "title": "Key Facts",
+      "type": "array"
+    },
+    "question_answers": {
+      "items": {
+        "items": {
+          "$ref": "#/$defs/QAPair"
+        },
+        "minItems": 5,
+        "type": "array"
+      },
+      "title": "Question Answers",
+      "type": "array"
+    }
+  },
+  "required": [
+    "key_facts",
+    "question_answers"
+  ],
+  "title": "Summary",
+  "type": "object"
+}
+```
+
+```
+QAPair ::= "{" space QAPair-question-kv "," space QAPair-concise-answer-kv "," space QAPair-justification-kv ( "," space ( QAPair-additional-kv ( "," space QAPair-additional-kv )* ) )? "}" space
+QAPair-additional-k ::= ["] ( [c] ([o] ([n] ([c] ([i] ([s] ([e] ([_] ([a] ([n] ([s] ([w] ([e] ([r] char+ | [^"r] char*) | [^"e] char*) | [^"w] char*) | [^"s] char*) | [^"n] char*) | [^"a] char*) | [^"_] char*) | [^"e] char*) | [^"s] char*) | [^"i] char*) | [^"c] char*) | [^"n] char*) | [^"o] char*) | [j] ([u] ([s] ([t] ([i] ([f] ([i] ([c] ([a] ([t] ([i] ([o] ([n] char+ | [^"n] char*) | [^"o] char*) | [^"i] char*) | [^"t] char*) | [^"a] char*) | [^"c] char*) | [^"i] char*) | [^"f] char*) | [^"i] char*) | [^"t] char*) | [^"s] char*) | [^"u] char*) | [q] ([u] ([e] ([s] ([t] ([i] ([o] ([n] char+ | [^"n] char*) | [^"o] char*) | [^"i] char*) | [^"t] char*) | [^"s] char*) | [^"e] char*) | [^"u] char*) | [^"cjq] char* )? ["] space
+QAPair-additional-kv ::= QAPair-additional-k ":" space value
+QAPair-concise-answer-kv ::= "\"concise_answer\"" space ":" space string
+QAPair-justification-kv ::= "\"justification\"" space ":" space string
+QAPair-question-kv ::= "\"question\"" space ":" space string
+additional-k ::= ["] ( [k] ([e] ([y] ([_] ([f] ([a] ([c] ([t] ([s] char+ | [^"s] char*) | [^"t] char*) | [^"c] char*) | [^"a] char*) | [^"f] char*) | [^"_] char*) | [^"y] char*) | [^"e] char*) | [q] ([u] ([e] ([s] ([t] ([i] ([o] ([n] ([_] ([a] ([n] ([s] ([w] ([e] ([r] ([s] char+ | [^"s] char*) | [^"r] char*) | [^"e] char*) | [^"w] char*) | [^"s] char*) | [^"n] char*) | [^"a] char*) | [^"_] char*) | [^"n] char*) | [^"o] char*) | [^"i] char*) | [^"t] char*) | [^"s] char*) | [^"e] char*) | [^"u] char*) | [^"kq] char* )? ["] space
+additional-kv ::= additional-k ":" space value
+array ::= "[" space ( value ("," space value)* )? "]" space
+boolean ::= ("true" | "false") space
+char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+decimal-part ::= [0-9]{1,16}
+dot ::= [^\x0A\x0D]
+integral-part ::= [0] | [1-9] [0-9]{0,15}
+key-facts ::= "[" space (key-facts-item ("," space key-facts-item)*)? "]" space
+key-facts-item ::= "\"" "- " key-facts-item-1{5,} "\"" space
+key-facts-item-1 ::= dot
+key-facts-kv ::= "\"key_facts\"" space ":" space key-facts
+null ::= "null" space
+number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+question-answers ::= "[" space (question-answers-item ("," space question-answers-item)*)? "]" space
+question-answers-item ::= "[" space question-answers-item-item ("," space question-answers-item-item){4,} "]" space
+question-answers-item-item ::= QAPair
+question-answers-kv ::= "\"question_answers\"" space ":" space question-answers
+root ::= "{" space key-facts-kv "," space question-answers-kv ( "," space ( additional-kv ( "," space additional-kv )* ) )? "}" space
+space ::= | " " | "\n" [ \t]{0,20}
+string ::= "\"" char* "\"" space
+value ::= object | array | string | number | boolean | null
+```
+
+</details>
+
+If you're using [Zod](https://zod.dev/), you can make your objects to explicitly allow extra properties w/ `nonstrict()` / `passthrough()` (or explicitly no extra props w/ `z.object(...).strict()` or `z.strictObject(...)`) but note that [zod-to-json-schema](https://github.com/StefanTerdell/zod-to-json-schema) currently always sets `"additionalProperties": false` anyway.
+
+```js
+import { z } from 'zod';
+import { zodToJsonSchema } from 'zod-to-json-schema';
+
+const Foo = z.object({
+  age: z.number().positive(),
+  email: z.string().email(),
+}).strict();
+
+console.log(zodToJsonSchema(Foo));
+```
+
+<details>
+<summary>Show JSON schema & grammar</summary>
+
+```json
+{
+  "type": "object",
+  "properties": {
+    "age": {
+      "type": "number",
+      "exclusiveMinimum": 0
+    },
+    "email": {
+      "type": "string",
+      "format": "email"
+    }
+  },
+  "required": [
+    "age",
+    "email"
+  ],
+  "additionalProperties": false,
+  "$schema": "http://json-schema.org/draft-07/schema#"
+}
+```
+
+```
+age-kv ::= "\"age\"" space ":" space number
+char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+decimal-part ::= [0-9]{1,16}
+email-kv ::= "\"email\"" space ":" space string
+integral-part ::= [0] | [1-9] [0-9]{0,15}
+number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+root ::= "{" space age-kv "," space email-kv "}" space
+space ::= | " " | "\n" [ \t]{0,20}
+string ::= "\"" char* "\"" space
+```
+
+</details>
--- a/node_modules/node-llama-cpp/llama/grammars/arithmetic.gbnf
+++ b/node_modules/node-llama-cpp/llama/grammars/arithmetic.gbnf
@@ -0,0 +1,6 @@
+root  ::= (expr "=" ws term "\n")+
+expr  ::= term ([-+*/] term)*
+term  ::= ident | num | "(" ws expr ")" ws
+ident ::= [a-z] [a-z0-9_]* ws
+num   ::= [0-9]+ ws
+ws    ::= [ \t\n]*
--- a/node_modules/node-llama-cpp/llama/grammars/c.gbnf
+++ b/node_modules/node-llama-cpp/llama/grammars/c.gbnf
@@ -0,0 +1,42 @@
+root ::= (declaration)*
+
+declaration ::= dataType identifier "(" parameter? ")" "{" statement* "}"
+
+dataType  ::= "int" ws | "float" ws | "char" ws
+identifier ::= [a-zA-Z_] [a-zA-Z_0-9]*
+
+parameter ::= dataType identifier
+
+statement ::=
+    ( dataType identifier ws "=" ws expression ";" ) |
+    ( identifier ws "=" ws expression ";" ) |
+    ( identifier ws "(" argList? ")" ";" ) |
+    ( "return" ws expression ";" ) |
+    ( "while" "(" condition ")" "{" statement* "}" ) |
+    ( "for" "(" forInit ";" ws condition ";" ws forUpdate ")" "{" statement* "}" ) |
+    ( "if" "(" condition ")" "{" statement* "}" ("else" "{" statement* "}")? ) |
+    ( singleLineComment ) |
+    ( multiLineComment )
+
+forInit ::= dataType identifier ws "=" ws expression | identifier ws "=" ws expression
+forUpdate ::= identifier ws "=" ws expression
+
+condition ::= expression relationOperator expression
+relationOperator ::= ("<=" | "<" | "==" | "!=" | ">=" | ">")
+
+expression ::= term (("+" | "-") term)*
+term ::= factor(("*" | "/") factor)*
+
+factor ::= identifier | number | unaryTerm | funcCall | parenExpression
+unaryTerm ::= "-" factor
+funcCall ::= identifier "(" argList? ")"
+parenExpression ::= "(" ws expression ws ")"
+
+argList ::= expression ("," ws expression)*
+
+number ::= [0-9]+
+
+singleLineComment ::= "//" [^\n]* "\n"
+multiLineComment ::= "/*" ( [^*] | ("*" [^/]) )* "*/"
+
+ws ::= ([ \t\n]+)
--- a/node_modules/node-llama-cpp/llama/grammars/chess.gbnf
+++ b/node_modules/node-llama-cpp/llama/grammars/chess.gbnf
@@ -0,0 +1,13 @@
+# Specifies chess moves as a list in algebraic notation, using PGN conventions
+
+# Force first move to "1. ", then any 1-2 digit number after, relying on model to follow the pattern
+root    ::= "1. " move " " move "\n" ([1-9] [0-9]? ". " move " " move "\n")+
+move    ::= (pawn | nonpawn | castle) [+#]?
+
+# piece type, optional file/rank, optional capture, dest file & rank
+nonpawn ::= [NBKQR] [a-h]? [1-8]? "x"? [a-h] [1-8]
+
+# optional file & capture, dest file & rank, optional promotion
+pawn    ::= ([a-h] "x")? [a-h] [1-8] ("=" [NBKQR])?
+
+castle  ::= "O-O" "-O"?
--- a/node_modules/node-llama-cpp/llama/grammars/english.gbnf
+++ b/node_modules/node-llama-cpp/llama/grammars/english.gbnf
@@ -0,0 +1,6 @@
+# note: this might be incomplete, mostly an example
+root        ::= en-char+ ([ \t\n] en-char+)*
+en-char     ::= letter | digit | punctuation
+letter      ::= [a-zA-Z]
+digit       ::= [0-9]
+punctuation ::= [!"#$%&'()*+,-./:;<=>?@[\\\]^_`{|}~]
--- a/node_modules/node-llama-cpp/llama/grammars/japanese.gbnf
+++ b/node_modules/node-llama-cpp/llama/grammars/japanese.gbnf
@@ -0,0 +1,7 @@
+# A probably incorrect grammar for Japanese
+root        ::= jp-char+ ([ \t\n] jp-char+)*
+jp-char     ::= hiragana | katakana | punctuation | cjk
+hiragana    ::= [ぁ-ゟ]
+katakana    ::= [ァ-ヿ]
+punctuation ::= [、-〾]
+cjk         ::= [一-鿿]
--- a/node_modules/node-llama-cpp/llama/grammars/json.gbnf
+++ b/node_modules/node-llama-cpp/llama/grammars/json.gbnf
@@ -0,0 +1,25 @@
+root   ::= object
+value  ::= object | array | string | number | ("true" | "false" | "null") ws
+
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}" ws
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]" ws
+
+string ::=
+  "\"" (
+    [^"\\\x7F\x00-\x1F] |
+    "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
+  )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
+
+# Optional space: by convention, applied in this grammar after literal chars when allowed
+ws ::= | " " | "\n" [ \t]{0,20}
--- a/node_modules/node-llama-cpp/llama/grammars/json_arr.gbnf
+++ b/node_modules/node-llama-cpp/llama/grammars/json_arr.gbnf
@@ -0,0 +1,34 @@
+# This is the same as json.gbnf but we restrict whitespaces at the end of the root array
+# Useful for generating JSON arrays
+
+root   ::= arr
+value  ::= object | array | string | number | ("true" | "false" | "null") ws
+
+arr  ::=
+  "[\n" ws (
+            value
+    (",\n" ws value)*
+  )? "]"
+
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}" ws
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]" ws
+
+string ::=
+  "\"" (
+    [^"\\\x7F\x00-\x1F] |
+    "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
+  )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws
+
+# Optional space: by convention, applied in this grammar after literal chars when allowed
+ws ::= | " " | "\n" [ \t]{0,20}
--- a/node_modules/node-llama-cpp/llama/grammars/list.gbnf
+++ b/node_modules/node-llama-cpp/llama/grammars/list.gbnf
@@ -0,0 +1,4 @@
+root ::= item+
+
+# Excludes various line break characters
+item ::= "- " [^\r\n\x0b\x0c\x85\u2028\u2029]+ "\n"
--- a/node_modules/node-llama-cpp/llama/llama.cpp.info.json
+++ b/node_modules/node-llama-cpp/llama/llama.cpp.info.json
@@ -0,0 +1,4 @@
+{
+    "tag": "b7836",
+    "llamaCppGithubRepo": "ggml-org/llama.cpp"
+}
--- a/node_modules/node-llama-cpp/llama/package.json
+++ b/node_modules/node-llama-cpp/llama/package.json
@@ -0,0 +1,5 @@
+{
+  "binary": {
+    "napi_versions": [7]
+  }
+}
--- a/node_modules/node-llama-cpp/llama/profiles/llvm.win32.host-arm64.target-arm64.cmake
+++ b/node_modules/node-llama-cpp/llama/profiles/llvm.win32.host-arm64.target-arm64.cmake
@@ -0,0 +1,14 @@
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
+setProgramFilesPaths("arm64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNodeLib.cmake")
+ensureNodeLib("arm64" "arm64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmApplyGnuModeAdaptations.cmake")
+llvmApplyGnuModeAdaptations()
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmEnsureCmakeAr.cmake")
+llvmEnsureCmakeAr("arm64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
+ensureNinjaPath()
--- a/node_modules/node-llama-cpp/llama/profiles/llvm.win32.host-x64.target-arm64.cmake
+++ b/node_modules/node-llama-cpp/llama/profiles/llvm.win32.host-x64.target-arm64.cmake
@@ -0,0 +1,14 @@
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
+setProgramFilesPaths("x64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNodeLib.cmake")
+ensureNodeLib("x64" "arm64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmApplyGnuModeAdaptations.cmake")
+llvmApplyGnuModeAdaptations()
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmEnsureCmakeAr.cmake")
+llvmEnsureCmakeAr("x64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
+ensureNinjaPath()
--- a/node_modules/node-llama-cpp/llama/profiles/llvm.win32.host-x64.target-x64.cmake
+++ b/node_modules/node-llama-cpp/llama/profiles/llvm.win32.host-x64.target-x64.cmake
@@ -0,0 +1,14 @@
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
+setProgramFilesPaths("x64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNodeLib.cmake")
+ensureNodeLib("x64" "x64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmApplyGnuModeAdaptations.cmake")
+llvmApplyGnuModeAdaptations()
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmEnsureCmakeAr.cmake")
+llvmEnsureCmakeAr("x64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
+ensureNinjaPath()
--- a/node_modules/node-llama-cpp/llama/toolchains/darwin.host-x64.target-arm64.cmake
+++ b/node_modules/node-llama-cpp/llama/toolchains/darwin.host-x64.target-arm64.cmake
@@ -0,0 +1,8 @@
+set(CMAKE_SYSTEM_NAME Darwin) # macOS
+set(CMAKE_SYSTEM_PROCESSOR arm64)
+
+set(CMAKE_C_COMPILER clang)
+set(CMAKE_CXX_COMPILER clang++)
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch arm64")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch arm64")
--- a/node_modules/node-llama-cpp/llama/toolchains/linux.host-arm64.target-x64.cmake
+++ b/node_modules/node-llama-cpp/llama/toolchains/linux.host-arm64.target-x64.cmake
@@ -0,0 +1,5 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR x86_64)
+
+set(CMAKE_C_COMPILER x86_64-linux-gnu-gcc)
+set(CMAKE_CXX_COMPILER x86_64-linux-gnu-g++)
--- a/node_modules/node-llama-cpp/llama/toolchains/linux.host-x64.target-arm64.cmake
+++ b/node_modules/node-llama-cpp/llama/toolchains/linux.host-x64.target-arm64.cmake
@@ -0,0 +1,5 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
+
+set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
+set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
--- a/node_modules/node-llama-cpp/llama/toolchains/linux.host-x64.target-arm71.cmake
+++ b/node_modules/node-llama-cpp/llama/toolchains/linux.host-x64.target-arm71.cmake
@@ -0,0 +1,5 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR arm)
+
+set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc)
+set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
--- a/node_modules/node-llama-cpp/llama/toolchains/llvm.win32.host-x64.target-x64.cmake
+++ b/node_modules/node-llama-cpp/llama/toolchains/llvm.win32.host-x64.target-x64.cmake
@@ -0,0 +1,20 @@
+set(CMAKE_SYSTEM_NAME Windows)
+set(CMAKE_SYSTEM_PROCESSOR x86_64)
+
+set(target x86_64-pc-windows-msvc)
+set(CMAKE_C_COMPILER_TARGET ${target})
+set(CMAKE_CXX_COMPILER_TARGET ${target})
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
+setProgramFilesPaths("x64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmUseGnuModeCompilers.cmake")
+llvmUseGnuModeCompilers("x64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
+ensureNinjaPath()
+
+set(arch_c_flags "-march=native")
+
+set(CMAKE_C_FLAGS_INIT "${arch_c_flags}")
+set(CMAKE_CXX_FLAGS_INIT "${arch_c_flags}")
--- a/node_modules/node-llama-cpp/llama/toolchains/win32.host-arm64.target-arm64.cmake
+++ b/node_modules/node-llama-cpp/llama/toolchains/win32.host-arm64.target-arm64.cmake
@@ -0,0 +1,21 @@
+set(CMAKE_SYSTEM_NAME Windows)
+set(CMAKE_SYSTEM_PROCESSOR arm64)
+
+set(target arm64-pc-windows-msvc)
+set(CMAKE_C_COMPILER_TARGET ${target})
+set(CMAKE_CXX_COMPILER_TARGET ${target})
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
+setProgramFilesPaths("arm64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmUseGnuModeCompilers.cmake")
+llvmUseGnuModeCompilers("arm64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
+ensureNinjaPath()
+
+set(arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only")
+set(warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments")
+
+set(CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}")
+set(CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}")
--- a/node_modules/node-llama-cpp/llama/toolchains/win32.host-x64.target-arm64.cmake
+++ b/node_modules/node-llama-cpp/llama/toolchains/win32.host-x64.target-arm64.cmake
@@ -0,0 +1,21 @@
+set(CMAKE_SYSTEM_NAME Windows)
+set(CMAKE_SYSTEM_PROCESSOR arm64)
+
+set(target arm64-pc-windows-msvc)
+set(CMAKE_C_COMPILER_TARGET ${target})
+set(CMAKE_CXX_COMPILER_TARGET ${target})
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.programFilesPaths.cmake")
+setProgramFilesPaths("x64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.llvmUseGnuModeCompilers.cmake")
+llvmUseGnuModeCompilers("x64")
+
+include("${CMAKE_CURRENT_LIST_DIR}/../cmake/win32.ensureNinjaPath.cmake")
+ensureNinjaPath()
+
+set(arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only")
+set(warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments")
+
+set(CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}")
+set(CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}")
--- a/node_modules/node-llama-cpp/llama/xpack/package.json
+++ b/node_modules/node-llama-cpp/llama/xpack/package.json
@@ -0,0 +1,10 @@
+{
+  "xpack": {
+    "minimumXpmRequired": "0.16.3",
+    "dependencies": {},
+    "devDependencies": {},
+    "properties": {},
+    "actions": {},
+    "buildConfigurations": {}
+  }
+}