First upload version 0.0.1

This commit is contained in:
Neyra
2026-02-05 15:27:49 +08:00
commit 8e9b7201ed
4182 changed files with 593136 additions and 0 deletions

View File

@@ -0,0 +1,985 @@
#include <thread>
#include <algorithm>
#include <cmath>
#include "common/common.h"
#include "llama-vocab.h"
#include "llama.h"
#include "addonGlobals.h"
#include "AddonModel.h"
#include "AddonModelLora.h"
#include "AddonGrammarEvaluationState.h"
#include "AddonContext.h"
static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
uint64_t totalSize = 0;
if (embd) {
totalSize += sizeof(float) * n_tokens_alloc * embd;
} else {
totalSize += sizeof(llama_token) * n_tokens_alloc;
}
totalSize += sizeof(llama_pos) * n_tokens_alloc;
totalSize += sizeof(int32_t) * n_tokens_alloc;
totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
totalSize += sizeof(int8_t) * n_tokens_alloc;
return totalSize;
}
class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
public:
AddonContext* ctx;
AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
: Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
ctx(ctx),
deferred(Napi::Promise::Deferred::New(env)) {
ctx->Ref();
}
~AddonContextDecodeBatchWorker() {
ctx->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
// Perform the evaluation using llama_decode.
int r = llama_decode(ctx->ctx, ctx->batch);
if (r != 0) {
if (r == 1) {
SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
} else {
SetError("Eval has failed");
}
return;
}
llama_synchronize(ctx->ctx);
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_decode\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonContextLoadContextWorker : public Napi::AsyncWorker {
public:
AddonContext* context;
AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
: Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
context(context),
deferred(Napi::Promise::Deferred::New(env)) {
context->Ref();
}
~AddonContextLoadContextWorker() {
context->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
context->ctx = llama_init_from_model(context->model->model, context->context_params);
context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_init_from_model\"");
}
}
void OnOK() {
if (context->contextLoaded) {
uint64_t contextMemorySize = llama_state_get_size(context->ctx);
adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
context->loadedContextMemorySize = contextMemorySize;
}
deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
public:
AddonContext* context;
AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
: Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
context(context),
deferred(Napi::Promise::Deferred::New(env)) {
context->Ref();
}
~AddonContextUnloadContextWorker() {
context->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
llama_free(context->ctx);
context->contextLoaded = false;
try {
if (context->has_batch) {
llama_batch_free(context->batch);
context->has_batch = false;
context->batch_n_tokens = 0;
}
context->dispose();
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_batch_free\"");
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_free\"");
}
}
void OnOK() {
adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
context->loadedContextMemorySize = 0;
adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
context->batchMemorySize = 0;
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
public:
AddonContext* ctx;
AddonSampler* sampler;
bool arrayResult = false;
bool returnProbabilities = false;
bool returnConfidence = false;
float tokenConfidence = -1;
bool has_probabilities = false;
size_t probabilities_size;
llama_token * probabilities_tokens;
float * probabilities_probs;
int32_t batchLogitIndex;
llama_token result;
bool no_output = false;
AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
: Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
ctx(ctx),
deferred(Napi::Promise::Deferred::New(info.Env())) {
ctx->Ref();
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
sampler = Napi::ObjectWrap<AddonSampler>::Unwrap(info[1].As<Napi::Object>());
arrayResult = info.Length() > 2 && info[2].IsBoolean();
returnProbabilities = arrayResult ? info[2].As<Napi::Boolean>().Value() : false;
returnConfidence = arrayResult && info.Length() > 3 && info[3].IsBoolean() ? info[3].As<Napi::Boolean>().Value() : false;
sampler->Ref();
}
~AddonContextSampleTokenWorker() {
ctx->Unref();
sampler->Unref();
if (has_probabilities) {
delete[] probabilities_tokens;
delete[] probabilities_probs;
}
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
SampleToken();
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"SampleToken\"");
}
}
void SampleToken() {
if (llama_get_logits(ctx->ctx) == nullptr) {
SetError("This model does not support token generation");
return;
}
sampler->rebuildChainIfNeeded();
const auto * logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
const int n_vocab = llama_vocab_n_tokens(ctx->model->vocab);
auto & candidates = sampler->tokenCandidates;
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
}
llama_token_data_array cur_p = {
/* .data = */ candidates.data(),
/* .size = */ candidates.size(),
/* .selected = */ -1,
/* .sorted = */ false,
};
llama_sampler_apply(sampler->chain, &cur_p);
if (!(cur_p.selected >= 0 && cur_p.selected < (int32_t)cur_p.size)) {
no_output = true;
return;
}
auto new_token_id = cur_p.data[cur_p.selected].id;
if (returnProbabilities || returnConfidence) {
if (!cur_p.sorted) {
std::sort(cur_p.data, cur_p.data + cur_p.size, [](const llama_token_data & a, const llama_token_data & b) {
return a.logit > b.logit;
});
cur_p.sorted = true;
for (size_t i = 0; i < cur_p.size; i++) {
if (cur_p.data[i].id == new_token_id) {
cur_p.selected = i;
break;
}
}
}
}
if (returnProbabilities) {
probabilities_size = cur_p.size;
probabilities_tokens = new llama_token[probabilities_size];
probabilities_probs = new float[probabilities_size];
float maxLogit = cur_p.size > 0 ? cur_p.data[0].logit : -INFINITY;
for (size_t i = 0; i < cur_p.size; i++) {
auto logit = cur_p.data[i].logit;
probabilities_tokens[i] = cur_p.data[i].id;
probabilities_probs[i] = logit;
if (logit > maxLogit) {
maxLogit = logit;
}
}
if (probabilities_size > 0 && maxLogit != -INFINITY) {
float sum = 0.0f;
for (size_t i = 0; i < probabilities_size; i++) {
float prob = expf(probabilities_probs[i] - maxLogit);
probabilities_probs[i] = prob;
sum += prob;
}
for (size_t i = 0; i < probabilities_size; i++) {
probabilities_probs[i] /= sum;
}
}
has_probabilities = true;
}
if (returnConfidence) {
if (has_probabilities && cur_p.selected < probabilities_size) {
tokenConfidence = probabilities_probs[cur_p.selected];
} else {
float maxLogit = cur_p.data[0].logit;
float sum = 0.0f;
for (size_t i = 0; i < cur_p.size; i++) {
auto logit = cur_p.data[i].logit;
if (logit > maxLogit) {
maxLogit = logit;
}
}
for (size_t i = 0; i < cur_p.size; i++) {
sum += expf(cur_p.data[i].logit - maxLogit);
}
tokenConfidence = expf(cur_p.data[cur_p.selected].logit - maxLogit) / sum;
}
}
try {
sampler->acceptToken(new_token_id);
result = new_token_id;
} catch (const std::exception& e) {
SetError(std::string("Failed to accept token in sampler: ") + e.what());
} catch(...) {
SetError("Unknown error when calling \"acceptToken\"");
}
}
void OnOK() {
Napi::Number resultToken;
if (no_output) {
resultToken = Napi::Number::New(Env(), -1);
} else {
resultToken = Napi::Number::New(Env(), static_cast<uint32_t>(result));
}
if (!arrayResult) {
deferred.Resolve(resultToken);
return;
}
Napi::Array resultArray = Napi::Array::New(Env(), 2);
resultArray.Set(Napi::Number::New(Env(), 0), resultToken);
if (has_probabilities) {
Napi::Array probabilities = Napi::Array::New(Env(), probabilities_size * 2);
for (size_t i = 0; i < probabilities_size; i++) {
probabilities.Set(i * 2, Napi::Number::New(Env(), probabilities_tokens[i]));
probabilities.Set(i * 2 + 1, Napi::Number::New(Env(), probabilities_probs[i]));
}
resultArray.Set(1, probabilities);
}
if (returnConfidence && tokenConfidence != -1) {
resultArray.Set(2, Napi::Number::New(Env(), tokenConfidence));
}
deferred.Resolve(resultArray);
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
model->Ref();
context_params = llama_context_default_params();
context_params.n_ctx = 4096;
context_params.n_threads = std::max(cpu_get_num_math(), 1);
context_params.n_threads_batch = context_params.n_threads;
context_params.no_perf = true;
context_params.swa_full = false;
if (info.Length() > 1 && info[1].IsObject()) {
Napi::Object options = info[1].As<Napi::Object>();
if (options.Has("contextSize")) {
context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
}
if (options.Has("batchSize")) {
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
}
if (options.Has("sequences")) {
context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
}
if (options.Has("embeddings")) {
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
}
if (options.Has("ranking") && options.Get("ranking").As<Napi::Boolean>().Value()) {
context_params.pooling_type = LLAMA_POOLING_TYPE_RANK;
}
if (options.Has("flashAttention")) {
bool flashAttention = options.Get("flashAttention").As<Napi::Boolean>().Value();
context_params.flash_attn_type = flashAttention ? LLAMA_FLASH_ATTN_TYPE_ENABLED : LLAMA_FLASH_ATTN_TYPE_DISABLED;
}
if (options.Has("threads")) {
const auto n_threads = options.Get("threads").As<Napi::Number>().Int32Value();
const auto resolved_n_threads = n_threads == 0 ? std::max((int32_t)std::thread::hardware_concurrency(), context_params.n_threads) : n_threads;
context_params.n_threads = resolved_n_threads;
context_params.n_threads_batch = resolved_n_threads;
}
if (options.Has("performanceTracking")) {
context_params.no_perf = !(options.Get("performanceTracking").As<Napi::Boolean>().Value());
}
if (options.Has("swaFullCache")) {
context_params.swa_full = options.Get("swaFullCache").As<Napi::Boolean>().Value();
}
}
}
AddonContext::~AddonContext() {
dispose();
}
void AddonContext::dispose() {
if (disposed) {
return;
}
disposed = true;
if (contextLoaded) {
contextLoaded = false;
llama_free(ctx);
adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
loadedContextMemorySize = 0;
}
model->Unref();
disposeBatch();
}
void AddonContext::disposeBatch() {
if (!has_batch) {
return;
}
llama_batch_free(batch);
has_batch = false;
batch_n_tokens = 0;
adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
batchMemorySize = 0;
}
Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
if (disposed) {
return info.Env().Undefined();
}
if (contextLoaded) {
contextLoaded = false;
AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
worker->Queue();
return worker->GetPromise();
} else {
dispose();
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
deferred.Resolve(info.Env().Undefined());
return deferred.Promise();
}
}
Napi::Value AddonContext::GetContextSize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
}
Napi::Value AddonContext::InitBatch(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
if (has_batch) {
llama_batch_free(batch);
}
int32_t n_tokens = info[0].As<Napi::Number>().Int32Value();
batch = llama_batch_init(n_tokens, 0, 1);
has_batch = true;
batch_n_tokens = n_tokens;
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_model_n_embd(model->model), context_params.n_batch);
if (newBatchMemorySize > batchMemorySize) {
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
batchMemorySize = newBatchMemorySize;
} else if (newBatchMemorySize < batchMemorySize) {
adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
batchMemorySize = newBatchMemorySize;
}
return info.Env().Undefined();
}
Napi::Value AddonContext::DisposeBatch(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
disposeBatch();
return info.Env().Undefined();
}
Napi::Value AddonContext::AddToBatch(const Napi::CallbackInfo& info) {
if (!has_batch) {
Napi::Error::New(info.Env(), "No batch is initialized").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
int32_t firstTokenContextIndex = info[1].As<Napi::Number>().Int32Value();
Napi::Uint32Array tokens = info[2].As<Napi::Uint32Array>();
Napi::Uint32Array tokenLogitIndexes = info[3].As<Napi::Uint32Array>();
auto tokensLength = tokens.ElementLength();
auto tokenLogitIndexesLength = tokenLogitIndexes.ElementLength();
GGML_ASSERT(batch.n_tokens + tokensLength <= batch_n_tokens);
Napi::Uint32Array resLogitIndexes = Napi::Uint32Array::New(info.Env(), tokenLogitIndexesLength);
for (size_t i = 0, l = 0; i < tokensLength; i++) {
if (l < tokenLogitIndexesLength && l < tokenLogitIndexesLength && tokenLogitIndexes[l] == i) {
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, true);
resLogitIndexes[l] = batch.n_tokens - 1;
l++;
} else {
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
}
}
return resLogitIndexes;
}
Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
bool result = llama_memory_seq_rm(llama_get_memory(ctx), sequenceId, -1, -1);
if (!result) {
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return info.Env().Undefined();
}
Napi::Value AddonContext::RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
bool result = llama_memory_seq_rm(llama_get_memory(ctx), sequenceId, startPos, endPos);
return Napi::Boolean::New(info.Env(), result);
}
Napi::Value AddonContext::ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
llama_memory_seq_add(llama_get_memory(ctx), sequenceId, startPos, endPos, shiftDelta);
return info.Env().Undefined();
}
Napi::Value AddonContext::GetSequenceKvCacheMinPosition(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
const auto minPosition = llama_memory_seq_pos_min(llama_get_memory(ctx), sequenceId);
return Napi::Number::New(info.Env(), minPosition);
}
Napi::Value AddonContext::GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
const auto maxPosition = llama_memory_seq_pos_max(llama_get_memory(ctx), sequenceId);
return Napi::Number::New(info.Env(), maxPosition);
}
Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
int32_t maxVectorSize = (info.Length() > 1 && info[1].IsNumber()) ? info[1].As<Napi::Number>().Int32Value() : 0;
if (inputTokensLength <= 0) {
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
const int n_embd = llama_model_n_embd(model->model);
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
if (embeddings == NULL) {
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
}
if (embeddings == NULL) {
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
return info.Env().Undefined();
}
size_t resultSize = maxVectorSize == 0 ? n_embd : std::min(n_embd, maxVectorSize);
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), resultSize);
for (size_t i = 0; i < resultSize; i++) {
result[i] = embeddings[i];
}
return result;
}
Napi::Value AddonContext::GetStateSize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
}
Napi::Value AddonContext::GetThreads(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_n_threads(ctx));
}
Napi::Value AddonContext::SetThreads(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
const auto threads = info[0].As<Napi::Number>().Int32Value();
const auto resolvedThreads = threads == 0
? std::max((int32_t)std::thread::hardware_concurrency(), std::max(cpu_get_num_math(), 1))
: threads;
if (llama_n_threads(ctx) != resolvedThreads) {
llama_set_n_threads(ctx, resolvedThreads, resolvedThreads);
}
return info.Env().Undefined();
}
class AddonContextSaveSequenceStateToFileWorker : public Napi::AsyncWorker {
public:
AddonContext* context;
std::string filepath;
llama_seq_id sequenceId;
std::vector<llama_token> tokens;
size_t savedFileSize = 0;
AddonContextSaveSequenceStateToFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
: Napi::AsyncWorker(info.Env(), "AddonContextSaveSequenceStateToFileWorker"),
context(context),
deferred(Napi::Promise::Deferred::New(info.Env())) {
context->Ref();
filepath = info[0].As<Napi::String>().Utf8Value();
sequenceId = info[1].As<Napi::Number>().Int32Value();
Napi::Uint32Array inputTokens = info[2].As<Napi::Uint32Array>();
tokens.resize(inputTokens.ElementLength());
for (size_t i = 0; i < tokens.size(); i++) {
tokens[i] = inputTokens[i];
}
}
~AddonContextSaveSequenceStateToFileWorker() {
context->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
savedFileSize = llama_state_seq_save_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size());
if (savedFileSize == 0) {
SetError("Failed to save state to file");
return;
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_state_seq_save_file\"");
}
}
void OnOK() {
deferred.Resolve(Napi::Number::New(Env(), savedFileSize));
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
Napi::Value AddonContext::SaveSequenceStateToFile(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
AddonContextSaveSequenceStateToFileWorker* worker = new AddonContextSaveSequenceStateToFileWorker(info, this);
worker->Queue();
return worker->GetPromise();
}
class AddonContextLoadSequenceStateFromFileWorker : public Napi::AsyncWorker {
public:
AddonContext* context;
std::string filepath;
llama_seq_id sequenceId;
size_t maxContextSize;
std::vector<llama_token> tokens;
AddonContextLoadSequenceStateFromFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
: Napi::AsyncWorker(info.Env(), "AddonContextLoadSequenceStateFromFileWorker"),
context(context),
deferred(Napi::Promise::Deferred::New(info.Env())) {
context->Ref();
filepath = info[0].As<Napi::String>().Utf8Value();
sequenceId = info[1].As<Napi::Number>().Int32Value();
maxContextSize = info[2].As<Napi::Number>().Uint32Value();
tokens.resize(maxContextSize);
}
~AddonContextLoadSequenceStateFromFileWorker() {
context->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
size_t tokenCount = 0;
const size_t fileSize = llama_state_seq_load_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size(), &tokenCount);
if (fileSize == 0) {
SetError("Failed to load state from file. Current context sequence size may be smaller that the state of the file");
return;
}
tokens.resize(tokenCount);
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_state_seq_load_file\"");
}
}
void OnOK() {
size_t tokenCount = tokens.size();
Napi::Uint32Array result = Napi::Uint32Array::New(Env(), tokenCount);
for (size_t i = 0; i < tokenCount; i++) {
result[i] = tokens[i];
}
deferred.Resolve(result);
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
Napi::Value AddonContext::LoadSequenceStateFromFile(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
AddonContextLoadSequenceStateFromFileWorker* worker = new AddonContextLoadSequenceStateFromFileWorker(info, this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
llama_perf_context_print(ctx);
llama_perf_context_reset(ctx);
return info.Env().Undefined();
}
Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info) {
constexpr auto vocabSizeMaxDifference = 128; // SPEC_VOCAB_MAX_SIZE_DIFFERENCE
constexpr auto vocabCheckStartTokenId = 5; // SPEC_VOCAB_CHECK_START_TOKEN_ID
const AddonContext * draftContext = Napi::ObjectWrap<AddonContext>::Unwrap(info[0].As<Napi::Object>());
const auto currentCtx = ctx;
const auto draftCtx = draftContext->ctx;
const auto currentModel = model->model;
const auto draftModel = draftContext->model->model;
const auto currentVocab = model->vocab;
const auto draftVocab = draftContext->model->vocab;
if (llama_vocab_type(currentVocab) != llama_vocab_type(draftVocab)) {
Napi::Error::New(info.Env(), "Speculative draft model vocabulary type must match the target model vocabulary type").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
if (llama_vocab_get_add_bos(currentVocab) != llama_vocab_get_add_bos(draftVocab) ||
llama_vocab_get_add_eos(currentVocab) != llama_vocab_get_add_eos(draftVocab) ||
llama_vocab_bos(currentVocab) != llama_vocab_bos(draftVocab) ||
llama_vocab_eos(currentVocab) != llama_vocab_eos(draftVocab)
) {
Napi::Error::New(info.Env(), "Speculative draft model special tokens must match the target model special tokens").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
const int currentModelVocabSize = llama_vocab_n_tokens(currentVocab);
const int draftModelVocabSize = llama_vocab_n_tokens(draftVocab);
const int vocabDiff = std::abs(currentModelVocabSize - draftModelVocabSize);
if (vocabDiff > vocabSizeMaxDifference) {
Napi::Error::New(
info.Env(),
std::string("Speculative draft model vocabulary must closely match the target model vocabulary size (vocabulary size difference: ") +
std::to_string(vocabDiff) + std::string(", max allowed: ") + std::to_string(vocabSizeMaxDifference) + std::string(")")
).ThrowAsJavaScriptException();
return info.Env().Undefined();
}
const int minVocabSize = std::min(currentModelVocabSize, draftModelVocabSize);
for (int i = vocabCheckStartTokenId; i < minVocabSize; ++i) {
const char * currentTokenText = llama_vocab_get_text(currentVocab, i);
const char * draftTokenText = llama_vocab_get_text(draftVocab, i);
if (std::strcmp(currentTokenText, draftTokenText) != 0) {
Napi::Error::New(
info.Env(),
std::string("Speculative draft model vocabulary must match the target model vocabulary, but token ") +
std::to_string(i) + std::string(" content differs. Target: \"") + std::string(currentTokenText) +
std::string("\", Draft: \"") + std::string(draftTokenText) + std::string("")
).ThrowAsJavaScriptException();
return info.Env().Undefined();
}
}
return info.Env().Undefined();
}
Napi::Value AddonContext::SetLora(const Napi::CallbackInfo& info) {
AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
float scale = info[1].As<Napi::Number>().FloatValue();
llama_set_adapter_lora(ctx, lora->lora_adapter, scale);
return info.Env().Undefined();
}
void AddonContext::init(Napi::Object exports) {
exports.Set(
"AddonContext",
DefineClass(
exports.Env(),
"AddonContext",
{
InstanceMethod("init", &AddonContext::Init),
InstanceMethod("getContextSize", &AddonContext::GetContextSize),
InstanceMethod("initBatch", &AddonContext::InitBatch),
InstanceMethod("addToBatch", &AddonContext::AddToBatch),
InstanceMethod("disposeSequence", &AddonContext::DisposeSequence),
InstanceMethod("removeTokenCellsFromSequence", &AddonContext::RemoveTokenCellsFromSequence),
InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
InstanceMethod("getSequenceKvCacheMinPosition", &AddonContext::GetSequenceKvCacheMinPosition),
InstanceMethod("getSequenceKvCacheMaxPosition", &AddonContext::GetSequenceKvCacheMaxPosition),
InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
InstanceMethod("sampleToken", &AddonContext::SampleToken),
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
InstanceMethod("getStateSize", &AddonContext::GetStateSize),
InstanceMethod("getThreads", &AddonContext::GetThreads),
InstanceMethod("setThreads", &AddonContext::SetThreads),
InstanceMethod("printTimings", &AddonContext::PrintTimings),
InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
InstanceMethod("saveSequenceStateToFile", &AddonContext::SaveSequenceStateToFile),
InstanceMethod("loadSequenceStateFromFile", &AddonContext::LoadSequenceStateFromFile),
InstanceMethod("setLora", &AddonContext::SetLora),
InstanceMethod("dispose", &AddonContext::Dispose),
}
)
);
}

58
node_modules/node-llama-cpp/llama/addon/AddonContext.h generated vendored Normal file
View File

@@ -0,0 +1,58 @@
#pragma once
#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
#include "AddonSampler.h"
class AddonContext : public Napi::ObjectWrap<AddonContext> {
public:
AddonModel* model;
llama_context_params context_params;
llama_context* ctx;
llama_batch batch;
uint64_t batchMemorySize = 0;
bool has_batch = false;
int32_t batch_n_tokens = 0;
int n_cur = 0;
uint64_t loadedContextMemorySize = 0;
bool contextLoaded = false;
bool disposed = false;
AddonContext(const Napi::CallbackInfo& info);
~AddonContext();
void dispose();
void disposeBatch();
Napi::Value Init(const Napi::CallbackInfo& info);
Napi::Value Dispose(const Napi::CallbackInfo& info);
Napi::Value GetContextSize(const Napi::CallbackInfo& info);
Napi::Value InitBatch(const Napi::CallbackInfo& info);
Napi::Value DisposeBatch(const Napi::CallbackInfo& info);
Napi::Value AddToBatch(const Napi::CallbackInfo& info);
Napi::Value DisposeSequence(const Napi::CallbackInfo& info);
Napi::Value RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info);
Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info);
Napi::Value GetSequenceKvCacheMinPosition(const Napi::CallbackInfo& info);
Napi::Value GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo& info);
Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
Napi::Value SampleToken(const Napi::CallbackInfo& info);
Napi::Value GetEmbedding(const Napi::CallbackInfo& info);
Napi::Value GetStateSize(const Napi::CallbackInfo& info);
Napi::Value GetThreads(const Napi::CallbackInfo& info);
Napi::Value SetThreads(const Napi::CallbackInfo& info);
Napi::Value SaveSequenceStateToFile(const Napi::CallbackInfo& info);
Napi::Value LoadSequenceStateFromFile(const Napi::CallbackInfo& info);
Napi::Value PrintTimings(const Napi::CallbackInfo& info);
Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);
Napi::Value SetLora(const Napi::CallbackInfo& info);
static void init(Napi::Object exports);
};

View File

@@ -0,0 +1,92 @@
#include "addonGlobals.h"
#include "AddonGrammar.h"
AddonGrammar::AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
grammarCode = info[0].As<Napi::String>().Utf8Value();
if (info.Length() > 1 && info[1].IsObject()) {
Napi::Object options = info[1].As<Napi::Object>();
if (options.Has("addonExports")) {
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
hasAddonExportsRef = true;
}
if (options.Has("rootRuleName")) {
rootRuleName = options.Get("rootRuleName").As<Napi::String>().Utf8Value();
}
}
auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str(), false, nullptr, 0, nullptr, 0);
// will be empty if there are parse errors
if (parsed_grammar == nullptr) {
Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
return;
}
llama_grammar_free_impl(parsed_grammar);
}
AddonGrammar::~AddonGrammar() {
if (hasAddonExportsRef) {
addonExportsRef.Unref();
hasAddonExportsRef = false;
}
}
Napi::Value AddonGrammar::isTextCompatible(const Napi::CallbackInfo& info) {
const std::string testText = info[0].As<Napi::String>().Utf8Value();
auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str(), false, nullptr, 0, nullptr, 0);
// will be empty if there are parse errors
if (parsed_grammar == nullptr) {
Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
return Napi::Boolean::New(info.Env(), false);
}
const auto cpts = unicode_cpts_from_utf8(testText);
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(parsed_grammar);
for (const auto & cpt : cpts) {
try {
llama_grammar_accept(parsed_grammar, cpt);
} catch (const std::exception & e) {
llama_grammar_free_impl(parsed_grammar);
return Napi::Boolean::New(info.Env(), false);
} catch (...) {
llama_grammar_free_impl(parsed_grammar);
return Napi::Boolean::New(info.Env(), false);
}
if (stacks_cur.empty()) {
// no stacks means that the grammar failed to match at this point
llama_grammar_free_impl(parsed_grammar);
return Napi::Boolean::New(info.Env(), false);
}
}
for (const auto & stack : stacks_cur) {
if (stack.empty()) {
// an empty stack means that the grammar has been completed
llama_grammar_free_impl(parsed_grammar);
return Napi::Boolean::New(info.Env(), true);
}
}
llama_grammar_free_impl(parsed_grammar);
return Napi::Boolean::New(info.Env(), false);
}
void AddonGrammar::init(Napi::Object exports) {
exports.Set(
"AddonGrammar",
DefineClass(
exports.Env(),
"AddonGrammar",
{
InstanceMethod("isTextCompatible", &AddonGrammar::isTextCompatible),
}
)
);
}

22
node_modules/node-llama-cpp/llama/addon/AddonGrammar.h generated vendored Normal file
View File

@@ -0,0 +1,22 @@
#pragma once
#include "llama.h"
#include "common/common.h"
#include "llama-grammar.h"
#include "src/unicode.h"
#include "napi.h"
#include "addonGlobals.h"
class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
public:
std::string grammarCode = "";
std::string rootRuleName = "root";
Napi::Reference<Napi::Object> addonExportsRef;
bool hasAddonExportsRef = false;
AddonGrammar(const Napi::CallbackInfo& info);
~AddonGrammar();
Napi::Value isTextCompatible(const Napi::CallbackInfo& info);
static void init(Napi::Object exports);
};

View File

@@ -0,0 +1,36 @@
#include <sstream>
#include "addonGlobals.h"
#include "common/common.h"
#include "llama.h"
#include "AddonGrammarEvaluationState.h"
#include "AddonGrammar.h"
AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
if (info.Length() == 1) {
AddonGrammarEvaluationState* existingState = Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
model = existingState->model;
model->Ref();
grammarDef = existingState->grammarDef;
grammarDef->Ref();
sampler = llama_sampler_clone(existingState->sampler);
} else {
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
model->Ref();
grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
grammarDef->Ref();
sampler = llama_sampler_init_grammar(model->vocab, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
}
}
AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {
llama_sampler_free(sampler);
grammarDef->Unref();
model->Unref();
}
void AddonGrammarEvaluationState::init(Napi::Object exports) {
exports.Set("AddonGrammarEvaluationState", DefineClass(exports.Env(), "AddonGrammarEvaluationState", {}));
}

View File

@@ -0,0 +1,17 @@
#pragma once
#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
#include "AddonModel.h"
class AddonGrammarEvaluationState : public Napi::ObjectWrap<AddonGrammarEvaluationState> {
public:
AddonModel* model;
AddonGrammar* grammarDef;
llama_sampler * sampler = nullptr;
AddonGrammarEvaluationState(const Napi::CallbackInfo& info);
~AddonGrammarEvaluationState();
static void init(Napi::Object exports);
};

691
node_modules/node-llama-cpp/llama/addon/AddonModel.cpp generated vendored Normal file
View File

@@ -0,0 +1,691 @@
#include <sstream>
#include "addonGlobals.h"
#include "globals/addonLog.h"
#include "globals/addonProgress.h"
#include "common/common.h"
#include "llama.h"
#include "AddonModel.h"
#include "AddonModelData.h"
#include "AddonModelLora.h"
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
if (token < 0 || token == LLAMA_TOKEN_NULL) {
return Napi::Number::From(info.Env(), -1);
}
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
return Napi::Number::From(info.Env(), -1);
}
return Napi::Number::From(info.Env(), token);
}
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
if (token < 0) {
return Napi::Number::From(info.Env(), -1);
}
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
return Napi::Number::From(info.Env(), -1);
}
return Napi::Number::From(info.Env(), token);
}
static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
AddonModel* addonModel = (AddonModel *) user_data;
unsigned percentage = (unsigned) (100 * progress);
if (percentage > addonModel->modelLoadPercentage) {
addonModel->modelLoadPercentage = percentage;
// original llama.cpp logs
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
if (percentage >= 100) {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
}
}
if (progress > addonModel->rawModelLoadPercentage) {
addonModel->rawModelLoadPercentage = progress;
if (addonModel->onLoadProgressEventCallbackSet) {
addon_progress_event* data = new addon_progress_event {
progress
};
auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
if (status != napi_ok) {
delete data;
}
}
}
return !(addonModel->abortModelLoad);
}
class AddonModelLoadModelWorker : public Napi::AsyncWorker {
public:
AddonModel* model;
AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
: Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
model(model),
deferred(Napi::Promise::Deferred::New(env)) {
model->Ref();
}
~AddonModelLoadModelWorker() {
model->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
model->model = llama_model_load_from_file(model->modelPath.c_str(), model->model_params);
model->vocab = llama_model_get_vocab(model->model);
model->modelLoaded = model->model != nullptr && model->model != NULL;
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_model_load_from_file\"");
}
}
void OnOK() {
if (model->modelLoaded) {
uint64_t modelSize = llama_model_size(model->model);
adjustNapiExternalMemoryAdd(Env(), modelSize);
model->loadedModelSize = modelSize;
}
deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
if (model->onLoadProgressEventCallbackSet) {
model->addonThreadSafeOnLoadProgressEventCallback.Release();
}
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
public:
AddonModel* model;
AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
: Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
model(model),
deferred(Napi::Promise::Deferred::New(env)) {
model->Ref();
}
~AddonModelUnloadModelWorker() {
model->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
llama_model_free(model->model);
model->modelLoaded = false;
model->dispose();
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_model_free\"");
}
}
void OnOK() {
adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
model->loadedModelSize = 0;
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
public:
AddonModelLora* modelLora;
AddonModelLoadLoraWorker(
const Napi::Env& env,
AddonModelLora* modelLora
)
: Napi::AsyncWorker(env, "AddonModelLoadLoraWorker"),
modelLora(modelLora),
deferred(Napi::Promise::Deferred::New(env)) {
modelLora->model->Ref();
modelLora->Ref();
}
~AddonModelLoadLoraWorker() {
modelLora->model->Unref();
modelLora->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
const auto loraAdapter = llama_adapter_lora_init(modelLora->model->model, modelLora->loraFilePath.c_str());
if (loraAdapter == nullptr) {
SetError(
std::string(
std::string("Failed to initialize LoRA adapter \"" + modelLora->loraFilePath + "\"")
)
);
return;
}
modelLora->lora_adapter = loraAdapter;
modelLora->model->Ref();
if (modelLora->model->data != nullptr) {
modelLora->model->data->loraAdapters.insert(modelLora);
} else {
modelLora->dispose(true);
SetError("Model data is not initialized");
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_adapter_lora_init\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
AddonModel::AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
data = new AddonModelData();
model_params = llama_model_default_params();
// Get the model path
modelPath = info[0].As<Napi::String>().Utf8Value();
if (info.Length() > 1 && info[1].IsObject()) {
Napi::Object options = info[1].As<Napi::Object>();
if (options.Has("addonExports")) {
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
hasAddonExportsRef = true;
}
if (options.Has("gpuLayers")) {
model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
}
if (options.Has("vocabOnly")) {
model_params.vocab_only = options.Get("vocabOnly").As<Napi::Boolean>().Value();
}
if (options.Has("useMmap")) {
model_params.use_mmap = options.Get("useMmap").As<Napi::Boolean>().Value();
}
if (options.Has("useDirectIo")) {
model_params.use_direct_io = options.Get("useDirectIo").As<Napi::Boolean>().Value();
}
if (options.Has("useMlock")) {
model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
}
if (options.Has("checkTensors")) {
model_params.check_tensors = options.Get("checkTensors").As<Napi::Boolean>().Value();
}
if (options.Has("onLoadProgress")) {
auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
if (onLoadProgressJSCallback.IsFunction()) {
AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
info.Env(),
onLoadProgressJSCallback,
"onLoadProgressCallback",
0,
1,
context,
[](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
addonModel->onLoadProgressEventCallbackSet = false;
delete ctx;
},
this
);
onLoadProgressEventCallbackSet = true;
}
}
if (options.Has("hasLoadAbortSignal")) {
hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
}
if (options.Has("overridesList")) {
Napi::Array overridesList = options.Get("overridesList").As<Napi::Array>();
kv_overrides.reserve(overridesList.Length());
for (uint32_t i = 0; i < overridesList.Length(); i++) {
Napi::Array overrideItem = overridesList.Get(i).As<Napi::Array>();
auto key = overrideItem.Get((uint32_t)0).As<Napi::String>().Utf8Value();
auto value = overrideItem.Get((uint32_t)1);
if (key.length() > 127) {
continue;
}
llama_model_kv_override kvo;
std::strncpy(kvo.key, key.c_str(), key.length());
kvo.key[key.length()] = 0;
if (value.IsString()) {
auto valueString = value.As<Napi::String>().Utf8Value();
if (valueString.length() > 127) {
continue;
}
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
std::strncpy(kvo.val_str, valueString.c_str(), valueString.length());
kvo.val_str[valueString.length()] = 0;
fputs(std::string("Override: " + key + " = " + valueString + "\n").c_str(), stdout);
fflush(stdout);
} else if (value.IsNumber() || value.IsBigInt()) {
auto numberType = overrideItem.Get((uint32_t)2).As<Napi::Number>().Int32Value();
if (numberType == 0) {
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
kvo.val_i64 = value.As<Napi::Number>().Int64Value();
} else {
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
kvo.val_f64 = value.As<Napi::Number>().DoubleValue();
}
continue;
} else if (value.IsBoolean()) {
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
kvo.val_bool = value.As<Napi::Boolean>().Value();
}
kv_overrides.emplace_back(std::move(kvo));
}
if (!kv_overrides.empty()) {
kv_overrides.emplace_back();
kv_overrides.back().key[0] = 0;
}
model_params.kv_overrides = kv_overrides.data();
}
if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
model_params.progress_callback_user_data = &(*this);
model_params.progress_callback = llamaModelParamsProgressCallback;
}
}
}
AddonModel::~AddonModel() {
dispose();
}
void AddonModel::dispose() {
if (disposed) {
return;
}
disposed = true;
if (data != nullptr) {
auto currentData = data;
data = nullptr;
delete currentData;
}
if (modelLoaded) {
modelLoaded = false;
llama_model_free(model);
adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
loadedModelSize = 0;
}
if (hasAddonExportsRef) {
addonExportsRef.Unref();
hasAddonExportsRef = false;
}
}
Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonModel::LoadLora(const Napi::CallbackInfo& info) {
AddonModelLora* modelLora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
AddonModelLoadLoraWorker* worker = new AddonModelLoadLoraWorker(this->Env(), modelLora);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonModel::AbortActiveModelLoad(const Napi::CallbackInfo& info) {
abortModelLoad = true;
return info.Env().Undefined();
}
Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
if (disposed) {
return info.Env().Undefined();
}
if (modelLoaded) {
modelLoaded = false;
AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
worker->Queue();
return worker->GetPromise();
} else {
dispose();
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
deferred.Resolve(info.Env().Undefined());
return deferred.Promise();
}
}
Napi::Value AddonModel::Tokenize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
std::string text = info[0].As<Napi::String>().Utf8Value();
bool specialTokens = info[1].As<Napi::Boolean>().Value();
std::vector<llama_token> tokens = common_tokenize(vocab, text, false, specialTokens);
Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
for (size_t i = 0; i < tokens.size(); ++i) {
result[i] = static_cast<uint32_t>(tokens[i]);
}
return result;
}
Napi::Value AddonModel::Detokenize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
bool decodeSpecialTokens = info.Length() > 0
? info[1].As<Napi::Boolean>().Value()
: false;
std::string result;
result.resize(std::max(result.capacity(), tokens.ElementLength()));
int n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
if (n_chars < 0) {
result.resize(-n_chars);
n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
GGML_ASSERT(n_chars <= result.size()); // whitespace trimming is performed after per-token detokenization
}
result.resize(n_chars);
return Napi::String::New(info.Env(), result);
}
Napi::Value AddonModel::GetTrainContextSize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_model_n_ctx_train(model));
}
Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_model_n_embd(model));
}
Napi::Value AddonModel::GetTotalSize(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_model_size(model));
}
Napi::Value AddonModel::GetTotalParameters(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return Napi::Number::From(info.Env(), llama_model_n_params(model));
}
Napi::Value AddonModel::GetModelDescription(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
char model_desc[128];
int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
return Napi::String::New(info.Env(), model_desc, actual_length);
}
Napi::Value AddonModel::TokenBos(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiControlToken(info, vocab, llama_vocab_bos(vocab));
}
Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiControlToken(info, vocab, llama_vocab_eos(vocab));
}
Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_nl(vocab));
}
Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_fim_pre(vocab));
}
Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_fim_mid(vocab));
}
Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_fim_suf(vocab));
}
Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_eot(vocab));
}
Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return getNapiToken(info, vocab, llama_vocab_sep(vocab));
}
Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
int token = info[0].As<Napi::Number>().Int32Value();
std::stringstream ss;
const char* str = llama_vocab_get_text(vocab, token);
if (str == nullptr) {
return info.Env().Undefined();
}
ss << str;
return Napi::String::New(info.Env(), ss.str());
}
Napi::Value AddonModel::GetTokenAttributes(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
if (info[0].IsNumber() == false) {
return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_ATTR_UNDEFINED));
}
int token = info[0].As<Napi::Number>().Int32Value();
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
}
Napi::Value AddonModel::IsEogToken(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
if (info[0].IsNumber() == false) {
return Napi::Boolean::New(info.Env(), false);
}
int token = info[0].As<Napi::Number>().Int32Value();
return Napi::Boolean::New(info.Env(), llama_vocab_is_eog(vocab, token));
}
Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
auto vocabularyType = llama_vocab_type(vocab);
return Napi::Number::From(info.Env(), int32_t(vocabularyType));
}
Napi::Value AddonModel::ShouldPrependBosToken(const Napi::CallbackInfo& info) {
const bool addBos = llama_vocab_get_add_bos(vocab);
return Napi::Boolean::New(info.Env(), addBos);
}
Napi::Value AddonModel::ShouldAppendEosToken(const Napi::CallbackInfo& info) {
const bool addEos = llama_vocab_get_add_eos(vocab);
return Napi::Boolean::New(info.Env(), addEos);
}
Napi::Value AddonModel::GetModelSize(const Napi::CallbackInfo& info) {
return Napi::Number::From(info.Env(), llama_model_size(model));
}
void AddonModel::init(Napi::Object exports) {
exports.Set(
"AddonModel",
DefineClass(
exports.Env(),
"AddonModel",
{
InstanceMethod("init", &AddonModel::Init),
InstanceMethod("loadLora", &AddonModel::LoadLora),
InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
InstanceMethod("tokenize", &AddonModel::Tokenize),
InstanceMethod("detokenize", &AddonModel::Detokenize),
InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
InstanceMethod("tokenBos", &AddonModel::TokenBos),
InstanceMethod("tokenEos", &AddonModel::TokenEos),
InstanceMethod("tokenNl", &AddonModel::TokenNl),
InstanceMethod("prefixToken", &AddonModel::PrefixToken),
InstanceMethod("middleToken", &AddonModel::MiddleToken),
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
InstanceMethod("eotToken", &AddonModel::EotToken),
InstanceMethod("sepToken", &AddonModel::SepToken),
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
InstanceMethod("isEogToken", &AddonModel::IsEogToken),
InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
InstanceMethod("shouldAppendEosToken", &AddonModel::ShouldAppendEosToken),
InstanceMethod("getModelSize", &AddonModel::GetModelSize),
InstanceMethod("dispose", &AddonModel::Dispose),
}
)
);
}

64
node_modules/node-llama-cpp/llama/addon/AddonModel.h generated vendored Normal file
View File

@@ -0,0 +1,64 @@
#pragma once
#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
#include "globals/addonProgress.h"
class AddonModel : public Napi::ObjectWrap<AddonModel> {
public:
llama_model_params model_params;
std::vector<llama_model_kv_override> kv_overrides;
llama_model* model;
const llama_vocab* vocab;
uint64_t loadedModelSize = 0;
Napi::Reference<Napi::Object> addonExportsRef;
bool hasAddonExportsRef = false;
AddonModelData* data;
std::string modelPath;
bool modelLoaded = false;
bool abortModelLoad = false;
bool model_load_stopped = false;
float rawModelLoadPercentage = 0;
unsigned modelLoadPercentage = 0;
AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
bool onLoadProgressEventCallbackSet = false;
bool hasLoadAbortSignal = false;
bool disposed = false;
AddonModel(const Napi::CallbackInfo& info);
~AddonModel();
void dispose();
Napi::Value Init(const Napi::CallbackInfo& info);
Napi::Value LoadLora(const Napi::CallbackInfo& info);
Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info);
Napi::Value Dispose(const Napi::CallbackInfo& info);
Napi::Value Tokenize(const Napi::CallbackInfo& info);
Napi::Value Detokenize(const Napi::CallbackInfo& info);
Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info);
Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info);
Napi::Value GetTotalSize(const Napi::CallbackInfo& info);
Napi::Value GetTotalParameters(const Napi::CallbackInfo& info);
Napi::Value GetModelDescription(const Napi::CallbackInfo& info);
Napi::Value TokenBos(const Napi::CallbackInfo& info);
Napi::Value TokenEos(const Napi::CallbackInfo& info);
Napi::Value TokenNl(const Napi::CallbackInfo& info);
Napi::Value PrefixToken(const Napi::CallbackInfo& info);
Napi::Value MiddleToken(const Napi::CallbackInfo& info);
Napi::Value SuffixToken(const Napi::CallbackInfo& info);
Napi::Value EotToken(const Napi::CallbackInfo& info);
Napi::Value SepToken(const Napi::CallbackInfo& info);
Napi::Value GetTokenString(const Napi::CallbackInfo& info);
Napi::Value GetTokenAttributes(const Napi::CallbackInfo& info);
Napi::Value IsEogToken(const Napi::CallbackInfo& info);
Napi::Value GetVocabularyType(const Napi::CallbackInfo& info);
Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info);
Napi::Value ShouldAppendEosToken(const Napi::CallbackInfo& info);
Napi::Value GetModelSize(const Napi::CallbackInfo& info);
static void init(Napi::Object exports);
};

View File

@@ -0,0 +1,25 @@
#include <iostream>
#include "addonGlobals.h"
#include "AddonModelData.h"
#include "AddonModelLora.h"
AddonModelData::AddonModelData() {
}
AddonModelData::~AddonModelData() {
std::set<AddonModelLora *> currentLoraAdapters;
currentLoraAdapters.swap(loraAdapters);
for (auto lora : currentLoraAdapters) {
lora->dispose(true);
}
currentLoraAdapters.clear();
}
void AddonModelData::removeLora(AddonModelLora* lora) {
auto pos = loraAdapters.find(lora);
if (pos != loraAdapters.end()) {
loraAdapters.erase(pos);
}
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include <set>
#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
class AddonModelData {
public:
std::set<AddonModelLora *> loraAdapters;
AddonModelData();
~AddonModelData();
void removeLora(AddonModelLora* lora);
};

View File

@@ -0,0 +1,103 @@
#include "addonGlobals.h"
#include "AddonModel.h"
#include "AddonModelData.h"
#include "AddonModelLora.h"
class AddonModelLoraUnloadLoraWorker : public Napi::AsyncWorker {
public:
AddonModelLora* addonLora;
AddonModelLoraUnloadLoraWorker(const Napi::Env& env, AddonModelLora* addonLora)
: Napi::AsyncWorker(env, "AddonModelLoraUnloadLoraWorker"),
addonLora(addonLora),
deferred(Napi::Promise::Deferred::New(env)) {
addonLora->Ref();
}
~AddonModelLoraUnloadLoraWorker() {
addonLora->Unref();
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
addonLora->dispose();
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_adapter_lora_free\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
AddonModelLora::AddonModelLora(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModelLora>(info) {
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
loraFilePath = info[1].As<Napi::String>().Utf8Value();
lora_adapter = nullptr;
}
AddonModelLora::~AddonModelLora() {
dispose();
}
void AddonModelLora::dispose(bool skipErase) {
if (lora_adapter != nullptr) {
lora_adapter = nullptr;
if (!skipErase && model->data != nullptr) {
model->data->removeLora(this);
}
model->Unref();
}
}
Napi::Value AddonModelLora::GetFilePath(const Napi::CallbackInfo& info) {
return Napi::String::New(info.Env(), loraFilePath);
}
Napi::Value AddonModelLora::GetUsages(const Napi::CallbackInfo& info) {
return Napi::Number::From(info.Env(), usages);
}
void AddonModelLora::SetUsages(const Napi::CallbackInfo& info, const Napi::Value &value) {
usages = value.As<Napi::Number>().Uint32Value();
}
Napi::Value AddonModelLora::Dispose(const Napi::CallbackInfo& info) {
AddonModelLoraUnloadLoraWorker* worker = new AddonModelLoraUnloadLoraWorker(this->Env(), this);
worker->Queue();
return worker->GetPromise();
}
Napi::Value AddonModelLora::GetDisposed(const Napi::CallbackInfo& info) {
return Napi::Boolean::New(info.Env(), lora_adapter == nullptr);
}
void AddonModelLora::init(Napi::Object exports) {
exports.Set(
"AddonModelLora",
DefineClass(
exports.Env(),
"AddonModelLora",
{
InstanceAccessor("usages", &AddonModelLora::GetUsages, &AddonModelLora::SetUsages),
InstanceAccessor("filePath", &AddonModelLora::GetFilePath, nullptr),
InstanceAccessor("disposed", &AddonModelLora::GetDisposed, nullptr),
InstanceMethod("dispose", &AddonModelLora::Dispose),
}
)
);
}

View File

@@ -0,0 +1,28 @@
#pragma once
#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
class AddonModelLora : public Napi::ObjectWrap<AddonModelLora> {
public:
AddonModel* model;
llama_adapter_lora * lora_adapter;
std::string loraFilePath;
uint32_t usages = 0;
AddonModelLora(const Napi::CallbackInfo& info);
~AddonModelLora();
void dispose(bool skipErase = false);
Napi::Value GetFilePath(const Napi::CallbackInfo& info);
Napi::Value GetUsages(const Napi::CallbackInfo& info);
void SetUsages(const Napi::CallbackInfo& info, const Napi::Value &value);
Napi::Value GetDisposed(const Napi::CallbackInfo& info);
Napi::Value Dispose(const Napi::CallbackInfo& info);
static void init(Napi::Object exports);
};

View File

@@ -0,0 +1,511 @@
#include <cmath>
#include "common/common.h"
#include "globals/addonLog.h"
#include "ggml.h"
#include "llama.h"
#include "AddonGrammarEvaluationState.h"
#include "AddonSampler.h"
AddonSampler::AddonSampler(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonSampler>(info) {
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
model->Ref();
tokenCandidates.resize(llama_vocab_n_tokens(model->vocab));
tokenCandidates.reserve(llama_vocab_n_tokens(model->vocab));
}
AddonSampler::~AddonSampler() {
dispose();
}
void AddonSampler::dispose() {
if (disposed) {
return;
}
disposed = true;
model->Unref();
freeChain();
if (temperatureSampler != nullptr) {
llama_sampler_free(temperatureSampler);
temperatureSampler = nullptr;
}
if (greedySampler != nullptr) {
llama_sampler_free(greedySampler);
greedySampler = nullptr;
}
if (minPSampler != nullptr) {
llama_sampler_free(minPSampler);
minPSampler = nullptr;
}
if (topKSampler != nullptr) {
llama_sampler_free(topKSampler);
topKSampler = nullptr;
}
if (topPSampler != nullptr) {
llama_sampler_free(topPSampler);
topPSampler = nullptr;
}
if (seedSampler != nullptr) {
llama_sampler_free(seedSampler);
seedSampler = nullptr;
}
if (repeatPenaltySampler != nullptr) {
llama_sampler_free(repeatPenaltySampler);
repeatPenaltySampler = nullptr;
}
if (tokenBiasSampler != nullptr) {
llama_sampler_free(tokenBiasSampler);
tokenBiasSampler = nullptr;
}
if (grammarEvaluationState != nullptr) {
grammarEvaluationState->Unref();
grammarEvaluationState = nullptr;
}
}
void AddonSampler::freeChain() {
if (chain == nullptr) {
return;
}
// ensure existing state of samplers isn't cleared
while (llama_sampler_chain_n(chain) > 0) {
llama_sampler_chain_remove(chain, 0);
}
llama_sampler_free(chain);
chain = nullptr;
}
void AddonSampler::rebuildChainIfNeeded() {
if (disposed) {
throw std::runtime_error("Sampler is disposed");
}
if (chain != nullptr) {
return;
}
auto sampler_params = llama_sampler_chain_default_params();
chain = llama_sampler_chain_init(sampler_params);
if (tokenBiasSampler != nullptr) {
llama_sampler_chain_add(chain, tokenBiasSampler);
}
if (repeatPenaltySampler != nullptr) {
llama_sampler_chain_add(chain, repeatPenaltySampler);
}
if (grammarEvaluationState != nullptr) {
llama_sampler_chain_add(chain, grammarEvaluationState->sampler);
}
if (greedySampler != nullptr) {
llama_sampler_chain_add(chain, greedySampler);
} else {
if (topKSampler != nullptr) {
llama_sampler_chain_add(chain, topKSampler);
}
if (topPSampler != nullptr) {
llama_sampler_chain_add(chain, topPSampler);
}
if (minPSampler != nullptr) {
llama_sampler_chain_add(chain, minPSampler);
}
if (temperatureSampler != nullptr) {
llama_sampler_chain_add(chain, temperatureSampler);
}
if (seedSampler != nullptr) {
llama_sampler_chain_add(chain, seedSampler);
}
}
}
void AddonSampler::acceptToken(llama_token token) {
if (repeatPenaltySampler != nullptr) {
llama_sampler_accept(repeatPenaltySampler, token);
repeatPenalty_lastTokens.push_back(token);
}
if (grammarEvaluationState != nullptr && grammarEvaluationState->sampler != nullptr && !llama_vocab_is_eog(model->vocab, token)) {
llama_sampler_accept(grammarEvaluationState->sampler, token);
}
}
Napi::Value AddonSampler::Dispose(const Napi::CallbackInfo& info) {
dispose();
return info.Env().Undefined();
}
Napi::Value AddonSampler::ApplyConfig(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Sampler is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
size_t min_keep = std::max(1, n_probs);
Napi::Object config = info[0].As<Napi::Object>();
if (config.Has("temperature")) {
auto temperature = config.Get("temperature").As<Napi::Number>().FloatValue();
if (temperature != temperatureSampler_temperature || !temperatureSampler_initialized) {
temperatureSampler_initialized = true;
temperatureSampler_temperature = temperature;
freeChain();
if (temperatureSampler != nullptr) {
llama_sampler_free(temperatureSampler);
temperatureSampler = nullptr;
}
if (temperatureSampler_temperature <= 0) {
greedySampler = llama_sampler_init_greedy();
} else {
temperatureSampler = llama_sampler_init_temp(temperatureSampler_temperature);
if (greedySampler != nullptr) {
llama_sampler_free(greedySampler);
greedySampler = nullptr;
}
}
}
} else {
if (temperatureSampler != nullptr) {
freeChain();
llama_sampler_free(temperatureSampler);
temperatureSampler = nullptr;
}
if (greedySampler == nullptr) {
greedySampler = llama_sampler_init_greedy();
}
}
if (config.Has("minP")) {
auto minP = config.Get("minP").As<Napi::Number>().FloatValue();
if (minP != minPSampler_minP) {
minPSampler_minP = minP;
freeChain();
if (minPSampler != nullptr) {
llama_sampler_free(minPSampler);
minPSampler = nullptr;
}
if (minPSampler_minP != 0) {
minPSampler = llama_sampler_init_min_p(minPSampler_minP, min_keep);
}
}
} else if (minPSampler != nullptr) {
freeChain();
llama_sampler_free(minPSampler);
minPSampler = nullptr;
}
if (config.Has("topK")) {
auto topK = config.Get("topK").As<Napi::Number>().Int32Value();
if (topK != topKSampler_topK || !topKSampler_initialized) {
topKSampler_initialized = true;
topKSampler_topK = topK;
freeChain();
if (topKSampler != nullptr) {
llama_sampler_free(topKSampler);
topKSampler = nullptr;
}
const int32_t resolved_top_k = topKSampler_topK <= 0
? llama_vocab_n_tokens(model->vocab)
: std::min(topKSampler_topK, llama_vocab_n_tokens(model->vocab));
topKSampler = llama_sampler_init_top_k(resolved_top_k);
}
} else if (topKSampler != nullptr) {
freeChain();
llama_sampler_free(topKSampler);
topKSampler = nullptr;
}
if (config.Has("topP")) {
auto topP = config.Get("topP").As<Napi::Number>().FloatValue();
if (topP != topPSampler_topP) {
topPSampler_topP = topP;
freeChain();
if (topPSampler != nullptr) {
llama_sampler_free(topPSampler);
topPSampler = nullptr;
}
if (topPSampler_topP >= 1) {
topPSampler = llama_sampler_init_top_p(topPSampler_topP, min_keep);
}
}
} else if (topPSampler != nullptr) {
freeChain();
llama_sampler_free(topPSampler);
topPSampler = nullptr;
}
if (config.Has("seed")) {
auto seed = config.Get("seed").As<Napi::Number>().Uint32Value();
if (seed != seedSampler_seed || seedSampler == nullptr) {
seedSampler_seed = seed;
freeChain();
if (seedSampler != nullptr) {
llama_sampler_free(seedSampler);
seedSampler = nullptr;
}
seedSampler = llama_sampler_init_dist(seedSampler_seed);
}
} else if (seedSampler == nullptr) {
freeChain();
seedSampler = llama_sampler_init_dist(time(NULL));
}
if (config.Has("repeatPenaltyTokens")) {
Napi::Uint32Array repeat_penalty_tokens_uint32_array = config.Get("repeatPenaltyTokens").As<Napi::Uint32Array>();
auto repeatPenalty = config.Has("repeatPenalty")
? config.Get("repeatPenalty").As<Napi::Number>().FloatValue()
: 1;
auto repeatPenaltyMaxTokens = config.Has("repeatPenaltyMaxTokens")
? config.Get("repeatPenaltyMaxTokens").As<Napi::Number>().Int32Value()
: 64;
auto repeatPenaltyPresencePenalty = config.Has("repeatPenaltyPresencePenalty")
? config.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue()
: 0;
auto repeatPenaltyFrequencyPenalty = config.Has("repeatPenaltyFrequencyPenalty")
? config.Get("repeatPenaltyFrequencyPenalty").As<Napi::Number>().FloatValue()
: 0;
auto repeatPenaltyEnabled = repeatPenalty != 1 && repeatPenaltyMaxTokens > 0;
bool shouldCreateSampler = false;
if (!repeatPenaltyEnabled) {
if (repeatPenaltySampler != nullptr) {
freeChain();
llama_sampler_free(repeatPenaltySampler);
repeatPenaltySampler = nullptr;
}
} else if (repeatPenaltySampler == nullptr) {
freeChain();
shouldCreateSampler = true;
} else {
bool existingSamplerMatchesConfig = true;
existingSamplerMatchesConfig &= repeatPenalty_maxTokens == repeatPenaltyMaxTokens;
existingSamplerMatchesConfig &= repeatPenalty_penalty == repeatPenalty;
existingSamplerMatchesConfig &= repeatPenalty_presencePenalty == repeatPenaltyPresencePenalty;
existingSamplerMatchesConfig &= repeatPenalty_frequencyPenalty == repeatPenaltyFrequencyPenalty;
if (existingSamplerMatchesConfig) {
if (repeat_penalty_tokens_uint32_array.ElementLength() > 0) {
const auto firstToken = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[0]);
if (repeatPenalty_lastTokens.rat(0) != firstToken &&
repeatPenalty_lastTokens.size() == repeatPenalty_maxTokens &&
repeat_penalty_tokens_uint32_array.ElementLength() == repeatPenalty_maxTokens
) {
const auto lastToken = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[repeat_penalty_tokens_uint32_array.ElementLength() - 1]);
llama_sampler_accept(repeatPenaltySampler, lastToken);
repeatPenalty_lastTokens.push_back(lastToken);
}
}
for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength() && existingSamplerMatchesConfig; i++) {
auto token = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]);
if (i < repeatPenalty_lastTokens.size()) {
existingSamplerMatchesConfig &= repeatPenalty_lastTokens.rat(i) == token;
} else {
llama_sampler_accept(repeatPenaltySampler, token);
repeatPenalty_lastTokens.push_back(token);
}
}
}
if (!existingSamplerMatchesConfig) {
freeChain();
llama_sampler_free(repeatPenaltySampler);
repeatPenaltySampler = nullptr;
shouldCreateSampler = true;
}
}
if (shouldCreateSampler) {
repeatPenaltySampler = llama_sampler_init_penalties(
repeatPenaltyMaxTokens,
repeatPenalty,
repeatPenaltyFrequencyPenalty,
repeatPenaltyPresencePenalty
);
repeatPenalty_lastTokens = RingBuffer<llama_token>(repeatPenaltyMaxTokens);
for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength(); i++) {
llama_sampler_accept(repeatPenaltySampler, static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
repeatPenalty_lastTokens.push_back(static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
}
repeatPenalty_maxTokens = repeatPenaltyMaxTokens;
repeatPenalty_penalty = repeatPenalty;
repeatPenalty_presencePenalty = repeatPenaltyPresencePenalty;
repeatPenalty_frequencyPenalty = repeatPenaltyFrequencyPenalty;
}
} else if (repeatPenaltySampler != nullptr) {
freeChain();
llama_sampler_free(repeatPenaltySampler);
repeatPenaltySampler = nullptr;
}
if (config.Has("tokenBiasKeys") && config.Has("tokenBiasValues")) {
Napi::Uint32Array tokenBiasKeys = config.Get("tokenBiasKeys").As<Napi::Uint32Array>();
Napi::Float32Array tokenBiasValues = config.Get("tokenBiasValues").As<Napi::Float32Array>();
if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength() && tokenBiasKeys.ElementLength() > 0) {
bool existingSamplerMatchesConfig = tokenBiasSampler != nullptr;
if (tokenBiasSampler != nullptr && tokenBiasSampler_biases.size() == tokenBiasKeys.ElementLength()) {
for (size_t i = 0; i < tokenBiasKeys.ElementLength() && existingSamplerMatchesConfig; i++) {
existingSamplerMatchesConfig &= tokenBiasSampler_biases[i].token == static_cast<llama_token>(tokenBiasKeys[i]);
existingSamplerMatchesConfig &= tokenBiasSampler_biases[i].bias == tokenBiasValues[i];
}
} else {
existingSamplerMatchesConfig = false;
}
if (!existingSamplerMatchesConfig) {
if (tokenBiasSampler != nullptr) {
freeChain();
llama_sampler_free(tokenBiasSampler);
tokenBiasSampler = nullptr;
}
tokenBiasSampler_biases.clear();
tokenBiasSampler_biases.reserve(tokenBiasKeys.ElementLength());
for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
tokenBiasSampler_biases.emplace_back(llama_logit_bias { static_cast<llama_token>(tokenBiasKeys[i]), tokenBiasValues[i] });
}
tokenBiasSampler = llama_sampler_init_logit_bias(
llama_vocab_n_tokens(model->vocab),
tokenBiasSampler_biases.size(),
tokenBiasSampler_biases.data()
);
}
} else if (tokenBiasSampler != nullptr) {
freeChain();
llama_sampler_free(tokenBiasSampler);
tokenBiasSampler = nullptr;
}
} else if (tokenBiasSampler != nullptr) {
freeChain();
llama_sampler_free(tokenBiasSampler);
tokenBiasSampler = nullptr;
}
if (config.Has("grammarEvaluationState")) {
const auto configGrammarEvaluationState =
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(config.Get("grammarEvaluationState").As<Napi::Object>());
if (grammarEvaluationState != configGrammarEvaluationState) {
freeChain();
if (grammarEvaluationState != nullptr) {
grammarEvaluationState->Unref();
grammarEvaluationState = nullptr;
}
grammarEvaluationState = configGrammarEvaluationState;
grammarEvaluationState->Ref();
}
} else if (grammarEvaluationState != nullptr) {
freeChain();
grammarEvaluationState->Unref();
grammarEvaluationState = nullptr;
}
return info.Env().Undefined();
}
Napi::Value AddonSampler::AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
AddonGrammarEvaluationState* grammar_evaluation_state =
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
if ((grammar_evaluation_state)->sampler != nullptr) {
try {
llama_sampler_accept((grammar_evaluation_state)->sampler, tokenId);
} catch (const std::exception & e) {
Napi::Error::New(info.Env(), std::string("Failed to accept token in grammar sampler: ") + e.what()).ThrowAsJavaScriptException();
return info.Env().Undefined();
} catch (...) {
Napi::Error::New(info.Env(), "Failed to accept token in grammar sampler").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
}
return info.Env().Undefined();
}
Napi::Value AddonSampler::CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
AddonGrammarEvaluationState* grammar_evaluation_state =
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
if ((grammar_evaluation_state)->sampler != nullptr) {
std::vector<llama_token_data> candidates;
candidates.reserve(1);
candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
try {
llama_sampler_apply((grammar_evaluation_state)->sampler, &candidates_p);
} catch (const std::exception & e) {
addonLog(GGML_LOG_LEVEL_DEBUG, std::string("Failed to apply grammar sampler: ") + e.what());
return Napi::Boolean::New(info.Env(), false);
} catch (...) {
return Napi::Boolean::New(info.Env(), false);
}
if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
return Napi::Boolean::New(info.Env(), false);
}
return Napi::Boolean::New(info.Env(), true);
}
return Napi::Boolean::New(info.Env(), false);
}
void AddonSampler::init(Napi::Object exports) {
exports.Set(
"AddonSampler",
DefineClass(
exports.Env(),
"AddonSampler",
{
InstanceMethod("dispose", &AddonSampler::Dispose),
InstanceMethod("applyConfig", &AddonSampler::ApplyConfig),
StaticMethod("acceptGrammarEvaluationStateToken", &AddonSampler::AcceptGrammarEvaluationStateToken),
StaticMethod("canBeNextTokenForGrammarEvaluationState", &AddonSampler::CanBeNextTokenForGrammarEvaluationState),
}
)
);
}

63
node_modules/node-llama-cpp/llama/addon/AddonSampler.h generated vendored Normal file
View File

@@ -0,0 +1,63 @@
#pragma once
#include "llama.h"
#include "napi.h"
#include "RingBuffer.h"
#include "addonGlobals.h"
#include "AddonModel.h"
class AddonSampler : public Napi::ObjectWrap<AddonSampler> {
public:
AddonModel* model;
llama_sampler * chain = nullptr;
llama_sampler * temperatureSampler = nullptr;
bool temperatureSampler_initialized = false;
float temperatureSampler_temperature = 0.0f; // 0.0f = disabled
llama_sampler * greedySampler = nullptr;
llama_sampler * minPSampler = nullptr;
float minPSampler_minP = 0.0f; // Min p sampling <=0.0f = disabled
llama_sampler * topKSampler = nullptr;
bool topKSampler_initialized = false;
int topKSampler_topK = 0;
llama_sampler * topPSampler = nullptr;
float topPSampler_topP = 0.0f; // Top p sampling >=1.0 = disabled
llama_sampler * seedSampler = nullptr;
uint32_t seedSampler_seed = 0;
llama_sampler * repeatPenaltySampler = nullptr;
RingBuffer<llama_token> repeatPenalty_lastTokens = RingBuffer<llama_token>(64);
int32_t repeatPenalty_maxTokens = 64;
float repeatPenalty_penalty = 1.10f; // 1.0 = disabled
float repeatPenalty_presencePenalty = 0.00f; // 0.0 = disabled
float repeatPenalty_frequencyPenalty = 0.00f; // 0.0 = disabled
llama_sampler * tokenBiasSampler = nullptr;
std::vector<llama_logit_bias> tokenBiasSampler_biases;
AddonGrammarEvaluationState* grammarEvaluationState = nullptr;
std::vector<llama_token_data> tokenCandidates;
bool disposed = false;
AddonSampler(const Napi::CallbackInfo& info);
~AddonSampler();
void dispose();
void freeChain();
void rebuildChainIfNeeded();
void acceptToken(llama_token token);
Napi::Value Dispose(const Napi::CallbackInfo& info);
Napi::Value ApplyConfig(const Napi::CallbackInfo& info);
static Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info);
static Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info);
static void init(Napi::Object exports);
};

109
node_modules/node-llama-cpp/llama/addon/RingBuffer.h generated vendored Normal file
View File

@@ -0,0 +1,109 @@
// copied from llama-impl.h
template<typename T>
struct RingBuffer {
RingBuffer(size_t cap) : capacity(cap), data(cap) {}
T & front() {
if (sz == 0) {
throw std::runtime_error("ring buffer is empty");
}
return data[first];
}
const T & front() const {
if (sz == 0) {
throw std::runtime_error("ring buffer is empty");
}
return data[first];
}
T & back() {
if (sz == 0) {
throw std::runtime_error("ring buffer is empty");
}
return data[pos];
}
const T & back() const {
if (sz == 0) {
throw std::runtime_error("ring buffer is empty");
}
return data[pos];
}
void push_back(const T & value) {
if (capacity == 0) {
throw std::runtime_error("ring buffer: capacity is zero");
}
if (sz == capacity) {
// advance the start when buffer is full
first = (first + 1) % capacity;
} else {
sz++;
}
data[pos] = value;
pos = (pos + 1) % capacity;
}
T pop_front() {
if (sz == 0) {
throw std::runtime_error("ring buffer is empty");
}
T value = data[first];
first = (first + 1) % capacity;
sz--;
return value;
}
//T & operator[](size_t i) {
// if (i >= sz) {
// throw std::runtime_error("ring buffer: index out of bounds");
// }
// return data[(first + i) % capacity];
//}
//const T & at(size_t i) const {
// if (i >= sz) {
// throw std::runtime_error("ring buffer: index out of bounds");
// }
// return data[(first + i) % capacity];
//}
const T & rat(size_t i) const {
if (i >= sz) {
throw std::runtime_error("ring buffer: index out of bounds");
}
return data[(first + sz - i - 1) % capacity];
}
std::vector<T> to_vector() const {
std::vector<T> result;
result.reserve(sz);
for (size_t i = 0; i < sz; i++) {
result.push_back(data[(first + i) % capacity]);
}
return result;
}
void clear() {
// here only reset the status of the buffer
sz = 0;
first = 0;
pos = 0;
}
bool empty() const {
return sz == 0;
}
size_t size() const {
return sz;
}
size_t capacity = 0;
size_t sz = 0;
size_t first = 0;
size_t pos = 0;
std::vector<T> data;
};

314
node_modules/node-llama-cpp/llama/addon/addon.cpp generated vendored Normal file
View File

@@ -0,0 +1,314 @@
#include "addonGlobals.h"
#include "AddonModel.h"
#include "AddonModelLora.h"
#include "AddonGrammar.h"
#include "AddonGrammarEvaluationState.h"
#include "AddonSampler.h"
#include "AddonContext.h"
#include "globals/addonLog.h"
#include "globals/addonProgress.h"
#include "globals/getGpuInfo.h"
#include "globals/getSwapInfo.h"
#include "globals/getMemoryInfo.h"
#include <atomic>
bool backendInitialized = false;
bool backendDisposed = false;
Napi::Value systemInfo(const Napi::CallbackInfo& info) {
return Napi::String::From(info.Env(), llama_print_system_info());
}
Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
}
Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
return Napi::Boolean::New(info.Env(), llama_supports_mmap());
}
Napi::Value addonGetGpuSupportsMmap(const Napi::CallbackInfo& info) {
const auto llamaSupportsMmap = llama_supports_mmap();
const auto gpuDevice = getGpuDevice().first;
if (gpuDevice == nullptr) {
return Napi::Boolean::New(info.Env(), false);
}
ggml_backend_dev_props props;
ggml_backend_dev_get_props(gpuDevice, &props);
const bool gpuSupportsMmap = llama_supports_mmap() && props.caps.buffer_from_host_ptr;
return Napi::Boolean::New(info.Env(), gpuSupportsMmap);
}
Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
return Napi::Boolean::New(info.Env(), llama_supports_mlock());
}
Napi::Value addonGetMathCores(const Napi::CallbackInfo& info) {
return Napi::Number::New(info.Env(), cpu_get_num_math());
}
Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
return info.Env().Undefined();
}
const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
return Napi::Number::New(info.Env(), blockSize);
}
Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
return info.Env().Undefined();
}
const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
return Napi::Number::New(info.Env(), typeSize);
}
Napi::Value addonGetGgmlGraphOverheadCustom(const Napi::CallbackInfo& info) {
if (info.Length() < 2 || !info[0].IsNumber() || !info[1].IsBoolean()) {
return Napi::Number::New(info.Env(), 0);
}
const size_t size = info[0].As<Napi::Number>().Uint32Value();
const bool grads = info[1].As<Napi::Boolean>().Value();
const auto graphOverhead = ggml_graph_overhead_custom(size, grads);
return Napi::Number::New(info.Env(), graphOverhead);
}
Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
Napi::Object consts = Napi::Object::New(info.Env());
consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
return consts;
}
class AddonBackendLoadWorker : public Napi::AsyncWorker {
public:
AddonBackendLoadWorker(const Napi::Env& env)
: Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
deferred(Napi::Promise::Deferred::New(env)) {
}
~AddonBackendLoadWorker() {
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
llama_backend_init();
try {
if (backendDisposed) {
llama_backend_free();
} else {
backendInitialized = true;
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_backend_free\"");
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_backend_init\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
class AddonBackendUnloadWorker : public Napi::AsyncWorker {
public:
AddonBackendUnloadWorker(const Napi::Env& env)
: Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
deferred(Napi::Promise::Deferred::New(env)) {
}
~AddonBackendUnloadWorker() {
}
Napi::Promise GetPromise() {
return deferred.Promise();
}
protected:
Napi::Promise::Deferred deferred;
void Execute() {
try {
if (backendInitialized) {
backendInitialized = false;
llama_backend_free();
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_backend_free\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};
Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) {
const std::string forceLoadLibrariesSearchPath = info.Length() == 0
? ""
: info[0].IsString()
? info[0].As<Napi::String>().Utf8Value()
: "";
ggml_backend_reg_count();
if (forceLoadLibrariesSearchPath.length() > 0) {
ggml_backend_load_all_from_path(forceLoadLibrariesSearchPath.c_str());
}
return info.Env().Undefined();
}
Napi::Value addonSetNuma(const Napi::CallbackInfo& info) {
const bool numaDisabled = info.Length() == 0
? true
: info[0].IsBoolean()
? !info[0].As<Napi::Boolean>().Value()
: false;
if (numaDisabled)
return info.Env().Undefined();
const auto numaType = info[0].IsString()
? info[0].As<Napi::String>().Utf8Value()
: "";
if (numaType == "distribute") {
llama_numa_init(GGML_NUMA_STRATEGY_DISTRIBUTE);
} else if (numaType == "isolate") {
llama_numa_init(GGML_NUMA_STRATEGY_ISOLATE);
} else if (numaType == "numactl") {
llama_numa_init(GGML_NUMA_STRATEGY_NUMACTL);
} else if (numaType == "mirror") {
llama_numa_init(GGML_NUMA_STRATEGY_MIRROR);
} else {
Napi::Error::New(info.Env(), std::string("Invalid NUMA strategy \"") + numaType + "\"").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
return info.Env().Undefined();
}
Napi::Value markLoaded(const Napi::CallbackInfo& info) {
static std::atomic_bool loaded = false;
return Napi::Boolean::New(info.Env(), loaded.exchange(true));
}
Napi::Value addonInit(const Napi::CallbackInfo& info) {
if (backendInitialized) {
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
deferred.Resolve(info.Env().Undefined());
return deferred.Promise();
}
AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
worker->Queue();
return worker->GetPromise();
}
Napi::Value addonDispose(const Napi::CallbackInfo& info) {
if (backendDisposed) {
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
deferred.Resolve(info.Env().Undefined());
return deferred.Promise();
}
backendDisposed = true;
AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
worker->Queue();
return worker->GetPromise();
}
static void addonFreeLlamaBackend(Napi::Env env, int* data) {
if (backendDisposed) {
return;
}
backendDisposed = true;
if (backendInitialized) {
backendInitialized = false;
llama_backend_free();
}
}
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
exports.DefineProperties({
Napi::PropertyDescriptor::Function("markLoaded", markLoaded),
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
Napi::PropertyDescriptor::Function("getGpuSupportsMmap", addonGetGpuSupportsMmap),
Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
Napi::PropertyDescriptor::Function("getMathCores", addonGetMathCores),
Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
Napi::PropertyDescriptor::Function("getGgmlGraphOverheadCustom", addonGetGgmlGraphOverheadCustom),
Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
Napi::PropertyDescriptor::Function("setLogger", setLogger),
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
Napi::PropertyDescriptor::Function("ensureGpuDeviceIsSupported", ensureGpuDeviceIsSupported),
Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo),
Napi::PropertyDescriptor::Function("getMemoryInfo", getMemoryInfo),
Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends),
Napi::PropertyDescriptor::Function("setNuma", addonSetNuma),
Napi::PropertyDescriptor::Function("init", addonInit),
Napi::PropertyDescriptor::Function("dispose", addonDispose),
});
AddonModel::init(exports);
AddonModelLora::init(exports);
AddonGrammar::init(exports);
AddonGrammarEvaluationState::init(exports);
AddonContext::init(exports);
AddonSampler::init(exports);
llama_log_set(addonLlamaCppLogCallback, nullptr);
exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
return exports;
}
NODE_API_MODULE(NODE_GYP_MODULE_NAME, registerCallback)

View File

@@ -0,0 +1,22 @@
#include <sstream>
#include <vector>
#include "addonGlobals.h"
#include "napi.h"
void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
while (size > 0) {
int64_t adjustSize = std::min(size, chunkSize);
Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
size -= adjustSize;
}
}
void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
while (size > 0) {
int64_t adjustSize = std::min(size, chunkSize);
Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
size -= adjustSize;
}
}

12
node_modules/node-llama-cpp/llama/addon/addonGlobals.h generated vendored Normal file
View File

@@ -0,0 +1,12 @@
#pragma once
#include "napi.h"
class AddonModel;
class AddonModelLora;
class AddonModelData;
class AddonContext;
class AddonGrammar;
class AddonGrammarEvaluationState;
void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size);
void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size);

View File

@@ -0,0 +1,143 @@
#include <sstream>
#include "addonLog.h"
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
bool addonJsLoggerCallbackSet = false;
int addonLoggerLogLevel = 5;
int addonLastLoggerLogLevel = 6;
static int addonGetGgmlLogLevelNumber(ggml_log_level level) {
switch (level) {
case GGML_LOG_LEVEL_ERROR: return 2;
case GGML_LOG_LEVEL_WARN: return 3;
case GGML_LOG_LEVEL_INFO: return 4;
case GGML_LOG_LEVEL_NONE: return 5;
case GGML_LOG_LEVEL_DEBUG: return 6;
case GGML_LOG_LEVEL_CONT: return addonLastLoggerLogLevel;
}
return 1;
}
void addonCallJsLogCallback(
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
) {
bool called = false;
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
try {
callback.Call({
Napi::Number::New(env, data->logLevelNumber),
Napi::String::New(env, data->stringStream->str()),
});
called = true;
} catch (const Napi::Error& e) {
called = false;
}
}
if (!called && data != nullptr) {
if (data->logLevelNumber == 2) {
fputs(data->stringStream->str().c_str(), stderr);
fflush(stderr);
} else {
fputs(data->stringStream->str().c_str(), stdout);
fflush(stdout);
}
}
if (data != nullptr) {
delete data->stringStream;
delete data;
}
}
void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
int logLevelNumber = addonGetGgmlLogLevelNumber(level);
addonLastLoggerLogLevel = logLevelNumber;
if (logLevelNumber > addonLoggerLogLevel) {
return;
}
if (addonJsLoggerCallbackSet) {
std::stringstream* stringStream = new std::stringstream();
if (text != nullptr) {
*stringStream << text;
}
addon_logger_log* data = new addon_logger_log {
logLevelNumber,
stringStream,
};
auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
if (status == napi_ok) {
return;
} else {
delete stringStream;
delete data;
}
}
if (text != nullptr) {
if (level == 2) {
fputs(text, stderr);
fflush(stderr);
} else {
fputs(text, stdout);
fflush(stdout);
}
}
}
Napi::Value setLogger(const Napi::CallbackInfo& info) {
if (addonJsLoggerCallbackSet) {
addonJsLoggerCallbackSet = false;
addonThreadSafeLoggerCallback.Release();
}
if (info.Length() < 1 || !info[0].IsFunction()) {
return info.Env().Undefined();
}
auto addonLoggerJSCallback = info[0].As<Napi::Function>();
AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
info.Env(),
addonLoggerJSCallback,
"loggerCallback",
0,
1,
context,
[](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
addonJsLoggerCallbackSet = false;
delete ctx;
}
);
addonJsLoggerCallbackSet = true;
// prevent blocking the main node process from exiting due to active resources
addonThreadSafeLoggerCallback.Unref(info.Env());
return info.Env().Undefined();
}
Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
if (info.Length() < 1 || !info[0].IsNumber()) {
addonLoggerLogLevel = 5;
return info.Env().Undefined();
}
addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
return info.Env().Undefined();
}
void addonLog(ggml_log_level level, const std::string text) {
addonLlamaCppLogCallback(level, std::string("[addon] " + text + "\n").c_str(), nullptr);
}

View File

@@ -0,0 +1,24 @@
#pragma once
#include "llama.h"
#include "napi.h"
struct addon_logger_log {
public:
const int logLevelNumber;
const std::stringstream* stringStream;
};
void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
void addonCallJsLogCallback(
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
);
using AddonThreadSafeLogCallbackFunction =
Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
Napi::Value setLogger(const Napi::CallbackInfo& info);
Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info);
void addonLog(ggml_log_level level, const std::string text);

View File

@@ -0,0 +1,15 @@
#include "addonProgress.h"
void addonCallJsProgressCallback(
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
) {
if (env != nullptr && callback != nullptr) {
try {
callback.Call({Napi::Number::New(env, data->progress)});
} catch (const Napi::Error& e) {}
}
if (data != nullptr) {
delete data;
}
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include "napi.h"
struct addon_progress_event {
public:
const float progress;
};
using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
void addonCallJsProgressCallback(
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
);
using AddonThreadSafeProgressEventCallbackFunction =
Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;

View File

@@ -0,0 +1,146 @@
#include "getGpuInfo.h"
#include "addonLog.h"
#ifdef __APPLE__
#include <TargetConditionals.h>
#endif
#ifdef GPU_INFO_USE_VULKAN
# include "../../gpuInfo/vulkan-gpu-info.h"
#endif
#ifdef GPU_INFO_USE_VULKAN
void logVulkanWarning(const char* message) {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
}
#endif
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
ggml_backend_dev_t device = NULL;
size_t deviceTotal = 0;
size_t deviceFree = 0;
uint64_t total = 0;
uint64_t used = 0;
uint64_t unifiedVramSize = 0;
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
device = ggml_backend_dev_get(i);
auto deviceType = ggml_backend_dev_type(device);
if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
deviceTotal = 0;
deviceFree = 0;
ggml_backend_dev_memory(device, &deviceFree, &deviceTotal);
total += deviceTotal;
used += deviceTotal - deviceFree;
#if defined(__arm64__) || defined(__aarch64__)
if (std::string(ggml_backend_dev_name(device)) == "Metal") {
unifiedVramSize += deviceTotal;
}
#endif
}
}
#ifdef GPU_INFO_USE_VULKAN
uint64_t vulkanDeviceTotal = 0;
uint64_t vulkanDeviceUsed = 0;
uint64_t vulkanDeviceUnifiedVramSize = 0;
const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, &vulkanDeviceUnifiedVramSize, logVulkanWarning);
if (vulkanDeviceSupportsMemoryBudgetExtension) {
if (vulkanDeviceUnifiedVramSize > total) {
// this means that we counted memory from devices that aren't used by llama.cpp
vulkanDeviceUnifiedVramSize = 0;
}
unifiedVramSize += vulkanDeviceUnifiedVramSize;
}
if (used == 0 && vulkanDeviceUsed != 0) {
used = vulkanDeviceUsed;
}
#endif
Napi::Object result = Napi::Object::New(info.Env());
result.Set("total", Napi::Number::From(info.Env(), total));
result.Set("used", Napi::Number::From(info.Env(), used));
result.Set("unifiedSize", Napi::Number::From(info.Env(), unifiedVramSize));
return result;
}
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
std::vector<std::string> deviceNames;
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
ggml_backend_dev_t device = ggml_backend_dev_get(i);
auto deviceType = ggml_backend_dev_type(device);
if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
deviceNames.push_back(std::string(ggml_backend_dev_description(device)));
}
}
Napi::Object result = Napi::Object::New(info.Env());
Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
for (size_t i = 0; i < deviceNames.size(); ++i) {
deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
}
result.Set("deviceNames", deviceNamesNapiArray);
return result;
}
std::pair<ggml_backend_dev_t, std::string> getGpuDevice() {
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
ggml_backend_dev_t device = ggml_backend_dev_get(i);
const auto deviceName = std::string(ggml_backend_dev_name(device));
if (deviceName == "Metal") {
return std::pair<ggml_backend_dev_t, std::string>(device, "metal");
} else if (std::string(deviceName).find("Vulkan") == 0) {
return std::pair<ggml_backend_dev_t, std::string>(device, "vulkan");
} else if (std::string(deviceName).find("CUDA") == 0 || std::string(deviceName).find("ROCm") == 0 || std::string(deviceName).find("MUSA") == 0) {
return std::pair<ggml_backend_dev_t, std::string>(device, "cuda");
}
}
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
ggml_backend_dev_t device = ggml_backend_dev_get(i);
const auto deviceName = std::string(ggml_backend_dev_name(device));
if (deviceName == "CPU") {
return std::pair<ggml_backend_dev_t, std::string>(device, "cpu");
}
}
return std::pair<ggml_backend_dev_t, std::string>(nullptr, "");
}
Napi::Value getGpuType(const Napi::CallbackInfo& info) {
const auto gpuDeviceRes = getGpuDevice();
const auto device = gpuDeviceRes.first;
const auto deviceType = gpuDeviceRes.second;
if (deviceType == "cpu") {
return Napi::Boolean::New(info.Env(), false);
} else if (device != nullptr && deviceType != "") {
return Napi::String::New(info.Env(), deviceType);
}
return info.Env().Undefined();
}
Napi::Value ensureGpuDeviceIsSupported(const Napi::CallbackInfo& info) {
#ifdef GPU_INFO_USE_VULKAN
if (!checkIsVulkanEnvSupported(logVulkanWarning)) {
Napi::Error::New(info.Env(), "Vulkan device is not supported").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
#endif
return info.Env().Undefined();
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <utility>
#include <string>
#include "napi.h"
#include "llama.h"
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info);
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info);
std::pair<ggml_backend_dev_t, std::string> getGpuDevice();
Napi::Value getGpuType(const Napi::CallbackInfo& info);
Napi::Value ensureGpuDeviceIsSupported(const Napi::CallbackInfo& info);

View File

@@ -0,0 +1,63 @@
#include "getMemoryInfo.h"
#include "addonLog.h"
#ifdef __APPLE__
#include <iostream>
#include <mach/mach.h>
#include <sys/sysctl.h>
#elif __linux__
#include <fstream>
#include <sstream>
#include <string>
#elif _WIN32
#include <iostream>
#include <windows.h>
#include <psapi.h>
#endif
Napi::Value getMemoryInfo(const Napi::CallbackInfo& info) {
uint64_t totalMemoryUsage = 0;
#ifdef __APPLE__
struct mach_task_basic_info taskInfo;
mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&taskInfo, &infoCount) == KERN_SUCCESS) {
totalMemoryUsage = taskInfo.virtual_size;
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
}
#elif __linux__
std::ifstream procStatus("/proc/self/status");
std::string line;
bool foundMemoryUsage = false;
while (std::getline(procStatus, line)) {
if (line.rfind("VmSize:", 0) == 0) { // Resident Set Size (current memory usage)
std::istringstream iss(line);
std::string key, unit;
size_t value;
if (iss >> key >> value >> unit) {
totalMemoryUsage = value * 1024; // Convert from kB to bytes
foundMemoryUsage = true;
}
break;
}
}
if (!foundMemoryUsage) {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
}
#elif _WIN32
PROCESS_MEMORY_COUNTERS_EX memCounters;
if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&memCounters, sizeof(memCounters))) {
totalMemoryUsage = memCounters.PrivateUsage;
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
}
#endif
Napi::Object obj = Napi::Object::New(info.Env());
obj.Set("total", Napi::Number::New(info.Env(), totalMemoryUsage));
return obj;
}

View File

@@ -0,0 +1,4 @@
#pragma once
#include "napi.h"
Napi::Value getMemoryInfo(const Napi::CallbackInfo& info);

View File

@@ -0,0 +1,69 @@
#include "getSwapInfo.h"
#include "addonLog.h"
#ifdef __APPLE__
#include <iostream>
#include <mach/mach.h>
#include <sys/sysctl.h>
#elif __linux__
#include <iostream>
#include <sys/sysinfo.h>
#elif _WIN32
#include <iostream>
#include <windows.h>
#include <psapi.h>
#endif
Napi::Value getSwapInfo(const Napi::CallbackInfo& info) {
uint64_t totalSwap = 0;
uint64_t freeSwap = 0;
uint64_t maxSize = 0;
bool maxSizeSet = true;
#ifdef __APPLE__
struct xsw_usage swapInfo;
size_t size = sizeof(swapInfo);
if (sysctlbyname("vm.swapusage", &swapInfo, &size, NULL, 0) == 0) {
totalSwap = swapInfo.xsu_total;
freeSwap = swapInfo.xsu_avail;
maxSizeSet = false;
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get swap info").c_str(), nullptr);
}
#elif __linux__
struct sysinfo sysInfo;
if (sysinfo(&sysInfo) == 0) {
totalSwap = sysInfo.totalswap;
freeSwap = sysInfo.freeswap;
maxSize = sysInfo.totalswap;
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get swap info").c_str(), nullptr);
}
#elif _WIN32
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
if (GlobalMemoryStatusEx(&memInfo)) {
PERFORMANCE_INFORMATION perfInfo;
perfInfo.cb = sizeof(PERFORMANCE_INFORMATION);
if (GetPerformanceInfo(&perfInfo, sizeof(perfInfo))) {
totalSwap = memInfo.ullTotalPageFile;
freeSwap = memInfo.ullAvailPageFile;
maxSize = perfInfo.CommitLimit * perfInfo.PageSize;
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get max pagefile size").c_str(), nullptr);
}
} else {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get pagefile info").c_str(), nullptr);
}
#endif
Napi::Object obj = Napi::Object::New(info.Env());
obj.Set("total", Napi::Number::New(info.Env(), totalSwap));
obj.Set("free", Napi::Number::New(info.Env(), freeSwap));
obj.Set("maxSize", maxSizeSet ? Napi::Number::New(info.Env(), maxSize) : Napi::Number::New(info.Env(), -1));
return obj;
}

View File

@@ -0,0 +1,4 @@
#pragma once
#include "napi.h"
Napi::Value getSwapInfo(const Napi::CallbackInfo& info);