First upload version 0.0.1
This commit is contained in:
985
node_modules/node-llama-cpp/llama/addon/AddonContext.cpp
generated
vendored
Normal file
985
node_modules/node-llama-cpp/llama/addon/AddonContext.cpp
generated
vendored
Normal file
@@ -0,0 +1,985 @@
|
||||
#include <thread>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include "common/common.h"
|
||||
#include "llama-vocab.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include "addonGlobals.h"
|
||||
#include "AddonModel.h"
|
||||
#include "AddonModelLora.h"
|
||||
#include "AddonGrammarEvaluationState.h"
|
||||
#include "AddonContext.h"
|
||||
|
||||
static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
|
||||
uint64_t totalSize = 0;
|
||||
|
||||
if (embd) {
|
||||
totalSize += sizeof(float) * n_tokens_alloc * embd;
|
||||
} else {
|
||||
totalSize += sizeof(llama_token) * n_tokens_alloc;
|
||||
}
|
||||
|
||||
totalSize += sizeof(llama_pos) * n_tokens_alloc;
|
||||
totalSize += sizeof(int32_t) * n_tokens_alloc;
|
||||
totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
|
||||
|
||||
totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
|
||||
|
||||
totalSize += sizeof(int8_t) * n_tokens_alloc;
|
||||
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonContext* ctx;
|
||||
|
||||
AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
|
||||
: Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
|
||||
ctx(ctx),
|
||||
deferred(Napi::Promise::Deferred::New(env)) {
|
||||
ctx->Ref();
|
||||
}
|
||||
~AddonContextDecodeBatchWorker() {
|
||||
ctx->Unref();
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
// Perform the evaluation using llama_decode.
|
||||
int r = llama_decode(ctx->ctx, ctx->batch);
|
||||
|
||||
if (r != 0) {
|
||||
if (r == 1) {
|
||||
SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
|
||||
} else {
|
||||
SetError("Eval has failed");
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
llama_synchronize(ctx->ctx);
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_decode\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
deferred.Resolve(Env().Undefined());
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
|
||||
class AddonContextLoadContextWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonContext* context;
|
||||
|
||||
AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
|
||||
: Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
|
||||
context(context),
|
||||
deferred(Napi::Promise::Deferred::New(env)) {
|
||||
context->Ref();
|
||||
}
|
||||
~AddonContextLoadContextWorker() {
|
||||
context->Unref();
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
context->ctx = llama_init_from_model(context->model->model, context->context_params);
|
||||
|
||||
context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_init_from_model\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
if (context->contextLoaded) {
|
||||
uint64_t contextMemorySize = llama_state_get_size(context->ctx);
|
||||
adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
|
||||
context->loadedContextMemorySize = contextMemorySize;
|
||||
}
|
||||
|
||||
deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonContext* context;
|
||||
|
||||
AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
|
||||
: Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
|
||||
context(context),
|
||||
deferred(Napi::Promise::Deferred::New(env)) {
|
||||
context->Ref();
|
||||
}
|
||||
~AddonContextUnloadContextWorker() {
|
||||
context->Unref();
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
llama_free(context->ctx);
|
||||
context->contextLoaded = false;
|
||||
|
||||
try {
|
||||
if (context->has_batch) {
|
||||
llama_batch_free(context->batch);
|
||||
context->has_batch = false;
|
||||
context->batch_n_tokens = 0;
|
||||
}
|
||||
|
||||
context->dispose();
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_batch_free\"");
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_free\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
|
||||
context->loadedContextMemorySize = 0;
|
||||
|
||||
adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
|
||||
context->batchMemorySize = 0;
|
||||
|
||||
deferred.Resolve(Env().Undefined());
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonContext* ctx;
|
||||
AddonSampler* sampler;
|
||||
bool arrayResult = false;
|
||||
bool returnProbabilities = false;
|
||||
bool returnConfidence = false;
|
||||
float tokenConfidence = -1;
|
||||
bool has_probabilities = false;
|
||||
size_t probabilities_size;
|
||||
llama_token * probabilities_tokens;
|
||||
float * probabilities_probs;
|
||||
int32_t batchLogitIndex;
|
||||
llama_token result;
|
||||
bool no_output = false;
|
||||
|
||||
AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
|
||||
: Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
|
||||
ctx(ctx),
|
||||
deferred(Napi::Promise::Deferred::New(info.Env())) {
|
||||
ctx->Ref();
|
||||
|
||||
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
|
||||
sampler = Napi::ObjectWrap<AddonSampler>::Unwrap(info[1].As<Napi::Object>());
|
||||
arrayResult = info.Length() > 2 && info[2].IsBoolean();
|
||||
returnProbabilities = arrayResult ? info[2].As<Napi::Boolean>().Value() : false;
|
||||
returnConfidence = arrayResult && info.Length() > 3 && info[3].IsBoolean() ? info[3].As<Napi::Boolean>().Value() : false;
|
||||
sampler->Ref();
|
||||
}
|
||||
~AddonContextSampleTokenWorker() {
|
||||
ctx->Unref();
|
||||
sampler->Unref();
|
||||
|
||||
if (has_probabilities) {
|
||||
delete[] probabilities_tokens;
|
||||
delete[] probabilities_probs;
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
SampleToken();
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"SampleToken\"");
|
||||
}
|
||||
}
|
||||
|
||||
void SampleToken() {
|
||||
if (llama_get_logits(ctx->ctx) == nullptr) {
|
||||
SetError("This model does not support token generation");
|
||||
return;
|
||||
}
|
||||
|
||||
sampler->rebuildChainIfNeeded();
|
||||
|
||||
const auto * logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
|
||||
const int n_vocab = llama_vocab_n_tokens(ctx->model->vocab);
|
||||
|
||||
auto & candidates = sampler->tokenCandidates;
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||
candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
|
||||
}
|
||||
|
||||
llama_token_data_array cur_p = {
|
||||
/* .data = */ candidates.data(),
|
||||
/* .size = */ candidates.size(),
|
||||
/* .selected = */ -1,
|
||||
/* .sorted = */ false,
|
||||
};
|
||||
|
||||
llama_sampler_apply(sampler->chain, &cur_p);
|
||||
|
||||
if (!(cur_p.selected >= 0 && cur_p.selected < (int32_t)cur_p.size)) {
|
||||
no_output = true;
|
||||
return;
|
||||
}
|
||||
|
||||
auto new_token_id = cur_p.data[cur_p.selected].id;
|
||||
|
||||
if (returnProbabilities || returnConfidence) {
|
||||
if (!cur_p.sorted) {
|
||||
std::sort(cur_p.data, cur_p.data + cur_p.size, [](const llama_token_data & a, const llama_token_data & b) {
|
||||
return a.logit > b.logit;
|
||||
});
|
||||
cur_p.sorted = true;
|
||||
|
||||
for (size_t i = 0; i < cur_p.size; i++) {
|
||||
if (cur_p.data[i].id == new_token_id) {
|
||||
cur_p.selected = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (returnProbabilities) {
|
||||
probabilities_size = cur_p.size;
|
||||
probabilities_tokens = new llama_token[probabilities_size];
|
||||
probabilities_probs = new float[probabilities_size];
|
||||
float maxLogit = cur_p.size > 0 ? cur_p.data[0].logit : -INFINITY;
|
||||
|
||||
for (size_t i = 0; i < cur_p.size; i++) {
|
||||
auto logit = cur_p.data[i].logit;
|
||||
|
||||
probabilities_tokens[i] = cur_p.data[i].id;
|
||||
probabilities_probs[i] = logit;
|
||||
|
||||
if (logit > maxLogit) {
|
||||
maxLogit = logit;
|
||||
}
|
||||
}
|
||||
|
||||
if (probabilities_size > 0 && maxLogit != -INFINITY) {
|
||||
float sum = 0.0f;
|
||||
for (size_t i = 0; i < probabilities_size; i++) {
|
||||
float prob = expf(probabilities_probs[i] - maxLogit);
|
||||
probabilities_probs[i] = prob;
|
||||
sum += prob;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < probabilities_size; i++) {
|
||||
probabilities_probs[i] /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
has_probabilities = true;
|
||||
}
|
||||
|
||||
if (returnConfidence) {
|
||||
if (has_probabilities && cur_p.selected < probabilities_size) {
|
||||
tokenConfidence = probabilities_probs[cur_p.selected];
|
||||
} else {
|
||||
float maxLogit = cur_p.data[0].logit;
|
||||
float sum = 0.0f;
|
||||
for (size_t i = 0; i < cur_p.size; i++) {
|
||||
auto logit = cur_p.data[i].logit;
|
||||
|
||||
if (logit > maxLogit) {
|
||||
maxLogit = logit;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < cur_p.size; i++) {
|
||||
sum += expf(cur_p.data[i].logit - maxLogit);
|
||||
}
|
||||
|
||||
tokenConfidence = expf(cur_p.data[cur_p.selected].logit - maxLogit) / sum;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
sampler->acceptToken(new_token_id);
|
||||
result = new_token_id;
|
||||
} catch (const std::exception& e) {
|
||||
SetError(std::string("Failed to accept token in sampler: ") + e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"acceptToken\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
Napi::Number resultToken;
|
||||
if (no_output) {
|
||||
resultToken = Napi::Number::New(Env(), -1);
|
||||
} else {
|
||||
resultToken = Napi::Number::New(Env(), static_cast<uint32_t>(result));
|
||||
}
|
||||
|
||||
if (!arrayResult) {
|
||||
deferred.Resolve(resultToken);
|
||||
return;
|
||||
}
|
||||
|
||||
Napi::Array resultArray = Napi::Array::New(Env(), 2);
|
||||
resultArray.Set(Napi::Number::New(Env(), 0), resultToken);
|
||||
|
||||
if (has_probabilities) {
|
||||
Napi::Array probabilities = Napi::Array::New(Env(), probabilities_size * 2);
|
||||
for (size_t i = 0; i < probabilities_size; i++) {
|
||||
probabilities.Set(i * 2, Napi::Number::New(Env(), probabilities_tokens[i]));
|
||||
probabilities.Set(i * 2 + 1, Napi::Number::New(Env(), probabilities_probs[i]));
|
||||
}
|
||||
resultArray.Set(1, probabilities);
|
||||
}
|
||||
|
||||
if (returnConfidence && tokenConfidence != -1) {
|
||||
resultArray.Set(2, Napi::Number::New(Env(), tokenConfidence));
|
||||
}
|
||||
|
||||
deferred.Resolve(resultArray);
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
|
||||
AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
|
||||
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
|
||||
model->Ref();
|
||||
|
||||
context_params = llama_context_default_params();
|
||||
context_params.n_ctx = 4096;
|
||||
context_params.n_threads = std::max(cpu_get_num_math(), 1);
|
||||
context_params.n_threads_batch = context_params.n_threads;
|
||||
context_params.no_perf = true;
|
||||
context_params.swa_full = false;
|
||||
|
||||
if (info.Length() > 1 && info[1].IsObject()) {
|
||||
Napi::Object options = info[1].As<Napi::Object>();
|
||||
|
||||
if (options.Has("contextSize")) {
|
||||
context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
|
||||
}
|
||||
|
||||
if (options.Has("batchSize")) {
|
||||
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
|
||||
context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
|
||||
}
|
||||
|
||||
if (options.Has("sequences")) {
|
||||
context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
|
||||
}
|
||||
|
||||
if (options.Has("embeddings")) {
|
||||
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
||||
}
|
||||
|
||||
if (options.Has("ranking") && options.Get("ranking").As<Napi::Boolean>().Value()) {
|
||||
context_params.pooling_type = LLAMA_POOLING_TYPE_RANK;
|
||||
}
|
||||
|
||||
if (options.Has("flashAttention")) {
|
||||
bool flashAttention = options.Get("flashAttention").As<Napi::Boolean>().Value();
|
||||
context_params.flash_attn_type = flashAttention ? LLAMA_FLASH_ATTN_TYPE_ENABLED : LLAMA_FLASH_ATTN_TYPE_DISABLED;
|
||||
}
|
||||
|
||||
if (options.Has("threads")) {
|
||||
const auto n_threads = options.Get("threads").As<Napi::Number>().Int32Value();
|
||||
const auto resolved_n_threads = n_threads == 0 ? std::max((int32_t)std::thread::hardware_concurrency(), context_params.n_threads) : n_threads;
|
||||
|
||||
context_params.n_threads = resolved_n_threads;
|
||||
context_params.n_threads_batch = resolved_n_threads;
|
||||
}
|
||||
|
||||
if (options.Has("performanceTracking")) {
|
||||
context_params.no_perf = !(options.Get("performanceTracking").As<Napi::Boolean>().Value());
|
||||
}
|
||||
|
||||
if (options.Has("swaFullCache")) {
|
||||
context_params.swa_full = options.Get("swaFullCache").As<Napi::Boolean>().Value();
|
||||
}
|
||||
}
|
||||
}
|
||||
AddonContext::~AddonContext() {
|
||||
dispose();
|
||||
}
|
||||
|
||||
void AddonContext::dispose() {
|
||||
if (disposed) {
|
||||
return;
|
||||
}
|
||||
|
||||
disposed = true;
|
||||
if (contextLoaded) {
|
||||
contextLoaded = false;
|
||||
llama_free(ctx);
|
||||
|
||||
adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
|
||||
loadedContextMemorySize = 0;
|
||||
}
|
||||
|
||||
model->Unref();
|
||||
|
||||
disposeBatch();
|
||||
}
|
||||
void AddonContext::disposeBatch() {
|
||||
if (!has_batch) {
|
||||
return;
|
||||
}
|
||||
|
||||
llama_batch_free(batch);
|
||||
has_batch = false;
|
||||
batch_n_tokens = 0;
|
||||
|
||||
adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
|
||||
batchMemorySize = 0;
|
||||
}
|
||||
|
||||
Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
}
|
||||
Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
if (contextLoaded) {
|
||||
contextLoaded = false;
|
||||
|
||||
AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
} else {
|
||||
dispose();
|
||||
|
||||
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
||||
deferred.Resolve(info.Env().Undefined());
|
||||
return deferred.Promise();
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Value AddonContext::GetContextSize(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
|
||||
}
|
||||
Napi::Value AddonContext::InitBatch(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
if (has_batch) {
|
||||
llama_batch_free(batch);
|
||||
}
|
||||
|
||||
int32_t n_tokens = info[0].As<Napi::Number>().Int32Value();
|
||||
|
||||
batch = llama_batch_init(n_tokens, 0, 1);
|
||||
has_batch = true;
|
||||
batch_n_tokens = n_tokens;
|
||||
|
||||
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_model_n_embd(model->model), context_params.n_batch);
|
||||
if (newBatchMemorySize > batchMemorySize) {
|
||||
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
|
||||
batchMemorySize = newBatchMemorySize;
|
||||
} else if (newBatchMemorySize < batchMemorySize) {
|
||||
adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
|
||||
batchMemorySize = newBatchMemorySize;
|
||||
}
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
Napi::Value AddonContext::DisposeBatch(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
disposeBatch();
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
Napi::Value AddonContext::AddToBatch(const Napi::CallbackInfo& info) {
|
||||
if (!has_batch) {
|
||||
Napi::Error::New(info.Env(), "No batch is initialized").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
||||
int32_t firstTokenContextIndex = info[1].As<Napi::Number>().Int32Value();
|
||||
Napi::Uint32Array tokens = info[2].As<Napi::Uint32Array>();
|
||||
Napi::Uint32Array tokenLogitIndexes = info[3].As<Napi::Uint32Array>();
|
||||
|
||||
auto tokensLength = tokens.ElementLength();
|
||||
auto tokenLogitIndexesLength = tokenLogitIndexes.ElementLength();
|
||||
GGML_ASSERT(batch.n_tokens + tokensLength <= batch_n_tokens);
|
||||
|
||||
Napi::Uint32Array resLogitIndexes = Napi::Uint32Array::New(info.Env(), tokenLogitIndexesLength);
|
||||
|
||||
for (size_t i = 0, l = 0; i < tokensLength; i++) {
|
||||
if (l < tokenLogitIndexesLength && l < tokenLogitIndexesLength && tokenLogitIndexes[l] == i) {
|
||||
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, true);
|
||||
resLogitIndexes[l] = batch.n_tokens - 1;
|
||||
l++;
|
||||
} else {
|
||||
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
|
||||
}
|
||||
}
|
||||
|
||||
return resLogitIndexes;
|
||||
}
|
||||
Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
||||
|
||||
bool result = llama_memory_seq_rm(llama_get_memory(ctx), sequenceId, -1, -1);
|
||||
|
||||
if (!result) {
|
||||
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
Napi::Value AddonContext::RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
||||
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
|
||||
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
||||
|
||||
bool result = llama_memory_seq_rm(llama_get_memory(ctx), sequenceId, startPos, endPos);
|
||||
|
||||
return Napi::Boolean::New(info.Env(), result);
|
||||
}
|
||||
Napi::Value AddonContext::ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
||||
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
|
||||
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
||||
int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
|
||||
|
||||
llama_memory_seq_add(llama_get_memory(ctx), sequenceId, startPos, endPos, shiftDelta);
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
Napi::Value AddonContext::GetSequenceKvCacheMinPosition(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
||||
|
||||
|
||||
const auto minPosition = llama_memory_seq_pos_min(llama_get_memory(ctx), sequenceId);
|
||||
|
||||
return Napi::Number::New(info.Env(), minPosition);
|
||||
}
|
||||
Napi::Value AddonContext::GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
||||
|
||||
|
||||
const auto maxPosition = llama_memory_seq_pos_max(llama_get_memory(ctx), sequenceId);
|
||||
|
||||
return Napi::Number::New(info.Env(), maxPosition);
|
||||
}
|
||||
Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
|
||||
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
}
|
||||
Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
|
||||
AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
}
|
||||
|
||||
Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
|
||||
int32_t maxVectorSize = (info.Length() > 1 && info[1].IsNumber()) ? info[1].As<Napi::Number>().Int32Value() : 0;
|
||||
|
||||
if (inputTokensLength <= 0) {
|
||||
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
const int n_embd = llama_model_n_embd(model->model);
|
||||
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
|
||||
const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
|
||||
if (embeddings == NULL) {
|
||||
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
|
||||
}
|
||||
|
||||
if (embeddings == NULL) {
|
||||
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
size_t resultSize = maxVectorSize == 0 ? n_embd : std::min(n_embd, maxVectorSize);
|
||||
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), resultSize);
|
||||
for (size_t i = 0; i < resultSize; i++) {
|
||||
result[i] = embeddings[i];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Napi::Value AddonContext::GetStateSize(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
|
||||
}
|
||||
|
||||
Napi::Value AddonContext::GetThreads(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return Napi::Number::From(info.Env(), llama_n_threads(ctx));
|
||||
}
|
||||
|
||||
Napi::Value AddonContext::SetThreads(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
const auto threads = info[0].As<Napi::Number>().Int32Value();
|
||||
const auto resolvedThreads = threads == 0
|
||||
? std::max((int32_t)std::thread::hardware_concurrency(), std::max(cpu_get_num_math(), 1))
|
||||
: threads;
|
||||
|
||||
if (llama_n_threads(ctx) != resolvedThreads) {
|
||||
llama_set_n_threads(ctx, resolvedThreads, resolvedThreads);
|
||||
}
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
class AddonContextSaveSequenceStateToFileWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonContext* context;
|
||||
std::string filepath;
|
||||
llama_seq_id sequenceId;
|
||||
std::vector<llama_token> tokens;
|
||||
size_t savedFileSize = 0;
|
||||
|
||||
AddonContextSaveSequenceStateToFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
|
||||
: Napi::AsyncWorker(info.Env(), "AddonContextSaveSequenceStateToFileWorker"),
|
||||
context(context),
|
||||
deferred(Napi::Promise::Deferred::New(info.Env())) {
|
||||
context->Ref();
|
||||
|
||||
filepath = info[0].As<Napi::String>().Utf8Value();
|
||||
sequenceId = info[1].As<Napi::Number>().Int32Value();
|
||||
Napi::Uint32Array inputTokens = info[2].As<Napi::Uint32Array>();
|
||||
|
||||
tokens.resize(inputTokens.ElementLength());
|
||||
for (size_t i = 0; i < tokens.size(); i++) {
|
||||
tokens[i] = inputTokens[i];
|
||||
}
|
||||
}
|
||||
~AddonContextSaveSequenceStateToFileWorker() {
|
||||
context->Unref();
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
savedFileSize = llama_state_seq_save_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size());
|
||||
if (savedFileSize == 0) {
|
||||
SetError("Failed to save state to file");
|
||||
return;
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_state_seq_save_file\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
deferred.Resolve(Napi::Number::New(Env(), savedFileSize));
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
Napi::Value AddonContext::SaveSequenceStateToFile(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
AddonContextSaveSequenceStateToFileWorker* worker = new AddonContextSaveSequenceStateToFileWorker(info, this);
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
}
|
||||
|
||||
class AddonContextLoadSequenceStateFromFileWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonContext* context;
|
||||
std::string filepath;
|
||||
llama_seq_id sequenceId;
|
||||
size_t maxContextSize;
|
||||
std::vector<llama_token> tokens;
|
||||
|
||||
AddonContextLoadSequenceStateFromFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
|
||||
: Napi::AsyncWorker(info.Env(), "AddonContextLoadSequenceStateFromFileWorker"),
|
||||
context(context),
|
||||
deferred(Napi::Promise::Deferred::New(info.Env())) {
|
||||
context->Ref();
|
||||
|
||||
filepath = info[0].As<Napi::String>().Utf8Value();
|
||||
sequenceId = info[1].As<Napi::Number>().Int32Value();
|
||||
maxContextSize = info[2].As<Napi::Number>().Uint32Value();
|
||||
|
||||
tokens.resize(maxContextSize);
|
||||
}
|
||||
~AddonContextLoadSequenceStateFromFileWorker() {
|
||||
context->Unref();
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
size_t tokenCount = 0;
|
||||
const size_t fileSize = llama_state_seq_load_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size(), &tokenCount);
|
||||
if (fileSize == 0) {
|
||||
SetError("Failed to load state from file. Current context sequence size may be smaller that the state of the file");
|
||||
return;
|
||||
}
|
||||
|
||||
tokens.resize(tokenCount);
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_state_seq_load_file\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
size_t tokenCount = tokens.size();
|
||||
Napi::Uint32Array result = Napi::Uint32Array::New(Env(), tokenCount);
|
||||
|
||||
for (size_t i = 0; i < tokenCount; i++) {
|
||||
result[i] = tokens[i];
|
||||
}
|
||||
|
||||
deferred.Resolve(result);
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
Napi::Value AddonContext::LoadSequenceStateFromFile(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
AddonContextLoadSequenceStateFromFileWorker* worker = new AddonContextLoadSequenceStateFromFileWorker(info, this);
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
}
|
||||
|
||||
Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
|
||||
llama_perf_context_print(ctx);
|
||||
llama_perf_context_reset(ctx);
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info) {
|
||||
constexpr auto vocabSizeMaxDifference = 128; // SPEC_VOCAB_MAX_SIZE_DIFFERENCE
|
||||
constexpr auto vocabCheckStartTokenId = 5; // SPEC_VOCAB_CHECK_START_TOKEN_ID
|
||||
|
||||
const AddonContext * draftContext = Napi::ObjectWrap<AddonContext>::Unwrap(info[0].As<Napi::Object>());
|
||||
const auto currentCtx = ctx;
|
||||
const auto draftCtx = draftContext->ctx;
|
||||
const auto currentModel = model->model;
|
||||
const auto draftModel = draftContext->model->model;
|
||||
const auto currentVocab = model->vocab;
|
||||
const auto draftVocab = draftContext->model->vocab;
|
||||
|
||||
if (llama_vocab_type(currentVocab) != llama_vocab_type(draftVocab)) {
|
||||
Napi::Error::New(info.Env(), "Speculative draft model vocabulary type must match the target model vocabulary type").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
if (llama_vocab_get_add_bos(currentVocab) != llama_vocab_get_add_bos(draftVocab) ||
|
||||
llama_vocab_get_add_eos(currentVocab) != llama_vocab_get_add_eos(draftVocab) ||
|
||||
llama_vocab_bos(currentVocab) != llama_vocab_bos(draftVocab) ||
|
||||
llama_vocab_eos(currentVocab) != llama_vocab_eos(draftVocab)
|
||||
) {
|
||||
Napi::Error::New(info.Env(), "Speculative draft model special tokens must match the target model special tokens").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
const int currentModelVocabSize = llama_vocab_n_tokens(currentVocab);
|
||||
const int draftModelVocabSize = llama_vocab_n_tokens(draftVocab);
|
||||
|
||||
const int vocabDiff = std::abs(currentModelVocabSize - draftModelVocabSize);
|
||||
|
||||
if (vocabDiff > vocabSizeMaxDifference) {
|
||||
Napi::Error::New(
|
||||
info.Env(),
|
||||
std::string("Speculative draft model vocabulary must closely match the target model vocabulary size (vocabulary size difference: ") +
|
||||
std::to_string(vocabDiff) + std::string(", max allowed: ") + std::to_string(vocabSizeMaxDifference) + std::string(")")
|
||||
).ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
const int minVocabSize = std::min(currentModelVocabSize, draftModelVocabSize);
|
||||
for (int i = vocabCheckStartTokenId; i < minVocabSize; ++i) {
|
||||
const char * currentTokenText = llama_vocab_get_text(currentVocab, i);
|
||||
const char * draftTokenText = llama_vocab_get_text(draftVocab, i);
|
||||
if (std::strcmp(currentTokenText, draftTokenText) != 0) {
|
||||
Napi::Error::New(
|
||||
info.Env(),
|
||||
std::string("Speculative draft model vocabulary must match the target model vocabulary, but token ") +
|
||||
std::to_string(i) + std::string(" content differs. Target: \"") + std::string(currentTokenText) +
|
||||
std::string("\", Draft: \"") + std::string(draftTokenText) + std::string("")
|
||||
).ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
}
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
Napi::Value AddonContext::SetLora(const Napi::CallbackInfo& info) {
|
||||
AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
|
||||
float scale = info[1].As<Napi::Number>().FloatValue();
|
||||
|
||||
llama_set_adapter_lora(ctx, lora->lora_adapter, scale);
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
void AddonContext::init(Napi::Object exports) {
|
||||
exports.Set(
|
||||
"AddonContext",
|
||||
DefineClass(
|
||||
exports.Env(),
|
||||
"AddonContext",
|
||||
{
|
||||
InstanceMethod("init", &AddonContext::Init),
|
||||
InstanceMethod("getContextSize", &AddonContext::GetContextSize),
|
||||
InstanceMethod("initBatch", &AddonContext::InitBatch),
|
||||
InstanceMethod("addToBatch", &AddonContext::AddToBatch),
|
||||
InstanceMethod("disposeSequence", &AddonContext::DisposeSequence),
|
||||
InstanceMethod("removeTokenCellsFromSequence", &AddonContext::RemoveTokenCellsFromSequence),
|
||||
InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
|
||||
InstanceMethod("getSequenceKvCacheMinPosition", &AddonContext::GetSequenceKvCacheMinPosition),
|
||||
InstanceMethod("getSequenceKvCacheMaxPosition", &AddonContext::GetSequenceKvCacheMaxPosition),
|
||||
InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
|
||||
InstanceMethod("sampleToken", &AddonContext::SampleToken),
|
||||
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
|
||||
InstanceMethod("getStateSize", &AddonContext::GetStateSize),
|
||||
InstanceMethod("getThreads", &AddonContext::GetThreads),
|
||||
InstanceMethod("setThreads", &AddonContext::SetThreads),
|
||||
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
||||
InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
|
||||
InstanceMethod("saveSequenceStateToFile", &AddonContext::SaveSequenceStateToFile),
|
||||
InstanceMethod("loadSequenceStateFromFile", &AddonContext::LoadSequenceStateFromFile),
|
||||
InstanceMethod("setLora", &AddonContext::SetLora),
|
||||
InstanceMethod("dispose", &AddonContext::Dispose),
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
58
node_modules/node-llama-cpp/llama/addon/AddonContext.h
generated
vendored
Normal file
58
node_modules/node-llama-cpp/llama/addon/AddonContext.h
generated
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
#include "llama.h"
|
||||
#include "napi.h"
|
||||
#include "addonGlobals.h"
|
||||
#include "AddonSampler.h"
|
||||
|
||||
class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
||||
public:
|
||||
AddonModel* model;
|
||||
llama_context_params context_params;
|
||||
llama_context* ctx;
|
||||
llama_batch batch;
|
||||
uint64_t batchMemorySize = 0;
|
||||
bool has_batch = false;
|
||||
int32_t batch_n_tokens = 0;
|
||||
int n_cur = 0;
|
||||
|
||||
uint64_t loadedContextMemorySize = 0;
|
||||
bool contextLoaded = false;
|
||||
|
||||
bool disposed = false;
|
||||
|
||||
AddonContext(const Napi::CallbackInfo& info);
|
||||
~AddonContext();
|
||||
|
||||
void dispose();
|
||||
void disposeBatch();
|
||||
|
||||
Napi::Value Init(const Napi::CallbackInfo& info);
|
||||
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
||||
|
||||
Napi::Value GetContextSize(const Napi::CallbackInfo& info);
|
||||
Napi::Value InitBatch(const Napi::CallbackInfo& info);
|
||||
Napi::Value DisposeBatch(const Napi::CallbackInfo& info);
|
||||
Napi::Value AddToBatch(const Napi::CallbackInfo& info);
|
||||
Napi::Value DisposeSequence(const Napi::CallbackInfo& info);
|
||||
Napi::Value RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info);
|
||||
Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetSequenceKvCacheMinPosition(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo& info);
|
||||
Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
|
||||
Napi::Value SampleToken(const Napi::CallbackInfo& info);
|
||||
|
||||
Napi::Value GetEmbedding(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetStateSize(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetThreads(const Napi::CallbackInfo& info);
|
||||
Napi::Value SetThreads(const Napi::CallbackInfo& info);
|
||||
|
||||
Napi::Value SaveSequenceStateToFile(const Napi::CallbackInfo& info);
|
||||
Napi::Value LoadSequenceStateFromFile(const Napi::CallbackInfo& info);
|
||||
|
||||
Napi::Value PrintTimings(const Napi::CallbackInfo& info);
|
||||
Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);
|
||||
|
||||
Napi::Value SetLora(const Napi::CallbackInfo& info);
|
||||
|
||||
static void init(Napi::Object exports);
|
||||
};
|
||||
92
node_modules/node-llama-cpp/llama/addon/AddonGrammar.cpp
generated
vendored
Normal file
92
node_modules/node-llama-cpp/llama/addon/AddonGrammar.cpp
generated
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
#include "addonGlobals.h"
|
||||
#include "AddonGrammar.h"
|
||||
|
||||
AddonGrammar::AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
|
||||
grammarCode = info[0].As<Napi::String>().Utf8Value();
|
||||
|
||||
if (info.Length() > 1 && info[1].IsObject()) {
|
||||
Napi::Object options = info[1].As<Napi::Object>();
|
||||
|
||||
if (options.Has("addonExports")) {
|
||||
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
||||
hasAddonExportsRef = true;
|
||||
}
|
||||
|
||||
if (options.Has("rootRuleName")) {
|
||||
rootRuleName = options.Get("rootRuleName").As<Napi::String>().Utf8Value();
|
||||
}
|
||||
}
|
||||
|
||||
auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str(), false, nullptr, 0, nullptr, 0);
|
||||
|
||||
// will be empty if there are parse errors
|
||||
if (parsed_grammar == nullptr) {
|
||||
Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
|
||||
return;
|
||||
}
|
||||
|
||||
llama_grammar_free_impl(parsed_grammar);
|
||||
}
|
||||
AddonGrammar::~AddonGrammar() {
|
||||
if (hasAddonExportsRef) {
|
||||
addonExportsRef.Unref();
|
||||
hasAddonExportsRef = false;
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Value AddonGrammar::isTextCompatible(const Napi::CallbackInfo& info) {
|
||||
const std::string testText = info[0].As<Napi::String>().Utf8Value();
|
||||
|
||||
auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str(), false, nullptr, 0, nullptr, 0);
|
||||
|
||||
// will be empty if there are parse errors
|
||||
if (parsed_grammar == nullptr) {
|
||||
Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
}
|
||||
|
||||
const auto cpts = unicode_cpts_from_utf8(testText);
|
||||
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(parsed_grammar);
|
||||
|
||||
for (const auto & cpt : cpts) {
|
||||
try {
|
||||
llama_grammar_accept(parsed_grammar, cpt);
|
||||
} catch (const std::exception & e) {
|
||||
llama_grammar_free_impl(parsed_grammar);
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
} catch (...) {
|
||||
llama_grammar_free_impl(parsed_grammar);
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
}
|
||||
|
||||
if (stacks_cur.empty()) {
|
||||
// no stacks means that the grammar failed to match at this point
|
||||
llama_grammar_free_impl(parsed_grammar);
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & stack : stacks_cur) {
|
||||
if (stack.empty()) {
|
||||
// an empty stack means that the grammar has been completed
|
||||
llama_grammar_free_impl(parsed_grammar);
|
||||
return Napi::Boolean::New(info.Env(), true);
|
||||
}
|
||||
}
|
||||
|
||||
llama_grammar_free_impl(parsed_grammar);
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
}
|
||||
|
||||
void AddonGrammar::init(Napi::Object exports) {
|
||||
exports.Set(
|
||||
"AddonGrammar",
|
||||
DefineClass(
|
||||
exports.Env(),
|
||||
"AddonGrammar",
|
||||
{
|
||||
InstanceMethod("isTextCompatible", &AddonGrammar::isTextCompatible),
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
22
node_modules/node-llama-cpp/llama/addon/AddonGrammar.h
generated
vendored
Normal file
22
node_modules/node-llama-cpp/llama/addon/AddonGrammar.h
generated
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
#include "llama.h"
|
||||
#include "common/common.h"
|
||||
#include "llama-grammar.h"
|
||||
#include "src/unicode.h"
|
||||
#include "napi.h"
|
||||
#include "addonGlobals.h"
|
||||
|
||||
class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
||||
public:
|
||||
std::string grammarCode = "";
|
||||
std::string rootRuleName = "root";
|
||||
Napi::Reference<Napi::Object> addonExportsRef;
|
||||
bool hasAddonExportsRef = false;
|
||||
|
||||
AddonGrammar(const Napi::CallbackInfo& info);
|
||||
~AddonGrammar();
|
||||
|
||||
Napi::Value isTextCompatible(const Napi::CallbackInfo& info);
|
||||
|
||||
static void init(Napi::Object exports);
|
||||
};
|
||||
36
node_modules/node-llama-cpp/llama/addon/AddonGrammarEvaluationState.cpp
generated
vendored
Normal file
36
node_modules/node-llama-cpp/llama/addon/AddonGrammarEvaluationState.cpp
generated
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
#include <sstream>
|
||||
#include "addonGlobals.h"
|
||||
#include "common/common.h"
|
||||
#include "llama.h"
|
||||
#include "AddonGrammarEvaluationState.h"
|
||||
#include "AddonGrammar.h"
|
||||
|
||||
AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
|
||||
if (info.Length() == 1) {
|
||||
AddonGrammarEvaluationState* existingState = Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
||||
model = existingState->model;
|
||||
model->Ref();
|
||||
|
||||
grammarDef = existingState->grammarDef;
|
||||
grammarDef->Ref();
|
||||
|
||||
sampler = llama_sampler_clone(existingState->sampler);
|
||||
} else {
|
||||
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
|
||||
model->Ref();
|
||||
|
||||
grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
|
||||
grammarDef->Ref();
|
||||
|
||||
sampler = llama_sampler_init_grammar(model->vocab, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
|
||||
}
|
||||
}
|
||||
AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {
|
||||
llama_sampler_free(sampler);
|
||||
grammarDef->Unref();
|
||||
model->Unref();
|
||||
}
|
||||
|
||||
void AddonGrammarEvaluationState::init(Napi::Object exports) {
|
||||
exports.Set("AddonGrammarEvaluationState", DefineClass(exports.Env(), "AddonGrammarEvaluationState", {}));
|
||||
}
|
||||
17
node_modules/node-llama-cpp/llama/addon/AddonGrammarEvaluationState.h
generated
vendored
Normal file
17
node_modules/node-llama-cpp/llama/addon/AddonGrammarEvaluationState.h
generated
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
#include "llama.h"
|
||||
#include "napi.h"
|
||||
#include "addonGlobals.h"
|
||||
#include "AddonModel.h"
|
||||
|
||||
class AddonGrammarEvaluationState : public Napi::ObjectWrap<AddonGrammarEvaluationState> {
|
||||
public:
|
||||
AddonModel* model;
|
||||
AddonGrammar* grammarDef;
|
||||
llama_sampler * sampler = nullptr;
|
||||
|
||||
AddonGrammarEvaluationState(const Napi::CallbackInfo& info);
|
||||
~AddonGrammarEvaluationState();
|
||||
|
||||
static void init(Napi::Object exports);
|
||||
};
|
||||
691
node_modules/node-llama-cpp/llama/addon/AddonModel.cpp
generated
vendored
Normal file
691
node_modules/node-llama-cpp/llama/addon/AddonModel.cpp
generated
vendored
Normal file
@@ -0,0 +1,691 @@
|
||||
#include <sstream>
|
||||
#include "addonGlobals.h"
|
||||
#include "globals/addonLog.h"
|
||||
#include "globals/addonProgress.h"
|
||||
#include "common/common.h"
|
||||
#include "llama.h"
|
||||
#include "AddonModel.h"
|
||||
#include "AddonModelData.h"
|
||||
#include "AddonModelLora.h"
|
||||
|
||||
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
|
||||
if (token < 0 || token == LLAMA_TOKEN_NULL) {
|
||||
return Napi::Number::From(info.Env(), -1);
|
||||
}
|
||||
|
||||
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
|
||||
|
||||
if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
|
||||
return Napi::Number::From(info.Env(), -1);
|
||||
}
|
||||
|
||||
return Napi::Number::From(info.Env(), token);
|
||||
}
|
||||
|
||||
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
|
||||
if (token < 0) {
|
||||
return Napi::Number::From(info.Env(), -1);
|
||||
}
|
||||
|
||||
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
|
||||
|
||||
if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
|
||||
return Napi::Number::From(info.Env(), -1);
|
||||
}
|
||||
|
||||
return Napi::Number::From(info.Env(), token);
|
||||
}
|
||||
|
||||
static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
|
||||
AddonModel* addonModel = (AddonModel *) user_data;
|
||||
unsigned percentage = (unsigned) (100 * progress);
|
||||
|
||||
if (percentage > addonModel->modelLoadPercentage) {
|
||||
addonModel->modelLoadPercentage = percentage;
|
||||
|
||||
// original llama.cpp logs
|
||||
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
|
||||
if (percentage >= 100) {
|
||||
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
if (progress > addonModel->rawModelLoadPercentage) {
|
||||
addonModel->rawModelLoadPercentage = progress;
|
||||
|
||||
if (addonModel->onLoadProgressEventCallbackSet) {
|
||||
addon_progress_event* data = new addon_progress_event {
|
||||
progress
|
||||
};
|
||||
|
||||
auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
|
||||
|
||||
if (status != napi_ok) {
|
||||
delete data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return !(addonModel->abortModelLoad);
|
||||
}
|
||||
|
||||
class AddonModelLoadModelWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonModel* model;
|
||||
|
||||
AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
|
||||
: Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
|
||||
model(model),
|
||||
deferred(Napi::Promise::Deferred::New(env)) {
|
||||
model->Ref();
|
||||
}
|
||||
~AddonModelLoadModelWorker() {
|
||||
model->Unref();
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
model->model = llama_model_load_from_file(model->modelPath.c_str(), model->model_params);
|
||||
model->vocab = llama_model_get_vocab(model->model);
|
||||
|
||||
model->modelLoaded = model->model != nullptr && model->model != NULL;
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_model_load_from_file\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
if (model->modelLoaded) {
|
||||
uint64_t modelSize = llama_model_size(model->model);
|
||||
adjustNapiExternalMemoryAdd(Env(), modelSize);
|
||||
model->loadedModelSize = modelSize;
|
||||
}
|
||||
|
||||
deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
|
||||
if (model->onLoadProgressEventCallbackSet) {
|
||||
model->addonThreadSafeOnLoadProgressEventCallback.Release();
|
||||
}
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
|
||||
class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonModel* model;
|
||||
|
||||
AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
|
||||
: Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
|
||||
model(model),
|
||||
deferred(Napi::Promise::Deferred::New(env)) {
|
||||
model->Ref();
|
||||
}
|
||||
~AddonModelUnloadModelWorker() {
|
||||
model->Unref();
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
llama_model_free(model->model);
|
||||
model->modelLoaded = false;
|
||||
|
||||
model->dispose();
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_model_free\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
|
||||
model->loadedModelSize = 0;
|
||||
|
||||
deferred.Resolve(Env().Undefined());
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
|
||||
class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonModelLora* modelLora;
|
||||
|
||||
AddonModelLoadLoraWorker(
|
||||
const Napi::Env& env,
|
||||
AddonModelLora* modelLora
|
||||
)
|
||||
: Napi::AsyncWorker(env, "AddonModelLoadLoraWorker"),
|
||||
modelLora(modelLora),
|
||||
deferred(Napi::Promise::Deferred::New(env)) {
|
||||
modelLora->model->Ref();
|
||||
modelLora->Ref();
|
||||
}
|
||||
~AddonModelLoadLoraWorker() {
|
||||
modelLora->model->Unref();
|
||||
modelLora->Unref();
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
const auto loraAdapter = llama_adapter_lora_init(modelLora->model->model, modelLora->loraFilePath.c_str());
|
||||
|
||||
if (loraAdapter == nullptr) {
|
||||
SetError(
|
||||
std::string(
|
||||
std::string("Failed to initialize LoRA adapter \"" + modelLora->loraFilePath + "\"")
|
||||
)
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
modelLora->lora_adapter = loraAdapter;
|
||||
modelLora->model->Ref();
|
||||
|
||||
if (modelLora->model->data != nullptr) {
|
||||
modelLora->model->data->loraAdapters.insert(modelLora);
|
||||
} else {
|
||||
modelLora->dispose(true);
|
||||
SetError("Model data is not initialized");
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_adapter_lora_init\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
deferred.Resolve(Env().Undefined());
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
|
||||
AddonModel::AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
|
||||
data = new AddonModelData();
|
||||
model_params = llama_model_default_params();
|
||||
|
||||
// Get the model path
|
||||
modelPath = info[0].As<Napi::String>().Utf8Value();
|
||||
|
||||
if (info.Length() > 1 && info[1].IsObject()) {
|
||||
Napi::Object options = info[1].As<Napi::Object>();
|
||||
|
||||
if (options.Has("addonExports")) {
|
||||
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
||||
hasAddonExportsRef = true;
|
||||
}
|
||||
|
||||
if (options.Has("gpuLayers")) {
|
||||
model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
|
||||
}
|
||||
|
||||
if (options.Has("vocabOnly")) {
|
||||
model_params.vocab_only = options.Get("vocabOnly").As<Napi::Boolean>().Value();
|
||||
}
|
||||
|
||||
if (options.Has("useMmap")) {
|
||||
model_params.use_mmap = options.Get("useMmap").As<Napi::Boolean>().Value();
|
||||
}
|
||||
|
||||
if (options.Has("useDirectIo")) {
|
||||
model_params.use_direct_io = options.Get("useDirectIo").As<Napi::Boolean>().Value();
|
||||
}
|
||||
|
||||
if (options.Has("useMlock")) {
|
||||
model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
|
||||
}
|
||||
|
||||
if (options.Has("checkTensors")) {
|
||||
model_params.check_tensors = options.Get("checkTensors").As<Napi::Boolean>().Value();
|
||||
}
|
||||
|
||||
if (options.Has("onLoadProgress")) {
|
||||
auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
|
||||
if (onLoadProgressJSCallback.IsFunction()) {
|
||||
AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
||||
addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
|
||||
info.Env(),
|
||||
onLoadProgressJSCallback,
|
||||
"onLoadProgressCallback",
|
||||
0,
|
||||
1,
|
||||
context,
|
||||
[](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
|
||||
addonModel->onLoadProgressEventCallbackSet = false;
|
||||
|
||||
delete ctx;
|
||||
},
|
||||
this
|
||||
);
|
||||
onLoadProgressEventCallbackSet = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (options.Has("hasLoadAbortSignal")) {
|
||||
hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
|
||||
}
|
||||
|
||||
if (options.Has("overridesList")) {
|
||||
Napi::Array overridesList = options.Get("overridesList").As<Napi::Array>();
|
||||
kv_overrides.reserve(overridesList.Length());
|
||||
|
||||
for (uint32_t i = 0; i < overridesList.Length(); i++) {
|
||||
Napi::Array overrideItem = overridesList.Get(i).As<Napi::Array>();
|
||||
auto key = overrideItem.Get((uint32_t)0).As<Napi::String>().Utf8Value();
|
||||
auto value = overrideItem.Get((uint32_t)1);
|
||||
|
||||
if (key.length() > 127) {
|
||||
continue;
|
||||
}
|
||||
|
||||
llama_model_kv_override kvo;
|
||||
std::strncpy(kvo.key, key.c_str(), key.length());
|
||||
kvo.key[key.length()] = 0;
|
||||
|
||||
if (value.IsString()) {
|
||||
auto valueString = value.As<Napi::String>().Utf8Value();
|
||||
if (valueString.length() > 127) {
|
||||
continue;
|
||||
}
|
||||
|
||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
|
||||
std::strncpy(kvo.val_str, valueString.c_str(), valueString.length());
|
||||
kvo.val_str[valueString.length()] = 0;
|
||||
|
||||
fputs(std::string("Override: " + key + " = " + valueString + "\n").c_str(), stdout);
|
||||
fflush(stdout);
|
||||
} else if (value.IsNumber() || value.IsBigInt()) {
|
||||
auto numberType = overrideItem.Get((uint32_t)2).As<Napi::Number>().Int32Value();
|
||||
if (numberType == 0) {
|
||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
|
||||
kvo.val_i64 = value.As<Napi::Number>().Int64Value();
|
||||
} else {
|
||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
|
||||
kvo.val_f64 = value.As<Napi::Number>().DoubleValue();
|
||||
}
|
||||
|
||||
continue;
|
||||
} else if (value.IsBoolean()) {
|
||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
|
||||
kvo.val_bool = value.As<Napi::Boolean>().Value();
|
||||
}
|
||||
|
||||
kv_overrides.emplace_back(std::move(kvo));
|
||||
}
|
||||
|
||||
if (!kv_overrides.empty()) {
|
||||
kv_overrides.emplace_back();
|
||||
kv_overrides.back().key[0] = 0;
|
||||
}
|
||||
|
||||
model_params.kv_overrides = kv_overrides.data();
|
||||
}
|
||||
|
||||
if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
|
||||
model_params.progress_callback_user_data = &(*this);
|
||||
model_params.progress_callback = llamaModelParamsProgressCallback;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AddonModel::~AddonModel() {
|
||||
dispose();
|
||||
}
|
||||
void AddonModel::dispose() {
|
||||
if (disposed) {
|
||||
return;
|
||||
}
|
||||
|
||||
disposed = true;
|
||||
|
||||
if (data != nullptr) {
|
||||
auto currentData = data;
|
||||
data = nullptr;
|
||||
delete currentData;
|
||||
}
|
||||
|
||||
if (modelLoaded) {
|
||||
modelLoaded = false;
|
||||
llama_model_free(model);
|
||||
|
||||
adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
|
||||
loadedModelSize = 0;
|
||||
}
|
||||
|
||||
if (hasAddonExportsRef) {
|
||||
addonExportsRef.Unref();
|
||||
hasAddonExportsRef = false;
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
}
|
||||
Napi::Value AddonModel::LoadLora(const Napi::CallbackInfo& info) {
|
||||
AddonModelLora* modelLora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
|
||||
AddonModelLoadLoraWorker* worker = new AddonModelLoadLoraWorker(this->Env(), modelLora);
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
}
|
||||
Napi::Value AddonModel::AbortActiveModelLoad(const Napi::CallbackInfo& info) {
|
||||
abortModelLoad = true;
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
if (modelLoaded) {
|
||||
modelLoaded = false;
|
||||
|
||||
AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
} else {
|
||||
dispose();
|
||||
|
||||
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
||||
deferred.Resolve(info.Env().Undefined());
|
||||
return deferred.Promise();
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Value AddonModel::Tokenize(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
std::string text = info[0].As<Napi::String>().Utf8Value();
|
||||
bool specialTokens = info[1].As<Napi::Boolean>().Value();
|
||||
|
||||
std::vector<llama_token> tokens = common_tokenize(vocab, text, false, specialTokens);
|
||||
|
||||
Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
|
||||
for (size_t i = 0; i < tokens.size(); ++i) {
|
||||
result[i] = static_cast<uint32_t>(tokens[i]);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
Napi::Value AddonModel::Detokenize(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
|
||||
bool decodeSpecialTokens = info.Length() > 0
|
||||
? info[1].As<Napi::Boolean>().Value()
|
||||
: false;
|
||||
|
||||
std::string result;
|
||||
result.resize(std::max(result.capacity(), tokens.ElementLength()));
|
||||
|
||||
int n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
|
||||
if (n_chars < 0) {
|
||||
result.resize(-n_chars);
|
||||
n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
|
||||
GGML_ASSERT(n_chars <= result.size()); // whitespace trimming is performed after per-token detokenization
|
||||
}
|
||||
|
||||
result.resize(n_chars);
|
||||
|
||||
return Napi::String::New(info.Env(), result);
|
||||
}
|
||||
|
||||
Napi::Value AddonModel::GetTrainContextSize(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return Napi::Number::From(info.Env(), llama_model_n_ctx_train(model));
|
||||
}
|
||||
|
||||
Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return Napi::Number::From(info.Env(), llama_model_n_embd(model));
|
||||
}
|
||||
|
||||
Napi::Value AddonModel::GetTotalSize(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return Napi::Number::From(info.Env(), llama_model_size(model));
|
||||
}
|
||||
|
||||
Napi::Value AddonModel::GetTotalParameters(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return Napi::Number::From(info.Env(), llama_model_n_params(model));
|
||||
}
|
||||
|
||||
Napi::Value AddonModel::GetModelDescription(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
char model_desc[128];
|
||||
int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
|
||||
|
||||
return Napi::String::New(info.Env(), model_desc, actual_length);
|
||||
}
|
||||
|
||||
Napi::Value AddonModel::TokenBos(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return getNapiControlToken(info, vocab, llama_vocab_bos(vocab));
|
||||
}
|
||||
Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return getNapiControlToken(info, vocab, llama_vocab_eos(vocab));
|
||||
}
|
||||
Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return getNapiToken(info, vocab, llama_vocab_nl(vocab));
|
||||
}
|
||||
Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return getNapiToken(info, vocab, llama_vocab_fim_pre(vocab));
|
||||
}
|
||||
Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return getNapiToken(info, vocab, llama_vocab_fim_mid(vocab));
|
||||
}
|
||||
Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return getNapiToken(info, vocab, llama_vocab_fim_suf(vocab));
|
||||
}
|
||||
Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return getNapiToken(info, vocab, llama_vocab_eot(vocab));
|
||||
}
|
||||
Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return getNapiToken(info, vocab, llama_vocab_sep(vocab));
|
||||
}
|
||||
Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
int token = info[0].As<Napi::Number>().Int32Value();
|
||||
std::stringstream ss;
|
||||
|
||||
const char* str = llama_vocab_get_text(vocab, token);
|
||||
if (str == nullptr) {
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
ss << str;
|
||||
|
||||
return Napi::String::New(info.Env(), ss.str());
|
||||
}
|
||||
|
||||
Napi::Value AddonModel::GetTokenAttributes(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
if (info[0].IsNumber() == false) {
|
||||
return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_ATTR_UNDEFINED));
|
||||
}
|
||||
|
||||
int token = info[0].As<Napi::Number>().Int32Value();
|
||||
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
|
||||
|
||||
return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
|
||||
}
|
||||
Napi::Value AddonModel::IsEogToken(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
if (info[0].IsNumber() == false) {
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
}
|
||||
|
||||
int token = info[0].As<Napi::Number>().Int32Value();
|
||||
|
||||
return Napi::Boolean::New(info.Env(), llama_vocab_is_eog(vocab, token));
|
||||
}
|
||||
Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
auto vocabularyType = llama_vocab_type(vocab);
|
||||
|
||||
return Napi::Number::From(info.Env(), int32_t(vocabularyType));
|
||||
}
|
||||
Napi::Value AddonModel::ShouldPrependBosToken(const Napi::CallbackInfo& info) {
|
||||
const bool addBos = llama_vocab_get_add_bos(vocab);
|
||||
|
||||
return Napi::Boolean::New(info.Env(), addBos);
|
||||
}
|
||||
Napi::Value AddonModel::ShouldAppendEosToken(const Napi::CallbackInfo& info) {
|
||||
const bool addEos = llama_vocab_get_add_eos(vocab);
|
||||
|
||||
return Napi::Boolean::New(info.Env(), addEos);
|
||||
}
|
||||
|
||||
Napi::Value AddonModel::GetModelSize(const Napi::CallbackInfo& info) {
|
||||
return Napi::Number::From(info.Env(), llama_model_size(model));
|
||||
}
|
||||
|
||||
void AddonModel::init(Napi::Object exports) {
|
||||
exports.Set(
|
||||
"AddonModel",
|
||||
DefineClass(
|
||||
exports.Env(),
|
||||
"AddonModel",
|
||||
{
|
||||
InstanceMethod("init", &AddonModel::Init),
|
||||
InstanceMethod("loadLora", &AddonModel::LoadLora),
|
||||
InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
|
||||
InstanceMethod("tokenize", &AddonModel::Tokenize),
|
||||
InstanceMethod("detokenize", &AddonModel::Detokenize),
|
||||
InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
|
||||
InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
|
||||
InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
|
||||
InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
|
||||
InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
|
||||
InstanceMethod("tokenBos", &AddonModel::TokenBos),
|
||||
InstanceMethod("tokenEos", &AddonModel::TokenEos),
|
||||
InstanceMethod("tokenNl", &AddonModel::TokenNl),
|
||||
InstanceMethod("prefixToken", &AddonModel::PrefixToken),
|
||||
InstanceMethod("middleToken", &AddonModel::MiddleToken),
|
||||
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
|
||||
InstanceMethod("eotToken", &AddonModel::EotToken),
|
||||
InstanceMethod("sepToken", &AddonModel::SepToken),
|
||||
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
|
||||
InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
|
||||
InstanceMethod("isEogToken", &AddonModel::IsEogToken),
|
||||
InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
|
||||
InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
|
||||
InstanceMethod("shouldAppendEosToken", &AddonModel::ShouldAppendEosToken),
|
||||
InstanceMethod("getModelSize", &AddonModel::GetModelSize),
|
||||
InstanceMethod("dispose", &AddonModel::Dispose),
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
64
node_modules/node-llama-cpp/llama/addon/AddonModel.h
generated
vendored
Normal file
64
node_modules/node-llama-cpp/llama/addon/AddonModel.h
generated
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
#include "llama.h"
|
||||
#include "napi.h"
|
||||
#include "addonGlobals.h"
|
||||
#include "globals/addonProgress.h"
|
||||
|
||||
class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
||||
public:
|
||||
llama_model_params model_params;
|
||||
std::vector<llama_model_kv_override> kv_overrides;
|
||||
llama_model* model;
|
||||
const llama_vocab* vocab;
|
||||
uint64_t loadedModelSize = 0;
|
||||
Napi::Reference<Napi::Object> addonExportsRef;
|
||||
bool hasAddonExportsRef = false;
|
||||
AddonModelData* data;
|
||||
|
||||
std::string modelPath;
|
||||
bool modelLoaded = false;
|
||||
bool abortModelLoad = false;
|
||||
bool model_load_stopped = false;
|
||||
float rawModelLoadPercentage = 0;
|
||||
unsigned modelLoadPercentage = 0;
|
||||
AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
|
||||
bool onLoadProgressEventCallbackSet = false;
|
||||
bool hasLoadAbortSignal = false;
|
||||
|
||||
bool disposed = false;
|
||||
|
||||
AddonModel(const Napi::CallbackInfo& info);
|
||||
~AddonModel();
|
||||
void dispose();
|
||||
|
||||
Napi::Value Init(const Napi::CallbackInfo& info);
|
||||
Napi::Value LoadLora(const Napi::CallbackInfo& info);
|
||||
Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info);
|
||||
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
||||
Napi::Value Tokenize(const Napi::CallbackInfo& info);
|
||||
Napi::Value Detokenize(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetTotalSize(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetTotalParameters(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetModelDescription(const Napi::CallbackInfo& info);
|
||||
|
||||
Napi::Value TokenBos(const Napi::CallbackInfo& info);
|
||||
Napi::Value TokenEos(const Napi::CallbackInfo& info);
|
||||
Napi::Value TokenNl(const Napi::CallbackInfo& info);
|
||||
Napi::Value PrefixToken(const Napi::CallbackInfo& info);
|
||||
Napi::Value MiddleToken(const Napi::CallbackInfo& info);
|
||||
Napi::Value SuffixToken(const Napi::CallbackInfo& info);
|
||||
Napi::Value EotToken(const Napi::CallbackInfo& info);
|
||||
Napi::Value SepToken(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetTokenString(const Napi::CallbackInfo& info);
|
||||
|
||||
Napi::Value GetTokenAttributes(const Napi::CallbackInfo& info);
|
||||
Napi::Value IsEogToken(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetVocabularyType(const Napi::CallbackInfo& info);
|
||||
Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info);
|
||||
Napi::Value ShouldAppendEosToken(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetModelSize(const Napi::CallbackInfo& info);
|
||||
|
||||
static void init(Napi::Object exports);
|
||||
};
|
||||
25
node_modules/node-llama-cpp/llama/addon/AddonModelData.cpp
generated
vendored
Normal file
25
node_modules/node-llama-cpp/llama/addon/AddonModelData.cpp
generated
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "addonGlobals.h"
|
||||
#include "AddonModelData.h"
|
||||
#include "AddonModelLora.h"
|
||||
|
||||
AddonModelData::AddonModelData() {
|
||||
|
||||
}
|
||||
AddonModelData::~AddonModelData() {
|
||||
std::set<AddonModelLora *> currentLoraAdapters;
|
||||
currentLoraAdapters.swap(loraAdapters);
|
||||
|
||||
for (auto lora : currentLoraAdapters) {
|
||||
lora->dispose(true);
|
||||
}
|
||||
currentLoraAdapters.clear();
|
||||
}
|
||||
|
||||
void AddonModelData::removeLora(AddonModelLora* lora) {
|
||||
auto pos = loraAdapters.find(lora);
|
||||
if (pos != loraAdapters.end()) {
|
||||
loraAdapters.erase(pos);
|
||||
}
|
||||
}
|
||||
15
node_modules/node-llama-cpp/llama/addon/AddonModelData.h
generated
vendored
Normal file
15
node_modules/node-llama-cpp/llama/addon/AddonModelData.h
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
#include <set>
|
||||
#include "llama.h"
|
||||
#include "napi.h"
|
||||
#include "addonGlobals.h"
|
||||
|
||||
class AddonModelData {
|
||||
public:
|
||||
std::set<AddonModelLora *> loraAdapters;
|
||||
|
||||
AddonModelData();
|
||||
~AddonModelData();
|
||||
|
||||
void removeLora(AddonModelLora* lora);
|
||||
};
|
||||
103
node_modules/node-llama-cpp/llama/addon/AddonModelLora.cpp
generated
vendored
Normal file
103
node_modules/node-llama-cpp/llama/addon/AddonModelLora.cpp
generated
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
#include "addonGlobals.h"
|
||||
#include "AddonModel.h"
|
||||
#include "AddonModelData.h"
|
||||
#include "AddonModelLora.h"
|
||||
|
||||
class AddonModelLoraUnloadLoraWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonModelLora* addonLora;
|
||||
|
||||
AddonModelLoraUnloadLoraWorker(const Napi::Env& env, AddonModelLora* addonLora)
|
||||
: Napi::AsyncWorker(env, "AddonModelLoraUnloadLoraWorker"),
|
||||
addonLora(addonLora),
|
||||
deferred(Napi::Promise::Deferred::New(env)) {
|
||||
addonLora->Ref();
|
||||
}
|
||||
~AddonModelLoraUnloadLoraWorker() {
|
||||
addonLora->Unref();
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
addonLora->dispose();
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_adapter_lora_free\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
deferred.Resolve(Env().Undefined());
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
|
||||
AddonModelLora::AddonModelLora(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModelLora>(info) {
|
||||
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
|
||||
loraFilePath = info[1].As<Napi::String>().Utf8Value();
|
||||
lora_adapter = nullptr;
|
||||
}
|
||||
|
||||
AddonModelLora::~AddonModelLora() {
|
||||
dispose();
|
||||
}
|
||||
|
||||
void AddonModelLora::dispose(bool skipErase) {
|
||||
if (lora_adapter != nullptr) {
|
||||
lora_adapter = nullptr;
|
||||
|
||||
if (!skipErase && model->data != nullptr) {
|
||||
model->data->removeLora(this);
|
||||
}
|
||||
|
||||
model->Unref();
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Value AddonModelLora::GetFilePath(const Napi::CallbackInfo& info) {
|
||||
return Napi::String::New(info.Env(), loraFilePath);
|
||||
}
|
||||
|
||||
|
||||
Napi::Value AddonModelLora::GetUsages(const Napi::CallbackInfo& info) {
|
||||
return Napi::Number::From(info.Env(), usages);
|
||||
}
|
||||
|
||||
void AddonModelLora::SetUsages(const Napi::CallbackInfo& info, const Napi::Value &value) {
|
||||
usages = value.As<Napi::Number>().Uint32Value();
|
||||
}
|
||||
|
||||
Napi::Value AddonModelLora::Dispose(const Napi::CallbackInfo& info) {
|
||||
AddonModelLoraUnloadLoraWorker* worker = new AddonModelLoraUnloadLoraWorker(this->Env(), this);
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
}
|
||||
|
||||
Napi::Value AddonModelLora::GetDisposed(const Napi::CallbackInfo& info) {
|
||||
return Napi::Boolean::New(info.Env(), lora_adapter == nullptr);
|
||||
}
|
||||
|
||||
void AddonModelLora::init(Napi::Object exports) {
|
||||
exports.Set(
|
||||
"AddonModelLora",
|
||||
DefineClass(
|
||||
exports.Env(),
|
||||
"AddonModelLora",
|
||||
{
|
||||
InstanceAccessor("usages", &AddonModelLora::GetUsages, &AddonModelLora::SetUsages),
|
||||
InstanceAccessor("filePath", &AddonModelLora::GetFilePath, nullptr),
|
||||
InstanceAccessor("disposed", &AddonModelLora::GetDisposed, nullptr),
|
||||
InstanceMethod("dispose", &AddonModelLora::Dispose),
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
28
node_modules/node-llama-cpp/llama/addon/AddonModelLora.h
generated
vendored
Normal file
28
node_modules/node-llama-cpp/llama/addon/AddonModelLora.h
generated
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
#include "llama.h"
|
||||
#include "napi.h"
|
||||
#include "addonGlobals.h"
|
||||
|
||||
class AddonModelLora : public Napi::ObjectWrap<AddonModelLora> {
|
||||
public:
|
||||
AddonModel* model;
|
||||
llama_adapter_lora * lora_adapter;
|
||||
std::string loraFilePath;
|
||||
uint32_t usages = 0;
|
||||
|
||||
AddonModelLora(const Napi::CallbackInfo& info);
|
||||
~AddonModelLora();
|
||||
|
||||
void dispose(bool skipErase = false);
|
||||
|
||||
Napi::Value GetFilePath(const Napi::CallbackInfo& info);
|
||||
|
||||
Napi::Value GetUsages(const Napi::CallbackInfo& info);
|
||||
void SetUsages(const Napi::CallbackInfo& info, const Napi::Value &value);
|
||||
|
||||
Napi::Value GetDisposed(const Napi::CallbackInfo& info);
|
||||
|
||||
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
||||
|
||||
static void init(Napi::Object exports);
|
||||
};
|
||||
511
node_modules/node-llama-cpp/llama/addon/AddonSampler.cpp
generated
vendored
Normal file
511
node_modules/node-llama-cpp/llama/addon/AddonSampler.cpp
generated
vendored
Normal file
@@ -0,0 +1,511 @@
|
||||
#include <cmath>
|
||||
#include "common/common.h"
|
||||
#include "globals/addonLog.h"
|
||||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include "AddonGrammarEvaluationState.h"
|
||||
#include "AddonSampler.h"
|
||||
|
||||
AddonSampler::AddonSampler(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonSampler>(info) {
|
||||
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
|
||||
model->Ref();
|
||||
|
||||
tokenCandidates.resize(llama_vocab_n_tokens(model->vocab));
|
||||
tokenCandidates.reserve(llama_vocab_n_tokens(model->vocab));
|
||||
}
|
||||
AddonSampler::~AddonSampler() {
|
||||
dispose();
|
||||
}
|
||||
|
||||
void AddonSampler::dispose() {
|
||||
if (disposed) {
|
||||
return;
|
||||
}
|
||||
|
||||
disposed = true;
|
||||
|
||||
model->Unref();
|
||||
freeChain();
|
||||
|
||||
if (temperatureSampler != nullptr) {
|
||||
llama_sampler_free(temperatureSampler);
|
||||
temperatureSampler = nullptr;
|
||||
}
|
||||
|
||||
if (greedySampler != nullptr) {
|
||||
llama_sampler_free(greedySampler);
|
||||
greedySampler = nullptr;
|
||||
}
|
||||
|
||||
if (minPSampler != nullptr) {
|
||||
llama_sampler_free(minPSampler);
|
||||
minPSampler = nullptr;
|
||||
}
|
||||
|
||||
if (topKSampler != nullptr) {
|
||||
llama_sampler_free(topKSampler);
|
||||
topKSampler = nullptr;
|
||||
}
|
||||
|
||||
if (topPSampler != nullptr) {
|
||||
llama_sampler_free(topPSampler);
|
||||
topPSampler = nullptr;
|
||||
}
|
||||
|
||||
if (seedSampler != nullptr) {
|
||||
llama_sampler_free(seedSampler);
|
||||
seedSampler = nullptr;
|
||||
}
|
||||
|
||||
if (repeatPenaltySampler != nullptr) {
|
||||
llama_sampler_free(repeatPenaltySampler);
|
||||
repeatPenaltySampler = nullptr;
|
||||
}
|
||||
|
||||
if (tokenBiasSampler != nullptr) {
|
||||
llama_sampler_free(tokenBiasSampler);
|
||||
tokenBiasSampler = nullptr;
|
||||
}
|
||||
|
||||
if (grammarEvaluationState != nullptr) {
|
||||
grammarEvaluationState->Unref();
|
||||
grammarEvaluationState = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void AddonSampler::freeChain() {
|
||||
if (chain == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ensure existing state of samplers isn't cleared
|
||||
while (llama_sampler_chain_n(chain) > 0) {
|
||||
llama_sampler_chain_remove(chain, 0);
|
||||
}
|
||||
|
||||
llama_sampler_free(chain);
|
||||
chain = nullptr;
|
||||
}
|
||||
|
||||
void AddonSampler::rebuildChainIfNeeded() {
|
||||
if (disposed) {
|
||||
throw std::runtime_error("Sampler is disposed");
|
||||
}
|
||||
|
||||
if (chain != nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto sampler_params = llama_sampler_chain_default_params();
|
||||
chain = llama_sampler_chain_init(sampler_params);
|
||||
|
||||
if (tokenBiasSampler != nullptr) {
|
||||
llama_sampler_chain_add(chain, tokenBiasSampler);
|
||||
}
|
||||
|
||||
if (repeatPenaltySampler != nullptr) {
|
||||
llama_sampler_chain_add(chain, repeatPenaltySampler);
|
||||
}
|
||||
|
||||
if (grammarEvaluationState != nullptr) {
|
||||
llama_sampler_chain_add(chain, grammarEvaluationState->sampler);
|
||||
}
|
||||
|
||||
if (greedySampler != nullptr) {
|
||||
llama_sampler_chain_add(chain, greedySampler);
|
||||
} else {
|
||||
if (topKSampler != nullptr) {
|
||||
llama_sampler_chain_add(chain, topKSampler);
|
||||
}
|
||||
|
||||
if (topPSampler != nullptr) {
|
||||
llama_sampler_chain_add(chain, topPSampler);
|
||||
}
|
||||
|
||||
if (minPSampler != nullptr) {
|
||||
llama_sampler_chain_add(chain, minPSampler);
|
||||
}
|
||||
|
||||
if (temperatureSampler != nullptr) {
|
||||
llama_sampler_chain_add(chain, temperatureSampler);
|
||||
}
|
||||
|
||||
if (seedSampler != nullptr) {
|
||||
llama_sampler_chain_add(chain, seedSampler);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AddonSampler::acceptToken(llama_token token) {
|
||||
if (repeatPenaltySampler != nullptr) {
|
||||
llama_sampler_accept(repeatPenaltySampler, token);
|
||||
repeatPenalty_lastTokens.push_back(token);
|
||||
}
|
||||
|
||||
if (grammarEvaluationState != nullptr && grammarEvaluationState->sampler != nullptr && !llama_vocab_is_eog(model->vocab, token)) {
|
||||
llama_sampler_accept(grammarEvaluationState->sampler, token);
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Value AddonSampler::Dispose(const Napi::CallbackInfo& info) {
|
||||
dispose();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
Napi::Value AddonSampler::ApplyConfig(const Napi::CallbackInfo& info) {
|
||||
if (disposed) {
|
||||
Napi::Error::New(info.Env(), "Sampler is disposed").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
|
||||
size_t min_keep = std::max(1, n_probs);
|
||||
|
||||
Napi::Object config = info[0].As<Napi::Object>();
|
||||
|
||||
if (config.Has("temperature")) {
|
||||
auto temperature = config.Get("temperature").As<Napi::Number>().FloatValue();
|
||||
if (temperature != temperatureSampler_temperature || !temperatureSampler_initialized) {
|
||||
temperatureSampler_initialized = true;
|
||||
temperatureSampler_temperature = temperature;
|
||||
freeChain();
|
||||
|
||||
if (temperatureSampler != nullptr) {
|
||||
llama_sampler_free(temperatureSampler);
|
||||
temperatureSampler = nullptr;
|
||||
}
|
||||
|
||||
if (temperatureSampler_temperature <= 0) {
|
||||
greedySampler = llama_sampler_init_greedy();
|
||||
} else {
|
||||
temperatureSampler = llama_sampler_init_temp(temperatureSampler_temperature);
|
||||
|
||||
if (greedySampler != nullptr) {
|
||||
llama_sampler_free(greedySampler);
|
||||
greedySampler = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (temperatureSampler != nullptr) {
|
||||
freeChain();
|
||||
llama_sampler_free(temperatureSampler);
|
||||
temperatureSampler = nullptr;
|
||||
}
|
||||
|
||||
if (greedySampler == nullptr) {
|
||||
greedySampler = llama_sampler_init_greedy();
|
||||
}
|
||||
}
|
||||
|
||||
if (config.Has("minP")) {
|
||||
auto minP = config.Get("minP").As<Napi::Number>().FloatValue();
|
||||
if (minP != minPSampler_minP) {
|
||||
minPSampler_minP = minP;
|
||||
freeChain();
|
||||
|
||||
if (minPSampler != nullptr) {
|
||||
llama_sampler_free(minPSampler);
|
||||
minPSampler = nullptr;
|
||||
}
|
||||
|
||||
if (minPSampler_minP != 0) {
|
||||
minPSampler = llama_sampler_init_min_p(minPSampler_minP, min_keep);
|
||||
}
|
||||
}
|
||||
} else if (minPSampler != nullptr) {
|
||||
freeChain();
|
||||
llama_sampler_free(minPSampler);
|
||||
minPSampler = nullptr;
|
||||
}
|
||||
|
||||
if (config.Has("topK")) {
|
||||
auto topK = config.Get("topK").As<Napi::Number>().Int32Value();
|
||||
if (topK != topKSampler_topK || !topKSampler_initialized) {
|
||||
topKSampler_initialized = true;
|
||||
topKSampler_topK = topK;
|
||||
freeChain();
|
||||
|
||||
if (topKSampler != nullptr) {
|
||||
llama_sampler_free(topKSampler);
|
||||
topKSampler = nullptr;
|
||||
}
|
||||
|
||||
const int32_t resolved_top_k = topKSampler_topK <= 0
|
||||
? llama_vocab_n_tokens(model->vocab)
|
||||
: std::min(topKSampler_topK, llama_vocab_n_tokens(model->vocab));
|
||||
|
||||
topKSampler = llama_sampler_init_top_k(resolved_top_k);
|
||||
}
|
||||
} else if (topKSampler != nullptr) {
|
||||
freeChain();
|
||||
llama_sampler_free(topKSampler);
|
||||
topKSampler = nullptr;
|
||||
}
|
||||
|
||||
if (config.Has("topP")) {
|
||||
auto topP = config.Get("topP").As<Napi::Number>().FloatValue();
|
||||
if (topP != topPSampler_topP) {
|
||||
topPSampler_topP = topP;
|
||||
freeChain();
|
||||
|
||||
if (topPSampler != nullptr) {
|
||||
llama_sampler_free(topPSampler);
|
||||
topPSampler = nullptr;
|
||||
}
|
||||
|
||||
if (topPSampler_topP >= 1) {
|
||||
topPSampler = llama_sampler_init_top_p(topPSampler_topP, min_keep);
|
||||
}
|
||||
}
|
||||
} else if (topPSampler != nullptr) {
|
||||
freeChain();
|
||||
llama_sampler_free(topPSampler);
|
||||
topPSampler = nullptr;
|
||||
}
|
||||
|
||||
if (config.Has("seed")) {
|
||||
auto seed = config.Get("seed").As<Napi::Number>().Uint32Value();
|
||||
if (seed != seedSampler_seed || seedSampler == nullptr) {
|
||||
seedSampler_seed = seed;
|
||||
freeChain();
|
||||
|
||||
if (seedSampler != nullptr) {
|
||||
llama_sampler_free(seedSampler);
|
||||
seedSampler = nullptr;
|
||||
}
|
||||
|
||||
seedSampler = llama_sampler_init_dist(seedSampler_seed);
|
||||
}
|
||||
} else if (seedSampler == nullptr) {
|
||||
freeChain();
|
||||
seedSampler = llama_sampler_init_dist(time(NULL));
|
||||
}
|
||||
|
||||
if (config.Has("repeatPenaltyTokens")) {
|
||||
Napi::Uint32Array repeat_penalty_tokens_uint32_array = config.Get("repeatPenaltyTokens").As<Napi::Uint32Array>();
|
||||
auto repeatPenalty = config.Has("repeatPenalty")
|
||||
? config.Get("repeatPenalty").As<Napi::Number>().FloatValue()
|
||||
: 1;
|
||||
auto repeatPenaltyMaxTokens = config.Has("repeatPenaltyMaxTokens")
|
||||
? config.Get("repeatPenaltyMaxTokens").As<Napi::Number>().Int32Value()
|
||||
: 64;
|
||||
auto repeatPenaltyPresencePenalty = config.Has("repeatPenaltyPresencePenalty")
|
||||
? config.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue()
|
||||
: 0;
|
||||
auto repeatPenaltyFrequencyPenalty = config.Has("repeatPenaltyFrequencyPenalty")
|
||||
? config.Get("repeatPenaltyFrequencyPenalty").As<Napi::Number>().FloatValue()
|
||||
: 0;
|
||||
|
||||
auto repeatPenaltyEnabled = repeatPenalty != 1 && repeatPenaltyMaxTokens > 0;
|
||||
bool shouldCreateSampler = false;
|
||||
|
||||
if (!repeatPenaltyEnabled) {
|
||||
if (repeatPenaltySampler != nullptr) {
|
||||
freeChain();
|
||||
llama_sampler_free(repeatPenaltySampler);
|
||||
repeatPenaltySampler = nullptr;
|
||||
}
|
||||
} else if (repeatPenaltySampler == nullptr) {
|
||||
freeChain();
|
||||
shouldCreateSampler = true;
|
||||
} else {
|
||||
bool existingSamplerMatchesConfig = true;
|
||||
existingSamplerMatchesConfig &= repeatPenalty_maxTokens == repeatPenaltyMaxTokens;
|
||||
existingSamplerMatchesConfig &= repeatPenalty_penalty == repeatPenalty;
|
||||
existingSamplerMatchesConfig &= repeatPenalty_presencePenalty == repeatPenaltyPresencePenalty;
|
||||
existingSamplerMatchesConfig &= repeatPenalty_frequencyPenalty == repeatPenaltyFrequencyPenalty;
|
||||
|
||||
if (existingSamplerMatchesConfig) {
|
||||
if (repeat_penalty_tokens_uint32_array.ElementLength() > 0) {
|
||||
const auto firstToken = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[0]);
|
||||
if (repeatPenalty_lastTokens.rat(0) != firstToken &&
|
||||
repeatPenalty_lastTokens.size() == repeatPenalty_maxTokens &&
|
||||
repeat_penalty_tokens_uint32_array.ElementLength() == repeatPenalty_maxTokens
|
||||
) {
|
||||
const auto lastToken = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[repeat_penalty_tokens_uint32_array.ElementLength() - 1]);
|
||||
llama_sampler_accept(repeatPenaltySampler, lastToken);
|
||||
repeatPenalty_lastTokens.push_back(lastToken);
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength() && existingSamplerMatchesConfig; i++) {
|
||||
auto token = static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]);
|
||||
|
||||
if (i < repeatPenalty_lastTokens.size()) {
|
||||
existingSamplerMatchesConfig &= repeatPenalty_lastTokens.rat(i) == token;
|
||||
} else {
|
||||
llama_sampler_accept(repeatPenaltySampler, token);
|
||||
repeatPenalty_lastTokens.push_back(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!existingSamplerMatchesConfig) {
|
||||
freeChain();
|
||||
llama_sampler_free(repeatPenaltySampler);
|
||||
repeatPenaltySampler = nullptr;
|
||||
|
||||
shouldCreateSampler = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (shouldCreateSampler) {
|
||||
repeatPenaltySampler = llama_sampler_init_penalties(
|
||||
repeatPenaltyMaxTokens,
|
||||
repeatPenalty,
|
||||
repeatPenaltyFrequencyPenalty,
|
||||
repeatPenaltyPresencePenalty
|
||||
);
|
||||
repeatPenalty_lastTokens = RingBuffer<llama_token>(repeatPenaltyMaxTokens);
|
||||
|
||||
for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength(); i++) {
|
||||
llama_sampler_accept(repeatPenaltySampler, static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
|
||||
repeatPenalty_lastTokens.push_back(static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
|
||||
}
|
||||
|
||||
repeatPenalty_maxTokens = repeatPenaltyMaxTokens;
|
||||
repeatPenalty_penalty = repeatPenalty;
|
||||
repeatPenalty_presencePenalty = repeatPenaltyPresencePenalty;
|
||||
repeatPenalty_frequencyPenalty = repeatPenaltyFrequencyPenalty;
|
||||
}
|
||||
} else if (repeatPenaltySampler != nullptr) {
|
||||
freeChain();
|
||||
llama_sampler_free(repeatPenaltySampler);
|
||||
repeatPenaltySampler = nullptr;
|
||||
}
|
||||
|
||||
if (config.Has("tokenBiasKeys") && config.Has("tokenBiasValues")) {
|
||||
Napi::Uint32Array tokenBiasKeys = config.Get("tokenBiasKeys").As<Napi::Uint32Array>();
|
||||
Napi::Float32Array tokenBiasValues = config.Get("tokenBiasValues").As<Napi::Float32Array>();
|
||||
|
||||
if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength() && tokenBiasKeys.ElementLength() > 0) {
|
||||
bool existingSamplerMatchesConfig = tokenBiasSampler != nullptr;
|
||||
|
||||
if (tokenBiasSampler != nullptr && tokenBiasSampler_biases.size() == tokenBiasKeys.ElementLength()) {
|
||||
for (size_t i = 0; i < tokenBiasKeys.ElementLength() && existingSamplerMatchesConfig; i++) {
|
||||
existingSamplerMatchesConfig &= tokenBiasSampler_biases[i].token == static_cast<llama_token>(tokenBiasKeys[i]);
|
||||
existingSamplerMatchesConfig &= tokenBiasSampler_biases[i].bias == tokenBiasValues[i];
|
||||
}
|
||||
} else {
|
||||
existingSamplerMatchesConfig = false;
|
||||
}
|
||||
|
||||
if (!existingSamplerMatchesConfig) {
|
||||
if (tokenBiasSampler != nullptr) {
|
||||
freeChain();
|
||||
llama_sampler_free(tokenBiasSampler);
|
||||
tokenBiasSampler = nullptr;
|
||||
}
|
||||
|
||||
tokenBiasSampler_biases.clear();
|
||||
tokenBiasSampler_biases.reserve(tokenBiasKeys.ElementLength());
|
||||
|
||||
for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
|
||||
tokenBiasSampler_biases.emplace_back(llama_logit_bias { static_cast<llama_token>(tokenBiasKeys[i]), tokenBiasValues[i] });
|
||||
}
|
||||
|
||||
tokenBiasSampler = llama_sampler_init_logit_bias(
|
||||
llama_vocab_n_tokens(model->vocab),
|
||||
tokenBiasSampler_biases.size(),
|
||||
tokenBiasSampler_biases.data()
|
||||
);
|
||||
}
|
||||
} else if (tokenBiasSampler != nullptr) {
|
||||
freeChain();
|
||||
llama_sampler_free(tokenBiasSampler);
|
||||
tokenBiasSampler = nullptr;
|
||||
}
|
||||
} else if (tokenBiasSampler != nullptr) {
|
||||
freeChain();
|
||||
llama_sampler_free(tokenBiasSampler);
|
||||
tokenBiasSampler = nullptr;
|
||||
}
|
||||
|
||||
if (config.Has("grammarEvaluationState")) {
|
||||
const auto configGrammarEvaluationState =
|
||||
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(config.Get("grammarEvaluationState").As<Napi::Object>());
|
||||
|
||||
if (grammarEvaluationState != configGrammarEvaluationState) {
|
||||
freeChain();
|
||||
|
||||
if (grammarEvaluationState != nullptr) {
|
||||
grammarEvaluationState->Unref();
|
||||
grammarEvaluationState = nullptr;
|
||||
}
|
||||
|
||||
grammarEvaluationState = configGrammarEvaluationState;
|
||||
grammarEvaluationState->Ref();
|
||||
}
|
||||
} else if (grammarEvaluationState != nullptr) {
|
||||
freeChain();
|
||||
grammarEvaluationState->Unref();
|
||||
grammarEvaluationState = nullptr;
|
||||
}
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
Napi::Value AddonSampler::AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
|
||||
AddonGrammarEvaluationState* grammar_evaluation_state =
|
||||
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
||||
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
|
||||
|
||||
if ((grammar_evaluation_state)->sampler != nullptr) {
|
||||
try {
|
||||
llama_sampler_accept((grammar_evaluation_state)->sampler, tokenId);
|
||||
} catch (const std::exception & e) {
|
||||
Napi::Error::New(info.Env(), std::string("Failed to accept token in grammar sampler: ") + e.what()).ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
} catch (...) {
|
||||
Napi::Error::New(info.Env(), "Failed to accept token in grammar sampler").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
}
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
Napi::Value AddonSampler::CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
|
||||
AddonGrammarEvaluationState* grammar_evaluation_state =
|
||||
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
||||
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
|
||||
|
||||
if ((grammar_evaluation_state)->sampler != nullptr) {
|
||||
std::vector<llama_token_data> candidates;
|
||||
candidates.reserve(1);
|
||||
candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
|
||||
|
||||
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||
try {
|
||||
llama_sampler_apply((grammar_evaluation_state)->sampler, &candidates_p);
|
||||
} catch (const std::exception & e) {
|
||||
addonLog(GGML_LOG_LEVEL_DEBUG, std::string("Failed to apply grammar sampler: ") + e.what());
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
} catch (...) {
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
}
|
||||
|
||||
if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
}
|
||||
|
||||
return Napi::Boolean::New(info.Env(), true);
|
||||
}
|
||||
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
}
|
||||
|
||||
void AddonSampler::init(Napi::Object exports) {
|
||||
exports.Set(
|
||||
"AddonSampler",
|
||||
DefineClass(
|
||||
exports.Env(),
|
||||
"AddonSampler",
|
||||
{
|
||||
InstanceMethod("dispose", &AddonSampler::Dispose),
|
||||
InstanceMethod("applyConfig", &AddonSampler::ApplyConfig),
|
||||
StaticMethod("acceptGrammarEvaluationStateToken", &AddonSampler::AcceptGrammarEvaluationStateToken),
|
||||
StaticMethod("canBeNextTokenForGrammarEvaluationState", &AddonSampler::CanBeNextTokenForGrammarEvaluationState),
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
63
node_modules/node-llama-cpp/llama/addon/AddonSampler.h
generated
vendored
Normal file
63
node_modules/node-llama-cpp/llama/addon/AddonSampler.h
generated
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
#include "llama.h"
|
||||
#include "napi.h"
|
||||
#include "RingBuffer.h"
|
||||
#include "addonGlobals.h"
|
||||
#include "AddonModel.h"
|
||||
|
||||
class AddonSampler : public Napi::ObjectWrap<AddonSampler> {
|
||||
public:
|
||||
AddonModel* model;
|
||||
llama_sampler * chain = nullptr;
|
||||
|
||||
llama_sampler * temperatureSampler = nullptr;
|
||||
bool temperatureSampler_initialized = false;
|
||||
float temperatureSampler_temperature = 0.0f; // 0.0f = disabled
|
||||
|
||||
llama_sampler * greedySampler = nullptr;
|
||||
|
||||
llama_sampler * minPSampler = nullptr;
|
||||
float minPSampler_minP = 0.0f; // Min p sampling <=0.0f = disabled
|
||||
|
||||
llama_sampler * topKSampler = nullptr;
|
||||
bool topKSampler_initialized = false;
|
||||
int topKSampler_topK = 0;
|
||||
|
||||
llama_sampler * topPSampler = nullptr;
|
||||
float topPSampler_topP = 0.0f; // Top p sampling >=1.0 = disabled
|
||||
|
||||
llama_sampler * seedSampler = nullptr;
|
||||
uint32_t seedSampler_seed = 0;
|
||||
|
||||
llama_sampler * repeatPenaltySampler = nullptr;
|
||||
RingBuffer<llama_token> repeatPenalty_lastTokens = RingBuffer<llama_token>(64);
|
||||
int32_t repeatPenalty_maxTokens = 64;
|
||||
float repeatPenalty_penalty = 1.10f; // 1.0 = disabled
|
||||
float repeatPenalty_presencePenalty = 0.00f; // 0.0 = disabled
|
||||
float repeatPenalty_frequencyPenalty = 0.00f; // 0.0 = disabled
|
||||
|
||||
llama_sampler * tokenBiasSampler = nullptr;
|
||||
std::vector<llama_logit_bias> tokenBiasSampler_biases;
|
||||
|
||||
AddonGrammarEvaluationState* grammarEvaluationState = nullptr;
|
||||
|
||||
std::vector<llama_token_data> tokenCandidates;
|
||||
|
||||
bool disposed = false;
|
||||
|
||||
AddonSampler(const Napi::CallbackInfo& info);
|
||||
~AddonSampler();
|
||||
|
||||
void dispose();
|
||||
void freeChain();
|
||||
void rebuildChainIfNeeded();
|
||||
void acceptToken(llama_token token);
|
||||
|
||||
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
||||
Napi::Value ApplyConfig(const Napi::CallbackInfo& info);
|
||||
|
||||
static Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info);
|
||||
static Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info);
|
||||
|
||||
static void init(Napi::Object exports);
|
||||
};
|
||||
109
node_modules/node-llama-cpp/llama/addon/RingBuffer.h
generated
vendored
Normal file
109
node_modules/node-llama-cpp/llama/addon/RingBuffer.h
generated
vendored
Normal file
@@ -0,0 +1,109 @@
|
||||
// copied from llama-impl.h
|
||||
template<typename T>
|
||||
struct RingBuffer {
|
||||
RingBuffer(size_t cap) : capacity(cap), data(cap) {}
|
||||
|
||||
T & front() {
|
||||
if (sz == 0) {
|
||||
throw std::runtime_error("ring buffer is empty");
|
||||
}
|
||||
return data[first];
|
||||
}
|
||||
|
||||
const T & front() const {
|
||||
if (sz == 0) {
|
||||
throw std::runtime_error("ring buffer is empty");
|
||||
}
|
||||
return data[first];
|
||||
}
|
||||
|
||||
T & back() {
|
||||
if (sz == 0) {
|
||||
throw std::runtime_error("ring buffer is empty");
|
||||
}
|
||||
return data[pos];
|
||||
}
|
||||
|
||||
const T & back() const {
|
||||
if (sz == 0) {
|
||||
throw std::runtime_error("ring buffer is empty");
|
||||
}
|
||||
return data[pos];
|
||||
}
|
||||
|
||||
void push_back(const T & value) {
|
||||
if (capacity == 0) {
|
||||
throw std::runtime_error("ring buffer: capacity is zero");
|
||||
}
|
||||
|
||||
if (sz == capacity) {
|
||||
// advance the start when buffer is full
|
||||
first = (first + 1) % capacity;
|
||||
} else {
|
||||
sz++;
|
||||
}
|
||||
data[pos] = value;
|
||||
pos = (pos + 1) % capacity;
|
||||
}
|
||||
|
||||
T pop_front() {
|
||||
if (sz == 0) {
|
||||
throw std::runtime_error("ring buffer is empty");
|
||||
}
|
||||
T value = data[first];
|
||||
first = (first + 1) % capacity;
|
||||
sz--;
|
||||
return value;
|
||||
}
|
||||
|
||||
//T & operator[](size_t i) {
|
||||
// if (i >= sz) {
|
||||
// throw std::runtime_error("ring buffer: index out of bounds");
|
||||
// }
|
||||
// return data[(first + i) % capacity];
|
||||
//}
|
||||
|
||||
//const T & at(size_t i) const {
|
||||
// if (i >= sz) {
|
||||
// throw std::runtime_error("ring buffer: index out of bounds");
|
||||
// }
|
||||
// return data[(first + i) % capacity];
|
||||
//}
|
||||
|
||||
const T & rat(size_t i) const {
|
||||
if (i >= sz) {
|
||||
throw std::runtime_error("ring buffer: index out of bounds");
|
||||
}
|
||||
return data[(first + sz - i - 1) % capacity];
|
||||
}
|
||||
|
||||
std::vector<T> to_vector() const {
|
||||
std::vector<T> result;
|
||||
result.reserve(sz);
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
result.push_back(data[(first + i) % capacity]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
// here only reset the status of the buffer
|
||||
sz = 0;
|
||||
first = 0;
|
||||
pos = 0;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return sz == 0;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return sz;
|
||||
}
|
||||
|
||||
size_t capacity = 0;
|
||||
size_t sz = 0;
|
||||
size_t first = 0;
|
||||
size_t pos = 0;
|
||||
std::vector<T> data;
|
||||
};
|
||||
314
node_modules/node-llama-cpp/llama/addon/addon.cpp
generated
vendored
Normal file
314
node_modules/node-llama-cpp/llama/addon/addon.cpp
generated
vendored
Normal file
@@ -0,0 +1,314 @@
|
||||
#include "addonGlobals.h"
|
||||
#include "AddonModel.h"
|
||||
#include "AddonModelLora.h"
|
||||
#include "AddonGrammar.h"
|
||||
#include "AddonGrammarEvaluationState.h"
|
||||
#include "AddonSampler.h"
|
||||
#include "AddonContext.h"
|
||||
#include "globals/addonLog.h"
|
||||
#include "globals/addonProgress.h"
|
||||
#include "globals/getGpuInfo.h"
|
||||
#include "globals/getSwapInfo.h"
|
||||
#include "globals/getMemoryInfo.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
bool backendInitialized = false;
|
||||
bool backendDisposed = false;
|
||||
|
||||
Napi::Value systemInfo(const Napi::CallbackInfo& info) {
|
||||
return Napi::String::From(info.Env(), llama_print_system_info());
|
||||
}
|
||||
|
||||
Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
|
||||
return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
|
||||
}
|
||||
|
||||
Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
|
||||
return Napi::Boolean::New(info.Env(), llama_supports_mmap());
|
||||
}
|
||||
|
||||
Napi::Value addonGetGpuSupportsMmap(const Napi::CallbackInfo& info) {
|
||||
const auto llamaSupportsMmap = llama_supports_mmap();
|
||||
const auto gpuDevice = getGpuDevice().first;
|
||||
|
||||
if (gpuDevice == nullptr) {
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
}
|
||||
|
||||
ggml_backend_dev_props props;
|
||||
ggml_backend_dev_get_props(gpuDevice, &props);
|
||||
|
||||
const bool gpuSupportsMmap = llama_supports_mmap() && props.caps.buffer_from_host_ptr;
|
||||
return Napi::Boolean::New(info.Env(), gpuSupportsMmap);
|
||||
}
|
||||
|
||||
Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
|
||||
return Napi::Boolean::New(info.Env(), llama_supports_mlock());
|
||||
}
|
||||
|
||||
Napi::Value addonGetMathCores(const Napi::CallbackInfo& info) {
|
||||
return Napi::Number::New(info.Env(), cpu_get_num_math());
|
||||
}
|
||||
|
||||
Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
|
||||
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
|
||||
|
||||
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
|
||||
|
||||
return Napi::Number::New(info.Env(), blockSize);
|
||||
}
|
||||
|
||||
Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
|
||||
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
|
||||
|
||||
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
|
||||
|
||||
return Napi::Number::New(info.Env(), typeSize);
|
||||
}
|
||||
|
||||
Napi::Value addonGetGgmlGraphOverheadCustom(const Napi::CallbackInfo& info) {
|
||||
if (info.Length() < 2 || !info[0].IsNumber() || !info[1].IsBoolean()) {
|
||||
return Napi::Number::New(info.Env(), 0);
|
||||
}
|
||||
|
||||
const size_t size = info[0].As<Napi::Number>().Uint32Value();
|
||||
const bool grads = info[1].As<Napi::Boolean>().Value();
|
||||
|
||||
const auto graphOverhead = ggml_graph_overhead_custom(size, grads);
|
||||
|
||||
return Napi::Number::New(info.Env(), graphOverhead);
|
||||
}
|
||||
|
||||
Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
|
||||
Napi::Object consts = Napi::Object::New(info.Env());
|
||||
consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
|
||||
consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
|
||||
consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
|
||||
consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
|
||||
consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
|
||||
consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
|
||||
|
||||
return consts;
|
||||
}
|
||||
|
||||
class AddonBackendLoadWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonBackendLoadWorker(const Napi::Env& env)
|
||||
: Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
|
||||
deferred(Napi::Promise::Deferred::New(env)) {
|
||||
}
|
||||
~AddonBackendLoadWorker() {
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
llama_backend_init();
|
||||
|
||||
try {
|
||||
if (backendDisposed) {
|
||||
llama_backend_free();
|
||||
} else {
|
||||
backendInitialized = true;
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_backend_free\"");
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_backend_init\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
deferred.Resolve(Env().Undefined());
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class AddonBackendUnloadWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
AddonBackendUnloadWorker(const Napi::Env& env)
|
||||
: Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
|
||||
deferred(Napi::Promise::Deferred::New(env)) {
|
||||
}
|
||||
~AddonBackendUnloadWorker() {
|
||||
}
|
||||
|
||||
Napi::Promise GetPromise() {
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred deferred;
|
||||
|
||||
void Execute() {
|
||||
try {
|
||||
if (backendInitialized) {
|
||||
backendInitialized = false;
|
||||
llama_backend_free();
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
SetError(e.what());
|
||||
} catch(...) {
|
||||
SetError("Unknown error when calling \"llama_backend_free\"");
|
||||
}
|
||||
}
|
||||
void OnOK() {
|
||||
deferred.Resolve(Env().Undefined());
|
||||
}
|
||||
void OnError(const Napi::Error& err) {
|
||||
deferred.Reject(err.Value());
|
||||
}
|
||||
};
|
||||
|
||||
Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) {
|
||||
const std::string forceLoadLibrariesSearchPath = info.Length() == 0
|
||||
? ""
|
||||
: info[0].IsString()
|
||||
? info[0].As<Napi::String>().Utf8Value()
|
||||
: "";
|
||||
|
||||
ggml_backend_reg_count();
|
||||
|
||||
if (forceLoadLibrariesSearchPath.length() > 0) {
|
||||
ggml_backend_load_all_from_path(forceLoadLibrariesSearchPath.c_str());
|
||||
}
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
Napi::Value addonSetNuma(const Napi::CallbackInfo& info) {
|
||||
const bool numaDisabled = info.Length() == 0
|
||||
? true
|
||||
: info[0].IsBoolean()
|
||||
? !info[0].As<Napi::Boolean>().Value()
|
||||
: false;
|
||||
|
||||
if (numaDisabled)
|
||||
return info.Env().Undefined();
|
||||
|
||||
const auto numaType = info[0].IsString()
|
||||
? info[0].As<Napi::String>().Utf8Value()
|
||||
: "";
|
||||
|
||||
if (numaType == "distribute") {
|
||||
llama_numa_init(GGML_NUMA_STRATEGY_DISTRIBUTE);
|
||||
} else if (numaType == "isolate") {
|
||||
llama_numa_init(GGML_NUMA_STRATEGY_ISOLATE);
|
||||
} else if (numaType == "numactl") {
|
||||
llama_numa_init(GGML_NUMA_STRATEGY_NUMACTL);
|
||||
} else if (numaType == "mirror") {
|
||||
llama_numa_init(GGML_NUMA_STRATEGY_MIRROR);
|
||||
} else {
|
||||
Napi::Error::New(info.Env(), std::string("Invalid NUMA strategy \"") + numaType + "\"").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
Napi::Value markLoaded(const Napi::CallbackInfo& info) {
|
||||
static std::atomic_bool loaded = false;
|
||||
return Napi::Boolean::New(info.Env(), loaded.exchange(true));
|
||||
}
|
||||
|
||||
Napi::Value addonInit(const Napi::CallbackInfo& info) {
|
||||
if (backendInitialized) {
|
||||
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
||||
deferred.Resolve(info.Env().Undefined());
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
}
|
||||
|
||||
Napi::Value addonDispose(const Napi::CallbackInfo& info) {
|
||||
if (backendDisposed) {
|
||||
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
||||
deferred.Resolve(info.Env().Undefined());
|
||||
return deferred.Promise();
|
||||
}
|
||||
|
||||
backendDisposed = true;
|
||||
|
||||
AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
|
||||
worker->Queue();
|
||||
return worker->GetPromise();
|
||||
}
|
||||
|
||||
static void addonFreeLlamaBackend(Napi::Env env, int* data) {
|
||||
if (backendDisposed) {
|
||||
return;
|
||||
}
|
||||
|
||||
backendDisposed = true;
|
||||
if (backendInitialized) {
|
||||
backendInitialized = false;
|
||||
llama_backend_free();
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
||||
exports.DefineProperties({
|
||||
Napi::PropertyDescriptor::Function("markLoaded", markLoaded),
|
||||
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
||||
Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
|
||||
Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
|
||||
Napi::PropertyDescriptor::Function("getGpuSupportsMmap", addonGetGpuSupportsMmap),
|
||||
Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
|
||||
Napi::PropertyDescriptor::Function("getMathCores", addonGetMathCores),
|
||||
Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
|
||||
Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
|
||||
Napi::PropertyDescriptor::Function("getGgmlGraphOverheadCustom", addonGetGgmlGraphOverheadCustom),
|
||||
Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
|
||||
Napi::PropertyDescriptor::Function("setLogger", setLogger),
|
||||
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
|
||||
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
|
||||
Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
|
||||
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
|
||||
Napi::PropertyDescriptor::Function("ensureGpuDeviceIsSupported", ensureGpuDeviceIsSupported),
|
||||
Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo),
|
||||
Napi::PropertyDescriptor::Function("getMemoryInfo", getMemoryInfo),
|
||||
Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends),
|
||||
Napi::PropertyDescriptor::Function("setNuma", addonSetNuma),
|
||||
Napi::PropertyDescriptor::Function("init", addonInit),
|
||||
Napi::PropertyDescriptor::Function("dispose", addonDispose),
|
||||
});
|
||||
AddonModel::init(exports);
|
||||
AddonModelLora::init(exports);
|
||||
AddonGrammar::init(exports);
|
||||
AddonGrammarEvaluationState::init(exports);
|
||||
AddonContext::init(exports);
|
||||
AddonSampler::init(exports);
|
||||
|
||||
llama_log_set(addonLlamaCppLogCallback, nullptr);
|
||||
|
||||
exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
|
||||
|
||||
return exports;
|
||||
}
|
||||
|
||||
NODE_API_MODULE(NODE_GYP_MODULE_NAME, registerCallback)
|
||||
22
node_modules/node-llama-cpp/llama/addon/addonGlobals.cpp
generated
vendored
Normal file
22
node_modules/node-llama-cpp/llama/addon/addonGlobals.cpp
generated
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include "addonGlobals.h"
|
||||
#include "napi.h"
|
||||
|
||||
void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
|
||||
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
||||
while (size > 0) {
|
||||
int64_t adjustSize = std::min(size, chunkSize);
|
||||
Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
|
||||
size -= adjustSize;
|
||||
}
|
||||
}
|
||||
|
||||
void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
|
||||
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
||||
while (size > 0) {
|
||||
int64_t adjustSize = std::min(size, chunkSize);
|
||||
Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
|
||||
size -= adjustSize;
|
||||
}
|
||||
}
|
||||
12
node_modules/node-llama-cpp/llama/addon/addonGlobals.h
generated
vendored
Normal file
12
node_modules/node-llama-cpp/llama/addon/addonGlobals.h
generated
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
#include "napi.h"
|
||||
|
||||
class AddonModel;
|
||||
class AddonModelLora;
|
||||
class AddonModelData;
|
||||
class AddonContext;
|
||||
class AddonGrammar;
|
||||
class AddonGrammarEvaluationState;
|
||||
|
||||
void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size);
|
||||
void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size);
|
||||
143
node_modules/node-llama-cpp/llama/addon/globals/addonLog.cpp
generated
vendored
Normal file
143
node_modules/node-llama-cpp/llama/addon/globals/addonLog.cpp
generated
vendored
Normal file
@@ -0,0 +1,143 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "addonLog.h"
|
||||
|
||||
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
|
||||
bool addonJsLoggerCallbackSet = false;
|
||||
int addonLoggerLogLevel = 5;
|
||||
int addonLastLoggerLogLevel = 6;
|
||||
|
||||
static int addonGetGgmlLogLevelNumber(ggml_log_level level) {
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_ERROR: return 2;
|
||||
case GGML_LOG_LEVEL_WARN: return 3;
|
||||
case GGML_LOG_LEVEL_INFO: return 4;
|
||||
case GGML_LOG_LEVEL_NONE: return 5;
|
||||
case GGML_LOG_LEVEL_DEBUG: return 6;
|
||||
case GGML_LOG_LEVEL_CONT: return addonLastLoggerLogLevel;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void addonCallJsLogCallback(
|
||||
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
|
||||
) {
|
||||
bool called = false;
|
||||
|
||||
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
|
||||
try {
|
||||
callback.Call({
|
||||
Napi::Number::New(env, data->logLevelNumber),
|
||||
Napi::String::New(env, data->stringStream->str()),
|
||||
});
|
||||
called = true;
|
||||
} catch (const Napi::Error& e) {
|
||||
called = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!called && data != nullptr) {
|
||||
if (data->logLevelNumber == 2) {
|
||||
fputs(data->stringStream->str().c_str(), stderr);
|
||||
fflush(stderr);
|
||||
} else {
|
||||
fputs(data->stringStream->str().c_str(), stdout);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (data != nullptr) {
|
||||
delete data->stringStream;
|
||||
delete data;
|
||||
}
|
||||
}
|
||||
|
||||
void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
|
||||
int logLevelNumber = addonGetGgmlLogLevelNumber(level);
|
||||
addonLastLoggerLogLevel = logLevelNumber;
|
||||
|
||||
if (logLevelNumber > addonLoggerLogLevel) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (addonJsLoggerCallbackSet) {
|
||||
std::stringstream* stringStream = new std::stringstream();
|
||||
if (text != nullptr) {
|
||||
*stringStream << text;
|
||||
}
|
||||
|
||||
addon_logger_log* data = new addon_logger_log {
|
||||
logLevelNumber,
|
||||
stringStream,
|
||||
};
|
||||
|
||||
auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
|
||||
|
||||
if (status == napi_ok) {
|
||||
return;
|
||||
} else {
|
||||
delete stringStream;
|
||||
delete data;
|
||||
}
|
||||
}
|
||||
|
||||
if (text != nullptr) {
|
||||
if (level == 2) {
|
||||
fputs(text, stderr);
|
||||
fflush(stderr);
|
||||
} else {
|
||||
fputs(text, stdout);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Value setLogger(const Napi::CallbackInfo& info) {
|
||||
if (addonJsLoggerCallbackSet) {
|
||||
addonJsLoggerCallbackSet = false;
|
||||
addonThreadSafeLoggerCallback.Release();
|
||||
}
|
||||
|
||||
if (info.Length() < 1 || !info[0].IsFunction()) {
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
auto addonLoggerJSCallback = info[0].As<Napi::Function>();
|
||||
AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
||||
addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
|
||||
info.Env(),
|
||||
addonLoggerJSCallback,
|
||||
"loggerCallback",
|
||||
0,
|
||||
1,
|
||||
context,
|
||||
[](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
|
||||
addonJsLoggerCallbackSet = false;
|
||||
|
||||
delete ctx;
|
||||
}
|
||||
);
|
||||
addonJsLoggerCallbackSet = true;
|
||||
|
||||
// prevent blocking the main node process from exiting due to active resources
|
||||
addonThreadSafeLoggerCallback.Unref(info.Env());
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
|
||||
if (info.Length() < 1 || !info[0].IsNumber()) {
|
||||
addonLoggerLogLevel = 5;
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
void addonLog(ggml_log_level level, const std::string text) {
|
||||
addonLlamaCppLogCallback(level, std::string("[addon] " + text + "\n").c_str(), nullptr);
|
||||
}
|
||||
24
node_modules/node-llama-cpp/llama/addon/globals/addonLog.h
generated
vendored
Normal file
24
node_modules/node-llama-cpp/llama/addon/globals/addonLog.h
generated
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
#include "llama.h"
|
||||
#include "napi.h"
|
||||
|
||||
|
||||
struct addon_logger_log {
|
||||
public:
|
||||
const int logLevelNumber;
|
||||
const std::stringstream* stringStream;
|
||||
};
|
||||
|
||||
void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
|
||||
|
||||
using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
||||
void addonCallJsLogCallback(
|
||||
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
|
||||
);
|
||||
using AddonThreadSafeLogCallbackFunction =
|
||||
Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
|
||||
|
||||
Napi::Value setLogger(const Napi::CallbackInfo& info);
|
||||
Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info);
|
||||
|
||||
void addonLog(ggml_log_level level, const std::string text);
|
||||
15
node_modules/node-llama-cpp/llama/addon/globals/addonProgress.cpp
generated
vendored
Normal file
15
node_modules/node-llama-cpp/llama/addon/globals/addonProgress.cpp
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
#include "addonProgress.h"
|
||||
|
||||
void addonCallJsProgressCallback(
|
||||
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
||||
) {
|
||||
if (env != nullptr && callback != nullptr) {
|
||||
try {
|
||||
callback.Call({Napi::Number::New(env, data->progress)});
|
||||
} catch (const Napi::Error& e) {}
|
||||
}
|
||||
|
||||
if (data != nullptr) {
|
||||
delete data;
|
||||
}
|
||||
}
|
||||
15
node_modules/node-llama-cpp/llama/addon/globals/addonProgress.h
generated
vendored
Normal file
15
node_modules/node-llama-cpp/llama/addon/globals/addonProgress.h
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
#include "napi.h"
|
||||
|
||||
struct addon_progress_event {
|
||||
public:
|
||||
const float progress;
|
||||
};
|
||||
|
||||
using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
||||
void addonCallJsProgressCallback(
|
||||
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
||||
);
|
||||
using AddonThreadSafeProgressEventCallbackFunction =
|
||||
Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
|
||||
|
||||
146
node_modules/node-llama-cpp/llama/addon/globals/getGpuInfo.cpp
generated
vendored
Normal file
146
node_modules/node-llama-cpp/llama/addon/globals/getGpuInfo.cpp
generated
vendored
Normal file
@@ -0,0 +1,146 @@
|
||||
#include "getGpuInfo.h"
|
||||
#include "addonLog.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
||||
#ifdef GPU_INFO_USE_VULKAN
|
||||
# include "../../gpuInfo/vulkan-gpu-info.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef GPU_INFO_USE_VULKAN
|
||||
void logVulkanWarning(const char* message) {
|
||||
addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
||||
ggml_backend_dev_t device = NULL;
|
||||
size_t deviceTotal = 0;
|
||||
size_t deviceFree = 0;
|
||||
|
||||
uint64_t total = 0;
|
||||
uint64_t used = 0;
|
||||
uint64_t unifiedVramSize = 0;
|
||||
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
||||
device = ggml_backend_dev_get(i);
|
||||
auto deviceType = ggml_backend_dev_type(device);
|
||||
if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
|
||||
deviceTotal = 0;
|
||||
deviceFree = 0;
|
||||
ggml_backend_dev_memory(device, &deviceFree, &deviceTotal);
|
||||
|
||||
total += deviceTotal;
|
||||
used += deviceTotal - deviceFree;
|
||||
|
||||
#if defined(__arm64__) || defined(__aarch64__)
|
||||
if (std::string(ggml_backend_dev_name(device)) == "Metal") {
|
||||
unifiedVramSize += deviceTotal;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef GPU_INFO_USE_VULKAN
|
||||
uint64_t vulkanDeviceTotal = 0;
|
||||
uint64_t vulkanDeviceUsed = 0;
|
||||
uint64_t vulkanDeviceUnifiedVramSize = 0;
|
||||
const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, &vulkanDeviceUnifiedVramSize, logVulkanWarning);
|
||||
|
||||
if (vulkanDeviceSupportsMemoryBudgetExtension) {
|
||||
if (vulkanDeviceUnifiedVramSize > total) {
|
||||
// this means that we counted memory from devices that aren't used by llama.cpp
|
||||
vulkanDeviceUnifiedVramSize = 0;
|
||||
}
|
||||
|
||||
unifiedVramSize += vulkanDeviceUnifiedVramSize;
|
||||
}
|
||||
|
||||
if (used == 0 && vulkanDeviceUsed != 0) {
|
||||
used = vulkanDeviceUsed;
|
||||
}
|
||||
#endif
|
||||
|
||||
Napi::Object result = Napi::Object::New(info.Env());
|
||||
result.Set("total", Napi::Number::From(info.Env(), total));
|
||||
result.Set("used", Napi::Number::From(info.Env(), used));
|
||||
result.Set("unifiedSize", Napi::Number::From(info.Env(), unifiedVramSize));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
|
||||
std::vector<std::string> deviceNames;
|
||||
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
||||
ggml_backend_dev_t device = ggml_backend_dev_get(i);
|
||||
auto deviceType = ggml_backend_dev_type(device);
|
||||
if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
|
||||
deviceNames.push_back(std::string(ggml_backend_dev_description(device)));
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Object result = Napi::Object::New(info.Env());
|
||||
|
||||
Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
|
||||
for (size_t i = 0; i < deviceNames.size(); ++i) {
|
||||
deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
|
||||
}
|
||||
result.Set("deviceNames", deviceNamesNapiArray);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<ggml_backend_dev_t, std::string> getGpuDevice() {
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
||||
ggml_backend_dev_t device = ggml_backend_dev_get(i);
|
||||
const auto deviceName = std::string(ggml_backend_dev_name(device));
|
||||
|
||||
if (deviceName == "Metal") {
|
||||
return std::pair<ggml_backend_dev_t, std::string>(device, "metal");
|
||||
} else if (std::string(deviceName).find("Vulkan") == 0) {
|
||||
return std::pair<ggml_backend_dev_t, std::string>(device, "vulkan");
|
||||
} else if (std::string(deviceName).find("CUDA") == 0 || std::string(deviceName).find("ROCm") == 0 || std::string(deviceName).find("MUSA") == 0) {
|
||||
return std::pair<ggml_backend_dev_t, std::string>(device, "cuda");
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
||||
ggml_backend_dev_t device = ggml_backend_dev_get(i);
|
||||
const auto deviceName = std::string(ggml_backend_dev_name(device));
|
||||
|
||||
if (deviceName == "CPU") {
|
||||
return std::pair<ggml_backend_dev_t, std::string>(device, "cpu");
|
||||
}
|
||||
}
|
||||
|
||||
return std::pair<ggml_backend_dev_t, std::string>(nullptr, "");
|
||||
}
|
||||
|
||||
Napi::Value getGpuType(const Napi::CallbackInfo& info) {
|
||||
const auto gpuDeviceRes = getGpuDevice();
|
||||
const auto device = gpuDeviceRes.first;
|
||||
const auto deviceType = gpuDeviceRes.second;
|
||||
|
||||
if (deviceType == "cpu") {
|
||||
return Napi::Boolean::New(info.Env(), false);
|
||||
} else if (device != nullptr && deviceType != "") {
|
||||
return Napi::String::New(info.Env(), deviceType);
|
||||
}
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
Napi::Value ensureGpuDeviceIsSupported(const Napi::CallbackInfo& info) {
|
||||
#ifdef GPU_INFO_USE_VULKAN
|
||||
if (!checkIsVulkanEnvSupported(logVulkanWarning)) {
|
||||
Napi::Error::New(info.Env(), "Vulkan device is not supported").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
#endif
|
||||
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
11
node_modules/node-llama-cpp/llama/addon/globals/getGpuInfo.h
generated
vendored
Normal file
11
node_modules/node-llama-cpp/llama/addon/globals/getGpuInfo.h
generated
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include "napi.h"
|
||||
#include "llama.h"
|
||||
|
||||
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info);
|
||||
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info);
|
||||
std::pair<ggml_backend_dev_t, std::string> getGpuDevice();
|
||||
Napi::Value getGpuType(const Napi::CallbackInfo& info);
|
||||
Napi::Value ensureGpuDeviceIsSupported(const Napi::CallbackInfo& info);
|
||||
63
node_modules/node-llama-cpp/llama/addon/globals/getMemoryInfo.cpp
generated
vendored
Normal file
63
node_modules/node-llama-cpp/llama/addon/globals/getMemoryInfo.cpp
generated
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
#include "getMemoryInfo.h"
|
||||
#include "addonLog.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <iostream>
|
||||
#include <mach/mach.h>
|
||||
#include <sys/sysctl.h>
|
||||
#elif __linux__
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#elif _WIN32
|
||||
#include <iostream>
|
||||
#include <windows.h>
|
||||
#include <psapi.h>
|
||||
#endif
|
||||
|
||||
|
||||
Napi::Value getMemoryInfo(const Napi::CallbackInfo& info) {
|
||||
uint64_t totalMemoryUsage = 0;
|
||||
|
||||
#ifdef __APPLE__
|
||||
struct mach_task_basic_info taskInfo;
|
||||
mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
|
||||
if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&taskInfo, &infoCount) == KERN_SUCCESS) {
|
||||
totalMemoryUsage = taskInfo.virtual_size;
|
||||
} else {
|
||||
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
|
||||
}
|
||||
#elif __linux__
|
||||
std::ifstream procStatus("/proc/self/status");
|
||||
std::string line;
|
||||
bool foundMemoryUsage = false;
|
||||
while (std::getline(procStatus, line)) {
|
||||
if (line.rfind("VmSize:", 0) == 0) { // Resident Set Size (current memory usage)
|
||||
std::istringstream iss(line);
|
||||
std::string key, unit;
|
||||
size_t value;
|
||||
if (iss >> key >> value >> unit) {
|
||||
totalMemoryUsage = value * 1024; // Convert from kB to bytes
|
||||
foundMemoryUsage = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundMemoryUsage) {
|
||||
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
|
||||
}
|
||||
#elif _WIN32
|
||||
PROCESS_MEMORY_COUNTERS_EX memCounters;
|
||||
|
||||
if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&memCounters, sizeof(memCounters))) {
|
||||
totalMemoryUsage = memCounters.PrivateUsage;
|
||||
} else {
|
||||
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
Napi::Object obj = Napi::Object::New(info.Env());
|
||||
obj.Set("total", Napi::Number::New(info.Env(), totalMemoryUsage));
|
||||
return obj;
|
||||
}
|
||||
4
node_modules/node-llama-cpp/llama/addon/globals/getMemoryInfo.h
generated
vendored
Normal file
4
node_modules/node-llama-cpp/llama/addon/globals/getMemoryInfo.h
generated
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
#pragma once
|
||||
#include "napi.h"
|
||||
|
||||
Napi::Value getMemoryInfo(const Napi::CallbackInfo& info);
|
||||
69
node_modules/node-llama-cpp/llama/addon/globals/getSwapInfo.cpp
generated
vendored
Normal file
69
node_modules/node-llama-cpp/llama/addon/globals/getSwapInfo.cpp
generated
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
#include "getSwapInfo.h"
|
||||
#include "addonLog.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <iostream>
|
||||
#include <mach/mach.h>
|
||||
#include <sys/sysctl.h>
|
||||
#elif __linux__
|
||||
#include <iostream>
|
||||
#include <sys/sysinfo.h>
|
||||
#elif _WIN32
|
||||
#include <iostream>
|
||||
#include <windows.h>
|
||||
#include <psapi.h>
|
||||
#endif
|
||||
|
||||
|
||||
Napi::Value getSwapInfo(const Napi::CallbackInfo& info) {
|
||||
uint64_t totalSwap = 0;
|
||||
uint64_t freeSwap = 0;
|
||||
uint64_t maxSize = 0;
|
||||
bool maxSizeSet = true;
|
||||
|
||||
#ifdef __APPLE__
|
||||
struct xsw_usage swapInfo;
|
||||
size_t size = sizeof(swapInfo);
|
||||
|
||||
if (sysctlbyname("vm.swapusage", &swapInfo, &size, NULL, 0) == 0) {
|
||||
totalSwap = swapInfo.xsu_total;
|
||||
freeSwap = swapInfo.xsu_avail;
|
||||
maxSizeSet = false;
|
||||
} else {
|
||||
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get swap info").c_str(), nullptr);
|
||||
}
|
||||
#elif __linux__
|
||||
struct sysinfo sysInfo;
|
||||
|
||||
if (sysinfo(&sysInfo) == 0) {
|
||||
totalSwap = sysInfo.totalswap;
|
||||
freeSwap = sysInfo.freeswap;
|
||||
maxSize = sysInfo.totalswap;
|
||||
} else {
|
||||
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get swap info").c_str(), nullptr);
|
||||
}
|
||||
#elif _WIN32
|
||||
MEMORYSTATUSEX memInfo;
|
||||
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
|
||||
|
||||
if (GlobalMemoryStatusEx(&memInfo)) {
|
||||
PERFORMANCE_INFORMATION perfInfo;
|
||||
perfInfo.cb = sizeof(PERFORMANCE_INFORMATION);
|
||||
if (GetPerformanceInfo(&perfInfo, sizeof(perfInfo))) {
|
||||
totalSwap = memInfo.ullTotalPageFile;
|
||||
freeSwap = memInfo.ullAvailPageFile;
|
||||
maxSize = perfInfo.CommitLimit * perfInfo.PageSize;
|
||||
} else {
|
||||
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get max pagefile size").c_str(), nullptr);
|
||||
}
|
||||
} else {
|
||||
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get pagefile info").c_str(), nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
Napi::Object obj = Napi::Object::New(info.Env());
|
||||
obj.Set("total", Napi::Number::New(info.Env(), totalSwap));
|
||||
obj.Set("free", Napi::Number::New(info.Env(), freeSwap));
|
||||
obj.Set("maxSize", maxSizeSet ? Napi::Number::New(info.Env(), maxSize) : Napi::Number::New(info.Env(), -1));
|
||||
return obj;
|
||||
}
|
||||
4
node_modules/node-llama-cpp/llama/addon/globals/getSwapInfo.h
generated
vendored
Normal file
4
node_modules/node-llama-cpp/llama/addon/globals/getSwapInfo.h
generated
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
#pragma once
|
||||
#include "napi.h"
|
||||
|
||||
Napi::Value getSwapInfo(const Napi::CallbackInfo& info);
|
||||
Reference in New Issue
Block a user