First upload version 0.0.1
This commit is contained in:
67
node_modules/node-llama-cpp/dist/gguf/insights/GgufInsights.d.ts
generated
vendored
Normal file
67
node_modules/node-llama-cpp/dist/gguf/insights/GgufInsights.d.ts
generated
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
import { Llama } from "../../bindings/Llama.js";
|
||||
import { GgufFileInfo } from "../types/GgufFileInfoTypes.js";
|
||||
import { GgufInsightsConfigurationResolver } from "./GgufInsightsConfigurationResolver.js";
|
||||
import { GgufInsightsTokens } from "./GgufInsightsTokens.js";
|
||||
export type GgufInsightsResourceRequirements = {
|
||||
cpuRam: number;
|
||||
gpuVram: number;
|
||||
};
|
||||
export declare class GgufInsights {
|
||||
private constructor();
|
||||
/**
|
||||
* Get warnings about the model file that would affect its usage.
|
||||
*
|
||||
* Most of these warnings are also generated by `llama.cpp`
|
||||
*/
|
||||
getWarnings(modelFilePath?: string): string[];
|
||||
get ggufFileInfo(): GgufFileInfo;
|
||||
get configurationResolver(): GgufInsightsConfigurationResolver;
|
||||
get tokens(): GgufInsightsTokens;
|
||||
/** The context size the model was trained on */
|
||||
get trainContextSize(): number | undefined;
|
||||
/** The size of an embedding vector the model can produce */
|
||||
get embeddingVectorSize(): number | undefined;
|
||||
get totalLayers(): number;
|
||||
get modelSize(): number;
|
||||
get flashAttentionSupported(): boolean;
|
||||
get hasEncoder(): boolean;
|
||||
get hasDecoder(): boolean;
|
||||
get isRecurrent(): boolean;
|
||||
get supportsRanking(): boolean;
|
||||
/**
|
||||
* The size of the SWA (Sliding Window Attention).
|
||||
*
|
||||
* When `undefined`, the model does not use sliding window attention.
|
||||
*/
|
||||
get swaSize(): number | undefined;
|
||||
estimateModelResourceRequirements({ gpuLayers, useMmap, gpuSupportsMmap }: {
|
||||
gpuLayers: number;
|
||||
useMmap?: boolean;
|
||||
gpuSupportsMmap?: boolean;
|
||||
}): GgufInsightsResourceRequirements;
|
||||
/**
|
||||
* Estimates the memory required to create a context of the given parameters based on the implementation details of `llama.cpp`.
|
||||
* The calculation doesn't include a precise estimation of the graph overhead memory, so it uses a rough estimate for that.
|
||||
* The estimation for the graph overhead memory will be improved in the future to be more precise, but it's good enough for now.
|
||||
*/
|
||||
estimateContextResourceRequirements({ contextSize, modelGpuLayers, batchSize, sequences, isEmbeddingContext, includeGraphOverhead, flashAttention, swaFullCache }: {
|
||||
contextSize: number;
|
||||
modelGpuLayers: number;
|
||||
batchSize?: number;
|
||||
sequences?: number;
|
||||
isEmbeddingContext?: boolean;
|
||||
flashAttention?: boolean;
|
||||
includeGraphOverhead?: boolean;
|
||||
swaFullCache?: boolean;
|
||||
}): GgufInsightsResourceRequirements;
|
||||
/**
|
||||
* @param ggufFileInfo
|
||||
* @param llama - If you already have a `Llama` instance, pass it to reuse it for the `GgufInsights` instance.
|
||||
* If you don't pass a `Llama` instance, a basic `Llama` instance is created as a fallback - it's a slim instance that
|
||||
* doesn't instantiate a `llama.cpp` backend, so it won't utilize the GPU at all, and be shared with other `GgufInsights` instances
|
||||
* that need a fallback `Llama` instance.
|
||||
*/
|
||||
static from(ggufFileInfo: GgufFileInfo, llama?: Llama): Promise<GgufInsights>;
|
||||
}
|
||||
export declare function parseRankingTemplate(template: string | undefined | null): string | undefined;
|
||||
export declare function isRankingTemplateValid(template: string | undefined | null): boolean;
|
||||
Reference in New Issue
Block a user