First upload version 0.0.1
This commit is contained in:
85
node_modules/node-llama-cpp/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js
generated
vendored
Normal file
85
node_modules/node-llama-cpp/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js
generated
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
const maxSequentialUnhelpfulIterations = 100;
|
||||
export async function findCharacterRemovalCountToFitChatHistoryInContext({ compressChatHistory, chatHistory, tokensCountToFit, tokenizer, chatWrapper, initialCharactersRemovalCount = 0, estimatedCharactersPerToken = 5, maxDecompressionAttempts = 2, failedCompressionErrorMessage = "Failed to compress chat history. Consider increasing the context size." }) {
|
||||
let currentEstimatedCharactersPerToken = estimatedCharactersPerToken;
|
||||
function getTokensCountForChatHistory(chatHistory) {
|
||||
const { contextText } = chatWrapper.generateContextState({ chatHistory });
|
||||
return contextText.tokenize(tokenizer, "trimLeadingSpace").length;
|
||||
}
|
||||
async function getResultForCharacterRemovalCount(characterRemovalCount) {
|
||||
if (characterRemovalCount === 0)
|
||||
return {
|
||||
compressedHistory: chatHistory,
|
||||
tokensCount: getTokensCountForChatHistory(chatHistory),
|
||||
characterRemovalCount
|
||||
};
|
||||
const compressedHistory = await compressChatHistory({
|
||||
chatHistory,
|
||||
charactersToRemove: characterRemovalCount,
|
||||
estimatedCharactersPerToken: currentEstimatedCharactersPerToken
|
||||
});
|
||||
return {
|
||||
compressedHistory,
|
||||
tokensCount: getTokensCountForChatHistory(compressedHistory),
|
||||
characterRemovalCount
|
||||
};
|
||||
}
|
||||
let latestCompressionAttempt = await getResultForCharacterRemovalCount(initialCharactersRemovalCount);
|
||||
const firstCompressionAttempt = latestCompressionAttempt;
|
||||
let latestCompressionAttemptTokensCount = latestCompressionAttempt.tokensCount;
|
||||
let sameTokensCountRepetitions = 0;
|
||||
if (latestCompressionAttempt.tokensCount === tokensCountToFit ||
|
||||
(latestCompressionAttempt.tokensCount < tokensCountToFit && latestCompressionAttempt.characterRemovalCount === 0))
|
||||
return {
|
||||
removedCharactersCount: initialCharactersRemovalCount,
|
||||
compressedChatHistory: latestCompressionAttempt.compressedHistory
|
||||
};
|
||||
let bestCompressionAttempt = latestCompressionAttempt;
|
||||
for (let compressionAttempts = 0, decompressionAttempts = 0; bestCompressionAttempt.tokensCount !== tokensCountToFit;) {
|
||||
if (compressionAttempts > 0) {
|
||||
if (latestCompressionAttempt.tokensCount != firstCompressionAttempt.tokensCount &&
|
||||
latestCompressionAttempt.characterRemovalCount != firstCompressionAttempt.characterRemovalCount)
|
||||
currentEstimatedCharactersPerToken =
|
||||
Math.abs(latestCompressionAttempt.characterRemovalCount - firstCompressionAttempt.characterRemovalCount) /
|
||||
Math.abs(latestCompressionAttempt.tokensCount - firstCompressionAttempt.tokensCount);
|
||||
if (!Number.isFinite(currentEstimatedCharactersPerToken) || currentEstimatedCharactersPerToken === 0)
|
||||
currentEstimatedCharactersPerToken = estimatedCharactersPerToken;
|
||||
}
|
||||
const tokensLeftToRemove = latestCompressionAttempt.tokensCount - tokensCountToFit;
|
||||
let additionalCharactersToRemove = Math.round(tokensLeftToRemove * currentEstimatedCharactersPerToken);
|
||||
if (additionalCharactersToRemove === 0) {
|
||||
if (tokensLeftToRemove > 0)
|
||||
additionalCharactersToRemove = 1;
|
||||
else if (tokensLeftToRemove < 0)
|
||||
additionalCharactersToRemove = -1;
|
||||
}
|
||||
if (tokensLeftToRemove > 0)
|
||||
compressionAttempts++;
|
||||
else if (tokensLeftToRemove < 0)
|
||||
decompressionAttempts++;
|
||||
if (decompressionAttempts >= maxDecompressionAttempts)
|
||||
break;
|
||||
latestCompressionAttempt = await getResultForCharacterRemovalCount(latestCompressionAttempt.characterRemovalCount + additionalCharactersToRemove);
|
||||
if ((bestCompressionAttempt.tokensCount > tokensCountToFit &&
|
||||
latestCompressionAttempt.tokensCount <= bestCompressionAttempt.tokensCount) || (bestCompressionAttempt.tokensCount < tokensCountToFit &&
|
||||
latestCompressionAttempt.tokensCount < tokensCountToFit &&
|
||||
latestCompressionAttempt.tokensCount > bestCompressionAttempt.tokensCount) || (bestCompressionAttempt.tokensCount <= tokensCountToFit &&
|
||||
latestCompressionAttempt.tokensCount <= tokensCountToFit &&
|
||||
latestCompressionAttempt.characterRemovalCount < bestCompressionAttempt.characterRemovalCount))
|
||||
bestCompressionAttempt = latestCompressionAttempt;
|
||||
if (latestCompressionAttempt.tokensCount === latestCompressionAttemptTokensCount)
|
||||
sameTokensCountRepetitions++;
|
||||
else {
|
||||
latestCompressionAttemptTokensCount = latestCompressionAttempt.tokensCount;
|
||||
sameTokensCountRepetitions = 0;
|
||||
}
|
||||
if (decompressionAttempts === 0 &&
|
||||
compressionAttempts >= maxSequentialUnhelpfulIterations &&
|
||||
sameTokensCountRepetitions >= maxSequentialUnhelpfulIterations)
|
||||
throw new Error(failedCompressionErrorMessage);
|
||||
}
|
||||
return {
|
||||
removedCharactersCount: bestCompressionAttempt.characterRemovalCount,
|
||||
compressedChatHistory: bestCompressionAttempt.compressedHistory
|
||||
};
|
||||
}
|
||||
//# sourceMappingURL=findCharacterRemovalCountToFitChatHistoryInContext.js.map
|
||||
Reference in New Issue
Block a user