64 lines
3.0 KiB
JavaScript
64 lines
3.0 KiB
JavaScript
import { LlamaText, SpecialTokensText } from "../../../utils/LlamaText.js";
|
|
import { removeUndefinedFields } from "../../../utils/removeNullFields.js";
|
|
export function extractSegmentSettingsFromTokenizerAndChatTemplate(chatTemplate, tokenizer) {
|
|
function tryMatchPrefixSuffixPair(tryMatchGroups) {
|
|
if (chatTemplate != null) {
|
|
for (const [prefix, suffix] of tryMatchGroups) {
|
|
if ((hasAll(chatTemplate.replaceAll(prefix + "\\n\\n" + suffix, ""), [
|
|
prefix + "\\n\\n",
|
|
"\\n\\n" + suffix
|
|
])) || (hasAll(chatTemplate.replaceAll(prefix + "\n\n" + suffix, ""), [
|
|
prefix + "\n\n",
|
|
"\n\n" + suffix
|
|
])))
|
|
return {
|
|
prefix: LlamaText(new SpecialTokensText(prefix + "\n\n")),
|
|
suffix: LlamaText(new SpecialTokensText("\n\n" + suffix))
|
|
};
|
|
if ((hasAll(chatTemplate.replaceAll(prefix + "\\n" + suffix, ""), [
|
|
prefix + "\\n",
|
|
"\\n" + suffix
|
|
])) || (hasAll(chatTemplate.replaceAll(prefix + "\n" + suffix, ""), [
|
|
prefix + "\n",
|
|
"\n" + suffix
|
|
])))
|
|
return {
|
|
prefix: LlamaText(new SpecialTokensText(prefix + "\n")),
|
|
suffix: LlamaText(new SpecialTokensText("\n" + suffix))
|
|
};
|
|
if (chatTemplate.includes(prefix) && chatTemplate.includes(suffix))
|
|
return {
|
|
prefix: LlamaText(new SpecialTokensText(prefix)),
|
|
suffix: LlamaText(new SpecialTokensText(suffix))
|
|
};
|
|
}
|
|
}
|
|
if (tokenizer != null) {
|
|
for (const [prefix, suffix] of tryMatchGroups) {
|
|
const thinkTokens = tokenizer(prefix, true, "trimLeadingSpace");
|
|
const thinkEndTokens = tokenizer(suffix, true, "trimLeadingSpace");
|
|
const [thinkToken] = thinkTokens;
|
|
const [thinkEndToken] = thinkEndTokens;
|
|
if (thinkTokens.length === 1 && thinkEndTokens.length === 1 &&
|
|
thinkToken != null && thinkEndToken != null) {
|
|
return {
|
|
prefix: LlamaText(new SpecialTokensText(prefix)),
|
|
suffix: LlamaText(new SpecialTokensText(suffix))
|
|
};
|
|
}
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
return removeUndefinedFields({
|
|
thought: tryMatchPrefixSuffixPair([
|
|
["<think>", "</think>"], // DeepSeek, QwQ
|
|
["<thought>", "</thought>"], // EXAONE Deep
|
|
["<|START_THINKING|>", "<|END_THINKING|>"] // Command R7B
|
|
])
|
|
});
|
|
}
|
|
function hasAll(text, matches) {
|
|
return matches.every((match) => text.includes(match));
|
|
}
|
|
//# sourceMappingURL=extractSegmentSettingsFromTokenizerAndChatTemplate.js.map
|