Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tools/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ int main(int argc, char ** argv) {
ctx_cli.ctx_server.start_loop();
});

auto inf = ctx_cli.ctx_server.get_info();
auto inf = ctx_cli.ctx_server.get_meta();
std::string modalities = "text";
if (inf.has_inp_image) {
modalities += ", vision";
Expand Down
27 changes: 10 additions & 17 deletions tools/server/server-common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,26 +115,14 @@ bool lora_should_clear_cache(
!lora_all_alora(next));
}

std::vector<common_adapter_lora_info> parse_lora_request(
const std::vector<common_adapter_lora_info> & lora_base,
const json & data) {
std::vector<common_adapter_lora_info> lora(lora_base);
int max_idx = lora.size();

// clear existing value
for (auto & entry : lora) {
entry.scale = 0.0f;
}
std::map<int, float> parse_lora_request(const json & data) {
std::map<int, float> lora;

// set value
for (const auto & entry : data) {
int id = json_value(entry, "id", -1);
float scale = json_value(entry, "scale", 0.0f);
if (0 <= id && id < max_idx) {
lora[id].scale = scale;
} else {
throw std::runtime_error("invalid adapter id");
}
lora[id] = scale;
}

return lora;
Expand Down Expand Up @@ -1435,7 +1423,7 @@ std::string safe_json_to_str(const json & data) {

// TODO: reuse llama_detokenize
template <class Iter>
static std::string tokens_to_str(llama_context * ctx, Iter begin, Iter end) {
static std::string tokens_to_str(const llama_vocab * ctx, Iter begin, Iter end) {
std::string ret;
for (; begin != end; ++begin) {
ret += common_token_to_piece(ctx, *begin);
Expand All @@ -1445,7 +1433,12 @@ static std::string tokens_to_str(llama_context * ctx, Iter begin, Iter end) {
}

std::string tokens_to_str(llama_context * ctx, const llama_tokens & tokens) {
return tokens_to_str(ctx, tokens.begin(), tokens.end());
auto model = llama_get_model(ctx);
return tokens_to_str(llama_model_get_vocab(model), tokens.begin(), tokens.end());
}

std::string tokens_to_str(const llama_vocab * vocab, const llama_tokens & tokens) {
return tokens_to_str(vocab, tokens.begin(), tokens.end());
}

// format incomplete utf-8 multibyte character for output
Expand Down
5 changes: 2 additions & 3 deletions tools/server/server-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,7 @@ bool lora_should_clear_cache(
const std::vector<common_adapter_lora_info> & current,
const std::vector<common_adapter_lora_info> & next);

std::vector<common_adapter_lora_info> parse_lora_request(
const std::vector<common_adapter_lora_info> & lora_base,
const json & data);
std::map<int, float> parse_lora_request(const json & data);

bool are_lora_equal(
const std::vector<common_adapter_lora_info> & l1,
Expand Down Expand Up @@ -325,6 +323,7 @@ std::vector<llama_token_data> get_token_probabilities(llama_context * ctx, int i
std::string safe_json_to_str(const json & data);

std::string tokens_to_str(llama_context * ctx, const llama_tokens & tokens);
std::string tokens_to_str(const llama_vocab * vocab, const llama_tokens & tokens);

// format incomplete utf-8 multibyte character for output
std::string tokens_to_output_formatted_string(const llama_context * ctx, const llama_token token);
Expand Down
Loading
Loading