Started on the CL version

This commit is contained in:
Lukas Nöllemeyer 2025-02-04 06:27:11 +01:00
parent 17b7c3adb4
commit 0aac3aae10
11 changed files with 1181 additions and 3 deletions

131
server.lisp Normal file
View file

@ -0,0 +1,131 @@
(eval-when (:compile-toplevel :load-toplevel)
#+(or) (ql:quickload '(micros
woo clack ningle metabang-bind
;cffi-libffi
cl-autowrap ;/libffi
))
(unless (find-package '#:rkllm)
(defpackage #:rkllm)))
(defpackage #:rkllm-server
(:use :cl :bind))
(in-package :rkllm-server)
(cffi:define-foreign-library librkllm
(t (:default "librkllmrt")))
(cffi:load-foreign-library 'librkllm)
(cffi:define-foreign-library rkllm-wrapper
(t (:default "rkllm-wrapper")))
(cffi:load-foreign-library 'rkllm-wrapper)
(autowrap:c-include (asdf:system-relative-pathname :rkllm-server "include/rkllm-wrapper.h")
:spec-path '(rkllm-server lib)
:exclude-arch ("i686-pc-linux-gnu" "x86_64-pc-linux-gnu" "i686-pc-windows-msvc"
"x86_64-pc-windows-msvc" "i686-apple-darwin9" "x86_64-apple-darwin9"
"i386-unknown-freebsd" "x86_64-unknown-freebsd" "i386-unknown-openbsd"
"x86_64-unknown-openbsd" "arm-pc-linux-gnu" "arm-unknown-linux-androideabi"
"powerpc64-pc-linux-gnu" "powerpc64le-pc-linux-gnu" "i686-unknown-linux-android"
"x86_64-unknown-linux-android")
:definition-package :rkllm
:symbol-regex (("^(RKLLM|rkllm)" ()
(lambda (string matches regex)
(let ((new (subseq string 5)))
(if (char= #\_ (aref new 0))
(subseq new 1)
new))))))
(defvar *last-state* 0)
(defvar *output* nil)
(defun actual-callback (result state)
(setf *last-state* state)
(case state
(2 (format t "~%"))
(3 (format t "run error!~%"))
(4 (warn "Getting the last hidden layer is not implemented yet."))
(t
(let ((text (cffi:foreign-string-to-lisp (rkllm:result.text result))))
(format t "~a" text)
(push text *output*))))
(finish-output))
(autowrap:defcallback get-data-cb :void ((result (:pointer rkllm:result)) (userdata :pointer) (state rkllm:llm-call-state))
(declare (ignore userdata))
(actual-callback (autowrap:wrap-pointer result 'rkllm:result) state))
(defvar *empty-str* (autowrap:alloc-string ""))
(defvar *model-param* (autowrap:alloc 'rkllm:param))
(rkllm:get-packed-default *model-param*)
(defun update-params (&key (path "/srv/dev-disk-by-uuid-e704bc62-3f03-4c9f-a44a-7f7536ea97e1/public/compile/my_rkllm_server/models/Qwen2.5-Coder-3B-Instruct.rkllm")
(max-content-length 512) (max-new-tokens -1) (skip-special-tokens t) (top-k 20) (top-p 0.8) (temperature 0.7) (repeat-penalty 1.1)
(frequency-penalty 0.0) (presence-penalty 0.0) (mirostat 0) (mirostat-tau 5.0) (mirostat-eta 0.1) (is-async nil) (img-start *empty-str*)
(img-end *empty-str*) (img-content *empty-str*) (domain-base-id 0))
(unless (cffi:null-pointer-p (rkllm:param.model-path *model-param*))
(autowrap:free (rkllm:param.model-path *model-param*)))
(setf (rkllm:param.model-path *model-param*) (autowrap:alloc-string path)
(rkllm:param.max-context-len *model-param*) max-content-length
(rkllm:param.max-new-tokens *model-param*) max-new-tokens
(rkllm:param.skip-special-token *model-param*) (if skip-special-tokens 1 0)
(rkllm:param.top-k *model-param*) top-k
(rkllm:param.top-p *model-param*) top-p
(rkllm:param.temperature *model-param*) temperature
(rkllm:param.repeat-penalty *model-param*) repeat-penalty
(rkllm:param.frequency-penalty *model-param*) frequency-penalty
(rkllm:param.presence-penalty *model-param*) presence-penalty
(rkllm:param.mirostat *model-param*) mirostat
(rkllm:param.mirostat-tau *model-param*) mirostat-tau
(rkllm:param.mirostat-eta *model-param*) mirostat-eta
(rkllm:param.is-async *model-param*) (if is-async 1 0)
(rkllm:param.img-start *model-param*) img-start
(rkllm:param.img-end *model-param*) img-end
(rkllm:param.img-content *model-param*) img-content
(rkllm:param.extend-param.base-domain-id *model-param*) domain-base-id))
(update-params)
(defvar *model-handle* (autowrap:alloc-ptr :pointer))
(defvar *model*)
(defvar *model-lock* (bt2:make-lock :name "model-lock"))
(defun init-model ()
(unless (= 0 (rkllm:init *model-handle* *model-param* (autowrap:callback 'get-data-cb)))
(error "Failed to init!"))
(setf *model* (cffi:mem-ref *model-handle* :pointer)))
(defun prompt-model (prompt)
(autowrap:with-many-alloc ((iparam 'rkllm:infer-param)
(input 'rkllm:input))
(let ((prompt (autowrap:alloc-string prompt)))
(setf (rkllm:infer-param.mode iparam) rkllm:+infer-generate+
(rkllm:infer-param.lora-params iparam) (cffi:null-pointer)
(rkllm:infer-param.prompt-cache-params iparam) (cffi:null-pointer)
(rkllm:input.input-type input) rkllm:+input-prompt+
(rkllm:input.prompt-input input) prompt))
(rkllm:run *model* input iparam nil)
(autowrap:free prompt)))
;(init (cffi:mem-aptr *model* :pointer) *model-param* (cffi:callback get-data-cb))
(defparameter *msg-start* "<|im_start|>")
(defparameter *msg-end* "<|im_end|>")
(defun message->prompt (role &optional message)
(let ((*print-case* :downcase)) (format nil "~a~a~%~a~a~%" *msg-start* role (or message "") (if message *msg-end* ""))))
(defun messages->prompt (messages)
(apply #'concatenate 'string
(mapcar (lambda (msg)
(message->prompt (car msg) (cadr msg)))
(append messages '((:assistant))))))
(defun start ()
(clack:clackup (lambda (env) (format nil "~a" env)) :server :woo :address "0.0.0.0"))
(export '(start))