(eval-when (:compile-toplevel :load-toplevel) #+(or) (ql:quickload '(micros woo clack ningle metabang-bind ;cffi-libffi cl-autowrap ;/libffi )) (unless (find-package '#:rkllm) (defpackage #:rkllm))) (defpackage #:rkllm-server (:use :cl :bind)) (in-package :rkllm-server) (cffi:define-foreign-library librkllm (t (:default "librkllmrt"))) (cffi:load-foreign-library 'librkllm) (cffi:define-foreign-library rkllm-wrapper (t (:default "rkllm-wrapper"))) (cffi:load-foreign-library 'rkllm-wrapper) (autowrap:c-include (asdf:system-relative-pathname :rkllm-server "include/rkllm-wrapper.h") :spec-path '(rkllm-server lib) :exclude-arch ("i686-pc-linux-gnu" "x86_64-pc-linux-gnu" "i686-pc-windows-msvc" "x86_64-pc-windows-msvc" "i686-apple-darwin9" "x86_64-apple-darwin9" "i386-unknown-freebsd" "x86_64-unknown-freebsd" "i386-unknown-openbsd" "x86_64-unknown-openbsd" "arm-pc-linux-gnu" "arm-unknown-linux-androideabi" "powerpc64-pc-linux-gnu" "powerpc64le-pc-linux-gnu" "i686-unknown-linux-android" "x86_64-unknown-linux-android") :definition-package :rkllm :symbol-regex (("^(RKLLM|rkllm)" () (lambda (string matches regex) (let ((new (subseq string 5))) (if (char= #\_ (aref new 0)) (subseq new 1) new)))))) (defvar *last-state* 0) (defvar *output* nil) (defun actual-callback (result state) (setf *last-state* state) (case state (2 (format t "~%")) (3 (format t "run error!~%")) (4 (warn "Getting the last hidden layer is not implemented yet.")) (t (let ((text (cffi:foreign-string-to-lisp (rkllm:result.text result)))) (format t "~a" text) (push text *output*)))) (finish-output)) (autowrap:defcallback get-data-cb :void ((result (:pointer rkllm:result)) (userdata :pointer) (state rkllm:llm-call-state)) (declare (ignore userdata)) (actual-callback (autowrap:wrap-pointer result 'rkllm:result) state)) (defvar *empty-str* (autowrap:alloc-string "")) (defvar *model-param* (autowrap:alloc 'rkllm:param)) (rkllm:get-packed-default *model-param*) (defun update-params (&key (path "/srv/dev-disk-by-uuid-e704bc62-3f03-4c9f-a44a-7f7536ea97e1/public/compile/my_rkllm_server/models/Qwen2.5-Coder-3B-Instruct.rkllm") (max-content-length 512) (max-new-tokens -1) (skip-special-tokens t) (top-k 20) (top-p 0.8) (temperature 0.7) (repeat-penalty 1.1) (frequency-penalty 0.0) (presence-penalty 0.0) (mirostat 0) (mirostat-tau 5.0) (mirostat-eta 0.1) (is-async nil) (img-start *empty-str*) (img-end *empty-str*) (img-content *empty-str*) (domain-base-id 0)) (unless (cffi:null-pointer-p (rkllm:param.model-path *model-param*)) (autowrap:free (rkllm:param.model-path *model-param*))) (setf (rkllm:param.model-path *model-param*) (autowrap:alloc-string path) (rkllm:param.max-context-len *model-param*) max-content-length (rkllm:param.max-new-tokens *model-param*) max-new-tokens (rkllm:param.skip-special-token *model-param*) (if skip-special-tokens 1 0) (rkllm:param.top-k *model-param*) top-k (rkllm:param.top-p *model-param*) top-p (rkllm:param.temperature *model-param*) temperature (rkllm:param.repeat-penalty *model-param*) repeat-penalty (rkllm:param.frequency-penalty *model-param*) frequency-penalty (rkllm:param.presence-penalty *model-param*) presence-penalty (rkllm:param.mirostat *model-param*) mirostat (rkllm:param.mirostat-tau *model-param*) mirostat-tau (rkllm:param.mirostat-eta *model-param*) mirostat-eta (rkllm:param.is-async *model-param*) (if is-async 1 0) (rkllm:param.img-start *model-param*) img-start (rkllm:param.img-end *model-param*) img-end (rkllm:param.img-content *model-param*) img-content (rkllm:param.extend-param.base-domain-id *model-param*) domain-base-id)) (update-params) (defvar *model-handle* (autowrap:alloc-ptr :pointer)) (defvar *model*) (defvar *model-lock* (bt2:make-lock :name "model-lock")) (defun init-model () (unless (= 0 (rkllm:init *model-handle* *model-param* (autowrap:callback 'get-data-cb))) (error "Failed to init!")) (setf *model* (cffi:mem-ref *model-handle* :pointer))) (defun prompt-model (prompt) (autowrap:with-many-alloc ((iparam 'rkllm:infer-param) (input 'rkllm:input)) (let ((prompt (autowrap:alloc-string prompt))) (setf (rkllm:infer-param.mode iparam) rkllm:+infer-generate+ (rkllm:infer-param.lora-params iparam) (cffi:null-pointer) (rkllm:infer-param.prompt-cache-params iparam) (cffi:null-pointer) (rkllm:input.input-type input) rkllm:+input-prompt+ (rkllm:input.prompt-input input) prompt)) (rkllm:run *model* input iparam nil) (autowrap:free prompt))) ;(init (cffi:mem-aptr *model* :pointer) *model-param* (cffi:callback get-data-cb)) (defparameter *msg-start* "<|im_start|>") (defparameter *msg-end* "<|im_end|>") (defun message->prompt (role &optional message) (let ((*print-case* :downcase)) (format nil "~a~a~%~a~a~%" *msg-start* role (or message "") (if message *msg-end* "")))) (defun messages->prompt (messages) (apply #'concatenate 'string (mapcar (lambda (msg) (message->prompt (car msg) (cadr msg))) (append messages '((:assistant)))))) (defun start () (clack:clackup (lambda (env) (format nil "~a" env)) :server :woo :address "0.0.0.0")) (export '(start))