131 lines
5.9 KiB
Common Lisp
131 lines
5.9 KiB
Common Lisp
|
|
(eval-when (:compile-toplevel :load-toplevel)
|
|
#+(or) (ql:quickload '(micros
|
|
woo clack ningle metabang-bind
|
|
;cffi-libffi
|
|
cl-autowrap ;/libffi
|
|
))
|
|
(unless (find-package '#:rkllm)
|
|
(defpackage #:rkllm)))
|
|
|
|
(defpackage #:rkllm-server
|
|
(:use :cl :bind))
|
|
|
|
(in-package :rkllm-server)
|
|
|
|
(cffi:define-foreign-library librkllm
|
|
(t (:default "librkllmrt")))
|
|
(cffi:load-foreign-library 'librkllm)
|
|
|
|
(cffi:define-foreign-library rkllm-wrapper
|
|
(t (:default "rkllm-wrapper")))
|
|
|
|
(cffi:load-foreign-library 'rkllm-wrapper)
|
|
|
|
(autowrap:c-include (asdf:system-relative-pathname :rkllm-server "include/rkllm-wrapper.h")
|
|
:spec-path '(rkllm-server lib)
|
|
:exclude-arch ("i686-pc-linux-gnu" "x86_64-pc-linux-gnu" "i686-pc-windows-msvc"
|
|
"x86_64-pc-windows-msvc" "i686-apple-darwin9" "x86_64-apple-darwin9"
|
|
"i386-unknown-freebsd" "x86_64-unknown-freebsd" "i386-unknown-openbsd"
|
|
"x86_64-unknown-openbsd" "arm-pc-linux-gnu" "arm-unknown-linux-androideabi"
|
|
"powerpc64-pc-linux-gnu" "powerpc64le-pc-linux-gnu" "i686-unknown-linux-android"
|
|
"x86_64-unknown-linux-android")
|
|
:definition-package :rkllm
|
|
:symbol-regex (("^(RKLLM|rkllm)" ()
|
|
(lambda (string matches regex)
|
|
(let ((new (subseq string 5)))
|
|
(if (char= #\_ (aref new 0))
|
|
(subseq new 1)
|
|
new))))))
|
|
|
|
(defvar *last-state* 0)
|
|
|
|
(defvar *output* nil)
|
|
|
|
(defun actual-callback (result state)
|
|
(setf *last-state* state)
|
|
(case state
|
|
(2 (format t "~%"))
|
|
(3 (format t "run error!~%"))
|
|
(4 (warn "Getting the last hidden layer is not implemented yet."))
|
|
(t
|
|
(let ((text (cffi:foreign-string-to-lisp (rkllm:result.text result))))
|
|
(format t "~a" text)
|
|
(push text *output*))))
|
|
(finish-output))
|
|
|
|
(autowrap:defcallback get-data-cb :void ((result (:pointer rkllm:result)) (userdata :pointer) (state rkllm:llm-call-state))
|
|
(declare (ignore userdata))
|
|
(actual-callback (autowrap:wrap-pointer result 'rkllm:result) state))
|
|
|
|
|
|
(defvar *empty-str* (autowrap:alloc-string ""))
|
|
|
|
(defvar *model-param* (autowrap:alloc 'rkllm:param))
|
|
(rkllm:get-packed-default *model-param*)
|
|
|
|
(defun update-params (&key (path "/srv/dev-disk-by-uuid-e704bc62-3f03-4c9f-a44a-7f7536ea97e1/public/compile/my_rkllm_server/models/Qwen2.5-Coder-3B-Instruct.rkllm")
|
|
(max-content-length 512) (max-new-tokens -1) (skip-special-tokens t) (top-k 20) (top-p 0.8) (temperature 0.7) (repeat-penalty 1.1)
|
|
(frequency-penalty 0.0) (presence-penalty 0.0) (mirostat 0) (mirostat-tau 5.0) (mirostat-eta 0.1) (is-async nil) (img-start *empty-str*)
|
|
(img-end *empty-str*) (img-content *empty-str*) (domain-base-id 0))
|
|
(unless (cffi:null-pointer-p (rkllm:param.model-path *model-param*))
|
|
(autowrap:free (rkllm:param.model-path *model-param*)))
|
|
(setf (rkllm:param.model-path *model-param*) (autowrap:alloc-string path)
|
|
(rkllm:param.max-context-len *model-param*) max-content-length
|
|
(rkllm:param.max-new-tokens *model-param*) max-new-tokens
|
|
(rkllm:param.skip-special-token *model-param*) (if skip-special-tokens 1 0)
|
|
(rkllm:param.top-k *model-param*) top-k
|
|
(rkllm:param.top-p *model-param*) top-p
|
|
(rkllm:param.temperature *model-param*) temperature
|
|
(rkllm:param.repeat-penalty *model-param*) repeat-penalty
|
|
(rkllm:param.frequency-penalty *model-param*) frequency-penalty
|
|
(rkllm:param.presence-penalty *model-param*) presence-penalty
|
|
(rkllm:param.mirostat *model-param*) mirostat
|
|
(rkllm:param.mirostat-tau *model-param*) mirostat-tau
|
|
(rkllm:param.mirostat-eta *model-param*) mirostat-eta
|
|
(rkllm:param.is-async *model-param*) (if is-async 1 0)
|
|
(rkllm:param.img-start *model-param*) img-start
|
|
(rkllm:param.img-end *model-param*) img-end
|
|
(rkllm:param.img-content *model-param*) img-content
|
|
(rkllm:param.extend-param.base-domain-id *model-param*) domain-base-id))
|
|
(update-params)
|
|
(defvar *model-handle* (autowrap:alloc-ptr :pointer))
|
|
(defvar *model*)
|
|
|
|
(defvar *model-lock* (bt2:make-lock :name "model-lock"))
|
|
|
|
(defun init-model ()
|
|
(unless (= 0 (rkllm:init *model-handle* *model-param* (autowrap:callback 'get-data-cb)))
|
|
(error "Failed to init!"))
|
|
(setf *model* (cffi:mem-ref *model-handle* :pointer)))
|
|
|
|
|
|
(defun prompt-model (prompt)
|
|
(autowrap:with-many-alloc ((iparam 'rkllm:infer-param)
|
|
(input 'rkllm:input))
|
|
(let ((prompt (autowrap:alloc-string prompt)))
|
|
(setf (rkllm:infer-param.mode iparam) rkllm:+infer-generate+
|
|
(rkllm:infer-param.lora-params iparam) (cffi:null-pointer)
|
|
(rkllm:infer-param.prompt-cache-params iparam) (cffi:null-pointer)
|
|
(rkllm:input.input-type input) rkllm:+input-prompt+
|
|
(rkllm:input.prompt-input input) prompt))
|
|
(rkllm:run *model* input iparam nil)
|
|
(autowrap:free prompt)))
|
|
;(init (cffi:mem-aptr *model* :pointer) *model-param* (cffi:callback get-data-cb))
|
|
|
|
(defparameter *msg-start* "<|im_start|>")
|
|
(defparameter *msg-end* "<|im_end|>")
|
|
|
|
(defun message->prompt (role &optional message)
|
|
(let ((*print-case* :downcase)) (format nil "~a~a~%~a~a~%" *msg-start* role (or message "") (if message *msg-end* ""))))
|
|
(defun messages->prompt (messages)
|
|
(apply #'concatenate 'string
|
|
(mapcar (lambda (msg)
|
|
(message->prompt (car msg) (cadr msg)))
|
|
(append messages '((:assistant))))))
|
|
|
|
|
|
|
|
(defun start ()
|
|
(clack:clackup (lambda (env) (format nil "~a" env)) :server :woo :address "0.0.0.0"))
|
|
(export '(start))
|