rkllm-server/server.lisp

171 lines
7.8 KiB
Common Lisp

(eval-when (:compile-toplevel :load-toplevel)
#+(or) (ql:quickload '(micros
woo clack ningle metabang-bind
;cffi-libffi
cl-autowrap ;/libffi
))
(unless (find-package '#:rkllm)
(defpackage #:rkllm)))
(defpackage #:rkllm-server
(:use :cl :bind))
(in-package :rkllm-server)
(cffi:define-foreign-library librkllm
(t (:default "librkllmrt")))
(cffi:load-foreign-library 'librkllm)
(cffi:define-foreign-library rkllm-wrapper
(t (:default "rkllm-wrapper")))
(cffi:load-foreign-library 'rkllm-wrapper)
(autowrap:c-include (asdf:system-relative-pathname :rkllm-server "include/rkllm-wrapper.h")
:spec-path '(rkllm-server lib)
:exclude-arch ("i686-pc-linux-gnu" "x86_64-pc-linux-gnu" "i686-pc-windows-msvc"
"x86_64-pc-windows-msvc" "i686-apple-darwin9" "x86_64-apple-darwin9"
"i386-unknown-freebsd" "x86_64-unknown-freebsd" "i386-unknown-openbsd"
"x86_64-unknown-openbsd" "arm-pc-linux-gnu" "arm-unknown-linux-androideabi"
"powerpc64-pc-linux-gnu" "powerpc64le-pc-linux-gnu" "i686-unknown-linux-android"
"x86_64-unknown-linux-android")
:definition-package :rkllm
:symbol-regex (("^(RKLLM|rkllm)" ()
(lambda (string matches regex)
(let ((new (subseq string 5)))
(if (char= #\_ (aref new 0))
(subseq new 1)
new))))))
(defvar *last-state* 0)
(defvar *output* nil)
(defun actual-callback (result state)
(setf *last-state* state)
(case state
(2 (format t "~%"))
(3 (format t "run error!~%"))
(4 (warn "Getting the last hidden layer is not implemented yet."))
(t
(let ((text (cffi:foreign-string-to-lisp (rkllm:result.text result))))
(format t "~a" text)
(push text *output*))))
(finish-output))
(autowrap:defcallback get-data-cb :void ((result (:pointer rkllm:result)) (userdata :pointer) (state rkllm:llm-call-state))
(declare (ignore userdata))
(actual-callback (autowrap:wrap-pointer result 'rkllm:result) state))
(defvar *empty-str* (autowrap:alloc-string ""))
(defvar *model-param* (autowrap:alloc 'rkllm:param))
(rkllm:get-packed-default *model-param*)
(defun update-params (&key (path "/srv/dev-disk-by-uuid-e704bc62-3f03-4c9f-a44a-7f7536ea97e1/public/compile/my_rkllm_server/models/Qwen2.5-Coder-3B-Instruct.rkllm")
(max-content-length 512) (max-new-tokens -1) (skip-special-tokens t) (top-k 20) (top-p 0.8) (temperature 0.7) (repeat-penalty 1.1)
(frequency-penalty 0.0) (presence-penalty 0.0) (mirostat 0) (mirostat-tau 5.0) (mirostat-eta 0.1) (is-async nil) (img-start *empty-str*)
(img-end *empty-str*) (img-content *empty-str*) (domain-base-id 0))
(unless (cffi:null-pointer-p (rkllm:param.model-path *model-param*))
(autowrap:free (rkllm:param.model-path *model-param*)))
(setf (rkllm:param.model-path *model-param*) (autowrap:alloc-string path)
(rkllm:param.max-context-len *model-param*) max-content-length
(rkllm:param.max-new-tokens *model-param*) max-new-tokens
(rkllm:param.skip-special-token *model-param*) (if skip-special-tokens 1 0)
(rkllm:param.top-k *model-param*) top-k
(rkllm:param.top-p *model-param*) top-p
(rkllm:param.temperature *model-param*) temperature
(rkllm:param.repeat-penalty *model-param*) repeat-penalty
(rkllm:param.frequency-penalty *model-param*) frequency-penalty
(rkllm:param.presence-penalty *model-param*) presence-penalty
(rkllm:param.mirostat *model-param*) mirostat
(rkllm:param.mirostat-tau *model-param*) mirostat-tau
(rkllm:param.mirostat-eta *model-param*) mirostat-eta
(rkllm:param.is-async *model-param*) (if is-async 1 0)
(rkllm:param.img-start *model-param*) img-start
(rkllm:param.img-end *model-param*) img-end
(rkllm:param.img-content *model-param*) img-content
(rkllm:param.extend-param.base-domain-id *model-param*) domain-base-id))
(update-params)
(defvar *model-handle* (autowrap:alloc-ptr :pointer))
(defvar *model*)
(defvar *model-lock* (bt2:make-lock :name "model-lock"))
(defun init-model ()
(unless (= 0 (rkllm:init *model-handle* *model-param* (autowrap:callback 'get-data-cb)))
(error "Failed to init!"))
(setf *model* (cffi:mem-ref *model-handle* :pointer)))
(defun prompt-model (prompt)
(autowrap:with-many-alloc ((iparam 'rkllm:infer-param)
(input 'rkllm:input))
(let ((prompt (autowrap:alloc-string prompt)))
(setf (rkllm:infer-param.mode iparam) rkllm:+infer-generate+
(rkllm:infer-param.lora-params iparam) (cffi:null-pointer)
(rkllm:infer-param.prompt-cache-params iparam) (cffi:null-pointer)
(rkllm:input.input-type input) rkllm:+input-prompt+
(rkllm:input.prompt-input input) prompt))
(rkllm:run *model* input iparam nil)
(autowrap:free prompt)))
(defparameter *msg-start* "<|im_start|>")
(defparameter *msg-end* "<|im_end|>")
(defun message->prompt (role &optional message)
(let ((*print-case* :downcase))
(format nil "~a~a~%~a~a~%" *msg-start* role (or message "") (if message *msg-end* ""))))
(defun messages->prompt (messages)
(apply #'concatenate 'string
(mapcar (lambda (msg)
(message->prompt (car msg) (cadr msg)))
(append messages '((:assistant))))))
(defvar *clack-server* (if (boundp '*clack-server*) *clack-server* nil))
(defun chat-handler (env)
(bind (((&key request-method &allow-other-keys) env))
(if (eq :post request-method)
(bind (((&key raw-body &allow-other-keys) env)
(post-data (yason:parse
(flexi-streams:make-flexi-stream raw-body :external-format :utf-8)))
(resp nil))
(format t "~&; INFO recieved ~a~%" (alexandria:hash-table-plist post-data))
(lambda (responder)
(let ((writer (funcall responder '(200 (:content-type "application/json"))))
(yason:*symbol-encoder* 'yason:encode-symbol-as-lowercase))
(loop :for chunk :across "Hello!"
:for i :upfrom 0
:do (funcall writer
(with-output-to-string (str)
(yason:encode
`(:id "bla" :object "blub" :choices ,(reverse (push `(:index ,i :data ,(format nil "~a" chunk)) resp)))
str)))
(sleep 0.5)
:while chunk
:finally (funcall writer nil :close t)))))
'(405 nil ("Only POST")))))
(defun server-handler (env)
(bind (((&key path-info remote-addr &allow-other-keys) env)
(path-sym (intern (if (string= path-info "") "/" (string-upcase path-info)) :keyword)))
(case path-sym
(:/rkllm-chat (chat-handler env))
(:/rkllm-abort `(200 nil (,(format nil "Welcome ~a. You should not have come here (~a) " remote-addr path-sym))))
(t `(200 nil ,(asdf:system-relative-pathname :rkllm-server (format nil "static~a" (if (eq path-sym :/) "/index.html" path-info))))))))
(defun start ()
(when *clack-server* (clack:stop *clack-server*))
(setf *clack-server*
(clack:clackup
(labels ((caller (env)
(restart-case (funcall 'server-handler env)
(retry () (caller env))
(return-500 () '(500 nil nil)))))
(lambda (env)
(caller env)))
:server :woo :address "0.0.0.0")))
(export '(start))