171 lines
7.8 KiB
Common Lisp
171 lines
7.8 KiB
Common Lisp
|
|
(eval-when (:compile-toplevel :load-toplevel)
|
|
#+(or) (ql:quickload '(micros
|
|
woo clack ningle metabang-bind
|
|
;cffi-libffi
|
|
cl-autowrap ;/libffi
|
|
))
|
|
(unless (find-package '#:rkllm)
|
|
(defpackage #:rkllm)))
|
|
|
|
(defpackage #:rkllm-server
|
|
(:use :cl :bind))
|
|
|
|
(in-package :rkllm-server)
|
|
|
|
(cffi:define-foreign-library librkllm
|
|
(t (:default "librkllmrt")))
|
|
(cffi:load-foreign-library 'librkllm)
|
|
|
|
(cffi:define-foreign-library rkllm-wrapper
|
|
(t (:default "rkllm-wrapper")))
|
|
|
|
(cffi:load-foreign-library 'rkllm-wrapper)
|
|
|
|
(autowrap:c-include (asdf:system-relative-pathname :rkllm-server "include/rkllm-wrapper.h")
|
|
:spec-path '(rkllm-server lib)
|
|
:exclude-arch ("i686-pc-linux-gnu" "x86_64-pc-linux-gnu" "i686-pc-windows-msvc"
|
|
"x86_64-pc-windows-msvc" "i686-apple-darwin9" "x86_64-apple-darwin9"
|
|
"i386-unknown-freebsd" "x86_64-unknown-freebsd" "i386-unknown-openbsd"
|
|
"x86_64-unknown-openbsd" "arm-pc-linux-gnu" "arm-unknown-linux-androideabi"
|
|
"powerpc64-pc-linux-gnu" "powerpc64le-pc-linux-gnu" "i686-unknown-linux-android"
|
|
"x86_64-unknown-linux-android")
|
|
:definition-package :rkllm
|
|
:symbol-regex (("^(RKLLM|rkllm)" ()
|
|
(lambda (string matches regex)
|
|
(let ((new (subseq string 5)))
|
|
(if (char= #\_ (aref new 0))
|
|
(subseq new 1)
|
|
new))))))
|
|
|
|
(defvar *last-state* 0)
|
|
|
|
(defvar *output* nil)
|
|
|
|
(defun actual-callback (result state)
|
|
(setf *last-state* state)
|
|
(case state
|
|
(2 (format t "~%"))
|
|
(3 (format t "run error!~%"))
|
|
(4 (warn "Getting the last hidden layer is not implemented yet."))
|
|
(t
|
|
(let ((text (cffi:foreign-string-to-lisp (rkllm:result.text result))))
|
|
(format t "~a" text)
|
|
(push text *output*))))
|
|
(finish-output))
|
|
|
|
(autowrap:defcallback get-data-cb :void ((result (:pointer rkllm:result)) (userdata :pointer) (state rkllm:llm-call-state))
|
|
(declare (ignore userdata))
|
|
(actual-callback (autowrap:wrap-pointer result 'rkllm:result) state))
|
|
|
|
|
|
(defvar *empty-str* (autowrap:alloc-string ""))
|
|
|
|
(defvar *model-param* (autowrap:alloc 'rkllm:param))
|
|
(rkllm:get-packed-default *model-param*)
|
|
|
|
(defun update-params (&key (path "/srv/dev-disk-by-uuid-e704bc62-3f03-4c9f-a44a-7f7536ea97e1/public/compile/my_rkllm_server/models/Qwen2.5-Coder-3B-Instruct.rkllm")
|
|
(max-content-length 512) (max-new-tokens -1) (skip-special-tokens t) (top-k 20) (top-p 0.8) (temperature 0.7) (repeat-penalty 1.1)
|
|
(frequency-penalty 0.0) (presence-penalty 0.0) (mirostat 0) (mirostat-tau 5.0) (mirostat-eta 0.1) (is-async nil) (img-start *empty-str*)
|
|
(img-end *empty-str*) (img-content *empty-str*) (domain-base-id 0))
|
|
(unless (cffi:null-pointer-p (rkllm:param.model-path *model-param*))
|
|
(autowrap:free (rkllm:param.model-path *model-param*)))
|
|
(setf (rkllm:param.model-path *model-param*) (autowrap:alloc-string path)
|
|
(rkllm:param.max-context-len *model-param*) max-content-length
|
|
(rkllm:param.max-new-tokens *model-param*) max-new-tokens
|
|
(rkllm:param.skip-special-token *model-param*) (if skip-special-tokens 1 0)
|
|
(rkllm:param.top-k *model-param*) top-k
|
|
(rkllm:param.top-p *model-param*) top-p
|
|
(rkllm:param.temperature *model-param*) temperature
|
|
(rkllm:param.repeat-penalty *model-param*) repeat-penalty
|
|
(rkllm:param.frequency-penalty *model-param*) frequency-penalty
|
|
(rkllm:param.presence-penalty *model-param*) presence-penalty
|
|
(rkllm:param.mirostat *model-param*) mirostat
|
|
(rkllm:param.mirostat-tau *model-param*) mirostat-tau
|
|
(rkllm:param.mirostat-eta *model-param*) mirostat-eta
|
|
(rkllm:param.is-async *model-param*) (if is-async 1 0)
|
|
(rkllm:param.img-start *model-param*) img-start
|
|
(rkllm:param.img-end *model-param*) img-end
|
|
(rkllm:param.img-content *model-param*) img-content
|
|
(rkllm:param.extend-param.base-domain-id *model-param*) domain-base-id))
|
|
(update-params)
|
|
(defvar *model-handle* (autowrap:alloc-ptr :pointer))
|
|
(defvar *model*)
|
|
|
|
(defvar *model-lock* (bt2:make-lock :name "model-lock"))
|
|
|
|
(defun init-model ()
|
|
(unless (= 0 (rkllm:init *model-handle* *model-param* (autowrap:callback 'get-data-cb)))
|
|
(error "Failed to init!"))
|
|
(setf *model* (cffi:mem-ref *model-handle* :pointer)))
|
|
|
|
|
|
(defun prompt-model (prompt)
|
|
(autowrap:with-many-alloc ((iparam 'rkllm:infer-param)
|
|
(input 'rkllm:input))
|
|
(let ((prompt (autowrap:alloc-string prompt)))
|
|
(setf (rkllm:infer-param.mode iparam) rkllm:+infer-generate+
|
|
(rkllm:infer-param.lora-params iparam) (cffi:null-pointer)
|
|
(rkllm:infer-param.prompt-cache-params iparam) (cffi:null-pointer)
|
|
(rkllm:input.input-type input) rkllm:+input-prompt+
|
|
(rkllm:input.prompt-input input) prompt))
|
|
(rkllm:run *model* input iparam nil)
|
|
(autowrap:free prompt)))
|
|
|
|
(defparameter *msg-start* "<|im_start|>")
|
|
(defparameter *msg-end* "<|im_end|>")
|
|
|
|
(defun message->prompt (role &optional message)
|
|
(let ((*print-case* :downcase))
|
|
(format nil "~a~a~%~a~a~%" *msg-start* role (or message "") (if message *msg-end* ""))))
|
|
(defun messages->prompt (messages)
|
|
(apply #'concatenate 'string
|
|
(mapcar (lambda (msg)
|
|
(message->prompt (car msg) (cadr msg)))
|
|
(append messages '((:assistant))))))
|
|
|
|
(defvar *clack-server* (if (boundp '*clack-server*) *clack-server* nil))
|
|
|
|
(defun chat-handler (env)
|
|
(bind (((&key request-method &allow-other-keys) env))
|
|
(if (eq :post request-method)
|
|
(bind (((&key raw-body &allow-other-keys) env)
|
|
(post-data (yason:parse
|
|
(flexi-streams:make-flexi-stream raw-body :external-format :utf-8)))
|
|
(resp nil))
|
|
(format t "~&; INFO recieved ~a~%" (alexandria:hash-table-plist post-data))
|
|
(lambda (responder)
|
|
(let ((writer (funcall responder '(200 (:content-type "application/json"))))
|
|
(yason:*symbol-encoder* 'yason:encode-symbol-as-lowercase))
|
|
(loop :for chunk :across "Hello!"
|
|
:for i :upfrom 0
|
|
:do (funcall writer
|
|
(with-output-to-string (str)
|
|
(yason:encode
|
|
`(:id "bla" :object "blub" :choices ,(reverse (push `(:index ,i :data ,(format nil "~a" chunk)) resp)))
|
|
str)))
|
|
(sleep 0.5)
|
|
:while chunk
|
|
:finally (funcall writer nil :close t)))))
|
|
'(405 nil ("Only POST")))))
|
|
|
|
(defun server-handler (env)
|
|
(bind (((&key path-info remote-addr &allow-other-keys) env)
|
|
(path-sym (intern (if (string= path-info "") "/" (string-upcase path-info)) :keyword)))
|
|
(case path-sym
|
|
(:/rkllm-chat (chat-handler env))
|
|
(:/rkllm-abort `(200 nil (,(format nil "Welcome ~a. You should not have come here (~a) " remote-addr path-sym))))
|
|
(t `(200 nil ,(asdf:system-relative-pathname :rkllm-server (format nil "static~a" (if (eq path-sym :/) "/index.html" path-info))))))))
|
|
|
|
(defun start ()
|
|
(when *clack-server* (clack:stop *clack-server*))
|
|
(setf *clack-server*
|
|
(clack:clackup
|
|
(labels ((caller (env)
|
|
(restart-case (funcall 'server-handler env)
|
|
(retry () (caller env))
|
|
(return-500 () '(500 nil nil)))))
|
|
(lambda (env)
|
|
(caller env)))
|
|
:server :woo :address "0.0.0.0")))
|
|
(export '(start))
|