;============================================================
;   iron_ai.hsp — OpenAI 互換 Chat API クライアント
;
;   OpenAI / Anthropic / Ollama / LM Studio / llama.cpp server /
;   GitHub Models / Groq / DeepSeek / xAI など、OpenAI 互換の
;   /chat/completions エンドポイントを提供する任意のサーバを
;   1 行で叩ける薄いラッパ。
;
;   依存: iron_http.hsp + iron_json.hsp (このファイルで auto include)
;
;   API:
;     iron_ai_set_endpoint "https://api.openai.com/v1"
;     iron_ai_set_key      "sk-..."
;     iron_ai_set_model    "gpt-4o-mini"
;     iron_ai_set_system   "あなたは親切な AI です"     ; 任意
;     iron_ai_chat         "こんにちは", reply           ; 同期問い合わせ
;     mes reply
;
;   会話履歴 (multi-turn):
;     iron_ai_history_clear
;     iron_ai_history_add "user", "1+1は?"
;     iron_ai_chat        "じゃあ 2+2 は?", reply
;     ; (chat 後、自動で user/assistant が history に追加される)
;
;   対応サーバ例:
;     OpenAI:    endpoint="https://api.openai.com/v1"  model="gpt-4o-mini"
;     Anthropic: endpoint="https://api.anthropic.com/v1" model="claude-3-5-sonnet-20241022"
;                (※ Anthropic は messages API シンプル版で動作)
;     Ollama:    endpoint="http://localhost:11434/v1"  model="llama3.2"  key=""
;     LM Studio: endpoint="http://localhost:1234/v1"   model="..."       key=""
;     llama.cpp: endpoint="http://localhost:8080/v1"   model="..."       key=""
;     Groq:      endpoint="https://api.groq.com/openai/v1"
;     xAI:       endpoint="https://api.x.ai/v1"
;
;   注意:
;     - HTTPS 通信は WinHTTP のシステム証明書ストアを使うので OS が新しければ問題なし
;     - レスポンスの JSON パースは hspjson.dll が必要 (iron_json.hsp 経由)
;     - ストリーミング (SSE) は将来対応 (Phase 2)
;============================================================

#ifndef __iron_ai_hsp__
#define __iron_ai_hsp__

#include "iron_http.hsp"
#include "iron_json.hsp"

; hspllama.as は LOCAL_LLAMA backend 用。hspllama.dll がリンクされて
; いなくても #uselib は遅延解決なので HTTP backend のみで使う場合は
; 実体不要 (ただし #func の shim はロードされる)。
#include "hspllama.as"

;------------------------------------------------------------
; バックエンド定数
;   IRON_AI_BACKEND_HTTP       — OpenAI 互換 /chat/completions (既定)
;   IRON_AI_BACKEND_LOCAL_LLAMA — hspllama.dll 経由で local .gguf モデル推論
;
; 将来:
;   IRON_AI_BACKEND_ONNX_LLM   — hsponnx.dll 経由の LLM (未実装)
;------------------------------------------------------------
#define global IRON_AI_BACKEND_HTTP         0
#define global IRON_AI_BACKEND_LOCAL_LLAMA  1

#module iron_ai

;------------------------------------------------------------
; 内部: JSON 文字列エスケープ (\, ", 制御文字, 改行)
;   in  : src
;   out : dst (sdim 済の十分大きい変数)
;------------------------------------------------------------
#deffunc _ai_json_escape str src, var dst,  local _i, local _c, local _len, local _src
    _src = src
    _len = strlen(_src)
    sdim dst, _len * 6 + 16
    repeat _len
        _c = peek(_src, cnt)
        if _c = '\\' {
            poke dst, strlen(dst), '\\'
            poke dst, strlen(dst), '\\'
        } else : if _c = '"' {
            poke dst, strlen(dst), '\\'
            poke dst, strlen(dst), '"'
        } else : if _c = 0x0a {
            poke dst, strlen(dst), '\\'
            poke dst, strlen(dst), 'n'
        } else : if _c = 0x0d {
            poke dst, strlen(dst), '\\'
            poke dst, strlen(dst), 'r'
        } else : if _c = 0x09 {
            poke dst, strlen(dst), '\\'
            poke dst, strlen(dst), 't'
        } else : if _c < 0x20 {
            ; 制御文字はスキップ
        } else {
            poke dst, strlen(dst), _c
        }
    loop
    return

;------------------------------------------------------------
; 設定系
;------------------------------------------------------------
#deffunc iron_ai_set_endpoint str url
    _ai_endpoint = url
    return

#deffunc iron_ai_set_key str key
    _ai_key = key
    return

#deffunc iron_ai_set_model str model
    _ai_model = model
    return

#deffunc iron_ai_set_system str sys
    _ai_system = sys
    return

#deffunc iron_ai_set_max_tokens int n
    _ai_max_tokens = n
    return

;   t_x100: 0 = 0.0, 70 = 0.7, 100 = 1.0  (HSP は double 引数より int の方が安定)
#deffunc iron_ai_set_temperature int t_x100
    _ai_temperature_x100 = t_x100
    return

;------------------------------------------------------------
; backend 切り替え
;   IRON_AI_BACKEND_HTTP       : HTTP 経由 (既定)
;   IRON_AI_BACKEND_LOCAL_LLAMA: hspllama.dll 経由
;
; LOCAL_LLAMA を選ぶ前に iron_ai_local_llama_load でモデルを
; ロードしておく必要がある。
;------------------------------------------------------------
#deffunc iron_ai_set_backend int b
    _ai_backend = b
    return

;   iron_ai_local_llama_load "model.gguf" [, n_ctx] [, n_gpu_layers]
;   → stat = 0 成功 / 負 = エラー
#deffunc iron_ai_local_llama_load str path, int n_ctx_, int n_gpu_,  local _h, local _nc, local _ng
    _nc = n_ctx_
    if _nc <= 0 : _nc = 2048
    _ng = n_gpu_
    if _ai_llama_inited = 0 {
        llama_init
        _ai_llama_inited = 1
    }
    if _ai_llama_h >= 0 {
        llama_close _ai_llama_h
        _ai_llama_h = -1
    }
    llama_load path, _nc, _ng, _h
    if _h < 0 : return _h
    _ai_llama_h = _h
    return 0

#deffunc iron_ai_local_llama_close
    if _ai_llama_h >= 0 {
        llama_close _ai_llama_h
        _ai_llama_h = -1
    }
    return

;------------------------------------------------------------
; 会話履歴管理
;------------------------------------------------------------
#deffunc iron_ai_history_clear
    _ai_history_json = ""
    return

;   role: "user" / "assistant" / "system" / "tool"
#deffunc iron_ai_history_add str role, str content,  local _esc, local _entry
    _ai_json_escape content, _esc
    sdim _entry, strlen(_esc) + 256
    _entry = "{\"role\":\"" + role + "\",\"content\":\"" + _esc + "\"}"
    if strlen(_ai_history_json) > 0 {
        _ai_history_json = _ai_history_json + "," + _entry
    } else {
        _ai_history_json = _entry
    }
    return

;------------------------------------------------------------
; iron_ai_chat msg, reply
;   user message を投げて assistant の返答を reply に格納。
;   完了後、user と assistant の両方を自動で history に追加 (multi-turn 用)。
;
;   stat: HTTP status (200=成功)
;------------------------------------------------------------
#deffunc iron_ai_chat str msg, var reply,  local _esc_msg, local _sys_esc, local _body, local _full_url, local _resp, local _hid, local _msgs, local _hdr_buf, local _user_entry, local _ass_esc, local _mt
    sdim reply, 4096
    reply = ""

    ; ---- LOCAL_LLAMA backend ----
    if _ai_backend = IRON_AI_BACKEND_LOCAL_LLAMA {
        if _ai_llama_h < 0 {
            ; モデル未ロード
            return -1001
        }
        _mt = _ai_max_tokens
        if _mt <= 0 : _mt = 512
        llama_chat _ai_llama_h, _ai_system, msg, _mt, reply
        ; 履歴に追加 (HTTP と同等)
        iron_ai_history_add "user", msg
        iron_ai_history_add "assistant", reply
        return 200
    }

    ; user message を JSON エスケープ
    _ai_json_escape msg, _esc_msg

    ; messages 配列を構築
    sdim _msgs, strlen(_ai_history_json) + strlen(_esc_msg) + strlen(_ai_system) * 2 + 1024
    _msgs = ""
    if strlen(_ai_system) > 0 {
        _ai_json_escape _ai_system, _sys_esc
        _msgs = "{\"role\":\"system\",\"content\":\"" + _sys_esc + "\"}"
    }
    if strlen(_ai_history_json) > 0 {
        if strlen(_msgs) > 0 : _msgs = _msgs + ","
        _msgs = _msgs + _ai_history_json
    }
    if strlen(_msgs) > 0 : _msgs = _msgs + ","
    _msgs = _msgs + "{\"role\":\"user\",\"content\":\"" + _esc_msg + "\"}"

    ; リクエストボディ全体
    sdim _body, strlen(_msgs) + 1024
    _body = "{\"model\":\"" + _ai_model + "\",\"messages\":[" + _msgs + "]"
    if _ai_max_tokens > 0 {
        _body = _body + ",\"max_tokens\":" + _ai_max_tokens
    }
    if _ai_temperature_x100 >= 0 {
        _body = _body + ",\"temperature\":0." + _ai_temperature_x100
        ; ※ 0.07 等の小数表現は後付け、ここでは粗い精度で OK
    }
    _body = _body + "}"

    ; Authorization ヘッダ
    sdim _hdr_buf, 2048
    if strlen(_ai_key) > 0 {
        _hdr_buf = "Authorization: Bearer " + _ai_key + "\r\n"
    } else {
        _hdr_buf = ""
    }
    http_set_header _hdr_buf

    ; URL
    sdim _full_url, 1024
    _full_url = _ai_endpoint + "/chat/completions"

    ; POST
    sdim _resp, 1
    http_post _full_url, _body, _resp, "application/json"
    if stat ! 200 {
        return stat
    }

    ; レスポンス JSON パース
    _hid = json_load(_resp)
    if _hid < 0 : return -1

    ; choices[0].message.content を取り出す
    reply = json_str(_hid, "choices[0].message.content")
    json_release _hid

    ; 履歴に user/assistant を追加 (次回呼び出し時のコンテキスト用)
    iron_ai_history_add "user", msg
    iron_ai_history_add "assistant", reply

    return 200

;------------------------------------------------------------
; iron_ai_transcribe "audio.wav", out_text [, "model"]
;
;   OpenAI 互換の /audio/transcriptions エンドポイントに音声ファイルを
;   multipart/form-data で POST → 文字起こし結果を out_text に書き込む。
;
;   対応サーバ:
;     OpenAI (api.openai.com)         → model="whisper-1"
;     Groq (api.groq.com/openai/v1)   → model="whisper-large-v3" (爆速)
;     LM Studio / llama.cpp / Ollama → 各サーバ実装に依存
;
;   音声ファイル形式: wav / mp3 / m4a / mp4 / mpeg / mpga / ogg / webm / flac
;
;   stat: HTTP status (200=成功)
;------------------------------------------------------------
#deffunc iron_ai_transcribe str audio_path, var out_text, str model_name,  local _full_url, local _resp, local _hid, local _model, local _kv, local _hdr_buf, local _ext, local _mime
    sdim out_text, 4096
    out_text = ""

    _model = model_name
    if strlen(_model) = 0 : _model = "whisper-1"

    ; 拡張子 → MIME 推定
    _ext = getpath(audio_path, 18)   ; 拡張子のみ
    _mime = "audio/wav"
    if _ext = ".mp3"  : _mime = "audio/mpeg"
    if _ext = ".m4a"  : _mime = "audio/mp4"
    if _ext = ".mp4"  : _mime = "audio/mp4"
    if _ext = ".ogg"  : _mime = "audio/ogg"
    if _ext = ".webm" : _mime = "audio/webm"
    if _ext = ".flac" : _mime = "audio/flac"

    ; Authorization ヘッダ
    sdim _hdr_buf, 2048
    if strlen(_ai_key) > 0 {
        _hdr_buf = "Authorization: Bearer " + _ai_key + "\r\n"
    } else {
        _hdr_buf = ""
    }
    http_set_header _hdr_buf

    ; multipart 用追加 field
    sdim _kv, 256
    _kv = "model=" + _model

    ; URL
    sdim _full_url, 1024
    _full_url = _ai_endpoint + "/audio/transcriptions"

    ; multipart POST
    sdim _resp, 65536
    http_post_file _full_url, audio_path, "file", _mime, _kv, _resp
    if stat ! 200 {
        return stat
    }

    ; レスポンス JSON: {"text":"..."}
    _hid = json_load(_resp)
    if _hid < 0 : return -1
    out_text = json_str(_hid, "text")
    json_release _hid
    return 200

;------------------------------------------------------------
; ストリーミング (SSE) API
;
; iron_ai_chat_stream_start msg
;   開始。サーバに stream:true で POST、SSE 読み取り状態に入る。
;
; iron_ai_chat_stream_poll delta_var, done_var
;   次の delta テキストを取得。
;     delta_var : 新しく届いた部分テキスト (空文字の場合あり)
;     done_var  : 1 = ストリーム終了、0 = 続行
;   HSP 側は loop で回して delta を逐次表示する。
;
; iron_ai_chat_stream_end
;   接続を閉じる。
;
; 例:
;   iron_ai_chat_stream_start "日本の首都は?"
;   sdim full, 16384
;   *loop
;       iron_ai_chat_stream_poll delta, done
;       if strlen(delta) > 0 {
;           full = full + delta
;           ; 画面に delta を追記描画
;       }
;       if done : break
;       await 16
;       goto *loop
;   iron_ai_chat_stream_end
;------------------------------------------------------------

#deffunc iron_ai_chat_stream_start str msg,  local _esc_msg, local _sys_esc, local _body, local _msgs, local _full_url, local _hdr_buf
    _ai_json_escape msg, _esc_msg

    sdim _msgs, strlen(_esc_msg) + strlen(_ai_system) * 2 + strlen(_ai_history_json) + 1024
    _msgs = ""
    if strlen(_ai_system) > 0 {
        _ai_json_escape _ai_system, _sys_esc
        _msgs = "{\"role\":\"system\",\"content\":\"" + _sys_esc + "\"}"
    }
    if strlen(_ai_history_json) > 0 {
        if strlen(_msgs) > 0 : _msgs = _msgs + ","
        _msgs = _msgs + _ai_history_json
    }
    if strlen(_msgs) > 0 : _msgs = _msgs + ","
    _msgs = _msgs + "{\"role\":\"user\",\"content\":\"" + _esc_msg + "\"}"

    sdim _body, strlen(_msgs) + 1024
    _body = "{\"model\":\"" + _ai_model + "\",\"messages\":[" + _msgs + "],\"stream\":true"
    if _ai_max_tokens > 0 : _body = _body + ",\"max_tokens\":" + _ai_max_tokens
    _body = _body + "}"

    ; Authorization
    sdim _hdr_buf, 2048
    if strlen(_ai_key) > 0 {
        _hdr_buf = "Authorization: Bearer " + _ai_key + "\r\n"
    } else {
        _hdr_buf = ""
    }
    http_set_header _hdr_buf

    sdim _full_url, 1024
    _full_url = _ai_endpoint + "/chat/completions"

    _ai_stream_pending = ""
    _ai_stream_done = 0
    http_stream_open _full_url, _body, "application/json"
    return stat

;   次の delta を取得 (SSE event "data: {...}" 1 個分)
#deffunc iron_ai_chat_stream_poll var out_delta, var out_done,  local _buf, local _nl2, local _event, local _payload, local _hid, local _d
    sdim out_delta, 4096
    out_delta = ""
    out_done = 0
    if _ai_stream_done {
        out_done = 1
        return 1
    }

    ; pending が空または "data:" 未到達なら次チャンクを読む
    repeat 8
        _nl2 = instr(_ai_stream_pending, 0, "\n\n")
        if _nl2 >= 0 : break

        sdim _buf, 8192
        http_stream_read _buf, 8191
        if stat <= 0 {
            ; no more data right now; return empty delta
            return 0
        }
        _ai_stream_pending = _ai_stream_pending + _buf
    loop

    _nl2 = instr(_ai_stream_pending, 0, "\n\n")
    if _nl2 < 0 : return 0

    _event = strmid(_ai_stream_pending, 0, _nl2)
    _ai_stream_pending = strmid(_ai_stream_pending, _nl2 + 2, strlen(_ai_stream_pending) - _nl2 - 2)

    ; "data: " prefix
    if strmid(_event, 0, 6) = "data: " {
        _payload = strmid(_event, 6, strlen(_event) - 6)
        if _payload = "[DONE]" {
            _ai_stream_done = 1
            out_done = 1
            return 1
        }
        _hid = json_load(_payload)
        if _hid >= 0 {
            out_delta = json_str(_hid, "choices[0].delta.content")
            json_release _hid
        }
    }
    return 1

#deffunc iron_ai_chat_stream_end
    http_stream_close
    _ai_stream_pending = ""
    _ai_stream_done = 0
    return

#global

	sdim _ai_endpoint@iron_ai, 1024
	sdim _ai_key@iron_ai, 1024
	sdim _ai_model@iron_ai, 256
	sdim _ai_system@iron_ai, 4096
	sdim _ai_history_json@iron_ai, 65536
	_ai_endpoint@iron_ai = "https://api.openai.com/v1"
	_ai_key@iron_ai = ""
	_ai_model@iron_ai = "gpt-4o-mini"
	_ai_system@iron_ai = ""
	_ai_history_json@iron_ai = ""
	_ai_max_tokens@iron_ai = 0
	_ai_temperature_x100@iron_ai = -1
	sdim _ai_stream_pending@iron_ai, 65536
	_ai_stream_pending@iron_ai = ""
	_ai_stream_done@iron_ai = 0

	; --- LOCAL_LLAMA backend state ---
	_ai_backend@iron_ai       = IRON_AI_BACKEND_HTTP
	_ai_llama_inited@iron_ai  = 0
	_ai_llama_h@iron_ai       = -1

#endif