;============================================================
;   iron_naive_bayes.hsp — ガウシアン/多項 Naive Bayes
;
;   Gaussian NB: 各特徴量の条件付き確率を正規分布で近似。
;   Multinomial NB: 離散カウント特徴量 (Bag-of-Words 向け)、
;   Laplace smoothing 付き。
;   pure HSP で軽量実装 (sklearn と同等のクラス事後確率式)。
;
;   API:
;     nb_fit_gaussian X (double), y_int, n, n_feat, n_classes
;     nb_fit_multinomial X (double、非負カウント), y_int, n, n_feat, n_classes,
;                        alpha (Laplace smoothing、既定 1.0)
;     nb_predict X, n, n_feat, array v_out (int)
;     nb_predict_proba X, n, n_feat, n_classes, array v_proba (double)
;     nb_score X, y, n → accuracy
;     nb_release
;============================================================
#ifndef __iron_naive_bayes_hsp__
#define __iron_naive_bayes_hsp__

#module iron_naive_bayes

sdim _nb_kind, 16
_nb_kind = ""
dim _nb_nfeat, 1
dim _nb_nclass, 1
ddim _nb_prior, 1      ; log priors per class
ddim _nb_mean, 1        ; gaussian: [class * n_feat]
ddim _nb_var, 1         ; gaussian: same
ddim _nb_logp, 1        ; multinomial: [class * n_feat] log prob

#deffunc nb_fit_gaussian array X, array y_int, int n, int n_feat, int n_classes, \
	local _c, local _i, local _f, local _cnt, local _m, local _v, \
	local _eps
	_nb_kind = "gaussian"
	_nb_nfeat = n_feat
	_nb_nclass = n_classes
	ddim _nb_prior, n_classes
	ddim _nb_mean, n_classes * n_feat
	ddim _nb_var, n_classes * n_feat
	; class priors
	repeat n_classes : _nb_prior(cnt) = 0.0 : loop
	repeat n_classes : repeat n_feat : _nb_mean(cnt * n_feat + cnt) = 0.0 : loop : loop
	dim _counts, n_classes
	repeat n
		_counts(y_int(cnt)) = _counts(y_int(cnt)) + 1
	loop
	repeat n_classes
		_nb_prior(cnt) = log(1.0 * _counts(cnt) / n)
	loop
	; means
	repeat n
		_c = y_int(cnt)
		_i = cnt
		repeat n_feat
			_nb_mean(_c * n_feat + cnt) = _nb_mean(_c * n_feat + cnt) + X(_i * n_feat + cnt)
		loop
	loop
	repeat n_classes
		_c = cnt
		if _counts(_c) = 0 : continue
		repeat n_feat
			_nb_mean(_c * n_feat + cnt) = _nb_mean(_c * n_feat + cnt) / _counts(_c)
		loop
	loop
	; variances (with smoothing)
	_eps = 1e-9
	repeat n
		_c = y_int(cnt)
		_i = cnt
		repeat n_feat
			_v = X(_i * n_feat + cnt) - _nb_mean(_c * n_feat + cnt)
			_nb_var(_c * n_feat + cnt) = _nb_var(_c * n_feat + cnt) + _v * _v
		loop
	loop
	repeat n_classes
		_c = cnt
		if _counts(_c) = 0 : continue
		repeat n_feat
			_nb_var(_c * n_feat + cnt) = _nb_var(_c * n_feat + cnt) / _counts(_c) + _eps
		loop
	loop
	return 0

#deffunc nb_fit_multinomial array X, array y_int, int n, int n_feat, int n_classes, double alpha, \
	local _c, local _i, local _f, local _cls_total, local _sum
	_nb_kind = "multinomial"
	_nb_nfeat = n_feat
	_nb_nclass = n_classes
	ddim _nb_prior, n_classes
	ddim _nb_logp, n_classes * n_feat
	dim _counts, n_classes
	repeat n
		_counts(y_int(cnt)) = _counts(y_int(cnt)) + 1
	loop
	repeat n_classes
		_nb_prior(cnt) = log(1.0 * _counts(cnt) / n)
	loop
	; feature counts per class
	ddim _fc, n_classes * n_feat
	repeat n
		_c = y_int(cnt)
		_i = cnt
		repeat n_feat
			_fc(_c * n_feat + cnt) = _fc(_c * n_feat + cnt) + X(_i * n_feat + cnt)
		loop
	loop
	; smoothing + log prob
	repeat n_classes
		_c = cnt
		_sum = 0.0
		repeat n_feat
			_sum = _sum + _fc(_c * n_feat + cnt) + alpha
		loop
		if _sum < 1e-12 : _sum = 1.0
		repeat n_feat
			_nb_logp(_c * n_feat + cnt) = log((_fc(_c * n_feat + cnt) + alpha) / _sum)
		loop
	loop
	return 0

;---------------------------------------------------------
; predict / predict_proba
;---------------------------------------------------------
#deffunc _nb_logpost array X, int i, int n_feat, var v_logpost, \
	local _c, local _f, local _lp, local _v, local _dd
	ddim v_logpost, _nb_nclass
	repeat _nb_nclass
		_c = cnt
		_lp = _nb_prior(_c)
		if _nb_kind = "gaussian" {
			repeat n_feat
				_f = cnt
				_dd = X(i * n_feat + _f) - _nb_mean(_c * n_feat + _f)
				_v = _nb_var(_c * n_feat + _f)
				_lp = _lp - 0.5 * log(2.0 * 3.141592653589793 * _v) - (_dd * _dd) / (2.0 * _v)
			loop
		}
		if _nb_kind = "multinomial" {
			repeat n_feat
				_f = cnt
				_lp = _lp + X(i * n_feat + _f) * _nb_logp(_c * n_feat + _f)
			loop
		}
		v_logpost(_c) = _lp
	loop
	return

#deffunc nb_predict array X, int n, array v_out, \
	local _i, local _logp, local _c, local _best, local _bv
	dim v_out, n
	repeat n
		_i = cnt
		_nb_logpost X, _i, _nb_nfeat, _logp
		_best = 0 : _bv = _logp(0)
		repeat _nb_nclass
			if _logp(cnt) > _bv : _bv = _logp(cnt) : _best = cnt
		loop
		v_out(_i) = _best
	loop
	return 0

#deffunc nb_predict_proba array X, int n, int n_classes, array v_proba, \
	local _i, local _logp, local _c, local _maxlp, local _sum
	ddim v_proba, n * n_classes
	repeat n
		_i = cnt
		_nb_logpost X, _i, _nb_nfeat, _logp
		_maxlp = _logp(0)
		repeat _nb_nclass
			if _logp(cnt) > _maxlp : _maxlp = _logp(cnt)
		loop
		_sum = 0.0
		repeat _nb_nclass
			v_proba(_i * n_classes + cnt) = exp(_logp(cnt) - _maxlp)
			_sum = _sum + v_proba(_i * n_classes + cnt)
		loop
		if _sum > 1e-12 {
			repeat _nb_nclass
				v_proba(_i * n_classes + cnt) = v_proba(_i * n_classes + cnt) / _sum
			loop
		}
	loop
	return 0

#defcfunc nb_score array X, array y_int, int n, \
	local _i, local _logp, local _best, local _bv, local _ok
	_ok = 0
	repeat n
		_i = cnt
		_nb_logpost X, _i, _nb_nfeat, _logp
		_best = 0 : _bv = _logp(0)
		repeat _nb_nclass
			if _logp(cnt) > _bv : _bv = _logp(cnt) : _best = cnt
		loop
		if _best = y_int(_i) : _ok++
	loop
	return 1.0 * _ok / n

#deffunc nb_release
	_nb_nfeat = 0 : _nb_nclass = 0
	return 0

#global
#endif