go-trustlog/api/persistence/cursor_worker.go

package persistence

import (
	"context"
	"database/sql"
	"fmt"
	"time"

	"go.yandata.net/iod/iod/go-trustlog/api/logger"
	"go.yandata.net/iod/iod/go-trustlog/api/model"
)

// OperationRecord 操作记录（包含数据库扩展字段）
type OperationRecord struct {
	OpID             string
	OpActor          string
	DOID             string
	ProducerID       string
	RequestBodyHash  string
	ResponseBodyHash string
	OpHash           string
	Sign             string
	OpSource         string
	OpType           string
	DOPrefix         string
	DORepository     string
	ClientIP         *string
	ServerIP         *string
	TrustlogStatus   string
	CreatedAt        time.Time
}

// ToModel 转换为 model.Operation
func (r *OperationRecord) ToModel() *model.Operation {
	return &model.Operation{
		OpID:             r.OpID,
		OpActor:          r.OpActor,
		Doid:             r.DOID,
		ProducerID:       r.ProducerID,
		RequestBodyHash:  &r.RequestBodyHash,
		ResponseBodyHash: &r.ResponseBodyHash,
		OpSource:         model.Source(r.OpSource),
		OpType:           model.Type(r.OpType),
		DoPrefix:         r.DOPrefix,
		DoRepository:     r.DORepository,
		ClientIP:         r.ClientIP,
		ServerIP:         r.ServerIP,
	}
}

// CursorWorkerConfig Cursor工作器配置
type CursorWorkerConfig struct {
	// ScanInterval 扫描间隔（默认10秒，快速发现新记录）
	ScanInterval time.Duration
	// BatchSize 批量处理大小（默认100）
	BatchSize int
	// CursorKey Cursor键（默认 "operation_scan"）
	CursorKey string
	// MaxRetryAttempt Cursor阶段最大重试次数（默认1，快速失败转入Retry）
	MaxRetryAttempt int
	// Enabled 是否启用Cursor工作器（默认启用）
	Enabled bool
}

// DefaultCursorWorkerConfig 默认Cursor工作器配置
func DefaultCursorWorkerConfig() CursorWorkerConfig {
	return CursorWorkerConfig{
		ScanInterval:    10 * time.Second,
		BatchSize:       100,
		CursorKey:       "operation_scan",
		MaxRetryAttempt: 1,
		Enabled:         true,
	}
}

// CursorWorker Cursor工作器（任务发现）
// 职责：扫描operation表，发现新的待存证记录，尝试存证
// 成功则更新状态，失败则加入重试表
type CursorWorker struct {
	config  CursorWorkerConfig
	manager *PersistenceManager
	logger  logger.Logger
	stopCh  chan struct{}
}

// NewCursorWorker 创建Cursor工作器
func NewCursorWorker(config CursorWorkerConfig, manager *PersistenceManager) *CursorWorker {
	if config.ScanInterval == 0 {
		config.ScanInterval = 10 * time.Second
	}
	if config.BatchSize == 0 {
		config.BatchSize = 100
	}
	if config.CursorKey == "" {
		config.CursorKey = "operation_scan"
	}
	if config.MaxRetryAttempt == 0 {
		config.MaxRetryAttempt = 1
	}

	return &CursorWorker{
		config:  config,
		manager: manager,
		logger:  manager.logger,
		stopCh:  make(chan struct{}),
	}
}

// Start 启动Cursor工作器
func (w *CursorWorker) Start(ctx context.Context) error {
	if !w.config.Enabled {
		w.logger.InfoContext(ctx, "cursor worker disabled, skipping start")
		return nil
	}

	w.logger.InfoContext(ctx, "starting cursor worker",
		"scanInterval", w.config.ScanInterval,
		"batchSize", w.config.BatchSize,
		"cursorKey", w.config.CursorKey,
	)

	// 初始化cursor（如果不存在）
	if err := w.initCursor(ctx); err != nil {
		return fmt.Errorf("failed to init cursor: %w", err)
	}

	// 启动定期扫描
	go w.run(ctx)

	return nil
}

// Stop 停止Cursor工作器
func (w *CursorWorker) Stop(ctx context.Context) error {
	w.logger.InfoContext(ctx, "stopping cursor worker")
	close(w.stopCh)
	return nil
}

// run 运行循环
func (w *CursorWorker) run(ctx context.Context) {
	ticker := time.NewTicker(w.config.ScanInterval)
	defer ticker.Stop()

	for {
		select {
		case <-w.stopCh:
			w.logger.InfoContext(ctx, "cursor worker stopped")
			return
		case <-ticker.C:
			w.scan(ctx)
		}
	}
}

// scan 扫描并处理未存证记录
func (w *CursorWorker) scan(ctx context.Context) {
	w.logger.DebugContext(ctx, "cursor worker scanning",
		"cursorKey", w.config.CursorKey,
	)

	// 1. 读取cursor
	cursor, err := w.getCursor(ctx)
	if err != nil {
		w.logger.ErrorContext(ctx, "failed to get cursor",
			"error", err,
		)
		return
	}

	w.logger.DebugContext(ctx, "cursor position",
		"cursor", cursor,
	)

	// 2. 扫描新记录
	operations, err := w.findNewOperations(ctx, cursor)
	if err != nil {
		w.logger.ErrorContext(ctx, "failed to find new operations",
			"error", err,
		)
		return
	}

	if len(operations) == 0 {
		w.logger.DebugContext(ctx, "no new operations found")
		return
	}

	w.logger.InfoContext(ctx, "found new operations",
		"count", len(operations),
	)

	// 3. 处理每条记录
	for _, op := range operations {
		w.processOperation(ctx, op)
	}
}

// initCursor 初始化cursor
func (w *CursorWorker) initCursor(ctx context.Context) error {
	cursorRepo := w.manager.GetCursorRepo()

	// 创建初始cursor（使用当前时间）
	now := time.Now().Format(time.RFC3339Nano)
	err := cursorRepo.InitCursor(ctx, w.config.CursorKey, now)
	if err != nil {
		return fmt.Errorf("failed to init cursor: %w", err)
	}

	w.logger.InfoContext(ctx, "cursor initialized",
		"cursorKey", w.config.CursorKey,
		"initialValue", now,
	)

	return nil
}

// getCursor 获取cursor值
func (w *CursorWorker) getCursor(ctx context.Context) (string, error) {
	cursorRepo := w.manager.GetCursorRepo()

	cursor, err := cursorRepo.GetCursor(ctx, w.config.CursorKey)
	if err != nil {
		return "", fmt.Errorf("failed to get cursor: %w", err)
	}

	// 如果cursor为空，使用一个很早的时间
	if cursor == "" {
		cursor = time.Time{}.Format(time.RFC3339Nano)
	}

	return cursor, nil
}

// updateCursor 更新cursor值
func (w *CursorWorker) updateCursor(ctx context.Context, value string) error {
	cursorRepo := w.manager.GetCursorRepo()

	err := cursorRepo.UpdateCursor(ctx, w.config.CursorKey, value)
	if err != nil {
		return fmt.Errorf("failed to update cursor: %w", err)
	}

	w.logger.DebugContext(ctx, "cursor updated",
		"cursorKey", w.config.CursorKey,
		"newValue", value,
	)

	return nil
}

// findNewOperations 查找新的待存证记录
func (w *CursorWorker) findNewOperations(ctx context.Context, cursor string) ([]*OperationRecord, error) {
	db := w.manager.db

	// 查询未存证的记录（created_at > cursor）
	rows, err := db.QueryContext(ctx, `
		SELECT op_id, op_actor, doid, producer_id,
		       request_body_hash, response_body_hash, op_hash, sign,
		       op_source, op_type, do_prefix, do_repository,
		       client_ip, server_ip, trustlog_status, created_at
		FROM operation
		WHERE trustlog_status = $1
		  AND created_at > $2
		ORDER BY created_at ASC
		LIMIT $3
	`, StatusNotTrustlogged, cursor, w.config.BatchSize)
	if err != nil {
		return nil, fmt.Errorf("failed to query operations: %w", err)
	}
	defer rows.Close()

	var operations []*OperationRecord
	for rows.Next() {
		op := &OperationRecord{}
		var clientIP, serverIP sql.NullString
		var createdAt time.Time

		err := rows.Scan(
			&op.OpID, &op.OpActor, &op.DOID, &op.ProducerID,
			&op.RequestBodyHash, &op.ResponseBodyHash, &op.OpHash, &op.Sign,
			&op.OpSource, &op.OpType, &op.DOPrefix, &op.DORepository,
			&clientIP, &serverIP, &op.TrustlogStatus, &createdAt,
		)
		if err != nil {
			return nil, fmt.Errorf("failed to scan operation: %w", err)
		}

		// 处理可空字段
		if clientIP.Valid {
			op.ClientIP = &clientIP.String
		}
		if serverIP.Valid {
			op.ServerIP = &serverIP.String
		}
		op.CreatedAt = createdAt

		operations = append(operations, op)
	}

	return operations, nil
}

// processOperation 处理单条记录
func (w *CursorWorker) processOperation(ctx context.Context, op *OperationRecord) {
	w.logger.DebugContext(ctx, "processing operation",
		"opID", op.OpID,
	)

	// 尝试存证（最多重试 MaxRetryAttempt 次）
	var lastErr error
	for attempt := 0; attempt <= w.config.MaxRetryAttempt; attempt++ {
		if attempt > 0 {
			w.logger.DebugContext(ctx, "retrying trustlog",
				"opID", op.OpID,
				"attempt", attempt,
			)
		}

		err := w.tryTrustlog(ctx, op)
		if err == nil {
			// 成功：更新状态
			if err := w.updateOperationStatus(ctx, op.OpID, StatusTrustlogged); err != nil {
				w.logger.ErrorContext(ctx, "failed to update operation status",
					"opID", op.OpID,
					"error", err,
				)
			} else {
				w.logger.InfoContext(ctx, "operation trustlogged successfully",
					"opID", op.OpID,
				)
			}

			// 更新cursor
			w.updateCursor(ctx, op.CreatedAt.Format(time.RFC3339Nano))
			return
		}

		lastErr = err
		if attempt < w.config.MaxRetryAttempt {
			time.Sleep(time.Second) // 简单的重试延迟
		}
	}

	// 失败：加入重试表
	w.logger.WarnContext(ctx, "failed to trustlog in cursor worker, adding to retry queue",
		"opID", op.OpID,
		"error", lastErr,
	)

	retryRepo := w.manager.GetRetryRepo()
	nextRetryAt := time.Now().Add(1 * time.Minute) // 1分钟后重试
	if err := retryRepo.AddRetry(ctx, op.OpID, lastErr.Error(), nextRetryAt); err != nil {
		w.logger.ErrorContext(ctx, "failed to add to retry queue",
			"opID", op.OpID,
			"error", err,
		)
	}

	// 即使失败也更新cursor（避免卡在同一条记录）
	w.updateCursor(ctx, op.CreatedAt.Format(time.RFC3339Nano))
}

// tryTrustlog 尝试存证（调用存证系统）
func (w *CursorWorker) tryTrustlog(ctx context.Context, op *OperationRecord) error {
	publisher := w.manager.GetPublisher()
	if publisher == nil {
		return fmt.Errorf("publisher not available")
	}

	// 转换为 Operation 模型
	modelOp := op.ToModel()

	// 调用存证
	if err := publisher.Publish(ctx, modelOp); err != nil {
		return fmt.Errorf("failed to publish to trustlog: %w", err)
	}

	return nil
}

// updateOperationStatus 更新操作状态
func (w *CursorWorker) updateOperationStatus(ctx context.Context, opID string, status TrustlogStatus) error {
	opRepo := w.manager.GetOperationRepo()
	return opRepo.UpdateStatus(ctx, opID, status)
}