package persistence import ( "context" "database/sql" "fmt" "time" "go.yandata.net/iod/iod/go-trustlog/api/logger" "go.yandata.net/iod/iod/go-trustlog/api/model" ) // OperationRecord 操作记录(包含数据库扩展字段) type OperationRecord struct { OpID string OpActor string DOID string ProducerID string RequestBodyHash string ResponseBodyHash string OpHash string Sign string OpSource string OpType string DOPrefix string DORepository string ClientIP *string ServerIP *string TrustlogStatus string CreatedAt time.Time } // ToModel 转换为 model.Operation func (r *OperationRecord) ToModel() *model.Operation { return &model.Operation{ OpID: r.OpID, OpActor: r.OpActor, Doid: r.DOID, ProducerID: r.ProducerID, RequestBodyHash: &r.RequestBodyHash, ResponseBodyHash: &r.ResponseBodyHash, OpSource: model.Source(r.OpSource), OpType: model.Type(r.OpType), DoPrefix: r.DOPrefix, DoRepository: r.DORepository, ClientIP: r.ClientIP, ServerIP: r.ServerIP, } } // CursorWorkerConfig Cursor工作器配置 type CursorWorkerConfig struct { // ScanInterval 扫描间隔(默认10秒,快速发现新记录) ScanInterval time.Duration // BatchSize 批量处理大小(默认100) BatchSize int // CursorKey Cursor键(默认 "operation_scan") CursorKey string // MaxRetryAttempt Cursor阶段最大重试次数(默认1,快速失败转入Retry) MaxRetryAttempt int // Enabled 是否启用Cursor工作器(默认启用) Enabled bool } // DefaultCursorWorkerConfig 默认Cursor工作器配置 func DefaultCursorWorkerConfig() CursorWorkerConfig { return CursorWorkerConfig{ ScanInterval: 10 * time.Second, BatchSize: 100, CursorKey: "operation_scan", MaxRetryAttempt: 1, Enabled: true, } } // CursorWorker Cursor工作器(任务发现) // 职责:扫描operation表,发现新的待存证记录,尝试存证 // 成功则更新状态,失败则加入重试表 type CursorWorker struct { config CursorWorkerConfig manager *PersistenceManager logger logger.Logger stopCh chan struct{} } // NewCursorWorker 创建Cursor工作器 func NewCursorWorker(config CursorWorkerConfig, manager *PersistenceManager) *CursorWorker { if config.ScanInterval == 0 { config.ScanInterval = 10 * time.Second } if config.BatchSize == 0 { config.BatchSize = 100 } if config.CursorKey == "" { config.CursorKey = "operation_scan" } if config.MaxRetryAttempt == 0 { config.MaxRetryAttempt = 1 } return &CursorWorker{ config: config, manager: manager, logger: manager.logger, stopCh: make(chan struct{}), } } // Start 启动Cursor工作器 func (w *CursorWorker) Start(ctx context.Context) error { if !w.config.Enabled { w.logger.InfoContext(ctx, "cursor worker disabled, skipping start") return nil } w.logger.InfoContext(ctx, "starting cursor worker", "scanInterval", w.config.ScanInterval, "batchSize", w.config.BatchSize, "cursorKey", w.config.CursorKey, ) // 初始化cursor(如果不存在) if err := w.initCursor(ctx); err != nil { return fmt.Errorf("failed to init cursor: %w", err) } // 启动定期扫描 go w.run(ctx) return nil } // Stop 停止Cursor工作器 func (w *CursorWorker) Stop(ctx context.Context) error { w.logger.InfoContext(ctx, "stopping cursor worker") close(w.stopCh) return nil } // run 运行循环 func (w *CursorWorker) run(ctx context.Context) { ticker := time.NewTicker(w.config.ScanInterval) defer ticker.Stop() for { select { case <-w.stopCh: w.logger.InfoContext(ctx, "cursor worker stopped") return case <-ticker.C: w.scan(ctx) } } } // scan 扫描并处理未存证记录 func (w *CursorWorker) scan(ctx context.Context) { w.logger.DebugContext(ctx, "cursor worker scanning", "cursorKey", w.config.CursorKey, ) // 1. 读取cursor cursor, err := w.getCursor(ctx) if err != nil { w.logger.ErrorContext(ctx, "failed to get cursor", "error", err, ) return } w.logger.DebugContext(ctx, "cursor position", "cursor", cursor, ) // 2. 扫描新记录 operations, err := w.findNewOperations(ctx, cursor) if err != nil { w.logger.ErrorContext(ctx, "failed to find new operations", "error", err, ) return } if len(operations) == 0 { w.logger.DebugContext(ctx, "no new operations found") return } w.logger.InfoContext(ctx, "found new operations", "count", len(operations), ) // 3. 处理每条记录 for _, op := range operations { w.processOperation(ctx, op) } } // initCursor 初始化cursor func (w *CursorWorker) initCursor(ctx context.Context) error { cursorRepo := w.manager.GetCursorRepo() // 创建初始cursor(使用当前时间) now := time.Now().Format(time.RFC3339Nano) err := cursorRepo.InitCursor(ctx, w.config.CursorKey, now) if err != nil { return fmt.Errorf("failed to init cursor: %w", err) } w.logger.InfoContext(ctx, "cursor initialized", "cursorKey", w.config.CursorKey, "initialValue", now, ) return nil } // getCursor 获取cursor值 func (w *CursorWorker) getCursor(ctx context.Context) (string, error) { cursorRepo := w.manager.GetCursorRepo() cursor, err := cursorRepo.GetCursor(ctx, w.config.CursorKey) if err != nil { return "", fmt.Errorf("failed to get cursor: %w", err) } // 如果cursor为空,使用一个很早的时间 if cursor == "" { cursor = time.Time{}.Format(time.RFC3339Nano) } return cursor, nil } // updateCursor 更新cursor值 func (w *CursorWorker) updateCursor(ctx context.Context, value string) error { cursorRepo := w.manager.GetCursorRepo() err := cursorRepo.UpdateCursor(ctx, w.config.CursorKey, value) if err != nil { return fmt.Errorf("failed to update cursor: %w", err) } w.logger.DebugContext(ctx, "cursor updated", "cursorKey", w.config.CursorKey, "newValue", value, ) return nil } // findNewOperations 查找新的待存证记录 func (w *CursorWorker) findNewOperations(ctx context.Context, cursor string) ([]*OperationRecord, error) { db := w.manager.db // 查询未存证的记录(created_at > cursor) rows, err := db.QueryContext(ctx, ` SELECT op_id, op_actor, doid, producer_id, request_body_hash, response_body_hash, op_hash, sign, op_source, op_type, do_prefix, do_repository, client_ip, server_ip, trustlog_status, created_at FROM operation WHERE trustlog_status = $1 AND created_at > $2 ORDER BY created_at ASC LIMIT $3 `, StatusNotTrustlogged, cursor, w.config.BatchSize) if err != nil { return nil, fmt.Errorf("failed to query operations: %w", err) } defer rows.Close() var operations []*OperationRecord for rows.Next() { op := &OperationRecord{} var clientIP, serverIP sql.NullString var createdAt time.Time err := rows.Scan( &op.OpID, &op.OpActor, &op.DOID, &op.ProducerID, &op.RequestBodyHash, &op.ResponseBodyHash, &op.OpHash, &op.Sign, &op.OpSource, &op.OpType, &op.DOPrefix, &op.DORepository, &clientIP, &serverIP, &op.TrustlogStatus, &createdAt, ) if err != nil { return nil, fmt.Errorf("failed to scan operation: %w", err) } // 处理可空字段 if clientIP.Valid { op.ClientIP = &clientIP.String } if serverIP.Valid { op.ServerIP = &serverIP.String } op.CreatedAt = createdAt operations = append(operations, op) } return operations, nil } // processOperation 处理单条记录 func (w *CursorWorker) processOperation(ctx context.Context, op *OperationRecord) { w.logger.DebugContext(ctx, "processing operation", "opID", op.OpID, ) // 尝试存证(最多重试 MaxRetryAttempt 次) var lastErr error for attempt := 0; attempt <= w.config.MaxRetryAttempt; attempt++ { if attempt > 0 { w.logger.DebugContext(ctx, "retrying trustlog", "opID", op.OpID, "attempt", attempt, ) } err := w.tryTrustlog(ctx, op) if err == nil { // 成功:更新状态 if err := w.updateOperationStatus(ctx, op.OpID, StatusTrustlogged); err != nil { w.logger.ErrorContext(ctx, "failed to update operation status", "opID", op.OpID, "error", err, ) } else { w.logger.InfoContext(ctx, "operation trustlogged successfully", "opID", op.OpID, ) } // 更新cursor w.updateCursor(ctx, op.CreatedAt.Format(time.RFC3339Nano)) return } lastErr = err if attempt < w.config.MaxRetryAttempt { time.Sleep(time.Second) // 简单的重试延迟 } } // 失败:加入重试表 w.logger.WarnContext(ctx, "failed to trustlog in cursor worker, adding to retry queue", "opID", op.OpID, "error", lastErr, ) retryRepo := w.manager.GetRetryRepo() nextRetryAt := time.Now().Add(1 * time.Minute) // 1分钟后重试 if err := retryRepo.AddRetry(ctx, op.OpID, lastErr.Error(), nextRetryAt); err != nil { w.logger.ErrorContext(ctx, "failed to add to retry queue", "opID", op.OpID, "error", err, ) } // 即使失败也更新cursor(避免卡在同一条记录) w.updateCursor(ctx, op.CreatedAt.Format(time.RFC3339Nano)) } // tryTrustlog 尝试存证(调用存证系统) func (w *CursorWorker) tryTrustlog(ctx context.Context, op *OperationRecord) error { publisher := w.manager.GetPublisher() if publisher == nil { return fmt.Errorf("publisher not available") } // 转换为 Operation 模型 modelOp := op.ToModel() // 调用存证 if err := publisher.Publish(ctx, modelOp); err != nil { return fmt.Errorf("failed to publish to trustlog: %w", err) } return nil } // updateOperationStatus 更新操作状态 func (w *CursorWorker) updateOperationStatus(ctx context.Context, opID string, status TrustlogStatus) error { opRepo := w.manager.GetOperationRepo() return opRepo.UpdateStatus(ctx, opID, status) }