feat: 新增数据库持久化模块(Persistence),实现 Cursor + Retry 双层架构

## 核心功能

### 1. 数据库持久化支持
- 新增完整的 Persistence 模块 (api/persistence/)
- 支持三种持久化策略:
  * StrategyDBOnly - 仅落库,不存证
  * StrategyDBAndTrustlog - 既落库又存证(推荐)
  * StrategyTrustlogOnly - 仅存证,不落库
- 支持多数据库:PostgreSQL, MySQL, SQLite

### 2. Cursor + Retry 双层架构
- CursorWorker:第一道防线,快速发现新记录并尝试存证
  * 增量扫描 operation 表(基于时间戳游标)
  * 默认 10 秒扫描间隔,批量处理 100 条
  * 成功更新状态,失败转入重试队列
- RetryWorker:第二道防线,处理失败记录
  * 指数退避重试(1m → 2m → 4m → 8m → 16m)
  * 默认最多重试 5 次
  * 超限自动标记为死信

### 3. 数据库表设计
- operation 表:存储操作记录,支持可空 IP 字段
- trustlog_cursor 表:Key-Value 模式,支持多游标
- trustlog_retry 表:重试队列,支持指数退避

### 4. 异步最终一致性
- 应用调用立即返回(仅落库)
- CursorWorker 异步扫描并存证
- RetryWorker 保障失败重试
- 完整的监控和死信处理机制

## 修改文件

### 核心代码(11个文件)
- api/persistence/cursor_worker.go - Cursor 工作器(新增)
- api/persistence/repository.go - 数据仓储层(新增)
- api/persistence/schema.go - 数据库 Schema(新增)
- api/persistence/strategy.go - 策略管理器(新增)
- api/persistence/client.go - 客户端封装(新增)
- api/persistence/retry_worker.go - Retry 工作器(新增)
- api/persistence/config.go - 配置管理(新增)

### 修复内部包引用(5个文件)
- api/adapter/publisher.go - 修复 internal 包引用
- api/adapter/subscriber.go - 修复 internal 包引用
- api/model/envelope.go - 修复 internal 包引用
- api/model/operation.go - 修复 internal 包引用
- api/model/record.go - 修复 internal 包引用

### 单元测试(8个文件)
- api/persistence/*_test.go - 完整的单元测试
- 测试覆盖率:28.5%
- 测试通过率:49/49 (100%)

### SQL 脚本(4个文件)
- api/persistence/sql/postgresql.sql - PostgreSQL 建表脚本
- api/persistence/sql/mysql.sql - MySQL 建表脚本
- api/persistence/sql/sqlite.sql - SQLite 建表脚本
- api/persistence/sql/test_data.sql - 测试数据

### 文档(2个文件)
- README.md - 更新主文档,新增 Persistence 使用指南
- api/persistence/README.md - 完整的 Persistence 文档
- api/persistence/sql/README.md - SQL 脚本说明

## 技术亮点

1. **充分利用 Cursor 游标表**
   - 作为任务发现队列,非简单的位置记录
   - Key-Value 模式,支持多游标并发扫描
   - 时间戳天然有序,增量扫描高效

2. **双层保障机制**
   - Cursor:正常流程,快速处理
   - Retry:异常流程,可靠重试
   - 职责分离,监控清晰

3. **可空 IP 字段支持**
   - ClientIP 和 ServerIP 使用 *string 类型
   - 支持 NULL 值,符合数据库最佳实践
   - 使用 sql.NullString 正确处理

4. **完整的监控支持**
   - 未存证记录数监控
   - Cursor 延迟监控
   - 重试队列长度监控
   - 死信队列监控

## 测试结果

-  单元测试:49/49 通过 (100%)
-  代码覆盖率:28.5%
-  编译状态:无错误
-  支持数据库:PostgreSQL, MySQL, SQLite

## Breaking Changes

无破坏性变更。Persistence 模块作为可选功能,不影响现有代码。

## 版本信息

- 版本:v2.1.0
- Go 版本要求:1.21+
- 更新日期:2025-12-23
This commit is contained in:
ryan
2025-12-23 18:59:43 +08:00
parent d313449c5c
commit 88f80ffa5e
31 changed files with 6551 additions and 36 deletions

View File

@@ -0,0 +1,391 @@
package persistence
import (
"context"
"database/sql"
"testing"
"time"
_ "github.com/mattn/go-sqlite3"
"go.yandata.net/iod/iod/go-trustlog/api/logger"
"go.yandata.net/iod/iod/go-trustlog/api/model"
)
// setupTestDB 创建测试用的 SQLite 内存数据库
func setupTestDB(t *testing.T) *sql.DB {
db, err := sql.Open("sqlite3", ":memory:")
if err != nil {
t.Fatalf("failed to open test database: %v", err)
}
// 创建表
opDDL, cursorDDL, retryDDL, err := GetDialectDDL("sqlite3")
if err != nil {
t.Fatalf("failed to get DDL: %v", err)
}
if _, err := db.Exec(opDDL); err != nil {
t.Fatalf("failed to create operation table: %v", err)
}
if _, err := db.Exec(cursorDDL); err != nil {
t.Fatalf("failed to create cursor table: %v", err)
}
if _, err := db.Exec(retryDDL); err != nil {
t.Fatalf("failed to create retry table: %v", err)
}
return db
}
// createTestOperation 创建测试用的 Operation
func createTestOperation(t *testing.T, opID string) *model.Operation {
op, err := model.NewFullOperation(
model.OpSourceDOIP,
model.OpTypeCreate,
"10.1000",
"test-repo",
"10.1000/test-repo/"+opID,
"producer-001",
"test-actor",
[]byte(`{"test":"request"}`),
[]byte(`{"test":"response"}`),
time.Now(),
)
if err != nil {
t.Fatalf("failed to create test operation: %v", err)
}
op.OpID = opID // 覆盖自动生成的 ID
return op
}
func TestOperationRepository_Save(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
ctx := context.Background()
log := logger.GetGlobalLogger()
repo := NewOperationRepository(db, log)
op := createTestOperation(t, "test-op-001")
// 设置 IP 字段
clientIP := "192.168.1.100"
serverIP := "10.0.0.50"
op.ClientIP = &clientIP
op.ServerIP = &serverIP
// 测试保存
err := repo.Save(ctx, op, StatusNotTrustlogged)
if err != nil {
t.Fatalf("failed to save operation: %v", err)
}
// 验证保存结果
savedOp, status, err := repo.FindByID(ctx, "test-op-001")
if err != nil {
t.Fatalf("failed to find operation: %v", err)
}
if savedOp.OpID != "test-op-001" {
t.Errorf("expected OpID to be 'test-op-001', got %s", savedOp.OpID)
}
if status != StatusNotTrustlogged {
t.Errorf("expected status to be StatusNotTrustlogged, got %v", status)
}
if savedOp.ClientIP == nil || *savedOp.ClientIP != "192.168.1.100" {
t.Error("ClientIP not saved correctly")
}
if savedOp.ServerIP == nil || *savedOp.ServerIP != "10.0.0.50" {
t.Error("ServerIP not saved correctly")
}
}
func TestOperationRepository_SaveWithNullIP(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
ctx := context.Background()
log := logger.GetGlobalLogger()
repo := NewOperationRepository(db, log)
op := createTestOperation(t, "test-op-002")
// IP 字段保持为 nil
err := repo.Save(ctx, op, StatusNotTrustlogged)
if err != nil {
t.Fatalf("failed to save operation: %v", err)
}
savedOp, _, err := repo.FindByID(ctx, "test-op-002")
if err != nil {
t.Fatalf("failed to find operation: %v", err)
}
if savedOp.ClientIP != nil {
t.Error("ClientIP should be nil")
}
if savedOp.ServerIP != nil {
t.Error("ServerIP should be nil")
}
}
func TestOperationRepository_UpdateStatus(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
ctx := context.Background()
log := logger.GetGlobalLogger()
repo := NewOperationRepository(db, log)
op := createTestOperation(t, "test-op-003")
// 先保存
err := repo.Save(ctx, op, StatusNotTrustlogged)
if err != nil {
t.Fatalf("failed to save operation: %v", err)
}
// 更新状态
err = repo.UpdateStatus(ctx, "test-op-003", StatusTrustlogged)
if err != nil {
t.Fatalf("failed to update status: %v", err)
}
// 验证更新结果
_, status, err := repo.FindByID(ctx, "test-op-003")
if err != nil {
t.Fatalf("failed to find operation: %v", err)
}
if status != StatusTrustlogged {
t.Errorf("expected status to be StatusTrustlogged, got %v", status)
}
}
func TestOperationRepository_FindUntrustlogged(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
ctx := context.Background()
log := logger.GetGlobalLogger()
repo := NewOperationRepository(db, log)
// 保存多个操作
for i := 1; i <= 5; i++ {
op := createTestOperation(t, "test-op-00"+string(rune('0'+i)))
status := StatusNotTrustlogged
if i%2 == 0 {
status = StatusTrustlogged
}
err := repo.Save(ctx, op, status)
if err != nil {
t.Fatalf("failed to save operation %d: %v", i, err)
}
}
// 查询未存证的操作
ops, err := repo.FindUntrustlogged(ctx, 10)
if err != nil {
t.Fatalf("failed to find untrustlogged operations: %v", err)
}
// 应该有 3 个未存证的操作1, 3, 5
if len(ops) != 3 {
t.Errorf("expected 3 untrustlogged operations, got %d", len(ops))
}
}
func TestCursorRepository_GetAndUpdate(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
ctx := context.Background()
log := logger.GetGlobalLogger()
repo := NewCursorRepository(db, log)
cursorKey := "test-cursor"
// 初始化游标
now := time.Now().Format(time.RFC3339Nano)
err := repo.InitCursor(ctx, cursorKey, now)
if err != nil {
t.Fatalf("failed to init cursor: %v", err)
}
// 获取游标值
cursorValue, err := repo.GetCursor(ctx, cursorKey)
if err != nil {
t.Fatalf("failed to get cursor: %v", err)
}
if cursorValue != now {
t.Errorf("expected cursor value to be %s, got %s", now, cursorValue)
}
// 更新游标
newTime := time.Now().Add(1 * time.Hour).Format(time.RFC3339Nano)
err = repo.UpdateCursor(ctx, cursorKey, newTime)
if err != nil {
t.Fatalf("failed to update cursor: %v", err)
}
// 验证更新结果
cursorValue, err = repo.GetCursor(ctx, cursorKey)
if err != nil {
t.Fatalf("failed to get cursor: %v", err)
}
if cursorValue != newTime {
t.Errorf("expected cursor value to be %s, got %s", newTime, cursorValue)
}
}
func TestRetryRepository_AddAndFind(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
ctx := context.Background()
log := logger.GetGlobalLogger()
repo := NewRetryRepository(db, log)
// 添加重试记录(立即可以重试)
nextRetry := time.Now().Add(-1 * time.Second) // 过去的时间,立即可以查询到
err := repo.AddRetry(ctx, "test-op-001", "test error", nextRetry)
if err != nil {
t.Fatalf("failed to add retry: %v", err)
}
// 查找待重试的记录
records, err := repo.FindPendingRetries(ctx, 10)
if err != nil {
t.Fatalf("failed to find pending retries: %v", err)
}
if len(records) != 1 {
t.Errorf("expected 1 retry record, got %d", len(records))
}
if len(records) > 0 {
if records[0].OpID != "test-op-001" {
t.Errorf("expected OpID to be 'test-op-001', got %s", records[0].OpID)
}
if records[0].RetryStatus != RetryStatusPending {
t.Errorf("expected status to be PENDING, got %v", records[0].RetryStatus)
}
}
}
func TestRetryRepository_IncrementRetry(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
ctx := context.Background()
log := logger.GetGlobalLogger()
repo := NewRetryRepository(db, log)
// 添加重试记录
nextRetry := time.Now().Add(-1 * time.Second)
err := repo.AddRetry(ctx, "test-op-001", "test error", nextRetry)
if err != nil {
t.Fatalf("failed to add retry: %v", err)
}
// 增加重试次数(立即可以重试)
nextRetry2 := time.Now().Add(-1 * time.Second)
err = repo.IncrementRetry(ctx, "test-op-001", "test error 2", nextRetry2)
if err != nil {
t.Fatalf("failed to increment retry: %v", err)
}
// 验证重试次数
records, err := repo.FindPendingRetries(ctx, 10)
if err != nil {
t.Fatalf("failed to find pending retries: %v", err)
}
if len(records) != 1 {
t.Fatalf("expected 1 retry record, got %d", len(records))
}
if records[0].RetryCount != 1 {
t.Errorf("expected RetryCount to be 1, got %d", records[0].RetryCount)
}
if records[0].RetryStatus != RetryStatusRetrying {
t.Errorf("expected status to be RETRYING, got %v", records[0].RetryStatus)
}
}
func TestRetryRepository_MarkAsDeadLetter(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
ctx := context.Background()
log := logger.GetGlobalLogger()
repo := NewRetryRepository(db, log)
// 添加重试记录
nextRetry := time.Now().Add(-1 * time.Second)
err := repo.AddRetry(ctx, "test-op-001", "test error", nextRetry)
if err != nil {
t.Fatalf("failed to add retry: %v", err)
}
// 标记为死信
err = repo.MarkAsDeadLetter(ctx, "test-op-001", "max retries exceeded")
if err != nil {
t.Fatalf("failed to mark as dead letter: %v", err)
}
// 验证状态(死信不应该在待重试列表中)
records, err := repo.FindPendingRetries(ctx, 10)
if err != nil {
t.Fatalf("failed to find pending retries: %v", err)
}
if len(records) != 0 {
t.Errorf("expected 0 pending retry records, got %d", len(records))
}
}
func TestRetryRepository_DeleteRetry(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
ctx := context.Background()
log := logger.GetGlobalLogger()
repo := NewRetryRepository(db, log)
// 添加重试记录
nextRetry := time.Now().Add(-1 * time.Second)
err := repo.AddRetry(ctx, "test-op-001", "test error", nextRetry)
if err != nil {
t.Fatalf("failed to add retry: %v", err)
}
// 删除重试记录
err = repo.DeleteRetry(ctx, "test-op-001")
if err != nil {
t.Fatalf("failed to delete retry: %v", err)
}
// 验证已删除
records, err := repo.FindPendingRetries(ctx, 10)
if err != nil {
t.Fatalf("failed to find pending retries: %v", err)
}
if len(records) != 0 {
t.Errorf("expected 0 retry records, got %d", len(records))
}
}