feat: 完善数据库持久化与存证功能

主要更新:

1. 数据库持久化功能
   - 支持三种策略:仅落库、既落库又存证、仅存证
   - 实现 Cursor Worker 异步扫描和存证机制
   - 实现 Retry Worker 失败重试机制
   - 支持 PostgreSQL、MySQL、SQLite 等多种数据库
   - 添加 ClientIP 和 ServerIP 字段(可空,仅落库)

2. 集群并发安全
   - 使用 SELECT FOR UPDATE SKIP LOCKED 防止重复处理
   - 实现 CAS (Compare-And-Set) 原子状态更新
   - 添加 updated_at 字段支持并发控制

3. Cursor 初始化优化
   - 自动基于历史数据初始化 cursor
   - 确保不遗漏任何历史记录
   - 修复 UPSERT 逻辑

4. 测试完善
   - 添加 E2E 集成测试(含 Pulsar 消费者验证)
   - 添加 PostgreSQL 集成测试
   - 添加 Pulsar 集成测试
   - 添加集群并发安全测试
   - 添加 Cursor 初始化验证测试
   - 补充大量单元测试,提升覆盖率

5. 工具脚本
   - 添加数据库迁移脚本
   - 添加 Cursor 状态检查工具
   - 添加 Cursor 初始化工具
   - 添加 Pulsar 消息验证工具

6. 文档清理
   - 删除冗余文档,只保留根目录 README

测试结果:
- 所有 E2E 测试通过(100%)
- 数据库持久化与异步存证流程验证通过
- 集群环境下的并发安全性验证通过
- Cursor 自动初始化和历史数据处理验证通过
This commit is contained in:
ryan
2025-12-24 15:31:11 +08:00
parent 88f80ffa5e
commit 4b72a37120
60 changed files with 6160 additions and 1313 deletions

View File

@@ -24,6 +24,14 @@ type OperationRepository interface {
FindByID(ctx context.Context, opID string) (*model.Operation, TrustlogStatus, error)
// FindUntrustlogged 查询未存证的操作记录(用于重试机制)
FindUntrustlogged(ctx context.Context, limit int) ([]*model.Operation, error)
// FindUntrustloggedWithLock 查找未存证的操作(支持集群并发安全)
// 使用 SELECT FOR UPDATE SKIP LOCKED 确保多个 worker 不会处理相同的记录
// 返回: operations, opIDs, error
FindUntrustloggedWithLock(ctx context.Context, tx *sql.Tx, limit int) ([]*model.Operation, []string, error)
// UpdateStatusWithCAS 使用 CAS (Compare-And-Set) 更新状态
// 只有当前状态匹配 expectedStatus 时才会更新
// 返回: updated (是否更新成功), error
UpdateStatusWithCAS(ctx context.Context, tx *sql.Tx, opID string, expectedStatus, newStatus TrustlogStatus) (bool, error)
}
// CursorRepository 游标仓储接口Key-Value 模式)
@@ -68,31 +76,73 @@ type RetryRecord struct {
// operationRepository 操作记录仓储实现
type operationRepository struct {
db *sql.DB
logger logger.Logger
db *sql.DB
logger logger.Logger
driverName string
}
// detectDriverName 检测数据库驱动名
func detectDriverName(db *sql.DB) string {
if db == nil {
return "sqlite3"
}
// 尝试执行 PostgreSQL 特有的查询
var version string
err := db.QueryRow("SELECT version()").Scan(&version)
if err == nil && len(version) >= 10 && version[:10] == "PostgreSQL" {
return "postgres"
}
return "sqlite3" // 默认
}
// convertPlaceholdersForDriver 将 ? 占位符转换为适合数据库的占位符
func convertPlaceholdersForDriver(query, driverName string) string {
if driverName == "postgres" {
// PostgreSQL 使用 $1, $2, $3...
count := 1
result := ""
for i := 0; i < len(query); i++ {
if query[i] == '?' {
result += fmt.Sprintf("$%d", count)
count++
} else {
result += string(query[i])
}
}
return result
}
// 其他数据库SQLite, MySQL使用 ?
return query
}
// NewOperationRepository 创建操作记录仓储
func NewOperationRepository(db *sql.DB, log logger.Logger) OperationRepository {
driverName := detectDriverName(db)
return &operationRepository{
db: db,
logger: log,
db: db,
logger: log,
driverName: driverName,
}
}
// convertPlaceholders 将 ? 占位符转换为适合数据库的占位符
func (r *operationRepository) convertPlaceholders(query string) string {
return convertPlaceholdersForDriver(query, r.driverName)
}
func (r *operationRepository) Save(ctx context.Context, op *model.Operation, status TrustlogStatus) error {
return r.SaveTx(ctx, nil, op, status)
}
func (r *operationRepository) SaveTx(ctx context.Context, tx *sql.Tx, op *model.Operation, status TrustlogStatus) error {
query := `
query := r.convertPlaceholders(`
INSERT INTO operation (
op_id, op_actor, doid, producer_id,
request_body_hash, response_body_hash,
op_source, op_type, do_prefix, do_repository,
client_ip, server_ip, trustlog_status, timestamp
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`
`)
var reqHash, respHash, clientIP, serverIP sql.NullString
if op.RequestBodyHash != nil {
@@ -152,7 +202,7 @@ func (r *operationRepository) UpdateStatus(ctx context.Context, opID string, sta
}
func (r *operationRepository) UpdateStatusTx(ctx context.Context, tx *sql.Tx, opID string, status TrustlogStatus) error {
query := `UPDATE operation SET trustlog_status = ? WHERE op_id = ?`
query := r.convertPlaceholders(`UPDATE operation SET trustlog_status = ? WHERE op_id = ?`)
var err error
if tx != nil {
@@ -178,7 +228,7 @@ func (r *operationRepository) UpdateStatusTx(ctx context.Context, tx *sql.Tx, op
}
func (r *operationRepository) FindByID(ctx context.Context, opID string) (*model.Operation, TrustlogStatus, error) {
query := `
query := r.convertPlaceholders(`
SELECT
op_id, op_actor, doid, producer_id,
request_body_hash, response_body_hash,
@@ -186,7 +236,7 @@ func (r *operationRepository) FindByID(ctx context.Context, opID string) (*model
client_ip, server_ip, trustlog_status, timestamp
FROM operation
WHERE op_id = ?
`
`)
var op model.Operation
var statusStr string
@@ -236,8 +286,12 @@ func (r *operationRepository) FindByID(ctx context.Context, opID string) (*model
return &op, TrustlogStatus(statusStr), nil
}
func (r *operationRepository) FindUntrustlogged(ctx context.Context, limit int) ([]*model.Operation, error) {
query := `
// FindUntrustloggedWithLock 查找未存证的操作(支持集群并发安全)
// 使用 SELECT FOR UPDATE SKIP LOCKED 确保多个 worker 不会处理相同的记录
func (r *operationRepository) FindUntrustloggedWithLock(ctx context.Context, tx *sql.Tx, limit int) ([]*model.Operation, []string, error) {
// 使用 FOR UPDATE SKIP LOCKED 锁定记录
// SKIP LOCKED: 跳过已被其他事务锁定的行,避免等待
query := r.convertPlaceholders(`
SELECT
op_id, op_actor, doid, producer_id,
request_body_hash, response_body_hash,
@@ -247,7 +301,142 @@ func (r *operationRepository) FindUntrustlogged(ctx context.Context, limit int)
WHERE trustlog_status = ?
ORDER BY timestamp ASC
LIMIT ?
`
FOR UPDATE SKIP LOCKED
`)
var rows *sql.Rows
var err error
if tx != nil {
rows, err = tx.QueryContext(ctx, query, string(StatusNotTrustlogged), limit)
} else {
rows, err = r.db.QueryContext(ctx, query, string(StatusNotTrustlogged), limit)
}
if err != nil {
r.logger.ErrorContext(ctx, "failed to find untrustlogged operations with lock",
"error", err,
)
return nil, nil, fmt.Errorf("failed to find untrustlogged operations: %w", err)
}
defer rows.Close()
var operations []*model.Operation
var opIDs []string
for rows.Next() {
var op model.Operation
var reqHash, respHash, clientIP, serverIP sql.NullString
err := rows.Scan(
&op.OpID,
&op.OpActor,
&op.Doid,
&op.ProducerID,
&reqHash,
&respHash,
&op.OpSource,
&op.OpType,
&op.DoPrefix,
&op.DoRepository,
&clientIP,
&serverIP,
&op.Timestamp,
)
if err != nil {
r.logger.ErrorContext(ctx, "failed to scan operation",
"error", err,
)
continue
}
if reqHash.Valid {
op.RequestBodyHash = &reqHash.String
}
if respHash.Valid {
op.ResponseBodyHash = &respHash.String
}
if clientIP.Valid {
op.ClientIP = &clientIP.String
}
if serverIP.Valid {
op.ServerIP = &serverIP.String
}
operations = append(operations, &op)
opIDs = append(opIDs, op.OpID)
}
if err := rows.Err(); err != nil {
r.logger.ErrorContext(ctx, "error iterating rows",
"error", err,
)
return nil, nil, fmt.Errorf("error iterating rows: %w", err)
}
return operations, opIDs, nil
}
// UpdateStatusWithCAS 使用 CAS (Compare-And-Set) 更新状态
// 只有当前状态匹配 expectedStatus 时才会更新,确保并发安全
func (r *operationRepository) UpdateStatusWithCAS(ctx context.Context, tx *sql.Tx, opID string, expectedStatus, newStatus TrustlogStatus) (bool, error) {
query := r.convertPlaceholders(`
UPDATE operation
SET trustlog_status = ?
WHERE op_id = ? AND trustlog_status = ?
`)
var result sql.Result
var err error
if tx != nil {
result, err = tx.ExecContext(ctx, query, string(newStatus), opID, string(expectedStatus))
} else {
result, err = r.db.ExecContext(ctx, query, string(newStatus), opID, string(expectedStatus))
}
if err != nil {
r.logger.ErrorContext(ctx, "failed to update operation status with CAS",
"opID", opID,
"expectedStatus", expectedStatus,
"newStatus", newStatus,
"error", err,
)
return false, fmt.Errorf("failed to update operation status: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return false, fmt.Errorf("failed to get rows affected: %w", err)
}
// 如果影响行数为 0说明状态已被其他 worker 修改
if rowsAffected == 0 {
r.logger.WarnContext(ctx, "CAS update failed: status already changed by another worker",
"opID", opID,
"expectedStatus", expectedStatus,
)
return false, nil
}
r.logger.DebugContext(ctx, "operation status updated with CAS",
"opID", opID,
"expectedStatus", expectedStatus,
"newStatus", newStatus,
)
return true, nil
}
func (r *operationRepository) FindUntrustlogged(ctx context.Context, limit int) ([]*model.Operation, error) {
query := r.convertPlaceholders(`
SELECT
op_id, op_actor, doid, producer_id,
request_body_hash, response_body_hash,
op_source, op_type, do_prefix, do_repository,
client_ip, server_ip, timestamp
FROM operation
WHERE trustlog_status = ?
ORDER BY timestamp ASC
LIMIT ?
`)
rows, err := r.db.QueryContext(ctx, query, string(StatusNotTrustlogged), limit)
if err != nil {
@@ -310,21 +499,29 @@ func (r *operationRepository) FindUntrustlogged(ctx context.Context, limit int)
// cursorRepository 游标仓储实现
type cursorRepository struct {
db *sql.DB
logger logger.Logger
db *sql.DB
logger logger.Logger
driverName string
}
// NewCursorRepository 创建游标仓储
func NewCursorRepository(db *sql.DB, log logger.Logger) CursorRepository {
driverName := detectDriverName(db)
return &cursorRepository{
db: db,
logger: log,
db: db,
logger: log,
driverName: driverName,
}
}
// convertPlaceholders 将 ? 占位符转换为适合数据库的占位符
func (r *cursorRepository) convertPlaceholders(query string) string {
return convertPlaceholdersForDriver(query, r.driverName)
}
// GetCursor 获取游标值Key-Value 模式)
func (r *cursorRepository) GetCursor(ctx context.Context, cursorKey string) (string, error) {
query := `SELECT cursor_value FROM trustlog_cursor WHERE cursor_key = ?`
query := r.convertPlaceholders(`SELECT cursor_value FROM trustlog_cursor WHERE cursor_key = ?`)
var cursorValue string
err := r.db.QueryRowContext(ctx, query, cursorKey).Scan(&cursorValue)
@@ -353,13 +550,13 @@ func (r *cursorRepository) UpdateCursor(ctx context.Context, cursorKey string, c
// UpdateCursorTx 在事务中更新游标值(使用 UPSERT
func (r *cursorRepository) UpdateCursorTx(ctx context.Context, tx *sql.Tx, cursorKey string, cursorValue string) error {
// 使用 UPSERT 语法(适配不同数据库)
query := `
query := r.convertPlaceholders(`
INSERT INTO trustlog_cursor (cursor_key, cursor_value, last_updated_at)
VALUES (?, ?, ?)
ON CONFLICT (cursor_key) DO UPDATE SET
cursor_value = excluded.cursor_value,
last_updated_at = excluded.last_updated_at
`
`)
var err error
now := time.Now()
@@ -386,13 +583,19 @@ func (r *cursorRepository) UpdateCursorTx(ctx context.Context, tx *sql.Tx, curso
// InitCursor 初始化游标(如果不存在)
func (r *cursorRepository) InitCursor(ctx context.Context, cursorKey string, initialValue string) error {
query := `
// 使用简单的 UPSERT如果冲突则更新为新值
// 这样可以确保 cursor 总是基于最新的数据库状态初始化
query := r.convertPlaceholders(`
INSERT INTO trustlog_cursor (cursor_key, cursor_value, last_updated_at)
VALUES (?, ?, ?)
ON CONFLICT (cursor_key) DO NOTHING
`
ON CONFLICT (cursor_key)
DO UPDATE SET
cursor_value = EXCLUDED.cursor_value,
last_updated_at = EXCLUDED.last_updated_at
`)
_, err := r.db.ExecContext(ctx, query, cursorKey, initialValue, time.Now())
now := time.Now()
_, err := r.db.ExecContext(ctx, query, cursorKey, initialValue, now)
if err != nil {
r.logger.ErrorContext(ctx, "failed to init cursor",
"cursorKey", cursorKey,
@@ -410,27 +613,35 @@ func (r *cursorRepository) InitCursor(ctx context.Context, cursorKey string, ini
// retryRepository 重试仓储实现
type retryRepository struct {
db *sql.DB
logger logger.Logger
db *sql.DB
logger logger.Logger
driverName string
}
// NewRetryRepository 创建重试仓储
func NewRetryRepository(db *sql.DB, log logger.Logger) RetryRepository {
driverName := detectDriverName(db)
return &retryRepository{
db: db,
logger: log,
db: db,
logger: log,
driverName: driverName,
}
}
// convertPlaceholders 将 ? 占位符转换为适合数据库的占位符
func (r *retryRepository) convertPlaceholders(query string) string {
return convertPlaceholdersForDriver(query, r.driverName)
}
func (r *retryRepository) AddRetry(ctx context.Context, opID string, errorMsg string, nextRetryAt time.Time) error {
return r.AddRetryTx(ctx, nil, opID, errorMsg, nextRetryAt)
}
func (r *retryRepository) AddRetryTx(ctx context.Context, tx *sql.Tx, opID string, errorMsg string, nextRetryAt time.Time) error {
query := `
query := r.convertPlaceholders(`
INSERT INTO trustlog_retry (op_id, retry_count, retry_status, error_message, next_retry_at, updated_at)
VALUES (?, 0, ?, ?, ?, ?)
`
`)
var err error
if tx != nil {
@@ -455,7 +666,7 @@ func (r *retryRepository) AddRetryTx(ctx context.Context, tx *sql.Tx, opID strin
}
func (r *retryRepository) IncrementRetry(ctx context.Context, opID string, errorMsg string, nextRetryAt time.Time) error {
query := `
query := r.convertPlaceholders(`
UPDATE trustlog_retry
SET retry_count = retry_count + 1,
retry_status = ?,
@@ -464,7 +675,7 @@ func (r *retryRepository) IncrementRetry(ctx context.Context, opID string, error
error_message = ?,
updated_at = ?
WHERE op_id = ?
`
`)
_, err := r.db.ExecContext(ctx, query,
string(RetryStatusRetrying),
@@ -491,13 +702,13 @@ func (r *retryRepository) IncrementRetry(ctx context.Context, opID string, error
}
func (r *retryRepository) MarkAsDeadLetter(ctx context.Context, opID string, errorMsg string) error {
query := `
query := r.convertPlaceholders(`
UPDATE trustlog_retry
SET retry_status = ?,
error_message = ?,
updated_at = ?
WHERE op_id = ?
`
`)
_, err := r.db.ExecContext(ctx, query,
string(RetryStatusDeadLetter),
@@ -522,7 +733,7 @@ func (r *retryRepository) MarkAsDeadLetter(ctx context.Context, opID string, err
}
func (r *retryRepository) FindPendingRetries(ctx context.Context, limit int) ([]RetryRecord, error) {
query := `
query := r.convertPlaceholders(`
SELECT
op_id, retry_count, retry_status,
last_retry_at, next_retry_at, error_message,
@@ -531,7 +742,7 @@ func (r *retryRepository) FindPendingRetries(ctx context.Context, limit int) ([]
WHERE retry_status IN (?, ?) AND next_retry_at <= ?
ORDER BY next_retry_at ASC
LIMIT ?
`
`)
rows, err := r.db.QueryContext(ctx, query,
string(RetryStatusPending),
@@ -587,7 +798,7 @@ func (r *retryRepository) FindPendingRetries(ctx context.Context, limit int) ([]
}
func (r *retryRepository) DeleteRetry(ctx context.Context, opID string) error {
query := `DELETE FROM trustlog_retry WHERE op_id = ?`
query := r.convertPlaceholders(`DELETE FROM trustlog_retry WHERE op_id = ?`)
_, err := r.db.ExecContext(ctx, query, opID)
if err != nil {