feat: 完善数据库持久化与存证功能

主要更新:

1. 数据库持久化功能
   - 支持三种策略:仅落库、既落库又存证、仅存证
   - 实现 Cursor Worker 异步扫描和存证机制
   - 实现 Retry Worker 失败重试机制
   - 支持 PostgreSQL、MySQL、SQLite 等多种数据库
   - 添加 ClientIP 和 ServerIP 字段(可空,仅落库)

2. 集群并发安全
   - 使用 SELECT FOR UPDATE SKIP LOCKED 防止重复处理
   - 实现 CAS (Compare-And-Set) 原子状态更新
   - 添加 updated_at 字段支持并发控制

3. Cursor 初始化优化
   - 自动基于历史数据初始化 cursor
   - 确保不遗漏任何历史记录
   - 修复 UPSERT 逻辑

4. 测试完善
   - 添加 E2E 集成测试(含 Pulsar 消费者验证)
   - 添加 PostgreSQL 集成测试
   - 添加 Pulsar 集成测试
   - 添加集群并发安全测试
   - 添加 Cursor 初始化验证测试
   - 补充大量单元测试,提升覆盖率

5. 工具脚本
   - 添加数据库迁移脚本
   - 添加 Cursor 状态检查工具
   - 添加 Cursor 初始化工具
   - 添加 Pulsar 消息验证工具

6. 文档清理
   - 删除冗余文档,只保留根目录 README

测试结果:
- 所有 E2E 测试通过(100%)
- 数据库持久化与异步存证流程验证通过
- 集群环境下的并发安全性验证通过
- Cursor 自动初始化和历史数据处理验证通过
This commit is contained in:
ryan
2025-12-24 15:31:11 +08:00
parent 88f80ffa5e
commit 4b72a37120
60 changed files with 6160 additions and 1313 deletions

144
scripts/check_cursor.go Normal file
View File

@@ -0,0 +1,144 @@
// 检查和修复 cursor 表的脚本
package main
import (
"context"
"database/sql"
"fmt"
"log"
"strings"
"time"
_ "github.com/lib/pq"
)
const (
pgHost = "localhost"
pgPort = 5432
pgUser = "postgres"
pgPassword = "postgres"
pgDatabase = "trustlog"
)
func main() {
fmt.Println("🔍 Cursor Table Check Tool")
fmt.Println(strings.Repeat("=", 60))
// 连接数据库
dsn := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=disable",
pgHost, pgPort, pgUser, pgPassword, pgDatabase)
db, err := sql.Open("postgres", dsn)
if err != nil {
log.Fatalf("Failed to connect: %v", err)
}
defer db.Close()
if err := db.Ping(); err != nil {
log.Fatalf("Failed to ping: %v", err)
}
fmt.Println("✅ Connected to PostgreSQL")
fmt.Println()
ctx := context.Background()
// 1. 检查 cursor 表数据
fmt.Println("📊 Current Cursor Table:")
rows, err := db.QueryContext(ctx, "SELECT cursor_key, cursor_value, last_updated_at FROM trustlog_cursor ORDER BY last_updated_at DESC")
if err != nil {
log.Printf("Failed to query cursor table: %v", err)
} else {
defer rows.Close()
count := 0
for rows.Next() {
var key, value string
var updatedAt time.Time
rows.Scan(&key, &value, &updatedAt)
fmt.Printf(" Key: %s\n", key)
fmt.Printf(" Value: %s\n", value)
fmt.Printf(" Updated: %v\n", updatedAt)
fmt.Println()
count++
}
if count == 0 {
fmt.Println(" ❌ No cursor records found!")
fmt.Println()
fmt.Println(" 问题原因:")
fmt.Println(" - Cursor Worker 可能没有启动")
fmt.Println(" - 或者初始化失败")
fmt.Println()
}
}
// 2. 检查 operation 表状态
fmt.Println("📊 Operation Table Status:")
var totalCount int
db.QueryRowContext(ctx, "SELECT COUNT(*) FROM operation").Scan(&totalCount)
fmt.Printf(" Total operations: %d\n", totalCount)
var trustloggedCount int
db.QueryRowContext(ctx, "SELECT COUNT(*) FROM operation WHERE trustlog_status = 'TRUSTLOGGED'").Scan(&trustloggedCount)
fmt.Printf(" Trustlogged: %d\n", trustloggedCount)
var notTrustloggedCount int
db.QueryRowContext(ctx, "SELECT COUNT(*) FROM operation WHERE trustlog_status = 'NOT_TRUSTLOGGED'").Scan(&notTrustloggedCount)
fmt.Printf(" Not trustlogged: %d\n", notTrustloggedCount)
// 查询最早的记录
var earliestTime sql.NullTime
db.QueryRowContext(ctx, "SELECT MIN(created_at) FROM operation WHERE trustlog_status = 'NOT_TRUSTLOGGED'").Scan(&earliestTime)
if earliestTime.Valid {
fmt.Printf(" Earliest NOT_TRUSTLOGGED record: %v\n", earliestTime.Time)
}
fmt.Println()
// 3. 检查 cursor 和记录的时间关系
if notTrustloggedCount > 0 {
fmt.Println("⚠️ Problem Detected:")
fmt.Printf(" 有 %d 条记录未存证\n", notTrustloggedCount)
var cursorValue sql.NullString
db.QueryRowContext(ctx, "SELECT cursor_value FROM trustlog_cursor WHERE cursor_key = 'operation_scan'").Scan(&cursorValue)
if !cursorValue.Valid {
fmt.Println(" Cursor 表为空!")
fmt.Println()
fmt.Println(" 可能的原因:")
fmt.Println(" 1. Cursor Worker 从未启动")
fmt.Println(" 2. PersistenceClient 没有启用 Cursor Worker")
fmt.Println()
fmt.Println(" 解决方案:")
fmt.Println(" 1. 确保 PersistenceClient 配置了 EnableCursorWorker: true")
fmt.Println(" 2. 手动初始化 cursor:")
fmt.Println(" go run scripts/init_cursor.go")
} else {
cursorTime, _ := time.Parse(time.RFC3339Nano, cursorValue.String)
fmt.Printf(" Cursor 时间: %v\n", cursorTime)
if earliestTime.Valid && earliestTime.Time.Before(cursorTime) {
fmt.Println()
fmt.Println(" ❌ 问题Cursor 时间晚于最早的未存证记录!")
fmt.Println(" 这些记录不会被处理。")
fmt.Println()
fmt.Println(" 解决方案:")
fmt.Println(" 1. 重置 cursor 到更早的时间:")
fmt.Printf(" UPDATE trustlog_cursor SET cursor_value = '%s' WHERE cursor_key = 'operation_scan';\n",
earliestTime.Time.Add(-1*time.Second).Format(time.RFC3339Nano))
fmt.Println()
fmt.Println(" 2. 或者使用脚本重置:")
fmt.Println(" go run scripts/reset_cursor.go")
}
}
} else {
fmt.Println("✅ All operations are trustlogged!")
}
fmt.Println()
fmt.Println(strings.Repeat("=", 60))
}

View File

@@ -0,0 +1,44 @@
package main
import (
"database/sql"
"fmt"
"log"
_ "github.com/lib/pq"
)
func main() {
dsn := "host=localhost port=5432 user=postgres password=postgres dbname=trustlog sslmode=disable"
db, err := sql.Open("postgres", dsn)
if err != nil {
log.Fatalf("Failed to connect: %v", err)
}
defer db.Close()
if err := db.Ping(); err != nil {
log.Fatalf("Failed to ping: %v", err)
}
fmt.Println("🧹 Cleaning test data...")
// 清理所有测试数据
_, err = db.Exec("DELETE FROM trustlog_retry")
if err != nil {
log.Printf("Warning: Failed to clean retry table: %v", err)
}
_, err = db.Exec("DELETE FROM operation")
if err != nil {
log.Printf("Warning: Failed to clean operation table: %v", err)
}
_, err = db.Exec("DELETE FROM trustlog_cursor")
if err != nil {
log.Printf("Warning: Failed to clean cursor table: %v", err)
}
fmt.Println("✅ All test data cleaned!")
}

112
scripts/init_cursor.go Normal file
View File

@@ -0,0 +1,112 @@
// 初始化或重置 cursor 的脚本
package main
import (
"context"
"database/sql"
"fmt"
"log"
"strings"
"time"
_ "github.com/lib/pq"
)
const (
pgHost = "localhost"
pgPort = 5432
pgUser = "postgres"
pgPassword = "postgres"
pgDatabase = "trustlog"
)
func main() {
fmt.Println("🔧 Cursor Initialization Tool")
fmt.Println(strings.Repeat("=", 60))
// 连接数据库
dsn := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=disable",
pgHost, pgPort, pgUser, pgPassword, pgDatabase)
db, err := sql.Open("postgres", dsn)
if err != nil {
log.Fatalf("Failed to connect: %v", err)
}
defer db.Close()
if err := db.Ping(); err != nil {
log.Fatalf("Failed to ping: %v", err)
}
fmt.Println("✅ Connected to PostgreSQL")
fmt.Println()
ctx := context.Background()
// 查询最早的 NOT_TRUSTLOGGED 记录
var earliestTime sql.NullTime
err = db.QueryRowContext(ctx,
"SELECT MIN(created_at) FROM operation WHERE trustlog_status = 'NOT_TRUSTLOGGED'",
).Scan(&earliestTime)
if err != nil {
log.Fatalf("Failed to query earliest record: %v", err)
}
var cursorValue string
if earliestTime.Valid {
// 设置为最早记录之前 1 秒
cursorValue = earliestTime.Time.Add(-1 * time.Second).Format(time.RFC3339Nano)
fmt.Printf("📊 Earliest NOT_TRUSTLOGGED record: %v\n", earliestTime.Time)
fmt.Printf("📍 Setting cursor to: %s\n", cursorValue)
} else {
// 如果没有未存证记录,使用一个很早的时间
cursorValue = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC).Format(time.RFC3339Nano)
fmt.Println("📊 No NOT_TRUSTLOGGED records found")
fmt.Printf("📍 Setting cursor to default: %s\n", cursorValue)
}
fmt.Println()
// 插入或更新 cursor
_, err = db.ExecContext(ctx, `
INSERT INTO trustlog_cursor (cursor_key, cursor_value, last_updated_at)
VALUES ($1, $2, $3)
ON CONFLICT (cursor_key)
DO UPDATE SET cursor_value = EXCLUDED.cursor_value, last_updated_at = EXCLUDED.last_updated_at
`, "operation_scan", cursorValue, time.Now())
if err != nil {
log.Fatalf("Failed to init cursor: %v", err)
}
fmt.Println("✅ Cursor initialized successfully!")
fmt.Println()
// 验证
var savedValue string
var updatedAt time.Time
err = db.QueryRowContext(ctx,
"SELECT cursor_value, last_updated_at FROM trustlog_cursor WHERE cursor_key = 'operation_scan'",
).Scan(&savedValue, &updatedAt)
if err != nil {
log.Fatalf("Failed to verify cursor: %v", err)
}
fmt.Println("📊 Cursor Status:")
fmt.Printf(" Key: operation_scan\n")
fmt.Printf(" Value: %s\n", savedValue)
fmt.Printf(" Updated: %v\n", updatedAt)
fmt.Println()
// 统计
var notTrustloggedCount int
db.QueryRowContext(ctx, "SELECT COUNT(*) FROM operation WHERE trustlog_status = 'NOT_TRUSTLOGGED'").Scan(&notTrustloggedCount)
fmt.Printf("📝 Records to process: %d\n", notTrustloggedCount)
fmt.Println()
fmt.Println("✅ Cursor Worker 现在会处理这些记录")
fmt.Println(strings.Repeat("=", 60))
}

View File

@@ -0,0 +1,128 @@
package main
import (
"database/sql"
"fmt"
"log"
_ "github.com/lib/pq"
)
func main() {
dsn := "host=localhost port=5432 user=postgres password=postgres dbname=trustlog sslmode=disable"
db, err := sql.Open("postgres", dsn)
if err != nil {
log.Fatalf("Failed to connect: %v", err)
}
defer db.Close()
if err := db.Ping(); err != nil {
log.Fatalf("Failed to ping: %v", err)
}
fmt.Println("🔄 Migrating PostgreSQL schema...")
// 删除旧表
fmt.Println(" Dropping old tables...")
_, err = db.Exec("DROP TABLE IF EXISTS trustlog_retry")
if err != nil {
log.Printf("Warning: Failed to drop retry table: %v", err)
}
_, err = db.Exec("DROP TABLE IF EXISTS operation")
if err != nil {
log.Printf("Warning: Failed to drop operation table: %v", err)
}
_, err = db.Exec("DROP TABLE IF EXISTS trustlog_cursor")
if err != nil {
log.Printf("Warning: Failed to drop cursor table: %v", err)
}
// 重新创建表
fmt.Println(" Creating new tables...")
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS operation (
op_id VARCHAR(32) NOT NULL PRIMARY KEY,
op_actor VARCHAR(64),
doid VARCHAR(512),
producer_id VARCHAR(32),
request_body_hash VARCHAR(128),
response_body_hash VARCHAR(128),
op_hash VARCHAR(128),
sign VARCHAR(512),
op_source VARCHAR(10),
op_type VARCHAR(30),
do_prefix VARCHAR(128),
do_repository VARCHAR(64),
client_ip VARCHAR(32),
server_ip VARCHAR(32),
trustlog_status VARCHAR(32),
timestamp TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)`)
if err != nil {
log.Fatalf("Failed to create operation table: %v", err)
}
_, err = db.Exec(`
CREATE INDEX IF NOT EXISTS idx_operation_timestamp ON operation(timestamp)`)
if err != nil {
log.Printf("Warning: Failed to create timestamp index: %v", err)
}
_, err = db.Exec(`
CREATE INDEX IF NOT EXISTS idx_operation_trustlog_status ON operation(trustlog_status)`)
if err != nil {
log.Printf("Warning: Failed to create status index: %v", err)
}
_, err = db.Exec(`
CREATE INDEX IF NOT EXISTS idx_operation_created_at ON operation(created_at)`)
if err != nil {
log.Printf("Warning: Failed to create created_at index: %v", err)
}
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS trustlog_cursor (
cursor_key VARCHAR(64) NOT NULL PRIMARY KEY,
cursor_value TEXT NOT NULL,
last_updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)`)
if err != nil {
log.Fatalf("Failed to create cursor table: %v", err)
}
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS trustlog_retry (
op_id VARCHAR(32) NOT NULL PRIMARY KEY,
retry_count INTEGER DEFAULT 0,
retry_status VARCHAR(32) DEFAULT 'PENDING',
last_retry_at TIMESTAMP,
next_retry_at TIMESTAMP,
error_message TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)`)
if err != nil {
log.Fatalf("Failed to create retry table: %v", err)
}
_, err = db.Exec(`
CREATE INDEX IF NOT EXISTS idx_retry_next_retry_at ON trustlog_retry(next_retry_at)`)
if err != nil {
log.Printf("Warning: Failed to create retry time index: %v", err)
}
_, err = db.Exec(`
CREATE INDEX IF NOT EXISTS idx_retry_retry_status ON trustlog_retry(retry_status)`)
if err != nil {
log.Printf("Warning: Failed to create retry status index: %v", err)
}
fmt.Println("✅ Schema migration completed!")
}

View File

@@ -0,0 +1,103 @@
// 验证 Pulsar 消息的简单脚本
// 使用方法: go run scripts/verify_pulsar_messages.go
package main
import (
"context"
"fmt"
"log"
"time"
"github.com/apache/pulsar-client-go/pulsar"
)
const (
pulsarURL = "pulsar://localhost:6650"
topic = "persistent://public/default/operation"
timeout = 10 * time.Second
)
func main() {
fmt.Println("🔍 Pulsar Message Verification Tool")
fmt.Println("=====================================")
fmt.Printf("Pulsar URL: %s\n", pulsarURL)
fmt.Printf("Topic: %s\n", topic)
fmt.Println()
// 创建 Pulsar 客户端
client, err := pulsar.NewClient(pulsar.ClientOptions{
URL: pulsarURL,
})
if err != nil {
log.Fatalf("❌ Failed to create Pulsar client: %v", err)
}
defer client.Close()
fmt.Println("✅ Connected to Pulsar")
// 创建消费者(使用唯一的 subscription
subName := fmt.Sprintf("verify-sub-%d", time.Now().Unix())
consumer, err := client.Subscribe(pulsar.ConsumerOptions{
Topic: topic,
SubscriptionName: subName,
Type: pulsar.Shared,
// 从最早的未确认消息开始读取
SubscriptionInitialPosition: pulsar.SubscriptionPositionEarliest,
})
if err != nil {
log.Fatalf("❌ Failed to create consumer: %v", err)
}
defer consumer.Close()
fmt.Printf("✅ Consumer created: %s\n\n", subName)
// 接收消息
fmt.Println("📩 Listening for messages (timeout: 10s)...")
fmt.Println("----------------------------------------")
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
messageCount := 0
for {
msg, err := consumer.Receive(ctx)
if err != nil {
if ctx.Err() == context.DeadlineExceeded {
break
}
log.Printf("⚠️ Error receiving message: %v", err)
continue
}
messageCount++
fmt.Printf("\n📨 Message #%d:\n", messageCount)
fmt.Printf(" Key: %s\n", msg.Key())
fmt.Printf(" Payload Size: %d bytes\n", len(msg.Payload()))
fmt.Printf(" Publish Time: %v\n", msg.PublishTime())
fmt.Printf(" Topic: %s\n", msg.Topic())
fmt.Printf(" Message ID: %v\n", msg.ID())
// 确认消息
consumer.Ack(msg)
// 最多显示 10 条消息
if messageCount >= 10 {
fmt.Println("\n⚠ Reached 10 messages limit, stopping...")
break
}
}
fmt.Println("\n========================================")
if messageCount == 0 {
fmt.Println("❌ No messages found in Pulsar")
fmt.Println("\nPossible reasons:")
fmt.Println(" 1. No operations have been published yet")
fmt.Println(" 2. All messages have been consumed by other consumers")
fmt.Println(" 3. Wrong topic name")
fmt.Println("\nTo test, run the E2E test:")
fmt.Println(" go test ./api/persistence -v -run TestE2E_DBAndTrustlog_WithPulsarConsumer")
} else {
fmt.Printf("✅ Found %d messages in Pulsar\n", messageCount)
}
fmt.Println("========================================")
}