从零搭建 Agent Harness 系列（四）四大元语的简单实现

发表于 2026-05-31 更新于 2026-07-23 分类于 Agent Waline：

上一篇博客我们介绍了Harness推崇的极简工具集的原则，现在就让我们简单实现一下4个Agent系统的原子能力

read_file

// internal/tools/read_file.go
package tools

import (
    "context"
    "encoding/json"
    "fmt"
    "io"
    "os"
    "path/filepath"

    "github.com/yourname/go-tiny-claw/internal/schema"
)

// ReadFileTool 实现了读取本地文件内容的工具
type ReadFileTool struct {
    // 将引擎的 WorkDir 注入给工具，限制它只能在此目录及其子目录下操作
    workDir string 
}

func NewReadFileTool(workDir string) *ReadFileTool {
    return &ReadFileTool{workDir: workDir}
}

func (t *ReadFileTool) Name() string {
    return "read_file"
}

// Definition 向大模型清晰地描述这个工具的用途和参数格式
func (t *ReadFileTool) Definition() schema.ToolDefinition {
    return schema.ToolDefinition{
        Name:        t.Name(),
        Description: "读取指定路径的文件内容。请提供相对工作区的路径。",
        // 遵循 JSON Schema 规范定义参数
        InputSchema: map[string]interface{}{
            "type": "object",
            "properties": map[string]interface{}{
                "path": map[string]interface{}{
                    "type":        "string",
                    "description": "要读取的文件路径，如 cmd/claw/main.go",
                },
            },
            "required": []string{"path"},
        },
    }
}

// readFileArgs 内部定义用于反序列化的结构体
type readFileArgs struct {
    Path string `json:"path"`
}

func (t *ReadFileTool) Execute(ctx context.Context, args json.RawMessage) (string, error) {
    // 1. 延迟解析：将大模型传过来的 JSON 参数解析为强类型结构体
    var input readFileArgs
    if err := json.Unmarshal(args, &input); err != nil {
        // 返回 error 会被 Registry 捕获并传给大模型，模型会知道自己 JSON 格式写错了
        return "", fmt.Errorf("参数解析失败: %w", err)
    }

    // 2. 拼接绝对路径 (注意：生产环境中需要做路径穿越检测防范，防止 ../../etc/passwd)
    fullPath := filepath.Join(t.workDir, input.Path)

    // 3. 执行物理 IO 操作
    file, err := os.Open(fullPath)
    if err != nil {
        return "", fmt.Errorf("打开文件失败: %w", err)
    }
    defer file.Close()

    content, err := io.ReadAll(file)
    if err != nil {
        return "", fmt.Errorf("读取文件内容失败: %w", err)
    }

    // 4. 【核心防线】长度截断保护
    // 为了防止大模型读取几百 MB 的日志文件导致 Context 瞬间爆炸 (OOM)，
    // 我们在工具内部直接进行物理截断。
    const maxLen = 8000
    if len(content) > maxLen {
        truncatedMsg := fmt.Sprintf("%s\n\n...[由于内容过长，已被系统截断至前 %d 字节]...", string(content[:maxLen]), maxLen)
        return truncatedMsg, nil
    }

    return string(content), nil
}

请仔细体会这 4 步中的第 4 步（长度截断保护）。

在大模型的 API 调用中，Token 就是金钱，Context 就是生命线。如果你放任大模型读取超大文件，不仅会引发高昂的账单，还会导致上下文爆炸，甚至导致 API 拒绝服务。驾驭工程的真谛就是：绝不把系统的安全性寄希望于大模型的理智，而是在底层的工具实现中强制兜底。

反思：关于文件读取截断的思考

在本讲的 read_file 实现中，我们采用了极其“粗暴”的 8000 字符硬截断（Hard Truncation）。作为单工具的兜底防御，这确实能防止单次读取把大模型撑爆。但在真实的实践中，比如代码库探索场景中，如果大模型需要分析一个 20000 行的核心业务类，这种粗暴截断会让模型永远看不到文件的后半部分，导致任务必然失败。

更成熟的解决方案是什么？

工具输出卸载（Tool Call Offloading）：工业级 Harness 的主流做法是在工具执行层实现输出卸载策略——当文件或命令输出超过阈值（通常为数千至数万字符）时，Harness 自动将完整内容写入磁盘临时目录，并向模型返回一段“头部预览 + 尾部预览 + 文件路径引用”的摘要消息，例如：“文件过长（共 5000 行，已卸载至）。以下为首尾预览，如需完整内容请调用 read_file(‘’)。” 通过这种方式，既保留了模型的决策依据，又倒逼其按需局部读取。
结合全局 Context Compaction：即使我们在单工具内通过卸载策略放宽了读取限制，在引擎的全局层面，工业级 Harness 依然在 Main Loop 中设有上下文窗口监控机制。当 Token 使用量接近模型上下文窗口的预设阈值（通常为 75%~98%）时，Harness 会触发 Compaction——对历史会话进行压缩（策略有多种，比如智能摘要等)，保留架构决策、未解决的 Bug 等高价值信息，裁剪冗余工具输出，使 Agent 得以在不丢失关键上下文的前提下继续长时运行。

write_file

// internal/tools/write_file.go
package tools

import (
    "context"
    "encoding/json"
    "fmt"
    "os"
    "path/filepath"

    "github.com/yourname/go-tiny-claw/internal/schema"
)

type WriteFileTool struct {
    workDir string // 工作区约束
}

func NewWriteFileTool(workDir string) *WriteFileTool {
    return &WriteFileTool{workDir: workDir}
}

func (t *WriteFileTool) Name() string {
    return "write_file"
}

func (t *WriteFileTool) Definition() schema.ToolDefinition {
    return schema.ToolDefinition{
        Name:        t.Name(),
        Description: "创建或覆盖写入一个文件。如果目录不存在会自动创建。请提供相对于工作区的相对路径。",
        InputSchema: map[string]interface{}{
            "type": "object",
            "properties": map[string]interface{}{
                "path": map[string]interface{}{
                    "type":        "string",
                    "description": "要写入的文件路径，如 src/main.go",
                },
                "content": map[string]interface{}{
                    "type":        "string",
                    "description": "要写入的完整文件内容",
                },
            },
            "required": []string{"path", "content"},
        },
    }
}

type writeFileArgs struct {
    Path    string `json:"path"`
    Content string `json:"content"`
}

func (t *WriteFileTool) Execute(ctx context.Context, args json.RawMessage) (string, error) {
    var input writeFileArgs
    if err := json.Unmarshal(args, &input); err != nil {
        return "", fmt.Errorf("参数解析失败: %w", err)
    }

    // 【安全防线】：限制在 WorkDir 下执行，防止大模型修改系统级文件
    fullPath := filepath.Join(t.workDir, input.Path)

    // 自动创建缺失的父级目录
    if err := os.MkdirAll(filepath.Dir(fullPath), 0755); err != nil {
        return "", fmt.Errorf("创建父目录失败: %w", err)
    }

    // 写入文件内容，权限设为 0644
    err := os.WriteFile(fullPath, []byte(input.Content), 0644)
    if err != nil {
        return "", fmt.Errorf("写入文件失败: %w", err)
    }

    return fmt.Sprintf("成功将内容写入到文件: %s", input.Path), nil
}

bash

// internal/tools/bash.go
package tools

import (
    "context"
    "encoding/json"
    "fmt"
    "os/exec"
    "time"

    "github.com/yourname/go-tiny-claw/internal/schema"
)

type BashTool struct {
    workDir string // 工作区约束
}

func NewBashTool(workDir string) *BashTool {
    return &BashTool{workDir: workDir}
}

func (t *BashTool) Name() string {
    return "bash"
}

func (t *BashTool) Definition() schema.ToolDefinition {
    return schema.ToolDefinition{
        Name:        t.Name(),
        Description: "在当前工作区执行任意的 bash 命令。支持链式命令(如 &&)。返回标准输出(stdout)和标准错误(stderr)。",
        InputSchema: map[string]interface{}{
            "type": "object",
            "properties": map[string]interface{}{
                "command": map[string]interface{}{
                    "type":        "string",
                    "description": "要执行的 bash 命令，例如: ls -la 或 go test ./...",
                },
            },
            "required": []string{"command"},
        },
    }
}

type bashArgs struct {
    Command string `json:"command"`
}

func (t *BashTool) Execute(ctx context.Context, args json.RawMessage) (string, error) {
    var input bashArgs
    if err := json.Unmarshal(args, &input); err != nil {
        return "", fmt.Errorf("参数解析失败: %w", err)
    }

    // 【驾驭底线 1】：Time Budgeting (时间预算与超时控制)
    // 给予 bash 命令一个最大执行时间，防止大模型卡死进程 (比如运行了 top 或持续监听的 Web 服务)
    timeoutCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
    defer cancel()

    // 在 macOS/Linux 下，我们通过将指令包裹在 `bash -c` 中执行，以支持环境变量、管道和逻辑与(&&)等复杂 Shell 语法。
    cmd := exec.CommandContext(timeoutCtx, "bash", "-c", input.Command)

    // 【驾驭底线 2】：绑定执行的工作区目录
    // 确保命令默认在用户指定的 WorkDir 下执行，而不是引擎启动时的绝对路径。
    cmd.Dir = t.workDir

    // 执行并捕获 CombinedOutput (合并 stdout 和 stderr)
    out, err := cmd.CombinedOutput()
    outputStr := string(out)

    // 如果命令执行超时，返回警告信息让模型知晓
    if timeoutCtx.Err() == context.DeadlineExceeded {
        return outputStr + "\n[警告: 命令执行超时(30s)，已被系统强制终止。如果是启动常驻服务，请尝试将其转入后台。]", nil
    }

    // 【驾驭底线 3】：错误原样回传 (Self-Correction 自愈机制)
    // 当 bash 报错时（err != nil），我们绝对不能返回 Go 的 error 阻断程序！
    // 我们必须把 err 和 outputStr 拼接成字符串返回，利用大模型的自纠错能力自己分析报错！
    if err != nil {
        return fmt.Sprintf("执行报错: %v\n输出:\n%s", err, outputStr), nil
    }

    // 如果没有终端输出（比如仅仅执行了 mkdir），给模型一个明确的执行成功的反馈
    if outputStr == "" {
        return "命令执行成功，无终端输出。", nil
    }

    // 【驾驭底线 4】：长度截断保护 (防 OOM)
    const maxLen = 8000
    if len(outputStr) > maxLen {
        return fmt.Sprintf("%s\n\n...[终端输出过长，已截断至前 %d 字节]...", outputStr[:maxLen], maxLen), nil
    }

    return outputStr, nil
}

在这段不起眼的代码中，我们埋下了 4 个极其重要的 Harness 驾驭逻辑边界：工作区约束、超时控制、自纠错回传、长度截断。这就是驾驭工程的真谛：对大模型的业务意图给予最高自由度的 YOLO 信任，但在底层资源分配和运行边界上施加最冷酷的物理拦截。

edit

对于一个理想的 edit 工具，它的 JSON Schema 应该非常简单：提供 path（文件路径）、old_text（你要替换的旧代码）和 new_text（新代码）。

如果用 Go 语言的思路，底层实现无非就是一句 strings.Replace(fileContent, oldText, newText, 1)。

但在 AI Agent 的世界里，绝对不能这么写。大模型在输出 old_text 时，经常会犯一种极其顽固的错误——格式幻觉。

假设原始代码是这样的（前面带有 8 个空格的缩进）

1
2
3

if user == nil {
    return err
}

大模型在返回的 JSON 工具参数中，为了节省字数或者受限于其内部的注意力机制，它吐出的 old_text 很可能是去掉了缩进的：

1
2
3

if user == nil {
    return err
}

如果你使用精确匹配，strings.Replace 会直接失败，因为找不到要替换的字符串。在没有容错机制的 Harness 中，Agent 会收到 Error: old_text not found。接着，Agent 会在下一个 Turn 拼命重试，依然不带缩进，最终陷入死循环，任务宣告失败.

降级策略：多级模糊匹配链（Chain of Responsibility）

多级模糊匹配链

// internal/tools/edit_file.go
package tools

import (
    "context"
    "encoding/json"
    "fmt"
    "os"
    "path/filepath"
    "strings"

    "github.com/yourname/go-tiny-claw/internal/schema"
)

type EditFileTool struct {
    workDir string
}

func NewEditFileTool(workDir string) *EditFileTool {
    return &EditFileTool{workDir: workDir}
}

func (t *EditFileTool) Name() string {
    return "edit_file"
}

func (t *EditFileTool) Definition() schema.ToolDefinition {
    return schema.ToolDefinition{
        Name:        t.Name(),
        Description: "对现有文件进行局部的字符串替换。这比重写整个文件更安全、更快速。请提供足够的 old_text 上下文以确保匹配的唯一性。",
        InputSchema: map[string]interface{}{
            "type": "object",
            "properties": map[string]interface{}{
                "path": map[string]interface{}{
                    "type":        "string",
                    "description": "要修改的文件路径",
                },
                "old_text": map[string]interface{}{
                    "type":        "string",
                    "description": "文件中原有的文本。必须包含足够的上下文（建议上下各多包含几行），以确保在文件中的唯一性。",
                },
                "new_text": map[string]interface{}{
                    "type":        "string",
                    "description": "要替换成的新文本",
                },
            },
            "required": []string{"path", "old_text", "new_text"},
        },
    }
}

type editFileArgs struct {
    Path    string `json:"path"`
    OldText string `json:"old_text"`
    NewText string `json:"new_text"`
}

// internal/tools/edit_file.go (续)

// fuzzyReplace 实现了四级容错降级替换算法
func fuzzyReplace(originalContent, oldText, newText string) (string, error) {
    // L1: 精确匹配
    count := strings.Count(originalContent, oldText)
    if count == 1 {
        return strings.Replace(originalContent, oldText, newText, 1), nil
    }
    if count > 1 {
        return "", fmt.Errorf("old_text 匹配到了 %d 处，请提供更多的上下文代码以确保唯一性", count)
    }

    // L2: 换行符归一化 (统一将 \r\n 转换为 \n)
    normalizedContent := strings.ReplaceAll(originalContent, "\r\n", "\n")
    normalizedOld := strings.ReplaceAll(oldText, "\r\n", "\n")

    count = strings.Count(normalizedContent, normalizedOld)
    if count == 1 {
        return strings.Replace(normalizedContent, normalizedOld, newText, 1), nil
    }

    // L3: Trim Space 匹配 (忽略首尾的空行和空格)
    trimmedOld := strings.TrimSpace(normalizedOld)
    if trimmedOld != "" {
        count = strings.Count(normalizedContent, trimmedOld)
        if count == 1 {
            // 注意：这里替换时，我们只能替换被 Trim 后的部分，不能直接用 newText 破坏原本的缩进
            // 为了保持本专栏代码不过于冗长复杂，当触发 L3/L4 时，如果 newText 没有带有正确的缩进，
            // 可能会导致替换后代码格式不美观。但这总比直接报错让 Agent 死循环要好。
            return strings.Replace(normalizedContent, trimmedOld, newText, 1), nil
        }
    }

    // L4: 逐行去缩进匹配 (最强力的容错：消除大模型遗漏缩进的幻觉)
    return lineByLineReplace(normalizedContent, normalizedOld, newText)
}

// lineByLineReplace 将文本按行切割，去除首尾空白后进行滑动窗口匹配
func lineByLineReplace(content, oldText, newText string) (string, error) {
    contentLines := strings.Split(content, "\n")
    oldLines := strings.Split(strings.TrimSpace(oldText), "\n")

    if len(oldLines) == 0 || len(contentLines) < len(oldLines) {
        return "", fmt.Errorf("找不到该代码片段")
    }

    // 清理 oldLines 的每行首尾空白
    for i := range oldLines {
        oldLines[i] = strings.TrimSpace(oldLines[i])
    }

    matchCount := 0
    matchStartIndex := -1
    matchEndIndex := -1

    // 滑动窗口在原始文件中寻找匹配块
    for i := 0; i <= len(contentLines)-len(oldLines); i++ {
        isMatch := true
        for j := 0; j < len(oldLines); j++ {
            if strings.TrimSpace(contentLines[i+j]) != oldLines[j] {
                isMatch = false
                break
            }
        }

        if isMatch {
            matchCount++
            matchStartIndex = i
            matchEndIndex = i + len(oldLines)
        }
    }

    if matchCount == 0 {
        return "", fmt.Errorf("在文件中未找到 old_text，请大模型先调用 read_file 仔细确认文件内容和缩进")
    }
    if matchCount > 1 {
        return "", fmt.Errorf("模糊匹配到了 %d 处相似代码，请提供更多上下行代码以精确定位", matchCount)
    }

    // 执行替换：将匹配到的原始行范围替换为 newText 拆分后的行
    // (这里简单处理，将 newText 直接作为整体替换进去)
    var newContentLines []string
    newContentLines = append(newContentLines, contentLines[:matchStartIndex]...)
    newContentLines = append(newContentLines, newText) // 插入新内容
    newContentLines = append(newContentLines, contentLines[matchEndIndex:]...)

    return strings.Join(newContentLines, "\n"), nil
}

// internal/tools/edit_file.go (续)

func (t *EditFileTool) Execute(ctx context.Context, args json.RawMessage) (string, error) {
    var input editFileArgs
    if err := json.Unmarshal(args, &input); err != nil {
        return "", fmt.Errorf("参数解析失败: %w", err)
    }

    fullPath := filepath.Join(t.workDir, input.Path)

    // 1. 读取原文件内容
    contentBytes, err := os.ReadFile(fullPath)
    if err != nil {
        return "", fmt.Errorf("读取文件失败，请确认路径是否正确: %w", err)
    }
    originalContent := string(contentBytes)

    // 2. 调用多级模糊替换算法
    newContent, err := fuzzyReplace(originalContent, input.OldText, input.NewText)
    if err != nil {
        // 【驾驭哲学】将具体的报错原因 (如匹配到多处) 原样返回，让大模型自行纠正
        return "", err
    }

    // 3. 将新内容安全地写回磁盘
    if err := os.WriteFile(fullPath, []byte(newContent), 0644); err != nil {
        return "", fmt.Errorf("写回文件失败: %w", err)
    }

    return fmt.Sprintf("✅ 成功修改文件: %s", input.Path), nil
}

我们通过手写一个看似普通的 edit 工具，深入洞察了驾驭工程的另一重境界：容错艺术。

正视大模型缺陷：大模型本质上是一个概率预测引擎，要求它 100% 精确输出多行代码的缩进和特殊符号是不现实的。硬抗只会导致死循环。
降级管线（Degradation Pipeline）：我们在底层设计了 L1 到 L4 四个级别的匹配算法，从精确匹配一路降级到“逐行去空格匹配”。这就像是给 Agent 戴上了一副“宽容的眼镜”，自动矫正了它的幻觉。
唯一性安全底线：在容错的同时，我们坚守了“如果匹配到多处，绝不替换”的安全底线。把 count > 1 的报错原样丢回给大模型，让大模型自己提供更多上下文。这完美利用了 LLM 强大的 Self-Correction（自我纠错）能力。