larksuite · dingding0418 · Apr 8, 2026 · greptile-apps · Apr 8, 2026 · greptile-apps
diff --git a/shortcuts/register.go b/shortcuts/register.go
@@ -22,6 +22,7 @@ import (
 	"github.com/larksuite/cli/shortcuts/task"
 	"github.com/larksuite/cli/shortcuts/vc"
 	"github.com/larksuite/cli/shortcuts/whiteboard"
+	"github.com/larksuite/cli/shortcuts/wiki"
 )
 
 // allShortcuts aggregates shortcuts from all domain packages.
@@ -41,6 +42,7 @@ func init() {
 	allShortcuts = append(allShortcuts, task.Shortcuts()...)
 	allShortcuts = append(allShortcuts, vc.Shortcuts()...)
 	allShortcuts = append(allShortcuts, whiteboard.Shortcuts()...)
+	allShortcuts = append(allShortcuts, wiki.Shortcuts()...)
 }
 
 // AllShortcuts returns a copy of all registered shortcuts (for dump-shortcuts).

diff --git a/shortcuts/wiki/shortcuts.go b/shortcuts/wiki/shortcuts.go
@@ -0,0 +1,13 @@
+// Copyright (c) 2026 Lark Technologies Pte. Ltd.
+// SPDX-License-Identifier: MIT
+
+package wiki
+
+import "github.com/larksuite/cli/shortcuts/common"
+
+// Shortcuts returns all wiki shortcuts.
+func Shortcuts() []common.Shortcut {
+	return []common.Shortcut{
+		WikiResolveNode,
+	}
+}
diff --git a/shortcuts/wiki/wiki_resolve_node.go b/shortcuts/wiki/wiki_resolve_node.go
@@ -0,0 +1,135 @@
+// Copyright (c) 2026 Lark Technologies Pte. Ltd.
+// SPDX-License-Identifier: MIT
+
+package wiki
+
+import (
+	"context"
+	"io"
+	"regexp"
+	"strings"
+
+	"github.com/larksuite/cli/internal/output"
+	"github.com/larksuite/cli/shortcuts/common"
+)
+
+// wikiURLPattern extracts the wiki node token from a Lark wiki URL.
+// Supports formats like:
+//
+//	https://bytedance.larkoffice.com/wiki/EzY8wvj5RiLtfIkw4UPcTdKinRe
+//	https://example.feishu.cn/wiki/EzY8wvj5RiLtfIkw4UPcTdKinRe?from=xxx
+//	bytedance.larkoffice.com/wiki/EzY8wvj5RiLtfIkw4UPcTdKinRe
+var wikiURLPattern = regexp.MustCompile(`/wiki/([A-Za-z0-9]+)`)
+
+// extractWikiToken returns the bare wiki token from either a URL or a token string.
+// If the input doesn't look like a URL, it's assumed to already be a token.
+func extractWikiToken(input string) string {
+	input = strings.TrimSpace(input)
+	if input == "" {
+		return ""
+	}
+	if matches := wikiURLPattern.FindStringSubmatch(input); len(matches) > 1 {
+		return matches[1]
+	}
+	// Strip any trailing query string or fragment if present
+	if idx := strings.IndexAny(input, "?#"); idx >= 0 {
+		input = input[:idx]
+	}
+	return input
+}
+
+// WikiResolveNode resolves a wiki node token to its underlying object metadata
+// (obj_token, obj_type, title, etc.). This is essential for fetching wiki-wrapped
+// content because /wiki/ URLs are wrappers — the actual document/bitable/sheet
+// has a different obj_token that must be used for content APIs.
+//
+// Without this shortcut, agents had to manually call the raw API:
+//
+//	lark-cli api GET /open-apis/wiki/v2/spaces/get_node \
+//	  --params '{"token":"...","obj_type":"wiki"}'
+//
+// This shortcut wraps that with friendlier ergonomics: accepts URLs or tokens,
+// returns a flat output with the four fields agents most commonly need.
+var WikiResolveNode = common.Shortcut{
+	Service:     "wiki",
+	Command:     "+resolve-node",
+	Description: "Resolve a wiki node URL/token to its underlying object (obj_token, obj_type, title); essential bridge before fetching wiki-wrapped content with docs/sheets/base APIs",
+	Risk:        "read",
+	UserScopes:  []string{"wiki:wiki:readonly"},
+	BotScopes:   []string{"wiki:wiki:readonly"},
+	AuthTypes:   []string{"user", "bot"},
+	HasFormat:   true,
+	Flags: []common.Flag{
+		{Name: "token", Required: true, Desc: "wiki node URL (e.g. https://x.larkoffice.com/wiki/wikXXX) or bare token"},
+	},
+	Tips: []string{
+		"output fields: node_token, obj_token, obj_type (docx/bitable/sheet/...), title, space_id",
+		"feed the returned obj_token + obj_type into the matching content API: docs +fetch / base / sheets",
+	},
+	Validate: func(ctx context.Context, runtime *common.RuntimeContext) error {
+		if runtime.Str("token") == "" {
+			return common.FlagErrorf("--token is required")
+		}
+		if extractWikiToken(runtime.Str("token")) == "" {
+			return common.FlagErrorf("could not extract a wiki token from --token")
+		}
+		return nil
+	},
+	DryRun: func(ctx context.Context, runtime *common.RuntimeContext) *common.DryRunAPI {
+		token := extractWikiToken(runtime.Str("token"))
+		return common.NewDryRunAPI().
+			GET("/open-apis/wiki/v2/spaces/get_node").
+			Desc("Resolve wiki node → obj_token + obj_type + title").
+			Params(map[string]interface{}{
+				"token":    token,
+				"obj_type": "wiki",
+			}).
+			Set("input_token", runtime.Str("token")).
+			Set("normalized_token", token)
+	},
+	Execute: func(ctx context.Context, runtime *common.RuntimeContext) error {
+		rawInput := runtime.Str("token")
+		token := extractWikiToken(rawInput)
+
+		data, err := runtime.CallAPI(
+			"GET",
+			"/open-apis/wiki/v2/spaces/get_node",
+			map[string]interface{}{
+				"token":    token,
+				"obj_type": "wiki",
+			},
+			nil,
+		)
+		if err != nil {
+			return err
+		}
+
+		node, _ := data["node"].(map[string]interface{})
+		if node == nil {
+			return output.ErrAPI(0, "wiki node not found or not accessible (input="+rawInput+", normalized="+token+")", nil)
-		node, _ := data["node"].(map[string]interface{})
-		if node == nil {
-			return output.ErrAPI(0, "wiki node not found or not accessible (input="+rawInput+", normalized="+token+")", nil)
+		return output.Errorf(output.ExitAPI, "api_error", "wiki node not found or not accessible (input=%s, normalized=%s)", rawInput, token)
-		node, _ := data["node"].(map[string]interface{})
-		if node == nil {
-			return output.ErrAPI(0, "wiki node not found or not accessible (input="+rawInput+", normalized="+token+")", nil)
+		return output.Errorf(output.ExitAPI, "api_error", "wiki node not found or not accessible (input=%s, normalized=%s)", rawInput, token)
+		}
+
+		// Flatten the most useful fields to top-level for easy consumption
+		out := map[string]interface{}{
+			"node_token": node["node_token"],
+			"obj_token":  node["obj_token"],
+			"obj_type":   node["obj_type"],
+			"title":      node["title"],
+			"space_id":   node["space_id"],
+			"node_type":  node["node_type"],
+			"creator":    node["creator"],
+			"has_child":  node["has_child"],
+		}
+
+		runtime.OutFormat(out, nil, func(w io.Writer) {
+			output.PrintTable(w, []map[string]interface{}{{
+				"node_token": out["node_token"],
+				"obj_token":  out["obj_token"],
+				"obj_type":   out["obj_type"],
+				"title":      out["title"],
+				"space_id":   out["space_id"],
+			}})
+		})
+		return nil
+	},
+}
diff --git a/skills/lark-doc/SKILL.md b/skills/lark-doc/SKILL.md
@@ -123,6 +123,49 @@ Drive Folder (云空间文件夹)
 - `docs +search` 不是只搜文档 / Wiki；结果里会直接返回 `SHEET` 等云空间对象。
 - 拿到 spreadsheet URL / token 后，再切到 `lark-sheets` 做对象内部读取、筛选、写入等操作。
 
+## AI Usage Guidance：企业知识搜索方法论 ⭐
+
+> **强制阅读**：搜索（`docs +search`）类任务，下面这套方法论是默认动作，不能跳过。详见 [`references/lark-doc-search-recipes.md`](references/lark-doc-search-recipes.md)。
+
+### 1. 多轮关键词改写是默认动作
+
+**单次搜索的召回率非常低**。开放问题或有明确目标的搜索任务，**至少跑 2-3 轮不同关键词**才算 baseline。每一轮换一个角度：
+
+| 轮次 | 策略 | 例子（query: "飞书Office SaaS直销政策"） |
+|---|---|---|
+| 1 | 原始关键词 | `--query "飞书Office SaaS 直销 政策"` |
+| 2 | 去掉修饰词，保留核心词 | `--query "SaaS 直销 售卖政策"` |
+| 3 | 换同义词或具体术语 | `--query "飞书 售卖 折扣 政策"` |
+| 4（如需） | 加业务术语限定 | `--query "Office 套件 价格 直销"` |
+
+**反模式**：第一轮搜了一个看似贴近的候选就一头扎进去深挖。正确做法是先比较多轮的 top 结果，挑相关度最高的再深挖。
+
+### 2. 广撒网 → 深挖，而不是一头扎进去
+
+每一轮搜索看 top 5 候选（不是 top 1），按以下顺序判断哪个最相关：
+
+1. **标题包含 query 核心词** > 标题不含
+2. **标题用户场景对应** > 标题是评测集 / 周报 / 通用文档
+3. **doc_types 匹配预期**（找权威文档优先 docx/wiki，找数据优先 sheet/bitable）
+4. **owner / update_time 信号**（owner 是相关业务方、update_time 较近）
+
+### 3. 空查询时不要轻易 abstain
+
+如果搜了 2-3 轮都没明确命中，**不要直接说"找不到"**：
+
+- **开放性问题**（用户问"为什么 X"、"怎么写 Y"）：可以基于通用知识 + 找到的弱相关材料 给出 best-effort 答案，但要明确标注"未找到权威文档，以下是基于通用知识 + 部分相关材料的推断"
+- **事实性问题**（用户问具体数字、具体人）：才适合直接说"找不到"
+- **聚合性问题**（用户问"列出所有 X"）：列出搜到的部分，并说明这是不完全列表
+
+### 4. 大文档处理：先看摘要，必要时分段
+
+`docs +fetch` 在体积特别大的文档上可能 504 timeout。处理策略：
+
+1. 先看 search 结果里的 `summary_highlighted` 字段（已含关键句）
+2. 若必须 fetch 全文，用 `--limit 50 --offset 0` 分段
+3. 还失败时退到 raw API：`lark-cli api GET /open-apis/docx/v1/documents/<token>/blocks --params '{"page_size":20}'` 拉 block 列表，再针对相关 block 单独取内容
+4. 详见 [`references/lark-doc-fetch.md`](references/lark-doc-fetch.md) 的"大文档处理"段
+
 ## 补充说明 
 `docs +search` 除了搜索文档 / Wiki，也承担“先定位云空间对象，再切回对应业务 skill 操作”的资源发现入口角色；当用户口头说“表格 / 报表”时，也优先从这里开始。
 

diff --git a/skills/lark-doc/references/lark-doc-fetch.md b/skills/lark-doc/references/lark-doc-fetch.md
@@ -31,6 +31,29 @@ lark-cli docs +fetch --doc Z1FjxxxxxxxxxxxxxxxxxxxtnAc --format pretty
 | `--limit` | 否 | 分页大小 |
 | `--format` | 否 | 输出格式：json（默认，含 title、markdown、has_more 等字段） \| pretty |
 
+## 大文档处理 ⚠️
+
+**已知问题**：对体积特别大的文档（万行级别、内嵌大量表格 / 媒体），`docs +fetch` 可能直接返回 `MCP HTTP 504 Gateway Timeout`。日志类文档（每日更新型）尤其常见。
+
+### 处理策略（按优先级）
+
+1. **先看 search 结果的 summary**：`docs +search --query "..."` 返回结果的 `summary_highlighted` 字段已经包含 query 命中的关键句，很多时候这就够答题了，根本不需要 fetch 全文
+2. **分段 fetch**：`lark-cli docs +fetch --doc <token> --offset 0 --limit 50`，先拿前 50 个 block 看看结构和命中位置
+3. **退到 raw blocks API**：
+   ```bash
+   # 列 block_id（很快，不会 timeout）
+   lark-cli api GET /open-apis/docx/v1/documents/<token>/blocks --params '{"page_size":50}'
+
+   # 拿到 block_id 列表后，针对相关 block 单独取内容
+   lark-cli api GET /open-apis/docx/v1/documents/<token>/blocks/<block_id>/children --params '{"page_size":50}'
+   ```
+4. **wiki 包装的大文档**：先用 `lark-cli wiki +resolve-node --token <wiki_token>` 拿到真正的 `obj_token`，再用上面的策略
+
+### 反模式
+
+- **不要在 fetch 失败后直接说"找不到"** —— 大文档 fetch 失败 ≠ 内容找不到，源文档已经定位了，只是工具能力暂时拿不到全文
+- **不要在 fetch 失败后无限重试** —— 504 通常是稳定失败，重试 1 次仍然失败就该切策略
+
 ## 重要：图片、文件、画板的处理
 
 **文档中的图片、文件、画板需要通过 `lark-doc-media-download`（docs +media-download）单独获取！**