- 预览
- 代码
- 文档
复制
import { agent } from "@llamaindex/workflow";
import { OpenAI } from "@llamaindex/openai";
import { createMemory, tool, } from "llamaindex";
import { z } from "zod";
import * as undici from "undici";
class LLM extends OpenAI {
constructor(config: ConstructorParameters<typeof OpenAI>[0]) {
super(config);
}
get metadata() {
const metadata = super.metadata;
// required to set manually in llamaindex for custom models,
// otherwise will fallback to 1024 tokens causes memory to drop messages.
metadata.contextWindow = 64 * 1024;
return metadata;
}
}
const createCommonLLMTools = () => {
const llm = new LLM({
model: process.env.OPENAI_MODEL || "gpt-4o-mini",
apiKey: process.env.OPENAI_API_KEY,
baseURL: process.env.OPENAI_BASE_URL
});
return {
llm
}
}
export function createWebReadingMultiAgent() {
const { llm } = createCommonLLMTools();
const crawlTool = tool({
name: 'crawl',
description: 'Get a page content from the web using crawl4ai',
parameters: z.object({
url: z.url()
}),
execute: async ({ url }) => {
const base = process.env.CRAWL4AI_BASE_URL;
if (!base) throw new Error('CRAWL4AI_BASE_URL is not set');
const resp = await undici.fetch(`${base}/md`, {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ url, f: 'fit', c: '0', q: null })
});
if (!resp.ok) {
const txt = await resp.text();
throw new Error(`crawl4ai error ${resp.status}: ${txt}`);
}
const text = await resp.text();
return text;
}
})
const translatorAgent = agent({
llm,
name: 'TranslatorAgent',
description: 'Translate fetched content to the target language while preserving structure',
systemPrompt: `You are TranslatorAgent. Translate the provided content. Preserve headings, lists, tables, code blocks, quotes, and links. If already in target language, return unchanged. Output markdown only.`,
});
const summarizerAgent = agent({
llm,
name: 'SummarizerAgent',
description: 'Extract entities and facts; produce structured content and a mind-map graph',
systemPrompt: 'You are SummarizerAgent. From the translated article, extract key entities and facts with citations, build a topical outline.',
memory: createMemory({
tokenLimit: 100 * 1024
})
});
const evaluatorAgent = agent({
llm,
name: 'EvaluatorAgent',
description: 'Evaluate the summarizer agent output and provide a score and a suggestion for improvement',
systemPrompt: 'You are EvaluatorAgent. Evaluate the summarizer agent output and provide a score and a suggestion for improvement. Return a score and a suggestion for improvement. Give suggestions on mind map, formmating, structure, etc.',
});
const orchestrator = agent({
llm,
name: 'Orchestrator',
description: 'Coordinate: crawl -> translate -> summarize -> evaluate',
systemPrompt: `You are the orchestrator. Understand user intent use the right tools to get the job done. You will be given a URL. you should read the content of the page generate a detailed report. Please write the report in sufficient detail and depth. Report should be in the same language as the user query and it should have a clear, structured and logical organization. Use formatting (headings, bullets) to make it skimmable and readable. The report should also contain a mind_map(mermaid format) of the content for better understanding. After generating the report, evaluate the report and make improvements if needed.`,
tools: [
tool({
name: 'translate',
description: `Translate content while preserving structure`,
parameters: z.object({
content: z.string(),
targetLanguage: z.string(),
}),
execute: async ({ content, targetLanguage }) => {
const result = await translatorAgent.run(`Translate the following content to ${targetLanguage}. Preserve all formatting and structure:\n\n${content}`);
return result.data.result;
}
}),
crawlTool,
tool({
name: 'summarize',
description: 'Extract entities and facts, produce structured content and mind-map',
parameters: z.object({
content: z.string(),
}),
execute: async ({ content }) => {
const result = await summarizerAgent.run(`Extract key entities and facts, build a topical outline from:\n\n${content}`);
return result.data.result;
}
}),
tool({
name: 'evaluate',
description: 'Evaluate the summarizer output and provide feedback',
parameters: z.object({
summary_content: z.string(),
original_content: z.string(),
}),
execute: async ({ summary_content, original_content }) => {
const result = await evaluatorAgent.run(`Evaluate this summary and provide feedback:\n\nSummary content:\n\n${summary_content}\n\nOriginal content:\n\n${original_content} Give practical suggestions for improvement of the summary.`);
return result.data.result;
}
})
]
});
return orchestrator;
}
async function main() {
if (process.env.http_proxy) {
undici.setGlobalDispatcher(new undici.ProxyAgent(process.env.http_proxy));
}
const agents = createWebReadingMultiAgent();
// const prompt = 'Read this page and generate a report for me. https://code.visualstudio.com/blogs/2021/09/29/bracket-pair-colorization';
const q = { "content": "May 2, 2025 | Updated May 12, 2025 \\n * Share: \\n\\n\\nAt GitHub, we’re committed to making our tools truly accessible for every developer, regardless of ability or toolset. The command line interface (CLI) is a vital part of the developer experience, and the [GitHub CLI](https://cli.github.com/) is our product that brings the power of GitHub to your terminal.\\nWhen it comes to accessibility, the terminal is fundamentally different from a web browser or a graphical user interface, with a lineage that predates the web itself. While standards like the Web Content Accessibility Guidelines (WCAG) provide a clear path for making web and graphical applications accessible, there is no equivalent, comprehensive standard for the terminal and CLIs. The W3C offers [some high-level guidance for non-web software](https://www.w3.org/TR/wcag2ict-22/#applying-wcag-2-to-text-applications), but it stops short of prescribing concrete techniques, leaving much open to interpretation and innovation.\\nThis gap has challenged us to think creatively and purposefully about what accessibility should look like in the terminal. Our recent [Public Preview](https://github.blog/changelog/2025-05-01-improved-accessibility-features-in-github-cli/) is focused on addressing the needs of three key groups: users who rely on screen readers, users who need high contrast between background and text, and users who require customizable color options. Our work aims to make the GitHub CLI more inclusive for all, regardless of how you interact with your terminal. Run `gh a11y` in the latest version of the GitHub CLI to enable these features, or read on to learn about our path to designing and implementing them.\\n## Understanding the terminal landscape\\nText-based and command-line applications differ fundamentally from graphical or web applications. On a web page, assistive technologies like screen readers make use of the document object model (DOM) to infer structure and context of the page. Web pages can be designed such that the DOM’s structure is friendly to these technologies without impacting the visual design of the page. By contrast, CLI’s primary output is plain text, without hidden markup. A terminal emulator acts as the “user agent” for text apps, rendering characters as directed by the server application. Assistive technologies access this matrix of characters, analyze its layout, and try to infer structure. As the [WCAG2ICT guidance](https://www.w3.org/TR/wcag2ict-22/) notes, accessibility in this space means ensuring that all text output is available to assistive technologies, and that structural information is conveyed in a way that’s programmatically determinable—even if no explicit markup is present.\\nIn our quest to improve the GitHub CLI’s usability for blind, low-vision, and colorblind users, we found ourselves navigating a landscape with lots of guidance, but few concrete techniques for implementing accessible experiences. We studied how assistive technology interacts with terminals: how screen readers review output, how color and contrast can be customized, and how structural cues can be inferred from plain text. Our recent Public Preview contains explorations into various use cases in these spaces. \\n## Rethinking prompts and progress for screen readers\\nOne of the GitHub CLI’s strengths as a command-line application is its rich prompting experience, which gives our users an interactive interface to enter command options. However, this rich interactive experience poses a hurdle for speech synthesis screen readers: Non-alphanumeric visual cues and uses of constant screen redraws for visual or other effects can be tricky to correctly interpret as speech. \\nA demo video with sound of screen reader reading legacy prompter.\\nTo reduce confusion and make it easier for blind and low vision users to confidently answer questions and navigate choices, we’re introducing a prompting experience that allows speech synthesis screen readers to accurately convey prompts to users. Our new prompter is built using Charm’s open source `charmbracelet/huh` [prompting library](https://github.com/charmbracelet/huh).\\nA demo of a screenreader correctly reading a prompt.\\nAnother use case where the terminal is redrawn for visual effect is when showing progress bars. Our existing implementation uses a “spinner” made by redrawing the screen to display different braille characters (yes, we appreciate the irony) to give the user the indication that their command is executing. Speech synthesis screen readers do not handle this well:\\nA demo of a screenreader and an old spinner.\\nThis has been replaced with a static text progress indicator (with a relevant message to the action being taken where possible, falling back to a general \"Working…\" message). We’re working on identifying other areas we can further improve the contextual text.\\nA demo video of the new progress indicator experience.\\n## Color, contrast, and customization\\nColor is more than decoration in the terminal: It’s a vital tool for highlighting information, signaling errors, and guiding workflows. But color can also be a barrier—if contrast between the color of the terminal background and the text displayed on it is too low, some users will have difficulty discerning the displayed information. Unlike in a web browser, a terminal’s background color is not set by the application. That task is handled by the user’s terminal emulator. In order to maintain contrast, it is important that a command line application takes into account this variable. Our legacy color palette used for rendering Markdown did not take the terminal’s background color into account, leading to low contrast in some cases.\\nThe colors themselves also matter. Different terminal environments have varied color capabilities (some support 4-bit, some 8-bit, some 24-bit, etc). No matter the capability, terminals enable users to customize their color preferences, choosing how different hues are displayed. However, most terminals only support changing a limited subset of colors: namely, the sixteen colors in the ANSI 4-bit color table. The GitHub CLI has made extensive efforts to align our color palettes to 4-bit colors so our users can completely customize their experience using their terminal preferences. We built on top of [the accessibility foundations pioneered by Primer](https://primer.style/accessibility/color-considerations/) when deciding which 4-bit colors to use.\\n## Building for the CLI community\\nOur improvements aim to support a wide range of developer needs, from blind users who need screen readers, to low vision users who need high contrast, to colorblind users who require customizable color options. But this Public Preview does not mark the end of our team’s commitment to enabling all developers to use the GitHub CLI. We intend to make it easier for our extension authors to implement the same accessibility improvements that we’ve made to the core CLI. This will allow users to have a cohesive experience across all GitHub CLI commands, official or community-maintained, and so that more workflows can be made accessible by default. We’re also looking into experiences to customize the formatting of tables output by commands to be more easily read/interpreted by screen readers. We’re excited to continue our accessibility journey.We couldn’t have come this far without collaboration with our friends at [Charm](https://github.com/charmbracelet) and our colleagues on the GitHub Accessibility team. \\n## A call for feedback\\nWe invite you to help us in our goal to make the GitHub CLI an experience for all developers:\\n * **Try it out** : Update the GitHub CLI to [v2.72.0](https://github.com/cli/cli/releases/tag/v2.72.0) and run `gh a11y` in your terminal to learn more about enabling these new accessible features.\\n * **Share your experience** : Join our [GitHub CLI accessibility discussion](https://github.com/orgs/community/discussions/158037) to provide feedback or suggestions.\\n * **Connect with us** : If you have a lived experience relevant to our accessibility personas, reach out to the accessibility team or get involved in our [discussion panel](https://github.com/orgs/community/discussions/128172).\\n\\n\\n## Looking forward\\nAdapting accessibility standards for the command line is a challenge—and an opportunity. We’re committed to sharing our approach, learning from the community, and helping set a new standard for accessible CLI tools.\\nThank you for building a more accessible GitHub with us.\\n**Want to help us make GitHub the home for all developers?** [Learn more](https://accessibility.github.com/feedback) about GitHub’s accessibility efforts." };
const prompt = q.content;
const result = await agents.run(`summarie this: ${prompt}`);
console.log(result.data.result);
}
if (require.main === module) {
main().catch(console.error);
}
🤖 多智能体网页阅读 - LlamaIndex
本演示展示
本演示展示了 AG-Kit 使用 LlamaIndex 的多智能体编排:- 多智能体工作流:协调抓取、翻译和摘要的 Agent
- LlamaIndex 集成:使用 LlamaIndex 的工作流和 Agent 原语
- 网页抓取:与 crawl4ai 集成以可靠地提取内容
- 结构化输出:生成带有思维导图和引用的摘要
- 进度流式传输:Agent 完成任务时的实时更新
如何交互
尝试这些建议或提供您自己的 URL:- “为我阅读这个页面 https://github.blog/engineering/user-experience/building-a-more-accessible-github-cli/”
- 提供任何其他要分析的网页 URL
技术实现
后端 (LlamaIndex):- 多个专业 Agent:
- CrawlerAgent:网页内容检索
- TranslatorAgent:内容翻译
- SummarizerAgent:信息提取
- Orchestrator:工作流协调
- 用于抓取和处理的工具集成
- 带有思维导图的结构化输出
- 向前端进行进度流式传输
useChat钩子用于对话管理- 带有建议的
AgKitChat组件 - 实时状态更新
- URL 参数 Agent 选择