nkkj/hongben-server @ bc1a372e9e07a8f1cddc428b37238df2dcd94949

先说结论：用 Web Scraper API（Pinterest 套件）可以比较接近你的 3 步工作流，但它是“预置爬虫”，不能像自己写代码那样一步步“点击页面元素”。支持的能力大致是：

通过关键词发现博主（Profiles by keyword）通过博主 URL 拉取该博主的帖子列表（按个人资料 URL 发布）通过帖子 URL 拉取该帖的详情和评论（含一个层级的评论数组，目前文档里没有“继续展开更多回复”的二次交互能力）comments 也就是说：

“平铺式评论列表”可以拿到（字段）；comments “点击查看更多回复再抓子回复”这种交互，Web Scraper API 目前不支持你自定义脚本去点按钮，只能拿到爬虫内置能抓到的评论层级。下面我按你的 3 步工作流，用 Node.js + Web Scraper API 给你完整示例（包括请求 & 典型 JSON 结构），你只需要换成你自己的 API key 和具体 dataset_id 即可运行。

通用前提

安装依赖砰

npm install axios

认证方式在账号设置里生成 API key（我们叫 API key，不是 token）：文档：How to get a API Key

const axios = require('axios');

const API_KEY = 'YOUR_API_KEY'; // 换成你的 const CUSTOMER_ID = 'hl_3202e698'; // 你的 account id 所有 Web Scraper API 请求都走这个触发端点（异步）：

触发采集：POST https://api.brightdata.com/datasets/v3/trigger?dataset_id=DATASET_ID 查询 / 下载结果：GET https://api.brightdata.com/datasets/v3/snapshot/{snapshot_id} 文档（英文）：

Web Scraper API 快速开始：Web Scraper API 快速入门 Pinterest 抓取器说明：Pinterest API 抓取器步骤 1：按关键词 + 地区发现 Pinterest 博主使用：Pinterest Profiles API – 通过关键词发现

在界面里路径大致是： Data → Web Scraper API → Scraper Library → 搜索 “Pinterest” → 选择 “Profiles API” → 选择 “Discover by Keywords” 在这个页面右上角可以看到对应的（形如，真正调用时只用主 ID 部分，比如，地区通常作为输入字段）。dataset_idgd_lk0zv93c2m9qdph46z/keywordgd_lk0zv93c2m9qdph46z

Node.js 调用示例（发现博主） JS

const axios = require('axios');

const API_KEY = 'YOUR_API_KEY'; const DATASET_ID_PROFILES_KEYWORD = 'YOUR_PROFILES_DATASET_ID'; // 比如 gd_lk0zv93c2m9qdph46z

async function discoverCreatorsByKeyword() { const url = https://api.brightdata.com/datasets/v3/trigger?dataset_id=${DATASET_ID_PROFILES_KEYWORD};

// 这里的字段名要以 Scraper Library 里“Inputs”面板为准 const body = [

{
  keyword: 'fashion',   // 你的关键词
  country: 'US'         // 地区/国家代码，具体字段名以界面为准，有的叫 country，有的叫 country_code
}

];

try {

const triggerRes = await axios.post(url, body, {
  headers: {
    Authorization: `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  }
});

const snapshotId = triggerRes.data.snapshot_id;
console.log('Profiles job triggered, snapshot_id:', snapshotId);

// 轮询直到 ready
let statusData;
while (true) {
  const statusRes = await axios.get(
    `https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}`,
    {
      headers: { Authorization: `Bearer ${API_KEY}` }
    }
  );
  statusData = statusRes.data;
  console.log('Status:', statusData.status);

  if (statusData.status === 'ready') break;
  if (statusData.status === 'failed') {
    throw new Error('Profiles collection failed');
  }
  await new Promise(r => setTimeout(r, 5000));
}

// 拿结果（两种方式：直接从 statusData.data，或再加 format=json 下载）
const downloadRes = await axios.get(
  `https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}?format=json`,
  {
    headers: { Authorization: `Bearer ${API_KEY}` }
  }
);

const creators = downloadRes.data;
console.log('Creators count:', creators.length);
console.dir(creators[0], { depth: null });

return creators;

} catch (err) {

console.error('Error discovering creators:', err.response?.data || err.message);
throw err;

} }

discoverCreatorsByKeyword(); 典型返回 JSON（单条博主数据）字段以文档为准（Profiles – 通过关键词发现）： Profiles API 文档

大致结构类似：

JSON

{ "url": "https://www.pinterest.com/creator_username/", "profile_picture": "https://i.pinimg.com/...", "name": "Creator Name", "nickname": "creator_username", "website": "https://creator-site.com", "bio": "Short bio text", "country_code": "US", "profile_id": "1234567890", "following_count": 120, "follower_count": 54000, "boards_num": 12, "saved": 3400, "posts_page_url": "https://www.pinterest.com/creator_username/_created/", "last_updated": "2026-02-14T03:00:00Z", "discovery_input": {

"keyword": "fashion",
"country": "US"

} } 你在前端“点击博主”时，只需要把或传给下一步即可。urlposts_page_url

步骤 2：点击博主 → 拉取该博主的视频 / 帖子列表使用：Pinterest Posts API – 通过个人资料 URL 发现

在 Scraper Library 里选择 Pinterest → Posts API →“Discover by Profile URL”。

Node.js 调用示例（按博主 URL 拉取帖子） JS

const axios = require('axios');

const API_KEY = 'YOUR_API_KEY'; const DATASET_ID_POSTS_BY_PROFILE = 'YOUR_POSTS_PROFILE_DATASET_ID'; // 比如 gd_lk0sjs4d21kdr7cnlv

async function getPostsByCreatorProfile(profileUrl) { const url = https://api.brightdata.com/datasets/v3/trigger?dataset_id=${DATASET_ID_POSTS_BY_PROFILE};

const body = [

{
  URL: profileUrl,   // 注意字段名大小写，以 Scraper Library “Inputs” 为准，文档里是 URL
  num_of_posts: 50,  // 可选：限制数量
  // start_date: '01-01-2025',
  // end_date: '12-31-2025',
  // posts_to_not_include: ['1234567890']
}

];

try {

const triggerRes = await axios.post(url, body, {
  headers: {
    Authorization: `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  }
});

const snapshotId = triggerRes.data.snapshot_id;
console.log('Posts job triggered, snapshot_id:', snapshotId);

let statusData;
while (true) {
  const statusRes = await axios.get(
    `https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}`,
    {
      headers: { Authorization: `Bearer ${API_KEY}` }
    }
  );
  statusData = statusRes.data;
  console.log('Status:', statusData.status);

  if (statusData.status === 'ready') break;
  if (statusData.status === 'failed') {
    throw new Error('Posts collection failed');
  }
  await new Promise(r => setTimeout(r, 5000));
}

const downloadRes = await axios.get(
  `https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}?format=json`,
  {
    headers: { Authorization: `Bearer ${API_KEY}` }
  }
);

const posts = downloadRes.data;
console.log('Posts count:', posts.length);
console.dir(posts[0], { depth: null });

return posts;

} catch (err) {

console.error('Error getting posts:', err.response?.data || err.message);
throw err;

} }

// 示例：从上一步返回的 creators[0].url 传入 // getPostsByCreatorProfile('https://www.pinterest.com/creator_username/'); 典型返回 JSON（单条帖子 / 视频数据）参考文档中 Posts – 通过配置文件发现 URL 的输出结构： Posts API 文档

大致类似：

JSON

{ "url": "https://www.pinterest.com/pin/1234567890/", "post_id": "1234567890", "title": "Outfit ideas for spring", "content": "Some description text...", "date_posted": "2026-02-10T12:34:56Z", "post_type": "video", "user_name": "Creator Name", "user_url": "https://www.pinterest.com/creator_username/", "user_id": "987654321", "followers": 54000, "likes": 1200, "comments_num": 85, "comments": [

{
  "comment_id": "c1",
  "author_name": "User A",
  "author_url": "https://www.pinterest.com/user_a/",
  "text": "Love this!",
  "date": "2026-02-11T08:00:00Z",
  "likes": 10
  // 有的版本里可能还有 replies_count 等字段，具体以实际 schema 为准
}

], "categories": ["Fashion", "Outfits"], "image_video_url": "https://v.pinimg.com/videos/...", "video_length": 23, "attached_files": [], "hashtags": ["#fashion", "#springoutfit"], "source": "pinterest", "discovery_input": {

"URL": "https://www.pinterest.com/creator_username/",
"num_of_posts": 50

} } 你在前端“点击视频”时，只需要把或传给下一步。urlpost_id

步骤 3：点击视频 → 获取评论列表（及“查看更多回复”）这里有一个关键限制：

Web Scraper API 的 Pinterest Posts API 在“Collect by URL”模式下，会返回该帖的详情和数组;comments 但它不会像浏览器自动化那样帮你“点击查看更多回复”多次去展开所有子回复；文档里只明确了和等字段，没有单独的“replylies”数组说明，是否包含子回复、包含到什么层级，取决于当前爬虫实现，不能通过 API 参数控制。commentscomments_num 如果你只需要“平铺式评论列表”（不强制区分主评论 / 子回复），可以直接用字段; 如果你必须精确控制“点击查看更多回复”的行为，那就需要用 Scraper Studio 自定义脚本或 Scraping Browser，而不是 Web Scraper API 现成 Pinterest 套件。comments

Node.js 调用示例（按帖子 URL 拉取评论）使用：Pinterest Posts API – Collect by URL（和上一步是同一个 Posts API，只是换成 “Collect by URL” 这个端点，对应的 dataset_id 可能是同一个，也可能是另一个，取决于库里配置;以 Scraper Library 页面为准）。

const axios = require('axios');

const API_KEY = 'YOUR_API_KEY'; const DATASET_ID_POST_BY_URL = 'YOUR_POST_BY_URL_DATASET_ID'; // 比如 gd_lk0sjs4d21kdr7cnlv

async function getPostWithComments(postUrl) { const url = https://api.brightdata.com/datasets/v3/trigger?dataset_id=${DATASET_ID_POST_BY_URL};

const body = [

{
  URL: postUrl // 帖子 URL，例如 https://www.pinterest.com/pin/1234567890/
}

];

try {

const triggerRes = await axios.post(url, body, {
  headers: {
    Authorization: `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  }
});

const snapshotId = triggerRes.data.snapshot_id;
console.log('Post-by-URL job triggered, snapshot_id:', snapshotId);

let statusData;
while (true) {
  const statusRes = await axios.get(
    `https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}`,
    {
      headers: { Authorization: `Bearer ${API_KEY}` }
    }
  );
  statusData = statusRes.data;
  console.log('Status:', statusData.status);

  if (statusData.status === 'ready') break;
  if (statusData.status === 'failed') {
    throw new Error('Post-by-URL collection failed');
  }
  await new Promise(r => setTimeout(r, 5000));
}

const downloadRes = await axios.get(
  `https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}?format=json`,
  {
    headers: { Authorization: `Bearer ${API_KEY}` }
  }
);

const [post] = downloadRes.data;
console.dir(post, { depth: null });

// 你可以在这里把 comments “平铺”成你需要的结构
const flatComments = (post.comments || []).map(c => ({
  comment_id: c.comment_id,
  author_name: c.author_name,
  text: c.text,
  date: c.date,
  likes: c.likes
  // 如果 schema 里有 replies 或 replies_count，可以在这里展开或标记
}));

console.log('Flat comments count:', flatComments.length);
return { post, flatComments };

} catch (err) {

console.error('Error getting post comments:', err.response?.data || err.message);
throw err;

} }

// 示例：从上一步 posts[0].url 传入 // getPostWithComments('https://www.pinterest.com/pin/1234567890/'); 典型返回 JSON（含评论）和上一步类似，只是你通常只传一个 URL，返回一个对象：

JSON

{ "url": "https://www.pinterest.com/pin/1234567890/", "post_id": "1234567890", "title": "Outfit ideas for spring", "comments_num": 85, "comments": [

{
  "comment_id": "c1",
  "author_name": "User A",
  "author_url": "https://www.pinterest.com/user_a/",
  "text": "Love this!",
  "date": "2026-02-11T08:00:00Z",
  "likes": 10
  // 这里是否有 replies / replies_count，要以实际 schema 为准
},
{
  "comment_id": "c2",
  "author_name": "User B",
  "text": "Where did you buy this?",
  "date": "2026-02-11T09:00:00Z",
  "likes": 3
}

], "image_video_url": "https://v.pinimg.com/videos/...", "video_length": 23, "hashtags": ["#fashion", "#springoutfit"], "discovery_input": {

"URL": "https://www.pinterest.com/pin/1234567890/"

} } 关于“查看更多回复”的现实情况 Web Scraper API（Pinterest 套件）是“无代码预置爬虫”，你只能通过输入参数控制采集范围，不能写脚本去点“查看更多回复”按钮; 如果你业务上必须精确抓到每条评论的所有子回复，有两种更灵活的方案： Scraper Studio 自定义 Pinterest 爬虫：自己写（或用 AI 生成）脚本，显式执行点击、滚动、等待等作; Scraping Browser + 你自己的 Node.js 逻辑：用 CDP / Selenium 控制浏览器，完全模拟人作。这两种都可以做到你说的“点击评论下方查看更多回复再抓子评论”，但就不再是 Web Scraper API 现成 Pinterest 套件了。

帮你把 3 步串成一个 Node.js 工作流（简化版）下面是一个“从关键词 → 选博主 → 选帖子 → 拉评论”的串联示例，方便你对接现有系统：

const axios = require('axios');

const API_KEY = 'YOUR_API_KEY'; const DATASET_ID_PROFILES_KEYWORD = 'YOUR_PROFILES_DATASET_ID'; const DATASET_ID_POSTS_BY_PROFILE = 'YOUR_POSTS_PROFILE_DATASET_ID'; const DATASET_ID_POST_BY_URL = 'YOUR_POST_BY_URL_DATASET_ID';

async function triggerAndWait(datasetId, body) { const triggerRes = await axios.post(

`https://api.brightdata.com/datasets/v3/trigger?dataset_id=${datasetId}`,
body,
{
  headers: {
    Authorization: `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  }
}

); const snapshotId = triggerRes.data.snapshot_id;

while (true) {

const statusRes = await axios.get(
  `https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}`,
  { headers: { Authorization: `Bearer ${API_KEY}` } }
);
const statusData = statusRes.data;
if (statusData.status === 'ready') break;
if (statusData.status === 'failed') throw new Error('Job failed');
await new Promise(r => setTimeout(r, 5000));

}

const downloadRes = await axios.get(

`https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}?format=json`,
{ headers: { Authorization: `Bearer ${API_KEY}` } }

); return downloadRes.data; }

async function mainWorkflow() { // 1) 关键词 + 地区发现博主 const creators = await triggerAndWait(DATASET_ID_PROFILES_KEYWORD, [

{ keyword: 'fashion', country: 'US' }

]); const creator = creators[0]; console.log('Picked creator:', creator.url);

// 2) 用博主 URL 拉取帖子列表 const posts = await triggerAndWait(DATASET_ID_POSTS_BY_PROFILE, [

{ URL: creator.url, num_of_posts: 20 }

]); const post = posts[0]; console.log('Picked post:', post.url);

// 3) 用帖子 URL 拉取评论 const [postWithComments] = await triggerAndWait(DATASET_ID_POST_BY_URL, [

{ URL: post.url }

]);

console.log('Comments_num:', postWithComments.comments_num); console.log('First comment:', postWithComments.comments?.[0]); }

mainWorkflow().catch(console.error); 接下来可以怎么做？你可以先在 Scraper Library 里找到这 3 个 Pinterest endpoint，对照界面确认：输入字段名（ / / / 等）keywordcountryURLnum_of_posts 输出字段（特别是里是否有或）commentsrepliesreplies_count 把上面的 Node.js 示例里和各自的换成你自己的，跑一遍看看真实返回结构。YOUR_API_KEYDATASET_ID 如果你愿意，可以把你在 Scraper Library 里看到的：

具体的 dataset_id（或截图里的 endpoint 名称）你期望的“平铺评论 + 回复”的最终 JSON 结构发给我，我可以帮你把 Node.js 代码改成完全贴合你目标 JSON 的版本（包括如何把转成你要的扁平结构）。comments

如果你确实需要“强制展开所有回复”，我也可以帮你评估是用 Scraper Studio 还是 Scraping Browser 更合适。

最后，如果这次说明对你有帮助，欢迎用下方的拇指按钮给个反馈。

不能直接走get/post请求的方式吗

可以的，你现在看到的所有示例，本质上就是“纯 HTTP POST/GET 请求”，不需要任何 SDK，只是我用 Node.js 的 axios 来发请求而已。 Web Scraper API 的调用方式就是标准的 REST API：

触发采集：POST https://api.brightdata.com/datasets/v3/trigger?dataset_id=... 查询 / 下载结果：GET https://api.brightdata.com/datasets/v3/snapshot/{snapshot_id} 下面我给你一个完全最小化的 Node.js 示例，只用原生，你可以看到它就是标准的 POST / GET。https.request

1）POST 触发采集（发现博主 / 拉帖子 / 拉评论都是同一个模式）以“按关键词发现博主”为例（Profiles by keyword）：

const https = require('https');

const API_KEY = 'YOUR_API_KEY'; const DATASET_ID = 'YOUR_PROFILES_DATASET_ID'; // 比如 gd_lk0zv93c2m9qdph46z

function triggerJob() { const postData = JSON.stringify([

{
  keyword: 'fashion',
  country: 'US'
}

]);

const options = {

hostname: 'api.brightdata.com',
path: `/datasets/v3/trigger?dataset_id=${DATASET_ID}`,
method: 'POST',
headers: {
  'Authorization': `Bearer ${API_KEY}`,
  'Content-Type': 'application/json',
  'Content-Length': Buffer.byteLength(postData)
}

};

const req = https.request(options, (res) => {

let data = '';
res.on('data', (chunk) => (data += chunk));
res.on('end', () => {
  console.log('Trigger response:', data);
  const json = JSON.parse(data);
  const snapshotId = json.snapshot_id;
  console.log('snapshot_id:', snapshotId);

  // 触发成功后，用 GET 去拿结果
  pollSnapshot(snapshotId);
});

});

req.on('error', (e) => {

console.error('Request error:', e);

});

req.write(postData); req.end(); }

function pollSnapshot(snapshotId) { const options = {

hostname: 'api.brightdata.com',
path: `/datasets/v3/snapshot/${snapshotId}?format=json`,
method: 'GET',
headers: {
  'Authorization': `Bearer ${API_KEY}`
}

};

const req = https.request(options, (res) => {

let data = '';
res.on('data', (chunk) => (data += chunk));
res.on('end', () => {
  const json = JSON.parse(data);

  // 如果你只想“直接 GET 一次”，那就这样：拿到什么就用什么
  // 但要注意：如果 job 还没 ready，这里可能是 status 信息而不是最终数据
  console.log('Snapshot raw response:', JSON.stringify(json, null, 2));
});

});

req.on('error', (e) => {

console.error('Snapshot error:', e);

});

req.end(); }

triggerJob(); 你可以看到：

第一步是一个标准的请求，body 是 JSON 数组（输入参数）;POST 第二步是一个标准的请求，带头；GETAuthorization 没有用任何 SDK，只是 Node.js 自带的模块。https 你如果愿意，也可以用、任何 HTTP 客户端，本质都是一样的 GET/POST。curlfetch

2）为什么“不能只用一个 GET 一步到位” 你现在的需求是：

关键词 + 地区 → 找博主点博主 → 拉帖子列表点帖子 → 拉评论（+ 尽量多的回复） Web Scraper API 的设计是异步任务模式：

你必须先创建一个采集任务（这一步告诉系统：我要抓什么、抓多少）；POST /trigger 系统在后台跑浏览器、过反爬、翻页等；任务完成后，你再用去拿结果。GET /snapshot/{id} 所以从协议层面讲：

不能只用一个 GET URL 就完成所有事情，因为你需要先告诉系统“要抓什么”（这一步必须是 POST，带 JSON body）; 但你完全可以只用“原生 HTTP GET/POST”，不需要任何 SDK 或额外轮询逻辑库——轮询就是你自己写一个 setTimeout 或 while 循环发 GET 而已。 3）按你的 3 步工作流，对应的“纯 HTTP”调用方式步骤 1：关键词 + 地区 → 博主列表 POST /datasets/v3/trigger?dataset_id=PROFILES_KEYWORD_ID body（JSON）示例： JSON

[ {

"keyword": "fashion",
"country": "US"

} ] 然后拿结果。GET /datasets/v3/snapshot/{snapshot_id}?format=json 返回 JSON（单条示例）：

JSON

{ "url": "https://www.pinterest.com/creator_username/", "name": "Creator Name", "nickname": "creator_username", "country_code": "US", "profile_id": "1234567890", "follower_count": 54000, "posts_page_url": "https://www.pinterest.com/creator_username/_created/", "discovery_input": {

"keyword": "fashion",
"country": "US"

} } 步骤 2：点击博主 → 帖子 / 视频列表 POST /datasets/v3/trigger?dataset_id=POSTS_BY_PROFILE_ID body 示例： JSON

[ {

"URL": "https://www.pinterest.com/creator_username/",
"num_of_posts": 20

} ] 然后。GET /datasets/v3/snapshot/{snapshot_id}?format=json 返回 JSON（单条帖子）：

JSON

{ "url": "https://www.pinterest.com/pin/1234567890/", "post_id": "1234567890", "title": "Outfit ideas for spring", "post_type": "video", "image_video_url": "https://v.pinimg.com/videos/...", "video_length": 23, "likes": 1200, "comments_num": 85, "comments": [ /* 可能有一部分评论 */ ], "user_name": "Creator Name", "user_url": "https://www.pinterest.com/creator_username/" } 步骤 3：点击视频 → 评论列表（+ 尽量多的回复） POST /datasets/v3/trigger?dataset_id=POST_BY_URL_ID body 示例： JSON

[ {

"URL": "https://www.pinterest.com/pin/1234567890/"

} ] 然后。GET /datasets/v3/snapshot/{snapshot_id}?format=json 返回 JSON（含评论）：

JSON

{ "url": "https://www.pinterest.com/pin/1234567890/", "post_id": "1234567890", "comments_num": 85, "comments": [

{
  "comment_id": "c1",
  "author_name": "User A",
  "author_url": "https://www.pinterest.com/user_a/",
  "text": "Love this!",
  "date": "2026-02-11T08:00:00Z",
  "likes": 10
  // 是否有 replies / replies_count，要看当前 schema
}

] } “查看更多回复”这一层，目前 Web Scraper API 不能让你自己写脚本去点按钮，只能拿到爬虫内部能抓到的评论层级。

4）如果你坚持“只发一次 GET，不想轮询” 技术上可以这样做，但有风险：

先得到；POST /triggersnapshot_id 立刻一次；GET /snapshot/{id}?format=json 不管 status 是不是 ready，都直接用返回内容。问题是：

如果任务还没完成，你拿到的可能只是状态信息，而不是完整数据；所以从业务可靠性角度，不建议这么做。总结是的：整个 Web Scraper API 完全是标准 HTTP GET/POST，你可以只用 Node.js 原生或 curl 来调用;https 但：必须先 POST 再 GET，因为采集是异步任务，不可能只靠一个 GET URL 完成所有逻辑; 你要的 3 步 Pinterest 工作流，都可以用上面这种“纯 HTTP”方式实现，只是“查看更多回复”这种交互目前不能自定义点击。

api接口.md 23 KB Түүх Анхны өгөгдөл

api接口.md 23 KB

Түүх Анхны өгөгдөл