diff --git a/sites/yunpan1/v1/yunpan1_search.py b/sites/yunpan1/v1/yunpan1_search.py
index ddccb87..fbc0b12 100644
--- a/sites/yunpan1/v1/yunpan1_search.py
+++ b/sites/yunpan1/v1/yunpan1_search.py
@@ -8,8 +8,8 @@ yunpan1 搜索工具 — 搜索云盘资源分享社区并提取夸克链接
py -X utf8 yunpan1_search.py 遮天
说明:
- - Discuz! 首次搜索需建索引(可能等 60-120 秒),同一关键词后续秒回
- - Cookie 文件: tmp/yunpan1_cookies.txt(需先通过 Playwright 登录获取)
+ - 优先搜索,30 秒超时后自动降级到浏览动漫板块最新帖子
+ - Cookie 文件: tmp/yunpan1_cookies.txt(格式:key=value 一行一个 或 Netscape 格式均可)
- 依赖: Python 标准库(无需额外安装)
"""
@@ -25,57 +25,91 @@ BASE_URL = 'https://yunpan1.cc'
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
COOKIE_FILE = os.path.abspath(os.path.join(SCRIPT_DIR, '..', '..', '..', 'tmp', 'yunpan1_cookies.txt'))
OUTPUT_FILE = os.path.abspath(os.path.join(SCRIPT_DIR, '..', '..', '..', 'tmp', 'quark_links.txt'))
+
SEARCH_URL = BASE_URL + '/search.php?mod=forum&srchtxt={keyword}&searchsubmit=yes'
+FORUM_URL = BASE_URL + '/forum.php?mod=forumdisplay&fid=3&orderby=dateline'
+
+SEARCH_TIMEOUT = 30 # 搜索超时(秒),超时后降级到浏览板块
+FORUM_TIMEOUT = 15 # 板块页面超时
-# 全局 opener(timeout=300 避免分块传输时断连)
_opener = urllib.request.build_opener()
-_opener.timeout = 300
+_opener.timeout = SEARCH_TIMEOUT
-# ── 工具函数 ──────────────────────────────────────────────────────
+# ── Cookie 加载(兼容两种格式) ──────────────────────────────────
def load_cookie():
+ """加载 Cookie,兼容 key=value 一行一个 和 Netscape 格式"""
path = COOKIE_FILE
if not os.path.exists(path):
print(f'❌ Cookie 文件不存在: {path}')
- print(f'请通过 Playwright 登录 yunpan1.cc 后,将 Cookie 保存到该文件(一行一个 key=value)')
+ print(f'请通过 Playwright 登录 yunpan1.cc 后,将 Cookie 保存到该文件')
sys.exit(1)
+
with open(path, 'r', encoding='utf-8') as f:
- return f.read().strip().replace('\n', '; ')
+ raw = f.read()
+
+ # 如果是 Netscape 格式(含 tab、domain、TRUE/FALSE 等字段)
+ if '\t' in raw and ('TRUE' in raw or 'FALSE' in raw):
+ parts = []
+ for line in raw.strip().splitlines():
+ line = line.strip()
+ if not line or line.startswith('#'):
+ continue
+ cols = line.split('\t')
+ if len(cols) >= 7:
+ parts.append(f'{cols[5]}={cols[6]}')
+ if parts:
+ return '; '.join(parts)
+
+ # 否则按 key=value 一行一个处理
+ return raw.strip().replace('\n', '; ').replace('\r', '')
-def request(url, cookie):
+# ── HTTP 请求 ─────────────────────────────────────────────────────
+
+def request(url, cookie, timeout=SEARCH_TIMEOUT):
req = urllib.request.Request(url, headers={
'Cookie': cookie,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
})
+ _opener.timeout = timeout
t0 = time.time()
- resp = _opener.open(req)
- html = resp.read().decode('utf-8', errors='replace')
- return resp.status, html, resp.url, time.time() - t0
+ try:
+ resp = _opener.open(req)
+ html = resp.read().decode('utf-8', errors='replace')
+ return resp.status, html, resp.url, time.time() - t0, None
+ except Exception as e:
+ return None, '', '', time.time() - t0, str(e)
# ── 提取函数 ──────────────────────────────────────────────────────
def extract_quark_links(html):
- """提取完整夸克链接(12 位字母数字 ID),排除末尾拼接 https 的误匹配"""
raw = re.findall(r'https?://pan\.quark\.cn/s/[a-zA-Z0-9]{12,}', html)
- # 过滤掉后面紧跟着 https 的(如 .../s/xxxhttps)
return sorted(set(l for l in raw if not l.endswith('https')))
def extract_truncated_links(html):
- """提取被截断的链接(1-11 位 ID,需要点进帖子)"""
raw = re.findall(r'https?://pan\.quark\.cn/s/[a-zA-Z0-9]{1,11}', html)
return sorted(set(raw))
def extract_threads(html):
- """提取帖子列表"""
- titles = re.findall(r']+href="forum\.php\?mod=viewthread[^>]+>([^<]+)', html)
+ """提取帖子标题,兼容 viewthread 和 thread 格式"""
+ patterns = [
+ r']+href="forum\.php\?mod=viewthread[^>]+>([^<]+)',
+ r']+href="thread\.php\?tid=\d+[^>]*>([^<]+)',
+ r'class="s xst"[^>]*>([^<]+)',
+ r']+class="xst"[^>]*>([^<]+)',
+ ]
+ all_titles = []
+ for p in patterns:
+ all_titles.extend(re.findall(p, html))
+
seen = set()
result = []
- for t in titles:
+ for t in all_titles:
t = t.strip()
if t and len(t) > 5 and t not in seen:
seen.add(t)
@@ -85,30 +119,23 @@ def extract_threads(html):
# ── 主流程 ────────────────────────────────────────────────────────
-def main():
- if len(sys.argv) < 2:
- print('用法: py -X utf8 yunpan1_search.py <关键词>')
- print('示例: py -X utf8 yunpan1_search.py 遮天')
- sys.exit(1)
-
- keyword = sys.argv[1]
- cookie = load_cookie()
-
- print(f'🔍 搜索 "{keyword}" ...')
- print(f'⏱ 首次搜索需建索引,可能等待 1-2 分钟,请耐心等待...')
+def search(keyword, cookie):
+ """搜索,超时则返回 None"""
+ url = SEARCH_URL.format(keyword=urllib.parse.quote(keyword))
+ print(f'🔍 搜索 "{keyword}"({SEARCH_TIMEOUT}s 超时)...')
sys.stdout.flush()
+ return request(url, cookie, timeout=SEARCH_TIMEOUT)
- status, html, final_url, elapsed = request(
- SEARCH_URL.format(keyword=urllib.parse.quote(keyword)),
- cookie
- )
- links = extract_quark_links(html)
- truncated = extract_truncated_links(html)
- threads = extract_threads(html)
+def browse_forum(cookie):
+ """浏览动漫板块最新帖子作为降级方案"""
+ print(f'⏩ 降级到浏览动漫板块最新帖子...')
+ sys.stdout.flush()
+ return request(FORUM_URL, cookie, timeout=FORUM_TIMEOUT)
- print(f'\n✅ 完成(耗时 {elapsed:.0f}s,状态码 {status})')
- print(f' HTML: {len(html)} 字符 / {len(threads)} 条帖子')
+
+def print_results(links, truncated, threads, source, elapsed):
+ print(f'\n✅ 完成(来源: {source},耗时 {elapsed:.0f}s)')
if links:
print(f'\n{"=" * 50}')
@@ -129,9 +156,44 @@ def main():
for l in truncated[:5]:
print(f' {l}')
- print(f'\n📌 帖子预览(前 10 条):')
- for t in threads[:10]:
- print(f' • {t[:60]}')
+ if threads:
+ print(f'\n📌 帖子预览(前 10 条):')
+ for t in threads[:10]:
+ print(f' • {t[:60]}')
+
+
+def main():
+ if len(sys.argv) < 2:
+ print('用法: py -X utf8 yunpan1_search.py <关键词>')
+ print('示例: py -X utf8 yunpan1_search.py 遮天')
+ sys.exit(1)
+
+ keyword = sys.argv[1]
+ cookie = load_cookie()
+
+ # 尝试搜索
+ status, html, _, elapsed, err = search(keyword, cookie)
+
+ if err and 'timeout' in err.lower():
+ print(f'⏱ 搜索超时({elapsed:.0f}s),机器配置较低时首次建索引可能卡死')
+ # 降级到浏览板块
+ status, html, _, elapsed, err2 = browse_forum(cookie)
+ if err2:
+ print(f'❌ 降级也失败了: {err2}')
+ sys.exit(1)
+ source = '板块浏览(搜索超时降级)'
+ elif err:
+ print(f'❌ 搜索失败: {err}')
+ # 尝试降级
+ status, html, _, elapsed, _ = browse_forum(cookie)
+ source = '板块浏览(搜索失败降级)'
+ else:
+ source = '搜索'
+
+ links = extract_quark_links(html)
+ truncated = extract_truncated_links(html)
+ threads = extract_threads(html)
+ print_results(links, truncated, threads, source, elapsed)
if __name__ == '__main__':