xcz/deepseek-iptv.py
2025-03-25 00:23:06 +08:00

133 lines
4.9 KiB
Python

import requests
import pandas as pd
import re
import os
from collections import defaultdict
# 配置分类规则(可自由扩展)
CATEGORY_RULES = {
"中央频道": ["CCTV", "中央", "CGTN", "央视"],
"广东频道": ["广州","广东", "GD", "珠江", "南方卫视", "大湾区"],
"港澳台": ["香港", "澳门", "台湾", "翡翠", "明珠", "凤凰卫视", "澳视"],
"卫视频道": ["卫视", "STV"],
"体育": ["体育", "足球", "篮球", "奥运"],
"少儿动漫": ["少儿", "卡通", "动漫", "动画"],
"其他": []
}
urls = [
"https://gh.tryxd.cn/https://raw.githubusercontent.com/alantang1977/JunTV/refs/heads/main/output/result.m3u",
"https://gh.tryxd.cn/https://raw.githubusercontent.com/zwc456baby/iptv_alive/master/live.txt",
"http://rihou.cc:55/lib/kx2024.txt",
"http://aktv.space/live.m3u",
"https://gh.tryxd.cn/https://raw.githubusercontent.com/tianya7981/jiekou/refs/heads/main/野火959",
"https://codeberg.org/alfredisme/mytvsources/raw/branch/main/mylist-ipv6.m3u",
"https://codeberg.org/lxxcp/live/raw/branch/main/gsdx.txt",
"https://live.zbds.top/tv/iptv6.txt",
"https://live.zbds.top/tv/iptv4.txt",
]
ipv4_pattern = re.compile(r'^http://(\d{1,3}\.){3}\d{1,3}')
ipv6_pattern = re.compile(r'^http://\[([a-fA-F0-9:]+)\]')
def classify_program(program_name):
"""智能分类频道"""
program_lower = program_name.lower()
for category, keywords in CATEGORY_RULES.items():
if any(re.search(re.escape(kw.lower()), program_lower) for kw in keywords if kw):
return category
return "其他"
def fetch_streams_from_url(url):
print(f"正在爬取网站源: {url}")
try:
response = requests.get(url, timeout=20)
response.encoding = 'utf-8'
return response.text if response.status_code == 200 else None
except Exception as e:
print(f"请求异常: {str(e)[:50]}")
return None
def fetch_all_streams():
return "\n".join(filter(None, (fetch_streams_from_url(url) for url in urls)))
def parse_m3u(content):
streams = []
current_program = None
for line in content.splitlines():
if line.startswith("#EXTINF"):
match = re.search(r'tvg-name="([^"]+)"', line)
if match:
current_program = match.group(1).strip()
else:
current_program = None
elif line.startswith("http") and current_program:
streams.append({
"program_name": current_program,
"stream_url": line.strip(),
"category": classify_program(current_program)
})
current_program = None
return streams
def parse_txt(content):
streams = []
for line in content.splitlines():
if match := re.match(r"(.+?),\s*(http.+)", line):
program = match.group(1).strip()
streams.append({
"program_name": program,
"stream_url": match.group(2).strip(),
"category": classify_program(program)
})
return streams
def organize_streams(content):
parser = parse_m3u if content.startswith("#EXTM3U") else parse_txt
df = pd.DataFrame(parser(content))
return df.drop_duplicates(subset=['program_name', 'stream_url'])
def save_to_txt(df, filename="mytv.txt"):
categorized = defaultdict(list)
for _, row in df.iterrows():
entry = f"{row['program_name']},{row['stream_url']}"
categorized[row['category']].append(entry)
with open(filename, 'w', encoding='utf-8') as f:
for category in [*CATEGORY_RULES.keys(), "其他"]:
if entries := categorized.get(category):
f.write(f"\n# {category} ({len(entries)}个频道)\n")
f.write("\n".join(sorted(entries)))
f.write("\n")
print(f"分类文本已保存: {os.path.abspath(filename)}")
def save_to_m3u(df, filename="mytv.m3u"):
with open(filename, 'w', encoding='utf-8') as f:
f.write("#EXTM3U\n")
for category in CATEGORY_RULES:
category_df = df[df['category'] == category]
if not category_df.empty:
f.write(f"\n# 分类: {category} ({len(category_df)}个频道)\n")
for _, row in category_df.iterrows():
f.write(f'#EXTINF:-1 tvg-name="{row["program_name"]}",{row["program_name"]}\n{row["stream_url"]}\n')
print(f"分类M3U已保存: {os.path.abspath(filename)}")
def print_statistics(df):
print("\n频道分类统计:")
stats = df['category'].value_counts().to_dict()
for cat, count in stats.items():
print(f"{cat.ljust(8)}: {count}个频道")
print(f"总频道数: {len(df)}")
if __name__ == "__main__":
print("开始抓取IPTV源...")
if content := fetch_all_streams():
df = organize_streams(content)
print_statistics(df)
save_to_txt(df)
save_to_m3u(df)
else:
print("未能获取有效数据")