dengdai/PY/51吸瓜.py

220 lines
7.5 KiB
Python

# -*- coding: utf-8 -*-
# by @嗷呜
import json
import random
import re
import sys
import threading
import time
from base64 import b64decode
import requests
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
self.host=self.host_late(self.get_domains())
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="134", "Google Chrome";v="134"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
'dnt': '1',
'upgrade-insecure-requests': '1',
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'zh-CN,zh;q=0.9',
'priority': 'u=0, i'
}
def homeContent(self, filter):
data=self.getpq(self.fetch(self.host, headers=self.headers).text)
result = {}
classes = []
for k in data('.category-list ul li').items():
classes.append({
'type_name': k('a').text(),
'type_id': k('a').attr('href')
})
result['class'] = classes
result['list'] = self.getlist(data('#index article a'))
return result
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
data=self.getpq(self.fetch(f"{self.host}{tid}{pg}", headers=self.headers).text)
result = {}
result['list'] = self.getlist(data('#archive article a'))
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, ids):
url=f"{self.host}{ids[0]}"
data=self.getpq(self.fetch(url, headers=self.headers).text)
vod = {'vod_play_from': '51吸瓜'}
try:
clist = []
if data('.tags .keywords a'):
for k in data('.tags .keywords a').items():
title = k.text()
href = k.attr('href')
clist.append('[a=cr:' + json.dumps({'id': href, 'name': title}) + '/]' + title + '[/a]')
vod['vod_content'] = ' '.join(clist)
except:
vod['vod_content'] = data('.post-title').text()
try:
plist=[]
if data('.dplayer'):
for c, k in enumerate(data('.dplayer').items(), start=1):
config = json.loads(k.attr('data-config'))
plist.append(f"视频{c}${config['video']['url']}")
vod['vod_play_url']='#'.join(plist)
except:
vod['vod_play_url']=f"请停止活塞运动,可能没有视频${url}"
return {'list':[vod]}
def searchContent(self, key, quick, pg="1"):
data=self.getpq(self.fetch(f"{self.host}/search/{key}/{pg}", headers=self.headers).text)
return {'list':self.getlist(data('#archive article a')),'page':pg}
def playerContent(self, flag, id, vipFlags):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua-platform': '"macOS"',
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="134", "Google Chrome";v="134"',
'DNT': '1',
'sec-ch-ua-mobile': '?0',
'Origin': self.host,
'Sec-Fetch-Site': 'cross-site',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
return {'parse': 1, 'url': id, 'header': headers}
def localProxy(self, param):
res=self.fetch(param['url'], headers=self.headers, timeout=10)
return [200,res.headers.get('Content-Type'),self.aesimg(res.content)]
def get_domains(self):
html = self.getpq(self.fetch("https://51cg.fun", headers=self.headers).text)
html_pattern = r"Base64\.decode\('([^']+)'\)"
html_match = re.search(html_pattern, html('script').eq(-1).text(), re.DOTALL)
if not html_match:
raise Exception("未找到html")
html = b64decode(html_match.group(1)).decode()
words_pattern = r"words\s*=\s*'([^']+)'"
words_match = re.search(words_pattern, html, re.DOTALL)
if not words_match:
raise Exception("未找到words")
words = words_match.group(1).split(',')
main_pattern = r"lineAry\s*=.*?words\.random\(\)\s*\+\s*'\.([^']+)'"
domain_match = re.search(main_pattern, html, re.DOTALL)
if not domain_match:
raise Exception("未找到主域名")
domain_suffix = domain_match.group(1)
domains = []
for _ in range(3):
random_word = random.choice(words)
domain = f"https://{random_word}.{domain_suffix}"
domains.append(domain)
return domains
def host_late(self, url_list):
if isinstance(url_list, str):
urls = [u.strip() for u in url_list.split(',')]
else:
urls = url_list
if len(urls) <= 1:
return urls[0] if urls else ''
results = {}
threads = []
def test_host(url):
try:
start_time = time.time()
response = requests.head(url, timeout=1.0, allow_redirects=False)
delay = (time.time() - start_time) * 1000
results[url] = delay
except Exception as e:
results[url] = float('inf')
for url in urls:
t = threading.Thread(target=test_host, args=(url,))
threads.append(t)
t.start()
for t in threads:
t.join()
return min(results.items(), key=lambda x: x[1])[0]
def getlist(self,data):
videos = []
for k in data.items():
a=k.attr('href')
b=k('h2').text()
c=k('span[itemprop="datePublished"]').text()
if a and b and c:
videos.append({
'vod_id': a,
'vod_name': b.replace('\n', ' '),
'vod_pic': self.getimg(k('script').text()),
'vod_remarks': c,
'style': {"type": "rect", "ratio": 1.33}
})
return videos
def getimg(self, text):
match = re.search(r"loadBannerDirect\('([^']+)'", text)
if match:
url = match.group(1)
return f"{self.getProxyUrl()}&url={url}&type=img"
else:
return ''
def aesimg(self, word):
key = b'f5d965df75336270'
iv = b'97b60394abc2fbe1'
cipher = AES.new(key, AES.MODE_CBC, iv)
decrypted = unpad(cipher.decrypt(word), AES.block_size)
return decrypted
def getpq(self, data):
try:
return pq(data)
except Exception as e:
print(f"{str(e)}")
return pq(data.encode('utf-8'))