Create 优.py
This commit is contained in:
parent
ec8a366469
commit
a7ffca5638
300
py/优.py
Normal file
300
py/优.py
Normal file
@ -0,0 +1,300 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from urllib.parse import quote
|
||||
from Crypto.Hash import MD5
|
||||
import requests
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(self.headers)
|
||||
self.session.cookies.update(self.cookie)
|
||||
self.get_ctoken()
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
host='https://www.youku.com'
|
||||
|
||||
shost='https://search.youku.com'
|
||||
|
||||
h5host='https://acs.youku.com'
|
||||
|
||||
ihost='https://v.youku.com'
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (; Windows 10.0.26100.3194_64 ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Electron/14.2.0 Safari/537.36 Node/14.17.0 YoukuDesktop/9.2.60 UOSYouku (2.0.1)-Electron(UTDID ZYmGMAAAACkDAMU8hbiMmYdd;CHANNEL official;ZREAL 0;BTYPE TM2013;BRAND TIMI;BUILDVER 9.2.60.1001)',
|
||||
'Referer': f'{host}/'
|
||||
}
|
||||
|
||||
cookie={
|
||||
"__ysuid": "17416134165380iB",
|
||||
"__aysid": "1741613416541WbD",
|
||||
"xlly_s": "1",
|
||||
"isI18n": "false",
|
||||
"cna": "bNdVIKmmsHgCAXW9W6yrQ1/s",
|
||||
"__ayft": "1741672162330",
|
||||
"__arpvid": "1741672162331FBKgrn-1741672162342",
|
||||
"__ayscnt": "1",
|
||||
"__aypstp": "1",
|
||||
"__ayspstp": "3",
|
||||
"tfstk": "gZbiib4JpG-6DqW-B98_2rwPuFrd1fTXQt3vHEp4YpJIBA3OgrWcwOi90RTOo9XVQ5tAM5NcK_CP6Ep97K2ce1XDc59v3KXAgGFLyzC11ET2n8U8yoyib67M3xL25e8gS8pbyzC1_ET4e8URWTsSnHv2uh8VTeJBgEuN3d-ELQAWuKWV36PHGpJ2uEWVTxvicLX1ewyUXYSekxMf-CxMEqpnoqVvshvP_pABOwvXjL5wKqeulm52np_zpkfCDGW9Ot4uKFIRwZtP7vP9_gfAr3KEpDWXSIfWRay-DHIc_Z-hAzkD1i5Ooi5LZ0O5YO_1mUc476YMI3R6xzucUnRlNe_zemKdm172xMwr2L7CTgIkbvndhFAVh3_YFV9Ng__52U4SQKIdZZjc4diE4EUxlFrfKmiXbBOHeP72v7sAahuTtWm78hRB1yV3tmg9bBOEhWVnq5KwOBL5."
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
result = {}
|
||||
categories = ["电视剧", "电影", "综艺", "动漫", "少儿", "纪录片", "文化", "亲子", "教育", "搞笑", "生活",
|
||||
"体育", "音乐", "游戏"]
|
||||
classes = [{'type_name': category, 'type_id': category} for category in categories]
|
||||
filters = {}
|
||||
self.typeid = {}
|
||||
with ThreadPoolExecutor(max_workers=len(categories)) as executor:
|
||||
tasks = {
|
||||
executor.submit(self.cf, {'type': category}, True): category
|
||||
for category in categories
|
||||
}
|
||||
|
||||
for future in as_completed(tasks):
|
||||
try:
|
||||
category = tasks[future]
|
||||
session, ft = future.result()
|
||||
filters[category] = ft
|
||||
self.typeid[category] = session
|
||||
except Exception as e:
|
||||
print(f"处理分类 {tasks[future]} 时出错: {str(e)}")
|
||||
|
||||
result['class'] = classes
|
||||
result['filters'] = filters
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
try:
|
||||
vlist = []
|
||||
params={"ms_codes":"2019061000","params":"{\"debug\":0,\"gray\":0,\"pageNo\":1,\"utdid\":\"ZYmGMAAAACkDAMU8hbiMmYdd\",\"userId\":\"\",\"bizKey\":\"YOUKU_WEB\",\"appPackageKey\":\"com.youku.YouKu\",\"showNodeList\":0,\"reqSubNode\":0,\"nodeKey\":\"WEBHOME\",\"bizContext\":\"{\\\"spmA\\\":\\\"a2hja\\\"}\"}","system_info":"{\"device\":\"pcweb\",\"os\":\"pcweb\",\"ver\":\"1.0.0.0\",\"userAgent\":\"Mozilla/5.0 (; Windows 10.0.26100.3194_64 ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Electron/14.2.0 Safari/537.36 Node/14.17.0 YoukuDesktop/9.2.60 UOSYouku (2.0.1)-Electron(UTDID ZYmGMAAAACkDAMU8hbiMmYdd;CHANNEL official;ZREAL 0;BTYPE TM2013;BRAND TIMI;BUILDVER 9.2.60.1001)\",\"guid\":\"1590141704165YXe\",\"appPackageKey\":\"com.youku.pcweb\",\"young\":0,\"brand\":\"\",\"network\":\"\",\"ouid\":\"\",\"idfa\":\"\",\"scale\":\"\",\"operator\":\"\",\"resolution\":\"\",\"pid\":\"\",\"childGender\":0,\"zx\":0}"}
|
||||
data=self.getdata(f'{self.h5host}/h5/mtop.youku.columbus.home.query/1.0/',params)
|
||||
okey=list(data['data'].keys())[0]
|
||||
for i in data['data'][okey]['data']['nodes'][0]['nodes'][-1]['nodes'][0]['nodes']:
|
||||
if i.get('nodes') and i['nodes'][0].get('data'):
|
||||
i=i['nodes'][0]['data']
|
||||
if i.get('assignId'):
|
||||
vlist.append({
|
||||
'vod_id': i['assignId'],
|
||||
'vod_name': i.get('title'),
|
||||
'vod_pic': i.get('vImg') or i.get('img'),
|
||||
'vod_year': i.get('mark',{}).get('data',{}).get('text'),
|
||||
'vod_remarks': i.get('summary')
|
||||
})
|
||||
return {'list': vlist}
|
||||
except Exception as e:
|
||||
print(f"处理主页视频数据时出错: {str(e)}")
|
||||
return {'list': []}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
result = {}
|
||||
vlist = []
|
||||
result['page'] = pg
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
pagecount = 9999
|
||||
params = {'type': tid}
|
||||
id = self.typeid[tid]
|
||||
params.update(extend)
|
||||
if pg == '1':
|
||||
id=self.cf(params)
|
||||
data=self.session.get(f'{self.host}/category/data?session={id}¶ms={quote(json.dumps(params))}&pageNo={pg}').json()
|
||||
try:
|
||||
data=data['data']['filterData']
|
||||
for i in data['listData']:
|
||||
if i.get('videoLink') and 's=' in i['videoLink']:
|
||||
vlist.append({
|
||||
'vod_id': i.get('videoLink').split('s=')[-1],
|
||||
'vod_name': i.get('title'),
|
||||
'vod_pic': i.get('img'),
|
||||
'vod_year': i.get('rightTagText'),
|
||||
'vod_remarks': i.get('summary')
|
||||
})
|
||||
self.typeid[tid]=quote(json.dumps(data['session']))
|
||||
except:
|
||||
pagecount=pg
|
||||
result['list'] = vlist
|
||||
result['pagecount'] = pagecount
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
try:
|
||||
data=self.session.get(f'{self.ihost}/v_getvideo_info/?showId={ids[0]}').json()
|
||||
v=data['data']
|
||||
vod = {
|
||||
'type_name': v.get('showVideotype'),
|
||||
'vod_year': v.get('lastUpdate'),
|
||||
'vod_remarks': v.get('rc_title'),
|
||||
'vod_actor': v.get('_personNameStr'),
|
||||
'vod_content': v.get('showdesc'),
|
||||
'vod_play_from': '优酷',
|
||||
'vod_play_url': ''
|
||||
}
|
||||
params={"biz":"new_detail_web2","videoId":v.get('vid'),"scene":"web_page","componentVersion":"3","ip":data.get('ip'),"debug":0,"utdid":"ZYmGMAAAACkDAMU8hbiMmYdd","userId":0,"platform":"pc","nextSession":"","gray":0,"source":"pcNoPrev","showId":ids[0]}
|
||||
sdata,index=self.getinfo(params)
|
||||
pdata=sdata['nodes']
|
||||
if index > len(pdata):
|
||||
batch_size = len(pdata)
|
||||
total_batches = ((index + batch_size - 1) // batch_size) - 1
|
||||
ssj = json.loads(sdata['data']['session'])
|
||||
with ThreadPoolExecutor(max_workers=total_batches) as executor:
|
||||
futures = []
|
||||
for batch in range(total_batches):
|
||||
start = batch_size + 1 + (batch * batch_size)
|
||||
end = start + batch_size - 1
|
||||
next_session = ssj.copy()
|
||||
next_session.update({
|
||||
"itemStartStage": start,
|
||||
"itemEndStage": min(end, index)
|
||||
})
|
||||
current_params = params.copy()
|
||||
current_params['nextSession'] = json.dumps(next_session)
|
||||
futures.append((start, executor.submit(self.getvinfo, current_params)))
|
||||
futures.sort(key=lambda x: x[0])
|
||||
|
||||
for _, future in futures:
|
||||
try:
|
||||
result = future.result()
|
||||
pdata.extend(result['nodes'])
|
||||
except Exception as e:
|
||||
print(f"Error fetching data: {str(e)}")
|
||||
vod['vod_play_url'] = '#'.join([f"{i['data'].get('title')}${i['data']['action'].get('value')}" for i in pdata])
|
||||
return {'list': [vod]}
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return {'list': [{'vod_play_from': '哎呀翻车啦', 'vod_play_url': f'呜呜呜${self.host}'}]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
data=self.session.get(f'{self.shost}/api/search?pg={pg}&keyword={key}').json()
|
||||
vlist = []
|
||||
for i in data['pageComponentList']:
|
||||
if i.get('commonData') and (i['commonData'].get('showId') or i['commonData'].get('realShowId')):
|
||||
i=i['commonData']
|
||||
vlist.append({
|
||||
'vod_id': i.get('showId') or i.get('realShowId'),
|
||||
'vod_name': i['titleDTO'].get('displayName'),
|
||||
'vod_pic': i['posterDTO'].get('vThumbUrl'),
|
||||
'vod_year': i.get('feature'),
|
||||
'vod_remarks': i.get('updateNotice')
|
||||
})
|
||||
return {'list': vlist, 'page': pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
return {'jx':1,'parse': 1, 'url': f"{self.ihost}/video?vid={id}", 'header': ''}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def cf(self,params,b=False):
|
||||
response = self.session.get(f'{self.host}/category/data?params={quote(json.dumps(params))}&optionRefresh=1&pageNo=1').json()
|
||||
data=response['data']['filterData']
|
||||
session=quote(json.dumps(data['session']))
|
||||
if b:
|
||||
return session,self.get_filter_data(data['filter']['filterData'][1:])
|
||||
return session
|
||||
|
||||
def process_key(self, key):
|
||||
if '_' not in key:
|
||||
return key
|
||||
parts = key.split('_')
|
||||
result = parts[0]
|
||||
for part in parts[1:]:
|
||||
if part:
|
||||
result += part[0].upper() + part[1:]
|
||||
return result
|
||||
|
||||
def get_filter_data(self, data):
|
||||
result = []
|
||||
try:
|
||||
for item in data:
|
||||
if not item.get('subFilter'):
|
||||
continue
|
||||
first_sub = item['subFilter'][0]
|
||||
if not first_sub.get('filterType'):
|
||||
continue
|
||||
filter_item = {
|
||||
'key': self.process_key(first_sub['filterType']),
|
||||
'name': first_sub['title'],
|
||||
'value': []
|
||||
}
|
||||
for sub in item['subFilter']:
|
||||
if 'value' in sub:
|
||||
filter_item['value'].append({
|
||||
'n': sub['title'],
|
||||
'v': sub['value']
|
||||
})
|
||||
if filter_item['value']:
|
||||
result.append(filter_item)
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理筛选数据时出错: {str(e)}")
|
||||
|
||||
return result
|
||||
|
||||
def get_ctoken(self):
|
||||
data=self.session.get(f'{self.h5host}/h5/mtop.ykrec.recommendservice.recommend/1.0/?jsv=2.6.1&appKey=24679788')
|
||||
|
||||
def md5(self,t,text):
|
||||
h = MD5.new()
|
||||
token=self.session.cookies.get('_m_h5_tk').split('_')[0]
|
||||
data=f"{token}&{t}&24679788&{text}"
|
||||
h.update(data.encode('utf-8'))
|
||||
return h.hexdigest()
|
||||
|
||||
def getdata(self, url, params, recursion_count=0, max_recursion=3):
|
||||
data = json.dumps(params)
|
||||
t = int(time.time() * 1000)
|
||||
jsdata = {
|
||||
'appKey': '24679788',
|
||||
't': t,
|
||||
'sign': self.md5(t, data),
|
||||
'data': data
|
||||
}
|
||||
response = self.session.get(url, params=jsdata)
|
||||
if '令牌过期' in response.text:
|
||||
if recursion_count >= max_recursion:
|
||||
raise Exception("达到最大递归次数,无法继续请求")
|
||||
self.get_ctoken()
|
||||
return self.getdata(url, params, recursion_count + 1, max_recursion)
|
||||
else:
|
||||
return response.json()
|
||||
|
||||
def getvinfo(self,params):
|
||||
body = {
|
||||
"ms_codes": "2019030100",
|
||||
"params": json.dumps(params),
|
||||
"system_info": "{\"os\":\"iku\",\"device\":\"iku\",\"ver\":\"9.2.9\",\"appPackageKey\":\"com.youku.iku\",\"appPackageId\":\"pcweb\"}"
|
||||
}
|
||||
data = self.getdata(f'{self.h5host}/h5/mtop.youku.columbus.gateway.new.execute/1.0/', body)
|
||||
okey = list(data['data'].keys())[0]
|
||||
i = data['data'][okey]['data']
|
||||
return i
|
||||
|
||||
def getinfo(self,params):
|
||||
i = self.getvinfo(params)
|
||||
jdata=i['nodes'][0]['nodes'][3]
|
||||
info=i['data']['extra']['episodeTotal']
|
||||
if i['data']['extra']['showCategory'] in ['电影','游戏']:
|
||||
jdata = i['nodes'][0]['nodes'][4]
|
||||
return jdata,info
|
Loading…
Reference in New Issue
Block a user