diff --git a/PY/official/优.py b/PY/official/优.py new file mode 100644 index 0000000..399d0db --- /dev/null +++ b/PY/official/优.py @@ -0,0 +1,301 @@ +# -*- coding: utf-8 -*- +# by @嗷呜 +import json +import sys +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from urllib.parse import quote +from Crypto.Hash import MD5 +import requests +sys.path.append('..') +from base.spider import Spider + + +class Spider(Spider): + + def init(self, extend=""): + self.session = requests.Session() + self.session.headers.update(self.headers) + self.session.cookies.update(self.cookie) + self.get_ctoken() + pass + + def getName(self): + pass + + def isVideoFormat(self, url): + pass + + def manualVideoCheck(self): + pass + + def destroy(self): + pass + + host='https://www.youku.com' + + shost='https://search.youku.com' + + h5host='https://acs.youku.com' + + ihost='https://v.youku.com' + + headers = { + 'User-Agent': 'Mozilla/5.0 (; Windows 10.0.26100.3194_64 ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Electron/14.2.0 Safari/537.36 Node/14.17.0 YoukuDesktop/9.2.60 UOSYouku (2.0.1)-Electron(UTDID ZYmGMAAAACkDAMU8hbiMmYdd;CHANNEL official;ZREAL 0;BTYPE TM2013;BRAND TIMI;BUILDVER 9.2.60.1001)', + 'Referer': f'{host}/' + } + + cookie={ + "__ysuid": "17416134165380iB", + "__aysid": "1741613416541WbD", + "xlly_s": "1", + "isI18n": "false", + "cna": "bNdVIKmmsHgCAXW9W6yrQ1/s", + "__ayft": "1741672162330", + "__arpvid": "1741672162331FBKgrn-1741672162342", + "__ayscnt": "1", + "__aypstp": "1", + "__ayspstp": "3", + "tfstk": "gZbiib4JpG-6DqW-B98_2rwPuFrd1fTXQt3vHEp4YpJIBA3OgrWcwOi90RTOo9XVQ5tAM5NcK_CP6Ep97K2ce1XDc59v3KXAgGFLyzC11ET2n8U8yoyib67M3xL25e8gS8pbyzC1_ET4e8URWTsSnHv2uh8VTeJBgEuN3d-ELQAWuKWV36PHGpJ2uEWVTxvicLX1ewyUXYSekxMf-CxMEqpnoqVvshvP_pABOwvXjL5wKqeulm52np_zpkfCDGW9Ot4uKFIRwZtP7vP9_gfAr3KEpDWXSIfWRay-DHIc_Z-hAzkD1i5Ooi5LZ0O5YO_1mUc476YMI3R6xzucUnRlNe_zemKdm172xMwr2L7CTgIkbvndhFAVh3_YFV9Ng__52U4SQKIdZZjc4diE4EUxlFrfKmiXbBOHeP72v7sAahuTtWm78hRB1yV3tmg9bBOEhWVnq5KwOBL5." + } + + def homeContent(self, filter): + result = {} + categories = ["电视剧", "电影", "综艺", "动漫", "少儿", "纪录片", "文化", "亲子", "教育", "搞笑", "生活", + "体育", "音乐", "游戏"] + classes = [{'type_name': category, 'type_id': category} for category in categories] + filters = {} + self.typeid = {} + with ThreadPoolExecutor(max_workers=len(categories)) as executor: + tasks = { + executor.submit(self.cf, {'type': category}, True): category + for category in categories + } + + for future in as_completed(tasks): + try: + category = tasks[future] + session, ft = future.result() + filters[category] = ft + self.typeid[category] = session + except Exception as e: + print(f"处理分类 {tasks[future]} 时出错: {str(e)}") + + result['class'] = classes + result['filters'] = filters + return result + + def homeVideoContent(self): + try: + vlist = [] + params={"ms_codes":"2019061000","params":"{\"debug\":0,\"gray\":0,\"pageNo\":1,\"utdid\":\"ZYmGMAAAACkDAMU8hbiMmYdd\",\"userId\":\"\",\"bizKey\":\"YOUKU_WEB\",\"appPackageKey\":\"com.youku.YouKu\",\"showNodeList\":0,\"reqSubNode\":0,\"nodeKey\":\"WEBHOME\",\"bizContext\":\"{\\\"spmA\\\":\\\"a2hja\\\"}\"}","system_info":"{\"device\":\"pcweb\",\"os\":\"pcweb\",\"ver\":\"1.0.0.0\",\"userAgent\":\"Mozilla/5.0 (; Windows 10.0.26100.3194_64 ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Electron/14.2.0 Safari/537.36 Node/14.17.0 YoukuDesktop/9.2.60 UOSYouku (2.0.1)-Electron(UTDID ZYmGMAAAACkDAMU8hbiMmYdd;CHANNEL official;ZREAL 0;BTYPE TM2013;BRAND TIMI;BUILDVER 9.2.60.1001)\",\"guid\":\"1590141704165YXe\",\"appPackageKey\":\"com.youku.pcweb\",\"young\":0,\"brand\":\"\",\"network\":\"\",\"ouid\":\"\",\"idfa\":\"\",\"scale\":\"\",\"operator\":\"\",\"resolution\":\"\",\"pid\":\"\",\"childGender\":0,\"zx\":0}"} + data=self.getdata(f'{self.h5host}/h5/mtop.youku.columbus.home.query/1.0/',params) + okey=list(data['data'].keys())[0] + for i in data['data'][okey]['data']['nodes'][0]['nodes'][-1]['nodes'][0]['nodes']: + if i.get('nodes') and i['nodes'][0].get('data'): + i=i['nodes'][0]['data'] + if i.get('assignId'): + vlist.append({ + 'vod_id': i['assignId'], + 'vod_name': i.get('title'), + 'vod_pic': i.get('vImg') or i.get('img'), + 'vod_year': i.get('mark',{}).get('data',{}).get('text'), + 'vod_remarks': i.get('summary') + }) + return {'list': vlist} + except Exception as e: + print(f"处理主页视频数据时出错: {str(e)}") + return {'list': []} + + def categoryContent(self, tid, pg, filter, extend): + result = {} + vlist = [] + result['page'] = pg + result['limit'] = 90 + result['total'] = 999999 + pagecount = 9999 + params = {'type': tid} + id = self.typeid[tid] + params.update(extend) + if pg == '1': + id=self.cf(params) + data=self.session.get(f'{self.host}/category/data?session={id}¶ms={quote(json.dumps(params))}&pageNo={pg}').json() + try: + data=data['data']['filterData'] + for i in data['listData']: + if i.get('videoLink') and 's=' in i['videoLink']: + vlist.append({ + 'vod_id': i.get('videoLink').split('s=')[-1], + 'vod_name': i.get('title'), + 'vod_pic': i.get('img'), + 'vod_year': i.get('rightTagText'), + 'vod_remarks': i.get('summary') + }) + self.typeid[tid]=quote(json.dumps(data['session'])) + except: + pagecount=pg + result['list'] = vlist + result['pagecount'] = pagecount + return result + + def detailContent(self, ids): + try: + data=self.session.get(f'{self.ihost}/v_getvideo_info/?showId={ids[0]}').json() + v=data['data'] + vod = { + 'type_name': v.get('showVideotype'), + 'vod_year': v.get('lastUpdate'), + 'vod_remarks': v.get('rc_title'), + 'vod_actor': v.get('_personNameStr'), + 'vod_content': v.get('showdesc'), + 'vod_play_from': '优酷', + 'vod_play_url': '' + } + params={"biz":"new_detail_web2","videoId":v.get('vid'),"scene":"web_page","componentVersion":"3","ip":data.get('ip'),"debug":0,"utdid":"ZYmGMAAAACkDAMU8hbiMmYdd","userId":0,"platform":"pc","nextSession":"","gray":0,"source":"pcNoPrev","showId":ids[0]} + sdata,index=self.getinfo(params) + pdata=sdata['nodes'] + if index > len(pdata): + batch_size = len(pdata) + total_batches = ((index + batch_size - 1) // batch_size) - 1 + ssj = json.loads(sdata['data']['session']) + with ThreadPoolExecutor(max_workers=total_batches) as executor: + futures = [] + for batch in range(total_batches): + start = batch_size + 1 + (batch * batch_size) + end = start + batch_size - 1 + next_session = ssj.copy() + next_session.update({ + "itemStartStage": start, + "itemEndStage": min(end, index) + }) + current_params = params.copy() + current_params['nextSession'] = json.dumps(next_session) + futures.append((start, executor.submit(self.getvinfo, current_params))) + futures.sort(key=lambda x: x[0]) + + for _, future in futures: + try: + result = future.result() + pdata.extend(result['nodes']) + except Exception as e: + print(f"Error fetching data: {str(e)}") + vod['vod_play_url'] = '#'.join([f"{i['data'].get('title')}${i['data']['action'].get('value')}" for i in pdata]) + return {'list': [vod]} + except Exception as e: + print(e) + return {'list': [{'vod_play_from': '哎呀翻车啦', 'vod_play_url': f'呜呜呜${self.host}'}]} + + def searchContent(self, key, quick, pg="1"): + data=self.session.get(f'{self.shost}/api/search?pg={pg}&keyword={key}').json() + vlist = [] + for i in data['pageComponentList']: + if i.get('commonData') and (i['commonData'].get('showId') or i['commonData'].get('realShowId')): + i=i['commonData'] + vlist.append({ + 'vod_id': i.get('showId') or i.get('realShowId'), + 'vod_name': i['titleDTO'].get('displayName'), + 'vod_pic': i['posterDTO'].get('vThumbUrl'), + 'vod_year': i.get('feature'), + 'vod_remarks': i.get('updateNotice') + }) + return {'list': vlist, 'page': pg} + + def playerContent(self, flag, id, vipFlags): + return {'jx':1,'parse': 1, 'url': f"{self.ihost}/video?vid={id}", 'header': ''} + + def localProxy(self, param): + pass + + def cf(self,params,b=False): + response = self.session.get(f'{self.host}/category/data?params={quote(json.dumps(params))}&optionRefresh=1&pageNo=1').json() + data=response['data']['filterData'] + session=quote(json.dumps(data['session'])) + if b: + return session,self.get_filter_data(data['filter']['filterData'][1:]) + return session + + def process_key(self, key): + if '_' not in key: + return key + parts = key.split('_') + result = parts[0] + for part in parts[1:]: + if part: + result += part[0].upper() + part[1:] + return result + + def get_filter_data(self, data): + result = [] + try: + for item in data: + if not item.get('subFilter'): + continue + first_sub = item['subFilter'][0] + if not first_sub.get('filterType'): + continue + filter_item = { + 'key': self.process_key(first_sub['filterType']), + 'name': first_sub['title'], + 'value': [] + } + for sub in item['subFilter']: + if 'value' in sub: + filter_item['value'].append({ + 'n': sub['title'], + 'v': sub['value'] + }) + if filter_item['value']: + result.append(filter_item) + + except Exception as e: + print(f"处理筛选数据时出错: {str(e)}") + + return result + + def get_ctoken(self): + data=self.session.get(f'{self.h5host}/h5/mtop.ykrec.recommendservice.recommend/1.0/?jsv=2.6.1&appKey=24679788') + + def md5(self,t,text): + h = MD5.new() + token=self.session.cookies.get('_m_h5_tk').split('_')[0] + data=f"{token}&{t}&24679788&{text}" + h.update(data.encode('utf-8')) + return h.hexdigest() + + def getdata(self, url, params, recursion_count=0, max_recursion=3): + data = json.dumps(params) + t = int(time.time() * 1000) + jsdata = { + 'appKey': '24679788', + 't': t, + 'sign': self.md5(t, data), + 'data': data + } + response = self.session.get(url, params=jsdata) + if '令牌过期' in response.text: + if recursion_count >= max_recursion: + raise Exception("达到最大递归次数,无法继续请求") + self.get_ctoken() + return self.getdata(url, params, recursion_count + 1, max_recursion) + else: + return response.json() + + def getvinfo(self,params): + body = { + "ms_codes": "2019030100", + "params": json.dumps(params), + "system_info": "{\"os\":\"iku\",\"device\":\"iku\",\"ver\":\"9.2.9\",\"appPackageKey\":\"com.youku.iku\",\"appPackageId\":\"pcweb\"}" + } + data = self.getdata(f'{self.h5host}/h5/mtop.youku.columbus.gateway.new.execute/1.0/', body) + okey = list(data['data'].keys())[0] + i = data['data'][okey]['data'] + return i + + def getinfo(self,params): + i = self.getvinfo(params) + jdata=i['nodes'][0]['nodes'][3] + info=i['data']['extra']['episodeTotal'] + if i['data']['extra']['showCategory'] in ['电影','游戏']: + jdata = i['nodes'][0]['nodes'][4] + return jdata,info + diff --git a/PY/official/爱.py b/PY/official/爱.py new file mode 100644 index 0000000..7574897 --- /dev/null +++ b/PY/official/爱.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- +# by @嗷呜 +import json +import random +import sys +from base64 import b64encode, b64decode +from concurrent.futures import ThreadPoolExecutor, as_completed +from urllib.parse import quote +sys.path.append('..') +from base.spider import Spider + +class Spider(Spider): + + def init(self, extend=""): + self.did = 'f8da348e186e6ee574d647918f5a7114' + pass + + def getName(self): + pass + + def isVideoFormat(self, url): + pass + + def manualVideoCheck(self): + pass + + def destroy(self): + pass + + rhost = 'https://www.iqiyi.com' + + hhost='https://mesh.if.iqiyi.com' + + dhost='https://miniapp.iqiyi.com' + + headers = { + 'Origin': rhost, + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36', + 'Referer': f'{rhost}/', + } + + def homeContent(self, filter): + result = {} + cateManual = { + "全部": "1009", + "电影": "1", + "剧集": "2", + "综艺": "6", + "动漫": "4", + "儿童": "15", + "微剧": "35", + "纪录片": "3" + } + classes = [] + filters = {} + for k in cateManual: + classes.append({ + 'type_name': k, + 'type_id': cateManual[k] + }) + with ThreadPoolExecutor(max_workers=len(classes)) as executor: + results = executor.map(self.getf, classes) + for id, ft in results: + if len(ft):filters[id] = ft + result['class'] = classes + result['filters'] = filters + return result + + def homeVideoContent(self): + data=self.fetch(f'{self.hhost}/portal/lw/v5/channel/recommend?v=13.014.21150', headers=self.headers).json() + vlist = [] + for i in data['items'][1:]: + for j in i['video'][0]['data']: + id = j.get('firstId') + pic=j.get('prevue',{}).get('image_url') or j.get('album_image_url_hover') + if id and pic: + pu=j.get('prevue',{}).get('page_url') or j.get('page_url').split('?')[0] + id = f'{id}@{self.e64(pu)}' + vlist.append({ + 'vod_id': id, + 'vod_name': j.get('display_name'), + 'vod_pic': pic, + 'vod_year': j.get('sns_score'), + 'vod_remarks': j.get('dq_updatestatus') or j.get('rank_prefix') + }) + return {'list':vlist} + + def categoryContent(self, tid, pg, filter, extend): + if pg == "1": + self.sid = '' + new_data = {'mode':'24'} + for key, value in extend.items(): + if value: + key_value_pairs = self.d64(value).split(',') + for pair in key_value_pairs: + k, v = pair.split('=') + if k in new_data: + new_data[k] += "," + v + else: + new_data[k] = v + path=f"/portal/lw/videolib/data?uid=&passport_id=&ret_num=60&version=13.034.21571&device_id={self.did}&channel_id={tid}&page_id={pg}&session={self.sid}&os=&conduit_id=&vip=0&auth=&recent_selected_tag=&ad=%5B%7B%22lm%22%3A%225%22%2C%22ai%22%3A%225%22%2C%22fp%22%3A%226%22%2C%22sei%22%3A%22S78ff51b694677e17af4b19368dadb7bd%22%2C%22position%22%3A%22library%22%7D%5D&adExt=%7B%22r%22%3A%221.2.1-ares6-pure%22%7D&dfp=a00b3c577e541c41149be7cde9320500b0a11307e61a8445448f7f4a9e895ced0f&filter={quote(json.dumps(new_data))}" + data=self.fetch(f'{self.hhost}{path}', headers=self.headers).json() + self.sid = data['session'] + videos = [] + for i in data['data']: + id = i.get('firstId') or i.get('tv_id') + if not id: + id=i.get('play_url').split(';')[0].split('=')[-1] + if id and not i.get('h'): + id=f'{id}@{self.e64(i.get("page_url"))}' + videos.append({ + 'vod_id': id, + 'vod_name': i.get('display_name'), + 'vod_pic': i.get('album_image_url_hover'), + 'vod_year': i.get('sns_score'), + 'vod_remarks': i.get('dq_updatestatus') or i.get('pay_mark') + }) + result = {} + result['list'] = videos + result['page'] = pg + result['pagecount'] = 9999 + result['limit'] = 90 + result['total'] = 999999 + return result + + def detailContent(self, ids): + ids = ids[0].split('@') + ids[-1] = self.d64(ids[-1]) + data = self.fetch(f'{self.dhost}/h5/mina/baidu/play/body/v1/{ids[0]}/', headers=self.headers).json() + v=data['data']['playInfo'] + vod = { + 'vod_name': v.get('albumName'), + 'type_name': v.get('tags'), + 'vod_year': v.get('albumYear'), + 'vod_remarks': v.get('updateStrategy'), + 'vod_actor': v.get('mainActors'), + 'vod_director': v.get('directors'), + 'vod_content': v.get('albumDesc'), + 'vod_play_from': '爱奇艺', + 'vod_play_url': '' + } + if data.get('data') and data['data'].get('videoList') and data['data']['videoList'].get('videos'): + purl=[f'{i["shortTitle"]}${i["pageUrl"]}' for i in data['data']['videoList']['videos']] + pg=data['data']['videoList'].get('totalPages') + if pg and pg > 1: + id = v['albumId'] + pages = list(range(2, pg + 1)) + page_results = {} + with ThreadPoolExecutor(max_workers=10) as executor: + future_to_page = { + executor.submit(self.fetch_page_data, page, id): page + for page in pages + } + for future in as_completed(future_to_page): + page = future_to_page[future] + try: + result = future.result() + page_results[page] = result + except Exception as e: + print(f"Error fetching page {page}: {e}") + for page in sorted(page_results.keys()): + purl.extend(page_results[page]) + vod['vod_play_url'] = '#'.join(purl) + else: + vdata=self.fetch(f'{self.dhost}/h5/mina/baidu/play/head/v1/{ids[0]}/', headers=self.headers).json() + v=vdata['data']['playInfo'] + vod = { + 'vod_name': v.get('shortTitle'), + 'type_name': v.get('channelName'), + 'vod_year': v.get('year'), + 'vod_remarks': v.get('focus'), + 'vod_actor': v.get('mainActors'), + 'vod_director': v.get('directors'), + 'vod_content': v.get('desc'), + 'vod_play_from': '爱奇艺', + 'vod_play_url': f'{v.get("shortTitle")}${ids[-1]}' + } + return {'list':[vod]} + + def searchContent(self, key, quick, pg="1"): + data=self.fetch(f'{self.hhost}/portal/lw/search/homePageV3?key={key}¤t_page={pg}&mode=1&source=input&suggest=&version=13.014.21150&pageNum={pg}&pageSize=25&pu=&u={self.did}&scale=150&token=&userVip=0&conduit=&vipType=-1&os=&osShortName=win10&dataType=&appMode=', headers=self.headers).json() + videos = [] + vdata=data['data']['templates'] + for i in data['data']['templates']: + if i.get('intentAlbumInfos'): + vdata=[{'albumInfo': c} for c in i['intentAlbumInfos']]+vdata + + for i in vdata: + if i.get('albumInfo') and (i['albumInfo'].get('playQipuId','') or i['albumInfo'].get('qipuId')) and i['albumInfo'].get('pageUrl'): + b=i['albumInfo'] + id=f"{(b.get('playQipuId','') or b.get('qipuId'))}@{self.e64(b.get('pageUrl'))}" + videos.append({ + 'vod_id': id, + 'vod_name': b.get('title'), + 'vod_pic': b.get('img'), + 'vod_year': (b.get('year',{}) or {}).get('value'), + 'vod_remarks': b.get('subscriptContent') or b.get('channel') or b.get('vipTips') + }) + return {'list':videos,'page':pg} + + def playerContent(self, flag, id, vipFlags): + id=id.replace('http://m.','https://www.') + return {'jx':1,'parse': 1, 'url': id, 'header': ''} + + def localProxy(self, param): + pass + + def fetch_page_data(self, page, id): + try: + url = f'{self.dhost}/h5/mina/avlist/{page}/{id}/' + data = self.fetch(url, headers=self.headers).json() + return [f'{i["shortTitle"]}${i["pageUrl"]}' for i in data['data']['videoList']['videos']] + except: + return [] + + def getf(self,body): + data=self.fetch(f'{self.hhost}/portal/lw/videolib/tag?channel_id={body["type_id"]}&tagAdd=&selected_tag_name=&version=13.014.21150&device={self.did}&uid=', headers=self.headers).json() + ft = [] + # for i in data[:-1]: + for i in data: + try: + value_array = [{"n": value['text'], "v": self.e64(value['tag_param'])} for value in i['tags'] if + value.get('tag_param')] + ft.append({"key": i['group'], "name": i['group'], "value": value_array}) + except: + print(i) + return (body['type_id'], ft) + + def e64(self, text): + try: + text_bytes = text.encode('utf-8') + encoded_bytes = b64encode(text_bytes) + return encoded_bytes.decode('utf-8') + except Exception as e: + print(f"Base64编码错误: {str(e)}") + return "" + + def d64(self,encoded_text: str): + try: + encoded_bytes = encoded_text.encode('utf-8') + decoded_bytes = b64decode(encoded_bytes) + return decoded_bytes.decode('utf-8') + except Exception as e: + print(f"Base64解码错误: {str(e)}") + return "" + + def random_str(self,length=16): + hex_chars = '0123456789abcdef' + return ''.join(random.choice(hex_chars) for _ in range(length)) diff --git a/PY/official/腾.py b/PY/official/腾.py new file mode 100644 index 0000000..7a5218f --- /dev/null +++ b/PY/official/腾.py @@ -0,0 +1,323 @@ +# -*- coding: utf-8 -*- +# by @嗷呜 +import json +import sys +import uuid +import copy +sys.path.append('..') +from base.spider import Spider +from concurrent.futures import ThreadPoolExecutor, as_completed + + +class Spider(Spider): + + def init(self, extend=""): + self.dbody = { + "page_params": { + "channel_id": "", + "filter_params": "sort=75", + "page_type": "channel_operation", + "page_id": "channel_list_second_page" + } + } + self.body = self.dbody + pass + + def getName(self): + pass + + def isVideoFormat(self, url): + pass + + def manualVideoCheck(self): + pass + + def destroy(self): + pass + + host = 'https://v.qq.com' + + apihost = 'https://pbaccess.video.qq.com' + + headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36', + 'origin': host, + 'referer': f'{host}/' + } + + def homeContent(self, filter): + cdata = { + "电视剧": "100113", + "电影": "100173", + "综艺": "100109", + "纪录片": "100105", + "动漫": "100119", + "少儿": "100150", + "短剧": "110755" + } + result = {} + classes = [] + filters = {} + for k in cdata: + classes.append({ + 'type_name': k, + 'type_id': cdata[k] + }) + with ThreadPoolExecutor(max_workers=len(classes)) as executor: + futures = [executor.submit(self.get_filter_data, item['type_id']) for item in classes] + for future in futures: + cid, data = future.result() + if not data.get('data', {}).get('module_list_datas'): + continue + filter_dict = {} + try: + items = data['data']['module_list_datas'][-1]['module_datas'][-1]['item_data_lists']['item_datas'] + for item in items: + if not item.get('item_params', {}).get('index_item_key'): + continue + params = item['item_params'] + filter_key = params['index_item_key'] + if filter_key not in filter_dict: + filter_dict[filter_key] = { + 'key': filter_key, + 'name': params['index_name'], + 'value': [] + } + filter_dict[filter_key]['value'].append({ + 'n': params['option_name'], + 'v': params['option_value'] + }) + except (IndexError, KeyError): + continue + filters[cid] = list(filter_dict.values()) + result['class'] = classes + result['filters'] = filters + return result + + def homeVideoContent(self): + json_data = {'page_context':None,'page_params':{'page_id':'100101','page_type':'channel','skip_privacy_types':'0','support_click_scan':'1','new_mark_label_enabled':'1','ams_cookies':'',},'page_bypass_params':{'params':{'caller_id':'','data_mode':'default','page_id':'','page_type':'channel','platform_id':'2','user_mode':'default',},'scene':'channel','abtest_bypass_id':'',}} + data = self.post(f'{self.apihost}/trpc.vector_layout.page_view.PageService/getPage',headers=self.headers, json=json_data).json() + vlist = [] + for it in data['data']['CardList'][0]['children_list']['list']['cards']: + if it.get('params'): + p = it['params'] + tag = json.loads(p.get('uni_imgtag', '{}') or p.get('imgtag', '{}') or '{}') + id = it.get('id') or p.get('cid') + name = p.get('mz_title') or p.get('title') + if name and 'http' not in id: + vlist.append({ + 'vod_id': id, + 'vod_name': name, + 'vod_pic': p.get('image_url'), + 'vod_year': tag.get('tag_2', {}).get('text'), + 'vod_remarks': tag.get('tag_4', {}).get('text') + }) + return {'list': vlist} + + def categoryContent(self, tid, pg, filter, extend): + result = {} + params = { + "sort": extend.get('sort', '75'), + "attr": extend.get('attr', '-1'), + "itype": extend.get('itype', '-1'), + "ipay": extend.get('ipay', '-1'), + "iarea": extend.get('iarea', '-1'), + "iyear": extend.get('iyear', '-1'), + "theater": extend.get('theater', '-1'), + "award": extend.get('award', '-1'), + "recommend": extend.get('recommend', '-1') + } + if pg == '1': + self.body = self.dbody.copy() + self.body['page_params']['channel_id'] = tid + self.body['page_params']['filter_params'] = self.josn_to_params(params) + data = self.post( + f'{self.apihost}/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=1000005&vplatform=2&vversion_name=8.9.10&new_mark_label_enabled=1', + json=self.body, headers=self.headers).json() + ndata = data['data'] + if ndata['has_next_page']: + result['pagecount'] = 9999 + self.body['page_context'] = ndata['next_page_context'] + else: + result['pagecount'] = int(pg) + vlist = [] + for its in ndata['module_list_datas'][-1]['module_datas'][-1]['item_data_lists']['item_datas']: + id = its.get('item_params', {}).get('cid') + if id: + p = its['item_params'] + tag = json.loads(p.get('uni_imgtag', '{}') or p.get('imgtag', '{}') or '{}') + name = p.get('mz_title') or p.get('title') + pic = p.get('new_pic_hz') or p.get('new_pic_vt') + vlist.append({ + 'vod_id': id, + 'vod_name': name, + 'vod_pic': pic, + 'vod_year': tag.get('tag_2', {}).get('text'), + 'vod_remarks': tag.get('tag_4', {}).get('text') + }) + result['list'] = vlist + result['page'] = pg + result['limit'] = 90 + result['total'] = 999999 + return result + + def detailContent(self, ids): + vbody = {"page_params":{"req_from":"web","cid":ids[0],"vid":"","lid":"","page_type":"detail_operation","page_id":"detail_page_introduction"},"has_cache":1} + body = {"page_params":{"req_from":"web_vsite","page_id":"vsite_episode_list","page_type":"detail_operation","id_type":"1","page_size":"","cid":ids[0],"vid":"","lid":"","page_num":"","page_context":"","detail_page_type":"1"},"has_cache":1} + with ThreadPoolExecutor(max_workers=2) as executor: + future_detail = executor.submit(self.get_vdata, vbody) + future_episodes = executor.submit(self.get_vdata, body) + vdata = future_detail.result() + data = future_episodes.result() + + pdata = self.process_tabs(data, body, ids) + if not pdata: + return self.handle_exception(None, "No pdata available") + + try: + star_list = vdata['data']['module_list_datas'][0]['module_datas'][0]['item_data_lists']['item_datas'][ + 0].get('sub_items', {}).get('star_list', {}).get('item_datas', []) + actors = [star['item_params']['name'] for star in star_list] + names = ['腾讯视频', '预告片'] + plist, ylist = self.process_pdata(pdata, ids) + if not plist: + del names[0] + if not ylist: + del names[1] + vod = self.build_vod(vdata, actors, plist, ylist, names) + return {'list': [vod]} + except Exception as e: + return self.handle_exception(e, "Error processing detail") + + def searchContent(self, key, quick, pg="1"): + headers = self.headers.copy() + headers.update({'Content-Type': 'application/json'}) + body = {'version':'25021101','clientType':1,'filterValue':'','uuid':str(uuid.uuid4()),'retry':0,'query':key,'pagenum':int(pg)-1,'pagesize':30,'queryFrom':0,'searchDatakey':'','transInfo':'','isneedQc':True,'preQid':'','adClientInfo':'','extraInfo':{'isNewMarkLabel':'1','multi_terminal_pc':'1','themeType':'1',},} + data = self.post(f'{self.apihost}/trpc.videosearch.mobile_search.MultiTerminalSearch/MbSearch?vplatform=2', + json=body, headers=headers).json() + vlist = [] + vname=["电视剧", "电影", "综艺", "纪录片", "动漫", "少儿", "短剧"] + v=data['data']['normalList']['itemList'] + d=data['data']['areaBoxList'][0]['itemList'] + q=v+d + if v[0].get('doc') and v[0]['doc'].get('id') =='MainNeed':q=d+v + for k in q: + if k.get('doc') and k.get('videoInfo') and k['doc'].get('id') and '外站' not in k['videoInfo'].get('subTitle') and k['videoInfo'].get('title') and k['videoInfo'].get('typeName') in vname: + img_tag = k.get('videoInfo', {}).get('imgTag') + if img_tag is not None and isinstance(img_tag, str): + try: + tag = json.loads(img_tag) + except json.JSONDecodeError as e: + tag = {} + else: + tag = {} + pic = k.get('videoInfo', {}).get('imgUrl') + vlist.append({ + 'vod_id': k['doc']['id'], + 'vod_name': self.removeHtmlTags(k['videoInfo']['title']), + 'vod_pic': pic, + 'vod_year': k['videoInfo'].get('typeName') +' '+ tag.get('tag_2', {}).get('text', ''), + 'vod_remarks': tag.get('tag_4', {}).get('text', '') + }) + return {'list': vlist, 'page': pg} + + def playerContent(self, flag, id, vipFlags): + ids = id.split('@') + url = f"{self.host}/x/cover/{ids[0]}/{ids[1]}.html" + return {'jx':1,'parse': 1, 'url': url, 'header': ''} + + def localProxy(self, param): + pass + + def get_filter_data(self, cid): + hbody = self.dbody.copy() + hbody['page_params']['channel_id'] = cid + data = self.post( + f'{self.apihost}/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=1000005&vplatform=2&vversion_name=8.9.10&new_mark_label_enabled=1', + json=hbody, headers=self.headers).json() + return cid, data + + def get_vdata(self, body): + try: + vdata = self.post( + f'{self.apihost}/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=3000010&vplatform=2&vversion_name=8.2.96', + json=body, headers=self.headers + ).json() + return vdata + except Exception as e: + print(f"Error in get_vdata: {str(e)}") + return {'data': {'module_list_datas': []}} + + def process_pdata(self, pdata, ids): + plist = [] + ylist = [] + for k in pdata: + if k.get('item_id'): + pid = f"{k['item_params']['union_title']}${ids[0]}@{k['item_id']}" + if '预告' in k['item_params']['union_title']: + ylist.append(pid) + else: + plist.append(pid) + return plist, ylist + + def build_vod(self, vdata, actors, plist, ylist, names): + d = vdata['data']['module_list_datas'][0]['module_datas'][0]['item_data_lists']['item_datas'][0]['item_params'] + urls = [] + if plist: + urls.append('#'.join(plist)) + if ylist: + urls.append('#'.join(ylist)) + vod = { + 'type_name': d.get('sub_genre', ''), + 'vod_name': d.get('title', ''), + 'vod_year': d.get('year', ''), + 'vod_area': d.get('area_name', ''), + 'vod_remarks': d.get('holly_online_time', '') or d.get('hotval', ''), + 'vod_actor': ','.join(actors), + 'vod_content': d.get('cover_description', ''), + 'vod_play_from': '$$$'.join(names), + 'vod_play_url': '$$$'.join(urls) + } + return vod + + def handle_exception(self, e, message): + print(f"{message}: {str(e)}") + return {'list': [{'vod_play_from': '哎呀翻车啦', 'vod_play_url': '翻车啦#555'}]} + + def process_tabs(self, data, body, ids): + try: + pdata = data['data']['module_list_datas'][-1]['module_datas'][-1]['item_data_lists']['item_datas'] + tabs = data['data']['module_list_datas'][-1]['module_datas'][-1]['module_params'].get('tabs') + if tabs and len(json.loads(tabs)): + tabs = json.loads(tabs) + remaining_tabs = tabs[1:] + task_queue = [] + for tab in remaining_tabs: + nbody = copy.deepcopy(body) + nbody['page_params']['page_context'] = tab['page_context'] + task_queue.append(nbody) + with ThreadPoolExecutor(max_workers=10) as executor: + future_map = {executor.submit(self.get_vdata, task): idx for idx, task in enumerate(task_queue)} + results = [None] * len(task_queue) + for future in as_completed(future_map.keys()): + idx = future_map[future] + results[idx] = future.result() + for result in results: + if result: + page_data = result['data']['module_list_datas'][-1]['module_datas'][-1]['item_data_lists'][ + 'item_datas'] + pdata.extend(page_data) + return pdata + except Exception as e: + print(f"Error processing episodes: {str(e)}") + return [] + + def josn_to_params(self, params, skip_empty=False): + query = [] + for k, v in params.items(): + if skip_empty and not v: + continue + query.append(f"{k}={v}") + return "&".join(query) + + diff --git a/PY/official/芒.py b/PY/official/芒.py new file mode 100644 index 0000000..6ba8e34 --- /dev/null +++ b/PY/official/芒.py @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- +# by @嗷呜 +import sys +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +sys.path.append('..') +from base.spider import Spider + +class Spider(Spider): + + def init(self, extend=""): + pass + + def getName(self): + pass + + def isVideoFormat(self, url): + pass + + def manualVideoCheck(self): + pass + + def destroy(self): + pass + + rhost='https://www.mgtv.com' + + host='https://pianku.api.mgtv.com' + + vhost='https://pcweb.api.mgtv.com' + + mhost='https://dc.bz.mgtv.com' + + shost='https://mobileso.bz.mgtv.com' + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.61 Chrome/126.0.6478.61 Not/A)Brand/8 Safari/537.36', + 'origin': rhost, + 'referer': f'{rhost}/' + } + + def homeContent(self, filter): + result = {} + cateManual = { + "电影": "3", + "电视剧": "2", + "综艺": "1", + "动画": "50", + "少儿": "10", + "纪录片": "51", + "教育": "115" + } + classes = [] + filters = {} + for k in cateManual: + classes.append({ + 'type_name': k, + 'type_id': cateManual[k] + }) + with ThreadPoolExecutor(max_workers=len(classes)) as executor: + results = executor.map(self.getf, classes) + for id, ft in results: + if len(ft):filters[id] = ft + result['class'] = classes + result['filters'] = filters + return result + + def homeVideoContent(self): + data=self.fetch(f'{self.mhost}/dynamic/v1/channel/index/0/0/0/1000000/0/0/17/1354?type=17&version=5.0&t={str(int(time.time()*1000))}&_support=10000000', headers=self.headers).json() + videoList = [] + for i in data['data']: + if i.get('DSLList') and len(i['DSLList']): + for j in i['DSLList']: + if j.get('data') and j['data'].get('items') and len(j['data']['items']): + for k in j['data']['items']: + videoList.append({ + 'vod_id': k["videoId"], + 'vod_name': k['videoName'], + 'vod_pic': k['img'], + 'vod_year': k.get('cornerTitle'), + 'vod_remarks': k.get('time') or k.get('desc'), + }) + return {'list':videoList} + + def categoryContent(self, tid, pg, filter, extend): + body={ + 'allowedRC': '1', + 'platform': 'pcweb', + 'channelId': tid, + 'pn': pg, + 'pc': '80', + 'hudong': '1', + '_support': '10000000' + } + body.update(extend) + data=self.fetch(f'{self.host}/rider/list/pcweb/v3', params=body, headers=self.headers).json() + videoList = [] + for i in data['data']['hitDocs']: + videoList.append({ + 'vod_id': i["playPartId"], + 'vod_name': i['title'], + 'vod_pic': i['img'], + 'vod_year': (i.get('rightCorner',{}) or {}).get('text') or i.get('year'), + 'vod_remarks': i['updateInfo'] + }) + result = {} + result['list'] = videoList + result['page'] = pg + result['pagecount'] = 9999 + result['limit'] = 90 + result['total'] = 999999 + return result + + def detailContent(self, ids): + vbody={'allowedRC': '1', 'vid': ids[0], 'type': 'b', '_support': '10000000'} + vdata=self.fetch(f'{self.vhost}/video/info', params=vbody, headers=self.headers).json() + d=vdata['data']['info']['detail'] + vod = { + 'vod_name': vdata['data']['info']['title'], + 'type_name': d.get('kind'), + 'vod_year': d.get('releaseTime'), + 'vod_area': d.get('area'), + 'vod_lang': d.get('language'), + 'vod_remarks': d.get('updateInfo'), + 'vod_actor': d.get('leader'), + 'vod_director': d.get('director'), + 'vod_content': d.get('story'), + 'vod_play_from': '芒果TV', + 'vod_play_url': '' + } + data,pdata=self.fetch_page_data('1', ids[0],True) + pagecount=data['data'].get('total_page') or 1 + if int(pagecount)>1: + pages = list(range(2, pagecount+1)) + page_results = {} + with ThreadPoolExecutor(max_workers=10) as executor: + future_to_page = { + executor.submit(self.fetch_page_data, page, ids[0]): page + for page in pages + } + for future in as_completed(future_to_page): + page = future_to_page[future] + try: + result = future.result() + page_results[page] = result + except Exception as e: + print(f"Error fetching page {page}: {e}") + for page in sorted(page_results.keys()): + pdata.extend(page_results[page]) + vod['vod_play_url'] = '#'.join(pdata) + return {'list':[vod]} + + def searchContent(self, key, quick, pg="1"): + data=self.fetch(f'{self.shost}/applet/search/v1?channelCode=mobile-wxap&q={key}&pn={pg}&pc=10&_support=10000000', headers=self.headers).json() + videoList = [] + for i in data['data']['contents']: + if i.get('data') and len(i['data']): + k = i['data'][0] + if k.get('vid') and k.get('img'): + try: + videoList.append({ + 'vod_id': k['vid'], + 'vod_name': k['title'], + 'vod_pic': k['img'], + 'vod_year': (i.get('rightTopCorner',{}) or {}).get('text') or i.get('year'), + 'vod_remarks': '/'.join(i.get('desc',[])), + }) + except: + print(k) + return {'list':videoList,'page':pg} + + def playerContent(self, flag, id, vipFlags): + id=f'{self.rhost}{id}' + return {'jx':1,'parse': 1, 'url': id, 'header': ''} + + def localProxy(self, param): + pass + + def getf(self, body): + params = { + 'allowedRC': '1', + 'channelId': body['type_id'], + 'platform': 'pcweb', + '_support': '10000000', + } + data = self.fetch(f'{self.host}/rider/config/channel/v1', params=params, headers=self.headers).json() + ft = [] + for i in data['data']['listItems']: + try: + value_array = [{"n": value['tagName'], "v": value['tagId']} for value in i['items'] if + value.get('tagName')] + ft.append({"key": i['eName'], "name": i['typeName'], "value": value_array}) + except: + print(i) + return body['type_id'], ft + + def fetch_page_data(self, page, id, b=False): + body = {'version': '5.5.35', 'video_id': id, 'page': page, 'size': '30', + 'platform': '4', 'src': 'mgtv', 'allowedRC': '1', '_support': '10000000'} + data = self.fetch(f'{self.vhost}/episode/list', params=body, headers=self.headers).json() + ldata = [f'{i["t3"]}${i["url"]}' for i in data['data']['list']] + if b: + return data, ldata + else: + return ldata