Hero League Python crawler

Time:2022-1-7

Hero League Python crawler

Hero main interface QQhttps://lol.qq.com/data/info-heros.shtml

1. Hero crawling

https://lol.qq.com/data/info-heros.shtml

Get method to get the specified hero information.

https://lol.qq.com/data/info-heros.shtml?id=xxx

id=xxx

2. JS obtains all hero information

import json

import requests
from faker import Factory
from bs4 import BeautifulSoup

f = Factory.create()


def get_all_heros():
    url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
    headers = {
        'user-agent': f.user_agent()
    }
    r = requests.get(url, headers=headers)
    r.encoding = r.apparent_encoding
    c = r.text
    l = json.loads(c)['hero']
    for i in l[:50]:
        Print ("ID: {0} Name: {1} alias: {2}". Format (I ['heroid '], I ['name'], I ['alias'])


if __name__ == '__main__':
    get_all_heros()

effect:

Hero League Python crawler

3. Crawl the game data

First lol page crawl

http://www.wanplus.com/lol/playerstats

When CSRF token is used, the post request needs to carry the CSRF token in the set cookies.

import json
import time

import requests
from faker import Factory
from urllib import parse

f = Factory.create()


def get_token():
    url = 'http://www.wanplus.com/lol/playerstats'
    headers = {
        'user-agent': f.user_agent(),
        'Referer': 'http://www.wanplus.com/lol/teamstats',
        'Host': 'www.wanplus.com',
    }
    r = requests.get(url, headers=headers, allow_redirects=False)
    r.encoding = r.apparent_encoding
    c = r.cookies
    r.close()
    myCookies = c.get_dict()
    # print(myCookies)
    return str(int(c.get('wanplus_csrf')[9:]) + int(16777216)), myCookies


def get_competition():
    url = 'http://www.wanplus.com/ajax/stats/list'
    token, myCookies = get_token()
    headers = {
        'user-agent': f.user_agent(),
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'www.wanplus.com',
        'Origin': 'http://www.wanplus.com',
        'Referer': 'http://www.wanplus.com/lol/playerstats',
        'X-CSRF-Token': token,
        'X-Requested-With': 'XMLHttpRequest',
    }
    formdata = {
        '_gtk': token,
        'draw': '1',
        'columns[0][data]': 'order',
        'columns[0][name]': '',
        'columns[0][searchable]': 'true',
        'columns[0][orderable]': 'false',
        'columns[0][search][value]': '',
        'columns[0][search][regex]': 'false',
        'columns[1][data]': 'playername',
        'columns[1][name]': '',
        'columns[1][searchable]': 'true',
        'columns[1][orderable]': 'false',
        'columns[1][search][value]': '',
        'columns[1][search][regex]': 'false',
        'columns[2][data]': 'teamname',
        'columns[2][name]': '',
        'columns[2][searchable]': 'true',
        'columns[2][orderable]': 'false',
        'columns[2][search][value]': '',
        'columns[2][search][regex]': 'false',
        'columns[3][data]': 'meta',
        'columns[3][name]': '',
        'columns[3][searchable]': 'true',
        'columns[3][orderable]': 'false',
        'columns[3][search][value]': '',
        'columns[3][search][regex]': 'false',
        'columns[4][data]': 'appearedTimes',
        'columns[4][name]': '',
        'columns[4][searchable]': 'true',
        'columns[4][orderable]': 'true',
        'columns[4][search][value]': '',
        'columns[4][search][regex]': 'false',
        'columns[5][data]': 'kda',
        'columns[5][name]': '',
        'columns[5][searchable]': 'true',
        'columns[5][orderable]': 'true',
        'columns[5][search][value]': '',
        'columns[5][search][regex]': 'false',
        'columns[6][data]': 'attendrate',
        'columns[6][name]': '',
        'columns[6][searchable]': 'true',
        'columns[6][orderable]': 'true',
        'columns[6][search][value]': '',
        'columns[6][search][regex]': 'false',
        'columns[7][data]': 'killsPergame',
        'columns[7][name]': '',
        'columns[7][searchable]': 'true',
        'columns[7][orderable]': 'true',
        'columns[7][search][value]': '',
        'columns[7][search][regex]': 'false',
        'columns[8][data]': 'mostkills',
        'columns[8][name]': '',
        'columns[8][searchable]': 'true',
        'columns[8][orderable]': 'true',
        'columns[8][search][value]': '',
        'columns[8][search][regex]': 'false',
        'columns[9][data]': 'deathsPergame',
        'columns[9][name]': '',
        'columns[9][searchable]': 'true',
        'columns[9][orderable]': 'true',
        'columns[9][search][value]': '',
        'columns[9][search][regex]': 'false',
        'columns[10][data]': 'mostdeaths',
        'columns[10][name]': '',
        'columns[10][searchable]': 'true',
        'columns[10][orderable]': 'true',
        'columns[10][search][value]': '',
        'columns[10][search][regex]': 'false',
        'columns[11][data]': 'assistsPergame',
        'columns[11][name]': '',
        'columns[11][searchable]': 'true',
        'columns[11][orderable]': 'true',
        'columns[11][search][value]': '',
        'columns[11][search][regex]': 'false',
        'columns[12][data]': 'mostassists',
        'columns[12][name]': '',
        'columns[12][searchable]': 'true',
        'columns[12][orderable]': 'true',
        'columns[12][search][value]': '',
        'columns[12][search][regex]': 'false',
        'columns[13][data]': 'goldsPermin',
        'columns[13][name]': '',
        'columns[13][searchable]': 'true',
        'columns[13][orderable]': 'true',
        'columns[13][search][value]': '',
        'columns[13][search][regex]': 'false',
        'columns[14][data]': 'lasthitPermin',
        'columns[14][name]': '',
        'columns[14][searchable]': 'true',
        'columns[14][orderable]': 'true',
        'columns[14][search][value]': '',
        'columns[14][search][regex]': 'false',
        'columns[15][data]': 'damagetoheroPermin',
        'columns[15][name]': '',
        'columns[15][searchable]': 'true',
        'columns[15][orderable]': 'true',
        'columns[15][search][value]': '',
        'columns[15][search][regex]': 'false',
        'columns[16][data]': 'damagetoheroPercent',
        'columns[16][name]': '',
        'columns[16][searchable]': 'true',
        'columns[16][orderable]': 'true',
        'columns[16][search][value]': '',
        'columns[16][search][regex]': 'false',
        'columns[17][data]': 'damagetakenPermin',
        'columns[17][name]': '',
        'columns[17][searchable]': 'true',
        'columns[17][orderable]': 'true',
        'columns[17][search][value]': '',
        'columns[17][search][regex]': 'false',
        'columns[18][data]': 'damagetakenPercent',
        'columns[18][name]': '',
        'columns[18][searchable]': 'true',
        'columns[18][orderable]': 'true',
        'columns[18][search][value]': '',
        'columns[18][search][regex]': 'false',
        'columns[19][data]': 'wardsplacedPermin',
        'columns[19][name]': '',
        'columns[19][searchable]': 'true',
        'columns[19][orderable]': 'true',
        'columns[19][search][value]': '',
        'columns[19][search][regex]': 'false',
        'columns[20][data]': 'wardskilledPermin',
        'columns[20][name]': '',
        'columns[20][searchable]': 'true',
        'columns[20][orderable]': 'true',
        'columns[20][search][value]': '',
        'columns[20][search][regex]': 'false',
        'order[0][column]': '4',
        'order[0][dir]': 'desc',
        'start': '0',
        'length': '20',
        'search[value]': '',
        'search[regex]': 'false',
        'area': '',
        'eid': '1065',
        'type': 'player',
        'gametype': '2',
        'filter': '{"team":{},"player":{},"meta":{}}',
    }
    #Dictionary converted to K1 = V1 & K2 = V2
    data = parse.urlencode(formdata)
    # print(data)
    r = requests.post(url, cookies=myCookies, data=data, headers=headers, allow_redirects=False)
    r.encoding = r.apparent_encoding
    c = r.text
    #Print ("11111 contents are as follows: ----------------------------------------------")
    if len(c) < 100:
        Print ('Get failed, get again! ')
        return False
    Print ('Get success! ')
    l = json.loads(c)['data']
    for i in l[:20]:
        Print ('team number: {0} team name: {1} player name: {2} '. Format (['team'], I ['teamname '], I ['playername'])
    return True


def cookie_to_dic(mycookie):
    dic = {}
    for i in mycookie.split('; '):
        dic[i.split('=')[0]] = i.split('=')[1]
    return dic


if __name__ == '__main__':
    while 1:
        ok = get_competition()
        if ok is True:
            break
#    test()

Hero League Python crawler


The second lol web page data crawling

http://lol.admin.pentaq.com/

No anti crawling and CSRF token authentication:

from faker import Factory
import requests
import json

f = Factory.create()


def fun():
    url = 'http://lol.admin.pentaq.com/api/tournament_team_data?tour=29&patch='
    headers = {
        'user-agent': f.user_agent()
    }
    r = requests.get(url, headers=headers)
    r.encoding = r.apparent_encoding
    c = r.text
    r.close()
    l = json.loads(c)['data']['teams_data']
    for i in l[:20]:
        Print ("team name: {0} team ID: {1} win: {2}". Format (I ['team_full_name '], I ['team_id'], I ['win '])


if __name__ == '__main__':
    fun()

Hero League Python crawler

The third lol web page data crawling

http://www.op.gg/champion/statistics

Hero League Python crawler

Beautiful soup is enough.

from faker import Factory
import requests
from bs4 import BeautifulSoup

f = Factory.create()


def fun():
    url = 'http://www.op.gg/champion/statistics'
    headers = {
        'user-agent': f.user_agent(),
        'Accept-Language': "zh-CN,zh;q=0.9,en;q=0.8'"
    }
    r = requests.get(url, headers=headers)
    r.encoding = r.apparent_encoding
    if r.status_code != 200:
        return False
    c = r.text
    r.close()
    # print(c)
    if len(c) < 10000:
        return False
    html = BeautifulSoup(c, 'html.parser')
    l = html.find('tbody', class_='tabItem champion-trend-tier-TOP').find_all('tr')
    for x in l[:5]:
        a = x.find_all('td')
        tmp = a[3]
        b = tmp.find_all('div')
        name = b[0].text
        pos = b[1].text.replace('\t','').replace('\n','')
        Print ('rank: {0} Name: {1} pos: {2} winning rate: {3} appearance rate: {4} '. Format (a [0]. Text, name, POS, a [4]. Text, a [5]. Text))
    return True
# for c in l[:20]:
#     a = c.find_all('td')
#     tmp  = a[3]
#     b = tmp.find_all('div')
#     name  = b[0].text
#     pos =  b[1].text
#Print ('rank: {0] name: {1} pos: {2} winning rate: {3} appearance rate: {4} '. Format (a [0]. Text, name, POS, a [4]. Text, a [5]. Text))


if __name__ == '__main__':
    while True:
        ok = fun()
        if ok:
            break

4. Multi thread crawling lol hero skin image

1. Get the list of corresponding hero URLs and function get_ url_ list()

2. Download the corresponding picture and save it to the folder download ()

3. Main() enables multithreading to perform crawling tasks

import requests
import json
import os
from faker import Factory
from multiprocessing.dummy import Pool as ThreadPool
import time

f = Factory.create()
headers = {
    'user-agent': f.user_agent()
}


def get_url_list():
    url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
    r = requests.get(url, headers=headers)
    r.encoding = r.apparent_encoding
    c = r.text
    Heros = json. Loads (c) ["hero"] # 156 hero messages
    idList = []
    for hero in Heros:
        hero_id = hero["heroId"]
        idList.append(hero_id)
   # print(idList)


def spider(url):
    r = requests.get(url, headers=headers)
    r.encoding = r.apparent_encoding
    c = r.text
    r.close()
    res_dict = json.loads(c)
    skins = res_ Dict ["skins"] # 15 hero messages
    For index, hero in enumerate (skins): # here, use enumerate to obtain subscripts for naming files and pictures;
        Item = {} # dictionary object
        item['name'] = hero["heroName"]
        item['skin_name'] = hero["name"]

        if hero["mainImg"] == '':
            continue
        item['imgLink'] = hero["mainImg"]
    #   print(item)
        download(index + 1, item)


def download(index, contdict):
    name = contdict['name']
    Path = "skin /" + name
    if not os.path.exists(path):
        os.makedirs(path)

    content = requests.get(contdict['imgLink'], headers=headers).content
    With open ('.. / skin /' + name + '/' + contract ['skin_name '] + str (index) +' jpg', 'wb') as f:
        f.write(content)


def main():
    start = time.time()
    pool = ThreadPool(6)
    page = []
    for i in range(1, 11):
        newpage = 'https://game.gtimg.cn/images/lol/act/img/js/hero/{}.js'.format(i)
        print(newpage)
        page.append(newpage)
    result = pool.map(spider, page)
    pool.close()
    pool.join()
    end = time.time()
    Print ('time: ', end start)


if __name__ == '__main__':
    main()

Hero League Python crawler
Hero League Python crawler

Hero League Python crawler

Recommended Today

Proper memory alignment in go language

problem type Part1 struct { a bool b int32 c int8 d int64 e byte } Before we start, I want you to calculatePart1What is the total occupancy size? func main() { fmt.Printf(“bool size: %d\n”, unsafe.Sizeof(bool(true))) fmt.Printf(“int32 size: %d\n”, unsafe.Sizeof(int32(0))) fmt.Printf(“int8 size: %d\n”, unsafe.Sizeof(int8(0))) fmt.Printf(“int64 size: %d\n”, unsafe.Sizeof(int64(0))) fmt.Printf(“byte size: %d\n”, unsafe.Sizeof(byte(0))) fmt.Printf(“string size: %d\n”, […]