-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgetData.py
More file actions
76 lines (67 loc) · 2.68 KB
/
getData.py
File metadata and controls
76 lines (67 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import requests
from bs4 import BeautifulSoup
import pandas as pd
def get_data(algsets):
all_algs = []
for algset in algsets:
url = "https://www.speedcubedb.com/a/3x3/"+algset
res = requests.get(url)
soup = BeautifulSoup(res.content,'html.parser')
datas = soup.select('div.row.mt-2.pt-3.mb-2.pb-3.singlealg.border-bottom')
# for index, data in enumerate([datas[0]]):
for index, data in enumerate(datas):
caseid = data.find('h3').text
catalog = data.find('h5').text
# video link and image (the later one)
if data.select('li.list-group-item.text-center') != []:
video_raw = data.select('li.list-group-item.text-center')[-1]
vlink = video_raw.find('a')['href']
vimg = video_raw.find('img')['src']
else:
vlink = ""
vimg = ""
# face color info for plain view like OLL and PLL
if data.select('div.jcube') !=[]:
img_raw = data.select('div.jcube')[0]
fc = img_raw['data-us']+img_raw['data-ur']+img_raw['data-uf']+'wwwwwwwww'+img_raw['data-ul']+img_raw['data-ub']
else:
fc = ""
# F2L has 4 orientations (0,1,2,3), others 1 (0)
if algset == "F2L":
ori_range = range(0,4)
else:
ori_range = range(0,1)
# algorithms
for ori in ori_range:
alg = [i.text for i in data.select('div[data-ori="'+str(ori)+'"]')[0].select('span')]
alg.extend([""]*(4-len(alg)))
if ori == 0:
oriname = ""
else:
oriname = "-" + str(ori)
vlink = ""
vimg = ""
all_algs.append({
"name":algset + "%02d" % (index+1) + oriname,
"algset":algset,
"caseid":caseid,
"catalog":catalog,
"alg1":alg[0],
"alg2":alg[1],
"alg3":alg[2],
"alg4":alg[3],
"video":vlink,
"videoimg":vimg,
"color":fc,
"orientation":ori
})
return all_algs
def main():
algsets = ["F2L", "OLL", "PLL"]
algsets.sort() # Sort by dictionary order, same as what did in Notion database.
# save as csv
all_algs = get_data(algsets)
keys = all_algs[0].keys()
pd.DataFrame(all_algs,columns=keys).to_csv('asset/allAlgs.csv', encoding='utf-8',index=False)
if __name__ == "__main__":
main()