-
Notifications
You must be signed in to change notification settings - Fork 0
/
pb.py
153 lines (124 loc) · 4.43 KB
/
pb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import cmd2
import re
import requests
from itertools import count
from pathlib import Path
from bs4 import BeautifulSoup
from clint.textui import progress
import sys
SCAN_PAGES = 2
NUM_DOWNLOADS = 17
CHARS_TO_REMOVE_FROM_FILENAME = "|\\?*<\":>+[]/'·\.,()’"
episode_list = []
podcast_dict = {
'법륜스님': 1805,
'신과함께': 15781,
'매불쇼': 16898,
}
class CmdLineApp(cmd2.Cmd):
elist = []
def do_names(self, args):
print(podcast_dict)
def do_get_list(self, args):
'''get_list PID NUM_PAGES
'''
items = args.split()
pid = int(items[0])
pages = int(items[1])
print(pid, pages)
self.elist = get_list_by_like(pid, pages)
def do_sort_list_by_like(self, args):
self.elist = sorted(self.elist, key=lambda k: k['like'], reverse=True)
def do_print_list(self, args):
for ep in self.elist:
print(ep)
def do_download(self, args):
for ep in self.elist:
mp3_download(ep['pid'], ep['eid'], ep['title'])
def do_search_keyword(self, args):
'''Get episodes including keyword
'''
keyword = args
new_list = []
for ep in self.elist:
if keyword in ep['title']:
new_list.append(ep)
self.elist = new_list
def do_exclude_keyword(self, args):
''' Get episodes not including keyword
'''
keyword = args
new_list = []
for ep in self.elist:
if not keyword in ep['title']:
new_list.append(ep)
self.elist = new_list
def do_head(self, args):
items = args.split()
max_number = int(items[0])
self.elist = self.elist[:max_number]
def do_clear_list(self, args):
self.elist = []
def get_list_by_like(pid, pages=SCAN_PAGES):
for page in range(pages):
page += 1
url = 'http://www.podbbang.com/podbbangchnew/episode_list?id={pid}&page={page}'.format(pid=pid, page=page)
print(url)
response = requests.get(url)
response.encoding = 'utf8'
html = response.text
soup = BeautifulSoup(html, 'lxml')
for dl_tag in soup.select('li > dl'):
# print(dl_tag)
try:
title = dl_tag.find('dt')['title']
new_title = get_android_filename(title)
like = int(dl_tag.find('dd', class_ ='dd_heart').text.replace(',', ''))
js = dl_tag['onclick']
matched = re.search(r"'(\d+)',\s*'(\w+/\w+)'", js)
if matched:
eid, content_type = matched.groups()
if 'video' in content_type:
continue
episode_list.append({
'pid': pid,
'eid': eid,
'title': new_title,
'like': like
})
print(content_type)
assert('audio' in content_type)
# print('{}=>{} ({})'.format(title, new_title, like))
# open(new_title+'.mp3', 'a').close()
except KeyError:
print('Ended')
return None
return episode_list
def mp3_download(pid, eid, title):
url = 'http://www.podbbang.com/download?pid={pid}&eid={eid}'.format(pid=pid, eid=eid)
headers = {
'Referer': 'http://www.podbbang.com/ch/{pid}'.format(pid=pid),
}
r = requests.get(url, headers=headers, stream=True)
if r.status_code == 200:
filepath = Path('{}.mp3'.format(title))
total_length = int(r.headers.get('content-length'))
if filepath.exists() and filepath.stat().st_size == total_length:
print('{} - File exists.'.format(title))
else:
print('{} - Downloading'.format(title))
with filepath.open('wb') as f:
chunk_size = 1024
expected_size = (total_length//chunk_size) + 1
for chunk in progress.bar(r.iter_content(chunk_size=chunk_size), expected_size=expected_size):
f.write(chunk)
else:
print('download failed. status code = {}'.format(r.status_code))
def get_android_filename(filename):
new_fn = filename
for c in CHARS_TO_REMOVE_FROM_FILENAME:
new_fn = new_fn.replace(c, '')
return new_fn
if __name__ == '__main__':
app = CmdLineApp()
sys.exit(app.cmdloop())