背景
前端时间在喜马拉雅上偶然听到牛大宝说的有声小说神通板砖,说的很是幽默,听的正起劲的时候开始收费了,于是我就在网上找了下看看有没有免费版,一搜果然有,但该网站上广告太多了,于是我就写了个小脚本可以批量下载的,供大家学习,如果您使用该脚本用于任何非法用途,均与本站无关。
思路
- 找到可以免费下载资源网站
- 针对该网站html进行分析并编写下载脚本
- linux命令行自动顺序播放
实施
找到可以免费下载资源网站
网址: www.ting56.com/mp3/6475.html#liebiao
编写脚本 stbz.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from html.parser import HTMLParser
import urllib3
import os
http = urllib3.PoolManager()
baseUrl='http://www.ting56.com'
class Mp3HTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
def handle_starttag(self,tag,attrs):
if tag == 'div':
for (k,v) in attrs:
if k=='id' and v=='bofangqi':
print(tag)
class MyHTMLParser(HTMLParser):
def __init__(self):
self.a_t = False
HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
"""
recognize start tag, like <div>
:param tag:
:param attrs:
:return:
"""
if tag == 'div':
for (k,v) in attrs:
if k == 'id' and v == 'vlink_1':
self.a_t = True
#print("Encountered a start tag:", tag)
else:
self.a_t = False
if self.a_t:
if tag == 'a':
title = ''
url = ''
for (k,v) in attrs:
if k=='href':
url=baseUrl+v
if k=='title':
title=v
#print('%s %s \n' % (title,url))
download(title,url)
#r = http.request('GET',url)
#dataStr = r.data.decode('gbk')
#parser = Mp3HTMLParser()
#parser.feed(dataStr)
# def handle_endtag(self, tag):
# """
# recognize end tag, like </div>
# :param tag:
# :return:
# """
# print("Encountered an end tag :", tag)
#
def handle_data(self, data):
"""
recognize data, html content string
:param data:
:return:
"""
pass
#if self.a_t:
#print(data)
#
# def handle_startendtag(self, tag, attrs):
# """
# recognize tag that without endtag, like <img />
# :param tag:
# :param attrs:
# :return:
# """
# print("Encountered startendtag :", tag)
#
# def handle_comment(self,data):
# """
#
# :param data:
# :return:
# """
# print("Encountered comment :", data)
def fromCharCode(n):
return chr(n%256)
def download(name,url):
print('%s %s' % (name,url))
cmd = 'echo "%s %s" >> /tmp/stbz/downloadList.txt' % (name,url)
os.system(cmd)
try:
if int(name) <= 137:
return
r = http.request('GET',url)
dataStr = r.data.decode('gbk')
os.environ['htmlStr'] = dataStr
cmd = 'echo $htmlStr | sed "s/.*FonHen_JieMa(\([0-9\\\'\*]*\)).*/\\1/g"'
t = os.popen(cmd).read()
t = t[1:-2]
#print(t)
tArr = t.split('*')
tmp = ''
for s in tArr:
if len(s)==0:
continue
tmp = tmp + fromCharCode(int(s))
print(tmp)
datas = tmp.split('&')
print(datas)
cmd = 'curl %s --output /tmp/stbz/%s.%s' % (datas[0],name,datas[2])
if os.system(cmd) != 0:
raise Exception("download err")
except:
print('[err] %s %s ' % (name,url))
cmd = 'echo "%s %s" >> /tmp/stbz/err.txt' % (name,url)
os.system(cmd)
if __name__ == "__main__":
os.system('echo "" > /tmp/stbz/downloadList.txt')
r = http.request('GET','http://www.ting56.com/mp3/6475.html#liebiao')
dataStr = r.data.decode('gbk')
#print(dataStr)
parser = MyHTMLParser()
parser.feed(dataStr)
自动播放脚本
下载好的音频文件都是以数字编号的,所以可以播放完一个自动进行下一个,并且记录当前播放记录,方便下次继续播放 将下面的脚本保存为play.sh
#!/usr/bin/env bash
basepath=$(cd `dirname $0`;pwd)
cd $basepath
echo "workspace [$basepath]"
posfile="stbz.pos"
if [ ! -f "$posfile" ]; then
echo '1.m4a' > ${posfile}
fi
pos=$(cat ${posfile})
state="start"
for name in `cat ./stbz.lst`;do
if [ $state = "start" ]; then
if [ $pos = $name ]; then
echo "begin play ${pos}"
mplayer $name 1>> /dev/null 2>>err.txt || exit 1
state="play"
echo "${name}" > ${posfile}
fi
else
echo "begin play ${name}"
echo "${name}" > ${posfile}
mplayer $name 1>> /dev/null 2>>err.txt || exit 1
fi
done