Python采集biaozhun政府文件

# utf-8
import requests
from bs4 import BeautifulSoup
import json
import base64
import array
#解码到pdf_reader
def base64_to_uint8_array(base64_string):
# Decode the base64 string to bytes
raw_bytes = base64.b64decode(base64_string)
# Convert the bytes to a Uint8 (unsigned byte) array
uint8_array = array.array(‘B’, raw_bytes)
return uint8_array

#读取pdf,导出pdf到本地 接口来源于Vue frame network
data = {
‘code’: 671,
}
res = requests.post(‘http://biaozhun.osta.org.cn/api/v1/profession/detail’,data)
re = json.loads(res.text)
with open(“671.pdf”, “wb”) as pdf_file:
pdf_file.write(base64.b64decode(re[‘data’]))
#print(base64.b64decode(re[‘data’]))
exit()

print(base64_to_uint8_array(re[‘data’]))
exit()

#采集列表内容脚本
for i in range(30):
url = ‘http://biaozhun.osta.org.cn/api/v1/profession/list?pageNum=’+str(i)+’&pageSize=20’ # 替换为你想要采集数据的网站
response = requests.get(url)
data = json.loads(response.text)
for item in data[‘rows’]:
id = item[‘id’]
name = item[‘name’]
code = item[‘code’]
issueNum=item[‘issueNum’]
issueDate=item[‘issueDate’]
fileName = item[‘fileName’]
attachment = item[‘attachment’]
print(id, name, code, issueNum, issueDate,attachment)
requests.get(‘http://biaozhun.osta.org.cn/’+attachment)

One thought on “Python采集biaozhun政府文件

发表评论