crawl.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import time
  2. import requests
  3. from bs4 import BeautifulSoup
  4. import re
  5. import json
  6. import datetime
  7. import socket
  8. import socks
  9. cookies = {}
  10. with open('data.json', 'r') as f:
  11. jsonData = json.load(f)
  12. cookies = jsonData['cookies']
  13. # 设置代理服务器的 IP 和端口号
  14. socks.set_default_proxy(socks.SOCKS5, "you_ip", you_port)
  15. # 将所有的 TCP 连接都通过代理服务器进行处理
  16. socket.socket = socks.socksocket
  17. def flushCookie():
  18. # 请求体
  19. reqData = {
  20. }
  21. # 请求头
  22. headers = {
  23. 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Mobile Safari/537.36',
  24. 'Referer': 'you_url',
  25. 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
  26. }
  27. try:
  28. # 发送登录请求
  29. response = requests.post('url', data=reqData, headers=headers, verify=False)
  30. # 判断是否登录成功
  31. if response.status_code == 200 and '"ret":1' in response.text:
  32. global cookies
  33. global jsonData
  34. # 读取cookie
  35. cookies = response.cookies.get_dict()
  36. # 将cookie写入jsonData
  37. jsonData['cookies'] = cookies
  38. flushJson("cookie已刷新!")
  39. else:
  40. # 失败就重新登陆
  41. flushCookie();
  42. except requests.exceptions.RequestException as e:
  43. # 出现异常,打印异常 重新登陆
  44. print(e)
  45. flushCookie()
  46. def getData():
  47. url = '' # 将此链接替换为您要抓取的实际链接
  48. global cookies
  49. try:
  50. response = requests.get(url, cookies=cookies, verify=False)
  51. # 如果发生了重定向 就刷新cookie
  52. if response.history:
  53. flushCookie();
  54. getData();
  55. # 如果发生了异常 就递归
  56. except requests.exceptions.RequestException as e:
  57. print(e)
  58. getData();
  59. else:
  60. # 分析网页 获取数据
  61. html = response.content
  62. soup = BeautifulSoup(html, 'html.parser')
  63. item = soup.findAll(class_='nodemain')
  64. item1 = soup.findAll('code')
  65. item2 = soup.findAll('input')
  66. # 在线设备数
  67. zxsb = item[2].find('dd').text.strip()
  68. # 上次使用时间
  69. sysj = item[2].findAll('div')[4].text[6:].strip()
  70. # 剩余流量
  71. syll = item1[2].text.strip()
  72. # 到期时间
  73. dqsj = item[0].findAll('div')[4].text[7:].strip()
  74. # ssr订阅链接
  75. ssr = item2[0]['value'].strip()
  76. ssr_bak = item2[1]['value'].strip()
  77. # clash订阅链接
  78. clash = item2[2]['value'].strip()
  79. # v2ray订阅链接
  80. v2ray = item2[4]['value'].strip()
  81. v2ray_bak = item2[5]['value'].strip()
  82. global jsonData
  83. jsonData['data'] = {
  84. 'code': 1,
  85. 'message': 'success!',
  86. 'data': {
  87. 'zxsb': zxsb,
  88. 'sysj': sysj,
  89. 'syll': syll,
  90. 'dqsj': dqsj,
  91. 'v2rayurl': v2ray,
  92. 'v2rayBakurl': v2ray_bak,
  93. 'SSRurl': ssr,
  94. 'SSRBakurl': ssr_bak,
  95. 'Clash': clash
  96. }
  97. }
  98. jsonData['saveTime'] = time.time();
  99. flushJson("数据已刷新");
  100. def flushJson(msg):
  101. global jsonData;
  102. with open('data.json', 'w') as f:
  103. json.dump(jsonData, f)
  104. # 获取当前的日期和时间
  105. now = datetime.datetime.now()
  106. # 格式化输出日期和时间
  107. formatted_time = now.strftime('%Y-%m-%d %H:%M:%S')
  108. print('{} '.format(formatted_time) + msg)