Format
Plain text
Post date
2019-05-21 19:25
Zeitraum der Veröffentlichung
Unbegrenzt
  1. import requests
  2. import re
  3. import io
  4. from bs4 import BeautifulSoup
  5. url1 = "http://www.ytu.edu.cn/xxyw/index.jhtml"
  6. url2 = "http://www.ldu.edu.cn/index/zhyw.htm"
  7. fytu = open("ytu_news.csv","w")
  8. fldu = open("ldu_news.csv","w")
  9. def getHtmlText(url):
  10. try :
  11. r= requests.get(url,timeout=30)
  12. r.raise_for_status()
  13. r.encoding = r.apparent_encoding
  14. return r.text
  15. except:
  16. return "出现灾难性的错误"
  17. r = getHtmlText(url2)
  18. soup = BeautifulSoup(r,"lxml")
  19. news = soup.find_all("a",{"class":"c49418"})
  20. for line in news:
  21. link=line["title"]+","+line["href"]
  22. fytu.writelines(link)
  23. fytu.write('\n')
  24. fytu.close()
  25. print("-------------------------------------------------------------------------")
  26. r = getHtmlText(url1)
  27. pattern = re.compile(r'<a href="(.*)" title="(.*)">.*</a>')
  28. items = pattern.findall(r)
  29. for item in items:
  30. link=item[1]+","+item[0]
  31. fldu.writelines(link)
  32. fldu.write('\n')
  33. fldu.close()
Download Printable view

URL of this paste

Embed with JavaScript

Embed with iframe

Raw text