from urllib.request import urlopen
import re
import sys
import os
filepath = 'urls.txt'
with open(filepath) as fp:
theurl = fp.readline()
while theurl:
if(not theurl.startswith('http')):
if(":443" in theurl):
theurl = 'https://' + theurl
else:
theurl = 'http://' + theurl
try:
html = urlopen(theurl, timeout=3)
titles = re.findall(r'<title>(.*?)</title>',str(html.read()))
if len(titles) > 0:
print(titles[0] + "," + theurl)
except:
print("ERROR," + theurl)
theurl = fp.readline()
----
input is text file with 1 url per line
----
sample results
No comments:
Post a Comment