# -*- coding: cp949 -*-
import urllib
def getpage(url):
try:
f=urllib.urlopen(url)
return f.read()
except:
return None
text=getpage('http://yonsei.ac.kr/')
if text:
print text
if not text:
print "잘못된 주소입니다. 바르게 입력하세요"
*링크만 프린트 해준다
import urllib
import re
def getpage(url):
try: #예외처리
f=urllib.urlopen(url)
return f.read()
except:
return None
text=getpage('http://naver.com')
p=re.compile('''href=([^'"]\S+?)[\s>]|href="([^'"]*?)"|href='([^'"]*?)\'''',re.I)
pos=0
while 1:
match = p.search(text,pos)
if match:
url=match.groups()
pos=match.end()
print filter(None,url)[0]
else: break
# -*- coding: cp949 -*-
import urllib
import re
import htmlparser
def getpage(url):
try: #예외처리
f=urllib.urlopen(url)
return f.read()
except:
return None
url='http://www.naver.com'
text=getpage(url)
p=htmlparser.SimpleTextHTMLParser()
p.parse(text, url)
for each in p.listURLs:
print each
print p.bodyText
print p.titleText




최근 덧글