url library 프로그래밍

# -*- coding: cp949 -*-
import urllib

def getpage(url):
    try:
        f=urllib.urlopen(url)
        return f.read()
    except:
        return None

text=getpage('http://yonsei.ac.kr/')


if text:
    print text

if not text:
    print "잘못된 주소입니다. 바르게 입력하세요"


*링크만 프린트 해준다

import urllib
import re

def getpage(url):
    try:  #예외처리
        f=urllib.urlopen(url)
        return f.read()
    except:
        return None

text=getpage('http://naver.com')
p=re.compile('''href=([^'"]\S+?)[\s>]|href="([^'"]*?)"|href='([^'"]*?)\'''',re.I)

pos=0
while 1:
    match = p.search(text,pos)
    if match:
        url=match.groups()
        pos=match.end()
        print  filter(None,url)[0]
    else: break

# -*- coding: cp949 -*-
import urllib
import re
import htmlparser

def getpage(url):
    try:  #예외처리
        f=urllib.urlopen(url)
        return f.read()
    except:
        return None

url='http://www.naver.com'
text=getpage(url)

p=htmlparser.SimpleTextHTMLParser()

p.parse(text, url)

for each in p.listURLs:
    print each

print p.bodyText
print p.titleText


트랙백

이 글과 관련된 글 쓰기 (트랙백 보내기)
TrackbackURL : http://browndoor.egloos.com/tb/2470855 [도움말]

덧글

덧글 입력 영역