66from urlparse import urljoin
77
88
9- def get_next_url (current_url , path ):
9+ def get_next_url (current_url , next_url ):
1010 """
1111 组装url
12+ protocol :// hostname[:port] / path / [;parameters][?query]#fragment
1213 :param current_url:
13- :param path :
14+ :param next_url :
1415 :return:
1516 """
16- if path is None or path == '' :
17+ if next_url is None or next_url == '' :
1718 return ''
18- if path .startswith ('http' ):
19- return path
20- if path .startswith ('/' ):
21- url = urlparse (current_url )
22- return urlunparse ((url .scheme , url .netloc , path , url .params , url .query , url .fragment ))
23- return urljoin (current_url , path )
19+ if next_url .startswith ('http' ):
20+ return next_url
21+ if next_url .startswith ('/' ):
22+ current_url_parse = urlparse (current_url )
23+ next_url_parse = urlparse (next_url )
24+ return urlunparse ((current_url_parse .scheme , current_url_parse .netloc , next_url_parse .path , next_url_parse .params , next_url_parse .query , next_url_parse .fragment ))
25+ return urljoin (current_url , next_url )
2426
2527
2628def test ():
2729 print get_next_url ('http://www.163.com/mail/index.htm' , 'http://www.163.com/about.htm' )
30+ print urljoin ('http://www.163.com/mail/index.htm' , 'http://www.163.com/about.htm' )
31+ print '\n ' ,
2832 print get_next_url ('http://www.163.com/mail/index.htm' , '/about.htm' )
33+ print urljoin ('http://www.163.com/mail/index.htm' , '/about.htm' )
34+ print '\n ' ,
2935 print get_next_url ('http://www.163.com/mail/index.htm' , 'about.htm' )
36+ print urljoin ('http://www.163.com/mail/index.htm' , 'about.htm' )
37+ print '\n ' ,
38+ print get_next_url ('http://sh.58.com/banjia/?sort=pingfen' , '/banjia/pn2/?sort=pingfen' )
39+ print urljoin ('http://sh.58.com/banjia/?sort=pingfen' , '/banjia/pn2/?sort=pingfen' )
3040
3141
3242if __name__ == '__main__' :
3343 test ()
3444 test_url = 'http://suining.58.com/zhongdiangong/?sort=pingfen'
3545 print urlparse (test_url ).hostname .rstrip ('.58.com' )
36- print urlparse (test_url ).path .strip ('/' )
46+ print urlparse (test_url ).path .strip ('/' )
47+
48+
49+ """
50+ 以上测试结果可以看出
51+ 一个urljoin就搞定了
52+ """
0 commit comments