#------------------------------------------------------------------------------- # # === getting stations === # # Auther: MAO # http://mao.nyanta.jp/ # # (C) 2010 MAO # #------------------------------------------------------------------------------- import os from pyquery import PyQuery #------------------------------------------------------------------------------- def get_prefs(): prefs = [] html = PyQuery(url = 'http://transit.map.yahoo.co.jp/station/list') for dl in html('#area dl'): dl = PyQuery(dl) for a in dl.find('a'): a = PyQuery(a) pref = { 'url' : a.attr('href'), 'name' : a.text(), 'area' : dl.find('dt').text(), } prefs.append(pref) return prefs #------------------------------------------------------------------------------- def get_lines(url): lines = [] html = PyQuery(url = url) for dl in html('#rail dl'): dl = PyQuery(dl) for a in dl.find('a'): a = PyQuery(a) line = { 'url' : a.attr('href'), 'name' : a.text(), 'group' : dl.find('dt').text(), } lines.append(line) return lines #------------------------------------------------------------------------------- def get_stations(url): stations = [] html = PyQuery(url = url) for a in html('#station li a'): a = PyQuery(a) stations.append(a.text()) return stations #------------------------------------------------------------------------------- def main(): out = file( os.path.join( os.path.dirname(__file__), 'stations.csv', ), 'w' ) print 'Getting prefectures ...' prefs = get_prefs() for pref in prefs: print 'Getting %s lines ...' % pref['name'] lines = get_lines(pref['url']) for line in lines: print 'Getting %s - %s stations ...' % (pref['name'], line['name']) stations = get_stations(line['url']) for station in stations: out.write(','.join([ pref['area'], pref['name'], line['group'], line['name'], station ]) + '\n') out.close() #------------------------------------------------------------------------------- if __name__ == '__main__': main()