import webapp2
from google.appengine.api import urlfetch # default is 5 second timeout
from xml.dom.minidom import parse, parseString
# import xml.etree.ElementTree as ET
URL = 'http://feeds.pandora.com/feeds/people/USERNAME/recentactivity.xml'
# schema: channel -> item (title,link,description -> pubDate?,...)
class Example( webapp2.RequestHandler ):
def get( self ):
self.response.write( '<html><body>' )
result = urlfetch.fetch( url=URL, method=urlfetch.GET )
dom = parseString( result.content ) # dom_from_file = parse( "example.xml" )
itemlist = dom.getElementsByTagName( 'item' ) # NodeList
# root = ET.fromstring( result.content ) # generates an Element, otherwise tree = ET.parse( file_object ), root = tree.getroot()
# self.response.write( str( root.tag ) ) # channel
# self.response.write( str( root.attrib ) ) # {}
# element = root[0]
# self.response.write( "Type: {}".format( type( element ) ) )
# itemElement = itemlist[0]
# titleList = itemElement.getElementsByTagName( 'title' ) # NodeList of size 1
# self.response.write( '<br />attributes: {}'.format( title[0].attributes ) ) # xml.dom.minidom.NamedNodeMap
# titleElement = titleList[0]
# self.response.write( '<br />tagname: {} \n'.format( titleElement.tagName ) ) # title
# self.response.write( '{}'.format( titleElement.toxml() ) ) # <title>some title</title>
# self.response.write( '{}'.format( titleElement ) ) # <DOM Element: title at 0x4de3d48>
# self.response.write( '<br />attribute keys: {}'.format( titleElement.attributes.keys() ) ) # [] , nope, no keys
for item in itemlist:
linkList = item.getElementsByTagName( 'link' ) # NodeList of size 1
link = self.get_angle_bracket_data( linkList[0].toxml(), '<link>', '</link>' ) # always size 1, converting it to a unicode <title>some title</title>
item_xml_string = item.toxml()
description = self.get_angle_bracket_data( item_xml_string , '<description>', '</description>' )
timestamp = self.get_angle_bracket_data( item_xml_string , '<pubDate>', '</pubDate>' )
self.response.write( '<br /><a href="{}">{}</a> {}\n'.format( link, description, timestamp ) )
# self.response.write( '<br />itemlist: {}'.format( len( itemlist )) )
# self.response.write( '<br />{}'.format( itemlist.item(0).toxml() ) )
# self.response.write( '{}'.format( dom.toxml() ) )
self.response.write( '</body></html>' )
def get_angle_bracket_data( self, angle_bracket_string, start_keyword, end_keyword ):
start = angle_bracket_string.find( start_keyword )
start = start + len( start_keyword )
end = angle_bracket_string.find( end_keyword )
value = angle_bracket_string[ start : end ]
return value