john pfeiffer
  • Home
  • Categories
  • Tags
  • Archives

google app engine urlfetch xml pandora activity feed

import webapp2
from google.appengine.api import urlfetch   # default is 5 second timeout
from xml.dom.minidom import parse, parseString

# import xml.etree.ElementTree as ET

URL = 'http://feeds.pandora.com/feeds/people/USERNAME/recentactivity.xml'
# schema: channel -> item (title,link,description -> pubDate?,...)


class Example( webapp2.RequestHandler ):

    def get( self ):
        self.response.write( '<html><body>' )
        result = urlfetch.fetch( url=URL, method=urlfetch.GET )


        dom = parseString( result.content )     # dom_from_file = parse( "example.xml" )
        itemlist = dom.getElementsByTagName( 'item' )       # NodeList

        # root = ET.fromstring( result.content )    #   generates an Element,  otherwise tree = ET.parse( file_object ), root = tree.getroot()
        # self.response.write(  str( root.tag ) )     # channel
        # self.response.write(  str( root.attrib ) )  # {}
        # element = root[0]
        # self.response.write(  "Type: {}".format( type( element ) ) )

        # itemElement = itemlist[0]
        # titleList = itemElement.getElementsByTagName( 'title' )     # NodeList of size 1
        # self.response.write(  '<br />attributes: {}'.format( title[0].attributes ) )    # xml.dom.minidom.NamedNodeMap

        # titleElement = titleList[0]
        # self.response.write(  '<br />tagname: {} \n'.format( titleElement.tagName ) ) # title
        # self.response.write(  '{}'.format( titleElement.toxml() ) )       # <title>some title</title>
        # self.response.write(  '{}'.format( titleElement ) )     # <DOM Element: title at 0x4de3d48>
        # self.response.write(  '<br />attribute keys: {}'.format( titleElement.attributes.keys() ) )    # []  , nope, no keys


        for item in itemlist:
            linkList = item.getElementsByTagName( 'link' )      # NodeList of size 1
            link = self.get_angle_bracket_data( linkList[0].toxml(), '<link>', '</link>' )  # always size 1, converting it to a unicode <title>some title</title>
            item_xml_string = item.toxml()
            description = self.get_angle_bracket_data( item_xml_string , '<description>', '</description>' )
            timestamp = self.get_angle_bracket_data( item_xml_string , '<pubDate>', '</pubDate>' )
            self.response.write(  '<br /><a href="{}">{}</a> {}\n'.format( link, description, timestamp ) )


        # self.response.write(  '<br />itemlist: {}'.format( len( itemlist )) )
        # self.response.write(  '<br />{}'.format( itemlist.item(0).toxml() ) )
        # self.response.write(  '{}'.format( dom.toxml() ) )
        self.response.write( '</body></html>' )


    def get_angle_bracket_data( self, angle_bracket_string, start_keyword, end_keyword ):
        start = angle_bracket_string.find( start_keyword )
        start = start + len( start_keyword )
        end = angle_bracket_string.find( end_keyword )
        value = angle_bracket_string[ start : end ]
        return value

  • « google app engine datastore like query workaround
  • google app engine cron job securing a url admin »

Published

Jul 11, 2013

Category

python-appengine

~205 words

Tags

  • activity 1
  • appengine 18
  • engine 12
  • feed 1
  • google 18
  • pandora 1
  • python 180
  • urlfetch 2
  • xml 22