john pfeiffer
  • Home
  • Categories
  • Tags
  • Archives

xml minidom

# https://docs.python.org/2/library/xml.dom.minidom.html
# https://wiki.python.org/moin/MiniDom

from xml.dom import minidom
dom1 = minidom.parse('/path/filename.xml')
print dom1.toxml()
with open('/tmp/pretty.xml', 'w') as f:
  f.write(dom1.toprettyxml())

document = """\
<?xml version="1.0" ?><application-configuration>
<properties>
<property name="server.id">1234-65WH-FJWH-GM0X</property>
<property name="hibernate.connection.driver_class">com.mysql.jdbc.Driver</property>
<property name="hibernate.connection.password">defaultpassword</property>
<property name="hibernate.connection.username">root</property>
</properties></application-configuration>
"""

dom1 = minidom.parseString(document)

props = dom1.getElementsByTagName('property')
print len(props)
4

print props[0].toxml()
<property name="server.id">1234-65WH-FJWH-GM0X</property>

print props[0].firstChild.data
1234-65WH-FJWH-GM0X


for i in props:
  # undocumented
  #print i.attributes['name'].value
  # CORRECT: https://docs.python.org/2/library/xml.dom.html#dom-attr-objects
  if i.hasAttribute('name'):
    print i.getAttribute('name')


- - -
import webapp2
from google.appengine.api import urlfetch   # default is 5 second timeout
from xml.dom.minidom import parseString

from lib.utility import jinja_environment, Utility


class PandoraHandler( webapp2.RequestHandler ):

    URL = 'http://feeds.pandora.com/feeds/people/USER/recentactivity.xml'        # schema: channel -> item (title,link,description -> pubDate?,...)

    def get( self ):

        result = urlfetch.fetch( url=self.URL, method=urlfetch.GET )
        dom = parseString( result.content )     # dom_from_file = parse( "example.xml" )
        itemlist = dom.getElementsByTagName( 'item' )       # NodeList
        # self.response.write( result.content ) # DEBUG

        results = list()
        for item in itemlist:
            linkList = item.getElementsByTagName( 'link' )      # NodeList of size 1
            link = Utility.get_angle_bracket_value( linkList[0].toxml(), '<link>', '</link>' )  # always size 1, converting it to a unicode <title>some title</title>
            item_xml_string = item.toxml()
            description = Utility.get_angle_bracket_value( item_xml_string , '<description>', '</description>' )
            timestamp = Utility.get_angle_bracket_value( item_xml_string , '<pubDate>', '</pubDate>' )
            # pub_date = Utility.get_angle_bracket_value( item_xml_string , '<pubDate>', '</pubDate>' )
            # timestamp = Utility.datetime_string_to_seconds( pub_date )
            results.append( dict( link=link, description=description, timestamp=timestamp ))

        # self.response.write(  '<br />itemlist: {}'.format( len( itemlist )) )
        # self.response.write(  '<br />{}'.format( itemlist.item(0).toxml() ) )
        # self.response.write(  '{}'.format( dom.toxml() ) )
        # self.response.write( '</body></html>' )

        table_headers = [ 'Song', 'Listened' ]

        template = jinja_environment.get_template( 'templates/pandora.html' )

        template_values = { 'title': 'Recent Music',
                            'table_headers': table_headers,
                            'items': results,
                          }
        self.response.content_type = 'text/html'
        self.response.write( template.render( template_values ) )



    def get_angle_bracket_data( self, angle_bracket_string, start_keyword, end_keyword ):
        start = angle_bracket_string.find( start_keyword )
        start = start + len( start_keyword )
        end = angle_bracket_string.find( end_keyword )
        value = angle_bracket_string[ start : end ]
        return value

  • « Mysql mysqldump csv
  • ec2 boto sort by launch time »

Published

Nov 4, 2015

Category

python

~218 words

Tags

  • minidom 1
  • python 180
  • xml 22