# # PyMeld is released under the terms of the following MIT-style license: # # Copyright (c) Richie Hindle 2002 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. # r"""Manipulates HTML (and XML, informally) using an object model based on `id` attributes. `Container` objects are created from HTML, and provide access to all the tags with `id="name"` attributes like this: `container.name`. Individual tags with a container are represented as `Tag` objects, which provide access to their HTML attributes like this: `tag.name`. You can also create `Tag`s directly: `tag = Tag( html="

" )`. `Tag`s have a magic attribute `content` which represents the text between the opening and closing tags. Here's an example that takes some HTML, changes some of the content and tag attributes, and inserts a modified clone of a tag into another part of the HTML: >>> import PyMeld >>> html = ''' ... ... ... Duplicate of 'pushme' goes here. ... ''' >>> page = PyMeld.Container( html ) # Create a Container from the HTML. >>> print page.message # Access a tag within the container. >>> print page.message.rows 2 >>> page.message.content = "New message." # Change the content of a tag. >>> print page.message.content New message. >>> print page.message >>> pushme2 = page.pushme.clone() # Clone a tag... >>> pushme2.id = 'pushme2' # ...rename the clone... >>> print pushme2 >>> page.duplicate.content = pushme2 # ...and add it to the container. >>> page.pushme.value = "Don't push me" # Change the original tag. >>> page.pushme2.value = "Nor me" # Change the clone in the container. >>> print page # Print the resulting page. Advantages: o No special requirements for the HTML (or just one: attribute values must be quoted) - so you can use any HTML/XML editor. o No logic embedded in HTML - separation of visual design from code and data. o You can include placeholders in the HTML and replace them with new content or modified clones at runtime (see the data-driven example below). o Works by string substitution, rather than by decomposing and rebuilding the HTML, hence has no impact on the parts of the page you don't manipulate. o Does nothing but maniplating HTML, hence fits in with any other Web toolkits you're using. o Tracebacks always point to the right place - many Python-HTML mixing systems use exec or eval, making bugs hard to track down. Here's a data-driven example populating a table from a data source, basing the table on sample data put in by the page designer. Note that in the real world the HTML would normally be a larger page read from an external file, keeping the data and presentation separate, and the data would come from an external source like an RDBMS: >>> html = ''' ... ... ...

Name	Age
Example name	21

''' >>> table = PyMeld.Container( html ) >>> templateRow = PyMeld.Container( table.row ) >>> outputLines = [ str( table.header ) ] >>> for name, age in [ ('Richie', 30), ('Dave', 39), ('John', 78) ]: ... newRow = templateRow.clone() ... newRow.name.content = name ... newRow.age.content = age ... outputLines.append( str( newRow ) ) >>> table.people.content = '\n'+string.join( outputLines, '\n' )+'\n' >>> print table

Name	Age
Richie	30
Dave	39
John	78

Note that if you were going to subsequently manipulate the table, using PyMeld or JavaScript for instance, you'd need to rename each `row`, `name` and `age` tag to have a unique name - you can do that by assigning to the `id` attribute but I've skipped that to make the example simpler. Here's an example that mixes single and double quotes, uses the wrong case to access a tag and an attribute, sets a numeric attribute value using a number rather than a string, manipulates an unclosed tag (``), manipulates nested tags, adds content to previously-empty tag, adds a new attribute to a tag, and introduces a double-quote character into a double-quoted attribute value, just to prove it all works: >>> html = '''

... Hello. ... This is doubly nested. ... ... ''' >>> doc = Container( html ) >>> doc.Pog.Height = 10 >>> doc.one.content = "New text." >>> doc.more.content = "More." >>> doc.more.title = 'She cried, "More, more, more!"' >>> print doc

New text. More. """ import re, string # Regular expressions for tags and attributes. openTagRE = r"""(?ix) <(?P\w+) # Tag opens; capture its name (?:\s+\w+=(?P["']).*?(?P=quote1))* # Attributes preceding 'id' \s+id=(?P["'])%s(?P=quote2) # The 'id' tag (?:\s+\w+=(?P["']).*?(?P=quote3))* # Attributes following 'id' \s*/?> # Tag closes """ attributeRE = r"""(?ix) (?P\s+) (?P%s)=(?P["'])(?P.*?)(?P=quote) """ idRE = r"""(?i)\s+id=(?P["'])(?P.*?)(?P=quote)""" class Container: """Represents an HTML document, or a fragment of one. Pass your HTML (or a `Container` or `Tag`) to the constructor. You can then access all the tags with `id="name"` attributes as `container.name`. You can set the content of such a tag using `container.name.content = content`, where `content` can be a string or another `Container` or `Tag`.""" def __init__( self, html ): """Creates a `Container` from HTML or another `Container` or `Tag`.""" # Coerce the 'html' to be a string, and take a copy (in case it's already a string). self.html = str( html )[:] def clone( self ): """Creates a clone of a Container.""" return Container( self.html ) def __getattr__( self, name ): """Returns the contained `Tag` that has the given name as its `id` attribute.""" if not re.search( openTagRE % name, self.html ): raise AttributeError, name else: return Tag( self, name ) def __str__( self ): """Returns the HTML that this `Container` represents.""" return self.html class Tag: """Represents a named tag in a `Container`, named by its `id` attribute. You can access all the HTML attributes of the `Tag` as `tag.name`. Create `Tag`s either via a `Container` using `container.name`, or directly from a snippet of HTML: `tag = Tag( "

" )`.""" def __init__( self, parent=None, name=None, html=None ): """Pass either `parent` and `name` to access a tag within a `Container`, or 'html' to create a standalone `Tag` from a snippet of HTML.""" if parent and name and not html: self._parent = parent self._name = name elif not parent and not name and html: self._parent = Container( html ) idMatch = re.search( idRE, html ) if not idMatch: raise AttributeError, "Tag HTML must have an 'id' attribute" else: self._name = idMatch.group( 'id' ) else: raise ValueError, "Tag() takes either 'parent' and 'name', or 'html'" def _updateParentRange( self ): # Updates the object's idea of where it lives within its Container's HTML. # self._openTagStart and self._openTagEnd mark the opening tag, while # self._closingTagStart and self._closingTagEnd mark the closing tag. openTagMatch = re.search( openTagRE % self._name, self._parent.html ) self._tagName = openTagMatch.group( 'tag' ) self._openTagStart, self._openTagEnd = openTagMatch.span() # Now find the closing tag in the remainder of the HTML. Most of this # code deals with nested tags - counting up nested opening tags and # counting down the closing tags until it gets to zero. remainder = self._parent.html[ self._openTagEnd: ] depth = 1 pos = 0 while 1: nextOpenMatch = re.search( '(?i)<%s(>|\s)' % self._tagName, remainder[ pos: ] ) nextCloseMatch = re.search( '(?i)' % self._tagName, remainder[ pos: ] ) if not nextCloseMatch: # There's no matching closing tag. self._closingTagStart = self._openTagEnd self._closingTagEnd = self._openTagEnd break elif not nextOpenMatch: if depth == 1: # We've found the matching closing tag. self._closingTagStart = self._openTagEnd + pos + nextCloseMatch.span()[ 0 ] self._closingTagEnd = self._openTagEnd + pos + nextCloseMatch.span()[ 1 ] break else: # We've found a closing tag, but it's for a nested opening tag. depth = depth - 1 pos = pos + nextCloseMatch.span()[ 1 ] elif nextOpenMatch.span()[ 0 ] < nextCloseMatch.span()[ 0 ]: # We've found a nested opening tag. depth = depth + 1 pos = pos + nextOpenMatch.span()[ 1 ] else: # nextCloseMatch.span()[ 0 ] < nextOpenMatch.span()[ 0 ] depth = depth - 1 if depth == 0: # We've found the matching closing tag. self._closingTagStart = self._openTagEnd + pos + nextCloseMatch.span()[ 0 ] self._closingTagEnd = self._openTagEnd + pos + nextCloseMatch.span()[ 1 ] break else: # We've found a closing tag but it's for a nested opening tag. pos = pos + nextCloseMatch.span()[ 1 ] def __getattr__( self, name ): """Returns the `Tag`s HTML content for the special attribute `content`, or returns the value of the given attribute.""" if name[ 0 ] == '_': return self.__dict__[ name ] elif name == 'content': self._updateParentRange() return self._parent.html[ self._openTagEnd:self._closingTagStart ] else: self._updateParentRange() openTag = self._parent.html[ self._openTagStart:self._openTagEnd ] attributeMatch = re.search( attributeRE % name, openTag ) return string.replace( attributeMatch.group( 'value' ), '"', '"' ) def __setattr__( self, name, value ): """Sets the `Tag`s HTML content for the special attribute `content`, or sets the value of the given attribute.""" if name[ 0 ] == '_': self.__dict__[ name ] = value elif name == 'content': self._updateParentRange() self._parent.html = self._parent.html[ :self._openTagEnd ] + \ str( value ) + \ self._parent.html[ self._closingTagStart: ] else: self._updateParentRange() openTag = self._parent.html[ self._openTagStart:self._openTagEnd ] attributeMatch = re.search( attributeRE % name, openTag ) escapedValue = string.replace( str( value ), '"', '"' ) if attributeMatch: # This is a change to an existing attribute. attributeStart, attributeEnd = attributeMatch.span() quote = attributeMatch.group( 'quote' ) newOpenTag = openTag[ :attributeStart ] + \ '%s%s=%s%s%s' % ( attributeMatch.group( 'space' ), attributeMatch.group( 'name' ), quote, escapedValue, quote ) + \ openTag[ attributeEnd: ] self._parent.html = self._parent.html[ :self._openTagStart ] + \ newOpenTag + \ self._parent.html[ self._openTagEnd: ] else: # This is introducing a new attribute. newAttributePos = self._openTagStart + 1 + len( self._tagName ) newAttribute = ' %s="%s"' % ( name, escapedValue ) self._parent.html = self._parent.html[ :newAttributePos ] + \ newAttribute + \ self._parent.html[ newAttributePos: ] if string.lower( name ) == 'id': self._name = str( value ) def __str__( self ): """Gets the whole tag's HTML.""" self._updateParentRange() return self._parent.html[ self._openTagStart:self._closingTagEnd ] def clone( self ): """Creates a clone of a `Tag`. Useful for copying tags between `Containers` or for populating template HTML tags with real data (see the data-driven example in the main module documentation).""" return Tag( html=str( self ) ) def test(): """Runs doctest on the `PyMeld` module.""" import doctest, PyMeld return doctest.testmod( PyMeld ) if __name__ == '__main__': failed, total = test() print "%d of %d doctests successful." % ( total - failed, total )