# # PyMeld is released under the terms of the following MIT-style license: # # Copyright (c) Richie Hindle 2002 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. # r"""Manipulates HTML (and XML, informally) using an object model based on `id` attributes. `Container` objects are created from HTML, and provide access to all the tags with `id="name"` attributes like this: `container.name`. Individual tags with a container are represented as `Tag` objects, which provide access to their HTML attributes like this: `tag.name`. You can also create `Tag`s directly: `tag = Tag( html="" )`. `Tag`s have a magic attribute `content` which represents the text between the opening and closing tags. Here's an example that takes some HTML, changes some of the content and tag attributes, and inserts a modified clone of a tag into another part of the HTML: >>> import PyMeld >>> html = ''' ... ... ... Duplicate of 'pushme' goes here. ... ''' >>> page = PyMeld.Container( html ) # Create a Container from the HTML. >>> print page.message # Access a tag within the container. >>> print page.message.rows 2 >>> page.message.content = "New message." # Change the content of a tag. >>> print page.message.content New message. >>> print page.message >>> pushme2 = page.pushme.clone() # Clone a tag... >>> pushme2.id = 'pushme2' # ...rename the clone... >>> print pushme2 >>> page.duplicate.content = pushme2 # ...and add it to the container. >>> page.pushme.value = "Don't push me" # Change the original tag. >>> page.pushme2.value = "Nor me" # Change the clone in the container. >>> print page # Print the resulting page. Advantages: o No special requirements for the HTML (or just one: attribute values must be quoted) - so you can use any HTML/XML editor. o No logic embedded in HTML - separation of visual design from code and data. o You can include placeholders in the HTML and replace them with new content or modified clones at runtime (see the data-driven example below). o Works by string substitution, rather than by decomposing and rebuilding the HTML, hence has no impact on the parts of the page you don't manipulate. o Does nothing but maniplating HTML, hence fits in with any other Web toolkits you're using. o Tracebacks always point to the right place - many Python-HTML mixing systems use exec or eval, making bugs hard to track down. Here's a data-driven example populating a table from a data source, basing the table on sample data put in by the page designer. Note that in the real world the HTML would normally be a larger page read from an external file, keeping the data and presentation separate, and the data would come from an external source like an RDBMS: >>> html = ''' ... ... ...
Example name21
''' >>> table = PyMeld.Container( html ) >>> templateRow = PyMeld.Container( table.row ) >>> outputLines = [ str( table.header ) ] >>> for name, age in [ ('Richie', 30), ('Dave', 39), ('John', 78) ]: ... newRow = templateRow.clone() ... newRow.name.content = name ... newRow.age.content = age ... outputLines.append( str( newRow ) ) >>> table.people.content = '\n'+string.join( outputLines, '\n' )+'\n' >>> print table
Richie30
Dave39
John78
Note that if you were going to subsequently manipulate the table, using PyMeld or JavaScript for instance, you'd need to rename each `row`, `name` and `age` tag to have a unique name - you can do that by assigning to the `id` attribute but I've skipped that to make the example simpler. Here's an example that mixes single and double quotes, uses the wrong case to access a tag and an attribute, sets a numeric attribute value using a number rather than a string, manipulates an unclosed tag (``), manipulates nested tags, adds content to previously-empty tag, adds a new attribute to a tag, and introduces a double-quote character into a double-quoted attribute value, just to prove it all works: >>> html = '''Pog ... Hello. ... This is doubly nested. ... ... ''' >>> doc = Container( html ) >>> doc.Pog.Height = 10 >>> doc.one.content = "New text." >>> doc.more.content = "More." >>> doc.more.title = 'She cried, "More, more, more!"' >>> print doc Pog New text. More. """ import re, string # Regular expressions for tags and attributes. openTagRE = r"""(?ix) <(?P\w+) # Tag opens; capture its name (?:\s+\w+=(?P["']).*?(?P=quote1))* # Attributes preceding 'id' \s+id=(?P["'])%s(?P=quote2) # The 'id' tag (?:\s+\w+=(?P["']).*?(?P=quote3))* # Attributes following 'id' \s*/?> # Tag closes """ attributeRE = r"""(?ix) (?P\s+) (?P%s)=(?P["'])(?P.*?)(?P=quote) """ idRE = r"""(?i)\s+id=(?P["'])(?P.*?)(?P=quote)""" class Container: """Represents an HTML document, or a fragment of one. Pass your HTML (or a `Container` or `Tag`) to the constructor. You can then access all the tags with `id="name"` attributes as `container.name`. You can set the content of such a tag using `container.name.content = content`, where `content` can be a string or another `Container` or `Tag`.""" def __init__( self, html ): """Creates a `Container` from HTML or another `Container` or `Tag`.""" # Coerce the 'html' to be a string, and take a copy (in case it's already a string). self.html = str( html )[:] def clone( self ): """Creates a clone of a Container.""" return Container( self.html ) def __getattr__( self, name ): """Returns the contained `Tag` that has the given name as its `id` attribute.""" if not re.search( openTagRE % name, self.html ): raise AttributeError, name else: return Tag( self, name ) def __str__( self ): """Returns the HTML that this `Container` represents.""" return self.html class Tag: """Represents a named tag in a `Container`, named by its `id` attribute. You can access all the HTML attributes of the `Tag` as `tag.name`. Create `Tag`s either via a `Container` using `container.name`, or directly from a snippet of HTML: `tag = Tag( "" )`.""" def __init__( self, parent=None, name=None, html=None ): """Pass either `parent` and `name` to access a tag within a `Container`, or 'html' to create a standalone `Tag` from a snippet of HTML.""" if parent and name and not html: self._parent = parent self._name = name elif not parent and not name and html: self._parent = Container( html ) idMatch = re.search( idRE, html ) if not idMatch: raise AttributeError, "Tag HTML must have an 'id' attribute" else: self._name = idMatch.group( 'id' ) else: raise ValueError, "Tag() takes either 'parent' and 'name', or 'html'" def _updateParentRange( self ): # Updates the object's idea of where it lives within its Container's HTML. # self._openTagStart and self._openTagEnd mark the opening tag, while # self._closingTagStart and self._closingTagEnd mark the closing tag. openTagMatch = re.search( openTagRE % self._name, self._parent.html ) self._tagName = openTagMatch.group( 'tag' ) self._openTagStart, self._openTagEnd = openTagMatch.span() # Now find the closing tag in the remainder of the HTML. Most of this # code deals with nested tags - counting up nested opening tags and # counting down the closing tags until it gets to zero. remainder = self._parent.html[ self._openTagEnd: ] depth = 1 pos = 0 while 1: nextOpenMatch = re.search( '(?i)<%s(>|\s)' % self._tagName, remainder[ pos: ] ) nextCloseMatch = re.search( '(?i)' % self._tagName, remainder[ pos: ] ) if not nextCloseMatch: # There's no matching closing tag. self._closingTagStart = self._openTagEnd self._closingTagEnd = self._openTagEnd break elif not nextOpenMatch: if depth == 1: # We've found the matching closing tag. self._closingTagStart = self._openTagEnd + pos + nextCloseMatch.span()[ 0 ] self._closingTagEnd = self._openTagEnd + pos + nextCloseMatch.span()[ 1 ] break else: # We've found a closing tag, but it's for a nested opening tag. depth = depth - 1 pos = pos + nextCloseMatch.span()[ 1 ] elif nextOpenMatch.span()[ 0 ] < nextCloseMatch.span()[ 0 ]: # We've found a nested opening tag. depth = depth + 1 pos = pos + nextOpenMatch.span()[ 1 ] else: # nextCloseMatch.span()[ 0 ] < nextOpenMatch.span()[ 0 ] depth = depth - 1 if depth == 0: # We've found the matching closing tag. self._closingTagStart = self._openTagEnd + pos + nextCloseMatch.span()[ 0 ] self._closingTagEnd = self._openTagEnd + pos + nextCloseMatch.span()[ 1 ] break else: # We've found a closing tag but it's for a nested opening tag. pos = pos + nextCloseMatch.span()[ 1 ] def __getattr__( self, name ): """Returns the `Tag`s HTML content for the special attribute `content`, or returns the value of the given attribute.""" if name[ 0 ] == '_': return self.__dict__[ name ] elif name == 'content': self._updateParentRange() return self._parent.html[ self._openTagEnd:self._closingTagStart ] else: self._updateParentRange() openTag = self._parent.html[ self._openTagStart:self._openTagEnd ] attributeMatch = re.search( attributeRE % name, openTag ) return string.replace( attributeMatch.group( 'value' ), '"', '"' ) def __setattr__( self, name, value ): """Sets the `Tag`s HTML content for the special attribute `content`, or sets the value of the given attribute.""" if name[ 0 ] == '_': self.__dict__[ name ] = value elif name == 'content': self._updateParentRange() self._parent.html = self._parent.html[ :self._openTagEnd ] + \ str( value ) + \ self._parent.html[ self._closingTagStart: ] else: self._updateParentRange() openTag = self._parent.html[ self._openTagStart:self._openTagEnd ] attributeMatch = re.search( attributeRE % name, openTag ) escapedValue = string.replace( str( value ), '"', '"' ) if attributeMatch: # This is a change to an existing attribute. attributeStart, attributeEnd = attributeMatch.span() quote = attributeMatch.group( 'quote' ) newOpenTag = openTag[ :attributeStart ] + \ '%s%s=%s%s%s' % ( attributeMatch.group( 'space' ), attributeMatch.group( 'name' ), quote, escapedValue, quote ) + \ openTag[ attributeEnd: ] self._parent.html = self._parent.html[ :self._openTagStart ] + \ newOpenTag + \ self._parent.html[ self._openTagEnd: ] else: # This is introducing a new attribute. newAttributePos = self._openTagStart + 1 + len( self._tagName ) newAttribute = ' %s="%s"' % ( name, escapedValue ) self._parent.html = self._parent.html[ :newAttributePos ] + \ newAttribute + \ self._parent.html[ newAttributePos: ] if string.lower( name ) == 'id': self._name = str( value ) def __str__( self ): """Gets the whole tag's HTML.""" self._updateParentRange() return self._parent.html[ self._openTagStart:self._closingTagEnd ] def clone( self ): """Creates a clone of a `Tag`. Useful for copying tags between `Containers` or for populating template HTML tags with real data (see the data-driven example in the main module documentation).""" return Tag( html=str( self ) ) def test(): """Runs doctest on the `PyMeld` module.""" import doctest, PyMeld return doctest.testmod( PyMeld ) if __name__ == '__main__': failed, total = test() print "%d of %d doctests successful." % ( total - failed, total )