The SAX Parser locator Facility
|
This is yet another, working sample of maintaining location in a SAX parsing handler That is, getting accurate line/column numbers out of it. Scraping this to start from should give you a leg-up on your SAX parser whatever its purpose.
What's missing? Well, pretty quickly—depending on what you need a SAX parser for—you'll need some kind of stack storage and management. I have some examples of that elsewhere.
package com.windofkeltia.processor; import java.io.IOException; import java.io.InputStream; import java.util.Map; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXException; import com.windofkeltia.sax.Position; public class Analyzer { private static final SAXParserFactory factory = SAXParserFactory.newInstance(); private final InputStream inputStream; private final SAXParser parser; private final AnalyzerHandler handler; /** * Look for an element, begin displaying output (which is just parsing status) * until that element's close is found, then stop displaying output again. */ public Analyzer( InputStream flowfile, final String contentElement ) throws ParserConfigurationException, SAXException { parser = factory.newSAXParser(); inputStream = flowfile; handler = new AnalyzerHandler( contentElement ); } public void parse() throws IOException, SAXException { parser.parse( inputStream, handler ); } public Position getStart() { return handler.getStart(); } public Position getEnd() { return handler.getEnd(); } }
Each entry point (handler method) prints out status including line- and column number and the element name (and attributes if relevant).
package com.windofkeltia.processor; import java.util.HashMap; import java.util.Map; import static java.util.Objects.isNull; import org.xml.sax.Attributes; import org.xml.sax.Locator; import org.xml.sax.helpers.DefaultHandler; import com.windofkeltia.sax.Position; import com.windofkeltia.utilities.StringUtilities; public class AnalyzerHandler extends DefaultHandler { private Locator locator; private Position position = new Position(); // starting element position we maintain private Position start = null; private Position end = null; private boolean outputOn = false; // allows us to start/stop some (output) action private final String contentElement; public AnalyzerHandler( final String contentElement ) { super(); this.contentElement = contentElement; System.out.println( "AnalyzerHandler():" ); } public void startElement( String uri, String localName, String elementName, Attributes attributes ) { if( elementName.equals( contentElement ) ) { outputOn = true; return; } if( outputOn ) { if( isNull( start ) ) start = position; StringBuilder sb = new StringBuilder(); sb.append( position.line ).append( ',' ).append( position.column ).append( ' ' ); Map< String, String > javaAttributes = getAttributesAsJavaMap( attributes ); sb.append( elementName ); System.out.println( sb ); } } public void endElement( String uri, String localName, String elementName ) { Position start = position; Position end = new Position( locator.getLineNumber(), locator.getColumnNumber() ); if( elementName.equals( contentElement ) ) outputOn = false; if( !outputOn ) return; if( isNull( end ) ) end = position; StringBuilder sb = new StringBuilder(); sb.append( position.line ).append( ',' ).append( position.column ).append( ' ' ); sb.append( elementName ); System.out.println( sb ); // update the starting point for the next element updateElementPoint( locator ); } public void characters( char[] ch, int start, int length ) { updateElementPoint( locator ); // now update the starting point if( !outputOn ) return; StringBuilder sb = new StringBuilder(); String characters = new String( ch, start, length ).trim(); if( !StringUtilities.isEmpty( characters ) ) { sb.append( position.line ).append( ',' ).append( position.column ).append( ' ' ); sb.append( '\"' ).append( characters.trim() ).append( '\"' ); System.out.println( sb ); } } public void comment( char[] ch, int start, int length ) { if( !outputOn ) return; String comment = new String( ch, start, length ); System.out.println( " comment(): \"" + comment.trim() + "\"" ); } public void startDocument() { if( outputOn ) System.out.println( " startDocument():" ); } public void endDocument() { if( outputOn ) System.out.println( " endDocument():" ); } public void setDocumentLocator( Locator location ) { locator = location; } public Position getStart() { return start; } public Position getEnd() { return end; } private void updateElementPoint( Locator locator ) { Position location = new Position( locator.getLineNumber(), locator.getColumnNumber() ); if( position.compareTo( location ) < 0 ) position = location; } /** * Here's how to make SAX' attributes "Java-useful." If we had uri (namespaces) defined, * we'd have to get a lot more serious about how to use uri, localName and qName. */ private Map< String, String > getAttributesAsJavaMap( Attributes saxAttributes ) { int attrLength = saxAttributes.getLength(); Map< String, String > javaAttributes = new HashMap<>( attrLength ); for( int attr = 0; attr < attrLength; attr++ ) { String attribute = saxAttributes.getQName( attr ); String value = saxAttributes.getValue( attr ); javaAttributes.put( attribute, value ); } return javaAttributes; } private String javaAttributesAsString( Map< String, String > javaAttributes ) { if( javaAttributes.size() == 0 ) return ""; StringBuilder sb = new StringBuilder(); for( Map.Entry< String, String > attribute : javaAttributes.entrySet() ) sb.append( attribute.getKey() ) .append( "=\"" ) .append( StringUtilities.smash( attribute.getValue() ) ) .append( "\", " ); sb.setLength( sb.length()-2 ); return sb.toString(); } }
package com.windofkeltia.sax; public class Position { public int line; public int column; public Position() { this.line = 1; this.column = 1; } public Position( int line, int column ) { this.line = line; this.column = column; } public void setLine( int line ) { this.line = line; } public void setColumn( int column ) { this.column = column; } public int getLine() { return line; } public int getColumn() { return column; } public int compareTo( Position position ) { // if our location is past recorded line... if( position.getLine() > getLine() ) return -1; // if on recorded line, but past recorded column... else if( position.getLine() == getLine() && position.getColumn() > getColumn() ) return -1; // if on recorded line and also at recorded column... else if( position.getLine() == getLine() && position.getColumn() == getColumn() ) return 0; // we're before current line and/or current column... else return 1; } }
public static boolean isEmpty( String string ) { return( string == null || string.length() < 1 ); } /** * Remove all newlines, multiple spaces, tabs, etc. to neutralize this string. This is helpful * when comparing test results where white space doesn't count. This does not remove all * spaces--only multiple, adjacent ones. */ public static String smash( String string ) { if( isEmpty( string ) ) return string; String result = ""; String smashed = string.replaceAll( "\t", " " ).replaceAll( "\n", "" ); int length = smashed.length(); for( int ch = 0; ch < length; ch++ ) { char c = smashed.charAt( ch ); if( c == ' ' ) { while( ch+1 < length && smashed.charAt( ch+1 ) == ' ' ) ch++; result += ' '; } else { result += c; } } return result; }
Much transformation (to simplfy) went into this so I don't guarantee it works as-is. Anyway, the output is supposed to be (simply) what's in CCD_BODY.
package com.windofkeltia.processor; import java.util.List; import javax.xml.parsers.ParserConfigurationException; import org.xml.sax.SAXException; import org.junit.After; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import static org.junit.Assert.assertEquals; public class AnalyzerTest { @After public void tearDown() { } @Before public void setUp() { TestUtilities.setUp( name ); } private static final boolean VERBOSE = true; private static final String CCD_BODY = "" + "<ClinicalDocument xmlns=\"urn:hl7-org:v3\" xmlns:sdtc=\"urn:hl7-org:sdtc\"\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" + " xsi:schemaLocation=\"urn:hl7-org:v3\n" + " http://xreg2.nist.gov:8080/hitspValidation/schema/cdar2c32/infrastructure/cda/C32_CDA.xsd\">\n" + " <realmCode code=\"US\"/>\n" + " <typeId root=\"2.16.840.1.113883.1.3\" extension=\"POCD_HD000040\"/>\n" + " <component>\n" + " <stuff>\n" + " ...\n" + " </stuff>\n" + " </component>\n" + "</ClinicalDocument>\n"; private static final String FROM_JDBCTOXML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<record>\n" + " <justforfun>\n" + " ...\n" + " </justforfun>\n" + " <ccdmessage>\n" + CCD_BODY + " </ccdmessage>\n" + "</record>\n"; @Test public void testAnalyzer() throws ParserConfigurationException, SAXException, IOException { String[] content = FROM_JDBCTOXML.split( "\n" ); System.out.println( "Input:" ); int lineNumber = 1; for( String line : content ) System.out.println( lineNumber++ + " " + line ); System.out.println(); Analyzer analyzer = new Analyzer( new ByteArrayInputStream( FROM_JDBCTOXML.getBytes() ), "ccdmessage" ); analyzer.parse(); } }