Skip to content

Commit

Permalink
Keep track of the current xml:base value
Browse files Browse the repository at this point in the history
Tracks xml:base attributes in a stack of *url.URLs. Consumers of the
parser can access the top-level URL through `XMLPullParser.BaseStack.Top()`

This is useful for applications that need to resolve URLs in XML
documents relative to the xml:base attributes. To that end, a helper
method is provided which will resolve a relative string to an absolute
URL according to the current base:
`func (p *XMLPullParser) XmlBaseResolveUrl(u string) (*url.URL, error)`

Includes a single test. It is not comprehensive, but it checks for
xml:base to two levels, tests resolving a string against the current
base, as well as resolution of relative xml:base values.
  • Loading branch information
cristoper authored and mmcdole committed Feb 28, 2023
1 parent 1ba3125 commit 1430f15
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 1 deletion.
86 changes: 86 additions & 0 deletions xpp.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@ import (
"errors"
"fmt"
"io"
"net/url"
"strings"
)

type XMLEventType int
type CharsetReader func(charset string, input io.Reader) (io.Reader, error)

const xmlNSURI = "http://www.w3.org/XML/1998/namespace"

const (
StartDocument XMLEventType = iota
EndDocument
Expand All @@ -24,10 +27,33 @@ const (
// TODO: CDSECT ?
)

type urlStack []*url.URL

func (s *urlStack) push(u *url.URL) {
*s = append([]*url.URL{u}, *s...)
}

func (s *urlStack) pop() *url.URL {
if s == nil || len(*s) == 0 {
return nil
}
var top *url.URL
top, *s = (*s)[0], (*s)[1:]
return top
}

func (s *urlStack) Top() *url.URL {
if s == nil || len(*s) == 0 {
return nil
}
return (*s)[0]
}

type XMLPullParser struct {
// Document State
Spaces map[string]string
SpacesStack []map[string]string
BaseStack urlStack

// Token State
Depth int
Expand Down Expand Up @@ -214,6 +240,7 @@ func (p *XMLPullParser) DecodeElement(v interface{}) error {
p.Depth--
p.Name = name
p.token = nil
p.popBase()
return nil
}

Expand Down Expand Up @@ -263,6 +290,26 @@ func (p *XMLPullParser) EventType(t xml.Token) (event XMLEventType) {
return
}

// resolve the given string as a URL relative to current xml:base
func (p *XMLPullParser) XmlBaseResolveUrl(u string) (*url.URL, error) {
curr := p.BaseStack.Top()
if curr == nil {
return nil, nil
}

relURL, err := url.Parse(u)
if err != nil {
return nil, err
}
if curr.Path != "" && u != "" && curr.Path[len(curr.Path)-1] != '/' {
// There's no reason someone would use a path in xml:base if they
// didn't mean for it to be a directory
curr.Path = curr.Path + "/"
}
absURL := curr.ResolveReference(relURL)
return absURL, nil
}

func (p *XMLPullParser) processToken(t xml.Token) {
switch tt := t.(type) {
case xml.StartElement:
Expand All @@ -286,6 +333,7 @@ func (p *XMLPullParser) processStartToken(t xml.StartElement) {
p.Name = t.Name.Local
p.Space = t.Name.Space
p.trackNamespaces(t)
p.pushBase()
}

func (p *XMLPullParser) processEndToken(t xml.EndElement) {
Expand All @@ -297,6 +345,7 @@ func (p *XMLPullParser) processEndToken(t xml.EndElement) {
p.Spaces = p.SpacesStack[len(p.SpacesStack)-1]
}
p.Name = t.Name.Local
p.popBase()
}

func (p *XMLPullParser) processCharDataToken(t xml.CharData) {
Expand Down Expand Up @@ -340,3 +389,40 @@ func (p *XMLPullParser) trackNamespaces(t xml.StartElement) {
p.Spaces = newSpace
p.SpacesStack = append(p.SpacesStack, newSpace)
}

// returns the popped base URL
func (p *XMLPullParser) popBase() string {
url := p.BaseStack.pop()
if url != nil {
return url.String()
}
return ""
}

// Searches current attributes for xml:base and updates the urlStack
func (p *XMLPullParser) pushBase() error {
var base string
// search list of attrs for "xml:base"
for _, attr := range p.Attrs {
if attr.Name.Local == "base" && attr.Name.Space == xmlNSURI {
base = attr.Value
break
}
}
if base == "" {
// no base attribute found
return nil
}

newURL, err := url.Parse(base)
if err != nil {
return err
}

topURL := p.BaseStack.Top()
if topURL != nil {
newURL = topURL.ResolveReference(newURL)
}
p.BaseStack.push(newURL)
return nil
}
33 changes: 32 additions & 1 deletion xpp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"io"
"testing"

"github.com/mmcdole/goxpp"
xpp "github.com/mmcdole/goxpp"
"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -85,6 +85,37 @@ func TestDecodeElementDepth(t *testing.T) {
p.DecodeElement(&v{})
}

func TestXMLBase(t *testing.T) {
crReader := func(charset string, input io.Reader) (io.Reader, error) {
return input, nil
}
r := bytes.NewBufferString(`<root xml:base="https://example.org/"><d2 xml:base="relative">foo</d2><d2>bar</d2></root>`)
p := xpp.NewXMLPullParser(r, false, crReader)

type v struct{}

// move to root
p.NextTag()
assert.Equal(t, "root", p.Name)
assert.Equal(t, "https://example.org/", p.BaseStack.Top().String())

// decode first <d2>
p.NextTag()
assert.Equal(t, "d2", p.Name)
assert.Equal(t, "https://example.org/relative", p.BaseStack.Top().String())

resolved, err := p.XmlBaseResolveUrl("test")
assert.NoError(t, err)
assert.Equal(t, "https://example.org/relative/test", resolved.String())
p.DecodeElement(&v{})

// decode second <d2>
p.NextTag()
assert.Equal(t, "d2", p.Name)
assert.Equal(t, "https://example.org/", p.BaseStack.Top().String())
p.DecodeElement(&v{})
}

func toNextStart(t *testing.T, p *xpp.XMLPullParser) {
for {
tok, err := p.NextToken()
Expand Down

0 comments on commit 1430f15

Please sign in to comment.