Skip to content

Commit

Permalink
mmcdole#151: Allow additional parsers for feed formats. Currently onl…
Browse files Browse the repository at this point in the history
…y atom is allowed as part of RSS
  • Loading branch information
Necoro committed Aug 22, 2020
1 parent dc4b4fe commit fac99d8
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 100 deletions.
206 changes: 113 additions & 93 deletions atom/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ var (
"src": true,
"uri": true,
}

// No known explicit extension parsers for Atom, currently
emptyExtParsers = make(shared.ExtParsers)
)

// Parser is an Atom Parser
Expand All @@ -50,6 +53,15 @@ func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
return ap.parseRoot(p)
}

func (ap *Parser) ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) {
ap.base = &shared.XMLBase{URIAttrs: atomURIAttrs} // TODO: do we need the surrounding base for the urlstack?
entry := &Entry{}
if err := ap.parseEntryContent(p, entry); err != nil {
return nil, err
}
return entry, nil
}

func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
if err := p.Expect(xpp.StartTag, "feed"); err != nil {
return nil, err
Expand Down Expand Up @@ -81,7 +93,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -227,103 +239,14 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
}

if tok == xpp.StartTag {

name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
extensions = e
} else if name == "title" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Title = result
} else if name == "id" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.ID = result
} else if name == "rights" ||
name == "copyright" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Rights = result
} else if name == "summary" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Summary = result
} else if name == "source" {
result, err := ap.parseSource(p)
if err != nil {
return nil, err
}
entry.Source = result
} else if name == "updated" ||
name == "modified" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Updated = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.UpdatedParsed = &utcDate
}
} else if name == "contributor" {
result, err := ap.parsePerson("contributor", p)
if err != nil {
return nil, err
}
entry.Contributors = append(entry.Contributors, result)
} else if name == "author" {
result, err := ap.parsePerson("author", p)
if err != nil {
return nil, err
}
entry.Authors = append(entry.Authors, result)
} else if name == "category" {
result, err := ap.parseCategory(p)
if err != nil {
return nil, err
}
entry.Categories = append(entry.Categories, result)
} else if name == "link" {
result, err := ap.parseLink(p)
if err != nil {
return nil, err
}
entry.Links = append(entry.Links, result)
} else if name == "published" ||
name == "issued" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Published = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.PublishedParsed = &utcDate
}
} else if name == "content" {
result, err := ap.parseContent(p)
if err != nil {
return nil, err
}
entry.Content = result
} else {
err := p.Skip()
if err != nil {
if err := ap.parseEntryContent(p, entry); err != nil {
return nil, err
}
}
Expand All @@ -341,6 +264,103 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
return entry, nil
}

func (ap *Parser) parseEntryContent(p *xpp.XMLPullParser, entry *Entry) error {
name := strings.ToLower(p.Name)

if name == "title" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Title = result
} else if name == "id" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.ID = result
} else if name == "rights" ||
name == "copyright" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Rights = result
} else if name == "summary" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Summary = result
} else if name == "source" {
result, err := ap.parseSource(p)
if err != nil {
return err
}
entry.Source = result
} else if name == "updated" ||
name == "modified" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Updated = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.UpdatedParsed = &utcDate
}
} else if name == "contributor" {
result, err := ap.parsePerson("contributor", p)
if err != nil {
return err
}
entry.Contributors = append(entry.Contributors, result)
} else if name == "author" {
result, err := ap.parsePerson("author", p)
if err != nil {
return err
}
entry.Authors = append(entry.Authors, result)
} else if name == "category" {
result, err := ap.parseCategory(p)
if err != nil {
return err
}
entry.Categories = append(entry.Categories, result)
} else if name == "link" {
result, err := ap.parseLink(p)
if err != nil {
return err
}
entry.Links = append(entry.Links, result)
} else if name == "published" ||
name == "issued" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Published = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.PublishedParsed = &utcDate
}
} else if name == "content" {
result, err := ap.parseContent(p)
if err != nil {
return err
}
entry.Content = result
} else {
err := p.Skip()
if err != nil {
return err
}
}
return nil
}

func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {

if err := p.Expect(xpp.StartTag, "source"); err != nil {
Expand Down Expand Up @@ -370,7 +390,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
Expand Down
1 change: 1 addition & 0 deletions extensions/extensions.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ type Extension struct {
Value string `json:"value"`
Attrs map[string]string `json:"attrs"`
Children map[string][]Extension `json:"children"`
Parsed interface{} `json:"parsed,omitempty"`
}

func parseTextExtension(name string, extensions map[string][]Extension) (value string) {
Expand Down
33 changes: 31 additions & 2 deletions internal/shared/extparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ import (
"github.com/mmcdole/goxpp"
)

type ExtParser interface {
ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error)
}

type ExtParsers map[string]ExtParser

// IsExtension returns whether or not the current
// XML element is an extension element (if it has a
// non empty prefix)
Expand All @@ -22,10 +28,16 @@ func IsExtension(p *xpp.XMLPullParser) bool {
// ParseExtension parses the current element of the
// XMLPullParser as an extension element and updates
// the extension map
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser, extParsers ExtParsers) (ext.Extensions, error) {
prefix := prefixForNamespace(p.Space, p)

result, err := parseExtensionElement(p)
var result ext.Extension
var err error
if extParser, ok := extParsers[prefix]; ok {
result, err = parseExtensionFromParser(p, extParser)
} else {
result, err = parseExtensionElement(p)
}
if err != nil {
return nil, err
}
Expand All @@ -43,6 +55,23 @@ func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, er
return fe, nil
}

func parseExtensionFromParser(p *xpp.XMLPullParser, extParser ExtParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
}

e.Name = p.Name
if e.Parsed, err = extParser.ParseAsExtension(p); err != nil {
return e, err
}

if err = p.Expect(xpp.EndTag, e.Name); err != nil {
return e, err
}

return e, nil
}

func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
Expand Down
14 changes: 13 additions & 1 deletion parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"strings"

"github.com/mmcdole/gofeed/atom"
"github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/gofeed/rss"
)

Expand Down Expand Up @@ -131,8 +132,19 @@ func (f *Parser) parseAtomFeed(feed io.Reader) (*Feed, error) {
return f.atomTrans().Translate(af)
}

func (f *Parser) BuildRSSExtParsers() shared.ExtParsers {
extParsers := make(shared.ExtParsers, 3)

// all possible atom variants
extParsers["atom"] = f.ap
extParsers["atom10"] = f.ap
extParsers["atom03"] = f.ap

return extParsers
}

func (f *Parser) parseRSSFeed(feed io.Reader) (*Feed, error) {
rf, err := f.rp.Parse(feed)
rf, err := f.rp.Parse(feed, f.BuildRSSExtParsers())
if err != nil {
return nil, err
}
Expand Down
9 changes: 6 additions & 3 deletions rss/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ import (
// Parser is a RSS Parser
type Parser struct {
base *shared.XMLBase
extParsers shared.ExtParsers
}

// Parse parses an xml feed into an rss.Feed
func (rp *Parser) Parse(feed io.Reader) (*Feed, error) {
func (rp *Parser) Parse(feed io.Reader, extParsers shared.ExtParsers) (*Feed, error) {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
rp.base = &shared.XMLBase{}
rp.extParsers = extParsers

_, err := rp.base.FindRoot(p)
if err != nil {
Expand Down Expand Up @@ -144,7 +146,8 @@ func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
ext, err := shared.ParseExtension(extensions, p)

ext, err := shared.ParseExtension(extensions, p, rp.extParsers)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -335,7 +338,7 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
ext, err := shared.ParseExtension(extensions, p)
ext, err := shared.ParseExtension(extensions, p, rp.extParsers)
if err != nil {
return nil, err
}
Expand Down
3 changes: 2 additions & 1 deletion rss/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"strings"
"testing"

"github.com/mmcdole/gofeed"
"github.com/mmcdole/gofeed/rss"
"github.com/stretchr/testify/assert"
)
Expand All @@ -27,7 +28,7 @@ func TestParser_Parse(t *testing.T) {

// Parse actual feed
fp := &rss.Parser{}
actual, _ := fp.Parse(bytes.NewReader(f))
actual, _ := fp.Parse(bytes.NewReader(f), gofeed.NewParser().BuildRSSExtParsers())

// Get json encoded expected feed result
ef := fmt.Sprintf("../testdata/parser/rss/%s.json", name)
Expand Down

0 comments on commit fac99d8

Please sign in to comment.