Skip to content

Commit

Permalink
mmcdole#151: Allow additional parsers for feed formats. Currently onl…
Browse files Browse the repository at this point in the history
…y atom is allowed as part of RSS
  • Loading branch information
Necoro committed Jul 10, 2024
1 parent 6a2266c commit 716142d
Show file tree
Hide file tree
Showing 8 changed files with 214 additions and 108 deletions.
211 changes: 115 additions & 96 deletions atom/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ var (
"uri": true,
"url": true, // atom 0.3
}

// No known explicit extension parsers for Atom, currently
emptyExtParsers = make(shared.ExtParsers)
)

// Parser is an Atom Parser
Expand All @@ -38,6 +41,14 @@ func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
return ap.parseRoot(p)
}

func (ap *Parser) ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) {
entry := &Entry{}
if err := ap.parseEntryContent(p, entry); err != nil {
return nil, err
}
return entry, nil
}

func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
if err := p.Expect(xpp.StartTag, "feed"); err != nil {
return nil, err
Expand Down Expand Up @@ -68,8 +79,8 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {

name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
if shared.IsExtension(p, emptyExtParsers) {
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -215,103 +226,14 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
}

if tok == xpp.StartTag {

name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
if shared.IsExtension(p, emptyExtParsers) {
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
extensions = e
} else if name == "title" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Title = result
} else if name == "id" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.ID = result
} else if name == "rights" ||
name == "copyright" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Rights = result
} else if name == "summary" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Summary = result
} else if name == "source" {
result, err := ap.parseSource(p)
if err != nil {
return nil, err
}
entry.Source = result
} else if name == "updated" ||
name == "modified" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Updated = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.UpdatedParsed = &utcDate
}
} else if name == "contributor" {
result, err := ap.parsePerson("contributor", p)
if err != nil {
return nil, err
}
entry.Contributors = append(entry.Contributors, result)
} else if name == "author" {
result, err := ap.parsePerson("author", p)
if err != nil {
return nil, err
}
entry.Authors = append(entry.Authors, result)
} else if name == "category" {
result, err := ap.parseCategory(p)
if err != nil {
return nil, err
}
entry.Categories = append(entry.Categories, result)
} else if name == "link" {
result, err := ap.parseLink(p)
if err != nil {
return nil, err
}
entry.Links = append(entry.Links, result)
} else if name == "published" ||
name == "issued" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Published = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.PublishedParsed = &utcDate
}
} else if name == "content" {
result, err := ap.parseContent(p)
if err != nil {
return nil, err
}
entry.Content = result
} else {
err := p.Skip()
if err != nil {
if err := ap.parseEntryContent(p, entry); err != nil {
return nil, err
}
}
Expand All @@ -329,6 +251,103 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
return entry, nil
}

func (ap *Parser) parseEntryContent(p *xpp.XMLPullParser, entry *Entry) error {
name := strings.ToLower(p.Name)

if name == "title" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Title = result
} else if name == "id" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.ID = result
} else if name == "rights" ||
name == "copyright" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Rights = result
} else if name == "summary" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Summary = result
} else if name == "source" {
result, err := ap.parseSource(p)
if err != nil {
return err
}
entry.Source = result
} else if name == "updated" ||
name == "modified" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Updated = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.UpdatedParsed = &utcDate
}
} else if name == "contributor" {
result, err := ap.parsePerson("contributor", p)
if err != nil {
return err
}
entry.Contributors = append(entry.Contributors, result)
} else if name == "author" {
result, err := ap.parsePerson("author", p)
if err != nil {
return err
}
entry.Authors = append(entry.Authors, result)
} else if name == "category" {
result, err := ap.parseCategory(p)
if err != nil {
return err
}
entry.Categories = append(entry.Categories, result)
} else if name == "link" {
result, err := ap.parseLink(p)
if err != nil {
return err
}
entry.Links = append(entry.Links, result)
} else if name == "published" ||
name == "issued" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Published = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.PublishedParsed = &utcDate
}
} else if name == "content" {
result, err := ap.parseContent(p)
if err != nil {
return err
}
entry.Content = result
} else {
err := p.Skip()
if err != nil {
return err
}
}
return nil
}

func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {

if err := p.Expect(xpp.StartTag, "source"); err != nil {
Expand Down Expand Up @@ -357,8 +376,8 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {

name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
if shared.IsExtension(p, emptyExtParsers) {
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
Expand Down
1 change: 1 addition & 0 deletions extensions/extensions.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ type Extension struct {
Value string `json:"value"`
Attrs map[string]string `json:"attrs"`
Children map[string][]Extension `json:"children"`
Parsed interface{} `json:"parsed,omitempty"`
}

func parseTextExtension(name string, extensions map[string][]Extension) (value string) {
Expand Down
40 changes: 37 additions & 3 deletions internal/shared/extparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,44 @@ var knownPrefixes = []string{
"atom03",
}

type ExtParser interface {
ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error)
}

type ExtParsers map[string]ExtParser

// IsExtension returns whether or not the current
// XML element is an extension element (if it has a
// non empty prefix)
func IsExtension(p *xpp.XMLPullParser) bool {
func IsExtension(p *xpp.XMLPullParser, extParsers ExtParsers) bool {
space := strings.TrimSpace(p.Space)
prefix := PrefixForNamespace(space, p)

if prefix == "" {
return false
}

// we have an extension parser for this, so we treat it as an extension
if _, ok := extParsers[prefix]; ok {
return true
}

return !slices.Contains(knownPrefixes, prefix)
}

// ParseExtension parses the current element of the
// XMLPullParser as an extension element and updates
// the extension map
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser, extParsers ExtParsers) (ext.Extensions, error) {
prefix := PrefixForNamespace(p.Space, p)

result, err := parseExtensionElement(p)
var result ext.Extension
var err error
if extParser, ok := extParsers[prefix]; ok {
result, err = parseExtensionFromParser(p, extParser)
} else {
result, err = parseExtensionElement(p)
}
if err != nil {
return nil, err
}
Expand All @@ -54,6 +71,23 @@ func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, er
return fe, nil
}

func parseExtensionFromParser(p *xpp.XMLPullParser, extParser ExtParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
}

e.Name = p.Name
if e.Parsed, err = extParser.ParseAsExtension(p); err != nil {
return e, err
}

if err = p.Expect(xpp.EndTag, e.Name); err != nil {
return e, err
}

return e, nil
}

func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
Expand Down
14 changes: 13 additions & 1 deletion parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/mmcdole/gofeed/atom"
"github.com/mmcdole/gofeed/json"
"github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/gofeed/rss"
)

Expand Down Expand Up @@ -155,8 +156,19 @@ func (f *Parser) parseAtomFeed(feed io.Reader) (*Feed, error) {
return f.atomTrans().Translate(af)
}

func (f *Parser) BuildRSSExtParsers() shared.ExtParsers {
extParsers := make(shared.ExtParsers, 3)

// all possible atom variants
extParsers["atom"] = f.ap
extParsers["atom10"] = f.ap
extParsers["atom03"] = f.ap

return extParsers
}

func (f *Parser) parseRSSFeed(feed io.Reader) (*Feed, error) {
rf, err := f.rp.Parse(feed)
rf, err := f.rp.Parse(feed, f.BuildRSSExtParsers())
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit 716142d

Please sign in to comment.