diff options
Diffstat (limited to 'gemini/gemtext/atomconv')
-rw-r--r-- | gemini/gemtext/atomconv/convert.go | 181 | ||||
-rw-r--r-- | gemini/gemtext/atomconv/convert_test.go | 139 |
2 files changed, 320 insertions, 0 deletions
diff --git a/gemini/gemtext/atomconv/convert.go b/gemini/gemtext/atomconv/convert.go new file mode 100644 index 0000000..30228f3 --- /dev/null +++ b/gemini/gemtext/atomconv/convert.go @@ -0,0 +1,181 @@ +package atomconv + +import ( + "bytes" + "context" + "html/template" + "io" + "mime" + "net/url" + "regexp" + "strconv" + "strings" + "time" + + "tildegit.org/tjp/sliderule/gemini" + "tildegit.org/tjp/sliderule/gemini/gemtext" + "tildegit.org/tjp/sliderule/internal/types" +) + +// Convert turns a gemini document to Atom format. +// +// It identifies feed fields and entries according to the specification at +// gemini://gemini.circumlunar.space/docs/companion/subscription.gmi +func Convert(wr io.Writer, doc gemtext.Document, location *url.URL) error { + if location == nil { + panic("atomconv.Convert: provided location was nil") + } + + if _, err := wr.Write([]byte(`<?xml version="1.0" encoding="utf-8"?>`)); err != nil { + return err + } + if _, err := wr.Write([]byte{'\n'}); err != nil { + return err + } + return atomTmpl.Execute(wr, parseGemSub(doc, location)) +} + +// Auto is a middleware which builds atom feeds for any gemtext pages. +// +// It looks for requests ending with the '.atom' extension, passes through the request +// with the extension clipped off, then if the response is in gemtext it converts it to +// an Atom feed according to the gmisub spec at +// gemini://gemini.circumlunar.space/docs/companion/subscription.gmi +var Auto = types.Middleware(func(h types.Handler) types.Handler { + return types.HandlerFunc(func(ctx context.Context, request *types.Request) *types.Response { + if request.Scheme != "gemini" || !strings.HasSuffix(request.Path, ".atom") { + return h.Handle(ctx, request) + } + + r := *request + u := *request.URL + u.Path = u.Path[:len(u.Path)-5] + r.URL = &u + + response := h.Handle(ctx, &r) + if response.Status != gemini.StatusSuccess { + return response + } + + mtype, _, err := mime.ParseMediaType(response.Meta.(string)) + if err != nil || mtype != "text/gemini" { + return response + } + + defer func() { + _ = response.Close() + }() + + doc, err := gemtext.Parse(response.Body) + if err != nil { + return gemini.Failure(err) + } + + buf := &bytes.Buffer{} + if err := Convert(buf, doc, request.URL); err != nil { + return gemini.Failure(err) + } + return gemini.Success("application/atom+xml; charset=utf-8", buf) + }) +}) + +type gmiSub struct { + ID template.URL + Title string + Subtitle string + Updated string + + Entries []gmiSubEntry +} + +type gmiSubEntry struct { + ID template.URL + Updated string + Title string +} + +var linkElemRE = regexp.MustCompile(`(\d{4})-([0-1]\d)-([0-3]\d)`) + +func parseGemSub(doc gemtext.Document, location *url.URL) *gmiSub { + sub := &gmiSub{ID: template.URL(location.String())} + updated := time.Time{} + + for i, line := range doc { + switch line.Type() { + case gemtext.LineTypeHeading1: + if sub.Title != "" { + continue + } + + sub.Title = line.(gemtext.HeadingLine).Body() + + for { // skip any empty lines + i += 1 + if i >= len(doc) || strings.TrimPrefix(doc[i].String(), "\r") != "\n" { + break + } + } + if i < len(doc) && doc[i].Type() == gemtext.LineTypeHeading2 { + sub.Subtitle = doc[i].(gemtext.HeadingLine).Body() + } + case gemtext.LineTypeLink: + label := line.(gemtext.LinkLine).Label() + if len(label) < 10 { + continue + } + match := linkElemRE.FindStringSubmatch(label[:10]) + if match == nil { + continue + } + + year, err := strconv.Atoi(match[1]) + if err != nil { + continue + } + month, err := strconv.Atoi(match[2]) + if err != nil || month > 12 { + continue + } + day, err := strconv.Atoi(match[3]) + if err != nil || day > 31 { + continue + } + + entryUpdated := time.Date(year, time.Month(month), day, 12, 0, 0, 0, time.UTC) + entryTitle := strings.TrimLeft(strings.TrimPrefix(strings.TrimLeft(label[10:], " \t"), "-"), " \t") + + sub.Entries = append(sub.Entries, gmiSubEntry{ + ID: template.URL(line.(gemtext.LinkLine).URL()), + Updated: entryUpdated.Format(time.RFC3339), + Title: entryTitle, + }) + + if entryUpdated.After(updated) { + updated = entryUpdated + sub.Updated = updated.Format(time.RFC3339) + } + } + } + + return sub +} + +var atomTmpl = template.Must(template.New("atom").Parse(` +<feed xmlns="http://www.w3.org/2005/Atom"> + <id>{{.ID}}</id> + <link href="{{.ID}}"/> + <title>{{.Title}}</title> + {{- if .Subtitle }} + <subtitle>{{.Subtitle}}</subtitle> + {{- end }} + <updated>{{.Updated}}</updated> +{{- range .Entries }} + <entry> + <id>{{.ID}}</id> + <link rel="alternate" href="{{.ID}}"/> + <title>{{.Title}}</title> + <updated>{{.Updated}}</updated> + </entry> +{{- end }} +</feed> +`[1:])) diff --git a/gemini/gemtext/atomconv/convert_test.go b/gemini/gemtext/atomconv/convert_test.go new file mode 100644 index 0000000..8adaa2e --- /dev/null +++ b/gemini/gemtext/atomconv/convert_test.go @@ -0,0 +1,139 @@ +package atomconv + +import ( + "bytes" + "context" + "fmt" + "io" + "net/url" + "testing" + + "tildegit.org/tjp/sliderule" + "tildegit.org/tjp/sliderule/gemini" + "tildegit.org/tjp/sliderule/gemini/gemtext" + "tildegit.org/tjp/sliderule/internal/types" +) + +func TestConvert(t *testing.T) { + tests := []struct { + url string + input string + output string + }{ + { + url: "gemini://sombodys.site/a/page", + input: ` +# This is a gemlog page + + +## with a subtitle after empty lines + +=> ./first-post.gmi 2023-08-25 - This is my first post +`[1:], + output: ` +<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom"> + <id>gemini://sombodys.site/a/page</id> + <link href="gemini://sombodys.site/a/page"/> + <title>This is a gemlog page</title> + <subtitle>with a subtitle after empty lines</subtitle> + <updated>2023-08-25T12:00:00Z</updated> + <entry> + <id>./first-post.gmi</id> + <link rel="alternate" href="./first-post.gmi"/> + <title>This is my first post</title> + <updated>2023-08-25T12:00:00Z</updated> + </entry> +</feed> +`[1:], + }, + } + + for _, test := range tests { + t.Run(test.url, func(t *testing.T) { + doc, err := gemtext.Parse(bytes.NewBufferString(test.input)) + if err != nil { + t.Fatal(err) + } + loc, err := url.Parse(test.url) + if err != nil { + t.Fatal(err) + } + out := &bytes.Buffer{} + if err := Convert(out, doc, loc); err != nil { + t.Fatal(err) + } + if out.String() != test.output { + t.Fatal("mismatched output") + } + }) + } +} + +func TestAuto(t *testing.T) { + rout := &sliderule.Router{} + + rout.Route("/foo.gmi", types.HandlerFunc(func(ctx context.Context, request *types.Request) *types.Response { + return gemini.Success("text/gemini", bytes.NewBufferString(` +# This is my gemini page + +## a subtitle + +=> ./first-post.gmi 2023-05-17 - My first post +=> ./second-post.gmi 2023-06-02 second-ever post +`[1:])) + })) + + rout.Route("/bar.gmi", types.HandlerFunc(func(ctx context.Context, request *types.Request) *types.Response { + return gemini.Success("text/gemini", bytes.NewBufferString(` +# Another homepage + +=> ./first-post.gmi 2023-05-17 - first post +=> ./second-post.gmi 2023-06-02 second post +`[1:])) + })) + + h := Auto(rout.Handler()) + + response := h.Handle(context.Background(), &types.Request{URL: &url.URL{ + Scheme: "gemini", + Host: "127.0.0.1", + Path: "/foo.gmi.atom", + }}) + if response.Status != gemini.StatusSuccess { + t.Fatal("bad response code") + } + + result, err := io.ReadAll(response.Body) + if err != nil { + t.Fatal(err) + } + + target := ` +<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom"> + <id>gemini://127.0.0.1/foo.gmi.atom</id> + <link href="gemini://127.0.0.1/foo.gmi.atom"/> + <title>This is my gemini page</title> + <subtitle>a subtitle</subtitle> + <updated>2023-06-02T12:00:00Z</updated> + <entry> + <id>./first-post.gmi</id> + <link rel="alternate" href="./first-post.gmi"/> + <title>My first post</title> + <updated>2023-05-17T12:00:00Z</updated> + </entry> + <entry> + <id>./second-post.gmi</id> + <link rel="alternate" href="./second-post.gmi"/> + <title>second-ever post</title> + <updated>2023-06-02T12:00:00Z</updated> + </entry> +</feed> +`[1:] + if string(result) != target { + fmt.Println(target) + fmt.Println(string(result)) + t.Fatal("response body") + } +} |