summaryrefslogtreecommitdiff
path: root/gemini/gemtext
diff options
context:
space:
mode:
Diffstat (limited to 'gemini/gemtext')
-rw-r--r--gemini/gemtext/fuzz_test.go16
-rw-r--r--gemini/gemtext/htmlconv/convert.go86
-rw-r--r--gemini/gemtext/htmlconv/convert_test.go46
-rw-r--r--gemini/gemtext/internal/templates.go150
-rw-r--r--gemini/gemtext/mdconv/convert.go78
-rw-r--r--gemini/gemtext/mdconv/convert_test.go103
-rw-r--r--gemini/gemtext/parse.go49
-rw-r--r--gemini/gemtext/parse_line.go107
-rw-r--r--gemini/gemtext/parse_line_test.go271
-rw-r--r--gemini/gemtext/parse_test.go104
-rw-r--r--gemini/gemtext/types.go184
11 files changed, 1194 insertions, 0 deletions
diff --git a/gemini/gemtext/fuzz_test.go b/gemini/gemtext/fuzz_test.go
new file mode 100644
index 0000000..f5435c1
--- /dev/null
+++ b/gemini/gemtext/fuzz_test.go
@@ -0,0 +1,16 @@
+package gemtext_test
+
+import (
+ "bytes"
+ "testing"
+
+ "tildegit.org/tjp/gus/gemini/gemtext"
+)
+
+func FuzzParse(f *testing.F) {
+ f.Fuzz(func(t *testing.T, input []byte) {
+ if _, err := gemtext.Parse(bytes.NewBuffer(input)); err != nil {
+ t.Errorf("Parse error: %s", err.Error())
+ }
+ })
+}
diff --git a/gemini/gemtext/htmlconv/convert.go b/gemini/gemtext/htmlconv/convert.go
new file mode 100644
index 0000000..5028766
--- /dev/null
+++ b/gemini/gemtext/htmlconv/convert.go
@@ -0,0 +1,86 @@
+package htmlconv
+
+import (
+ "html/template"
+ "io"
+
+ "tildegit.org/tjp/gus/gemini/gemtext"
+ "tildegit.org/tjp/gus/gemini/gemtext/internal"
+)
+
+// Convert writes markdown to a writer from the provided gemtext document.
+//
+// Templates can be provided to override the output for different line types.
+// The templates supported are:
+// - "header" is called before any lines and is passed the full Document.
+// - "footer" is called after the lines and is passed the full Document.
+// - "textline" is called once per line of text and is passed a gemtext.TextLine.
+// - "linkline" is called once per link line and is passed an object which wraps
+// a gemtext.LinkLine but also supports a ValidatedURL() method returning a
+// string which html/template will always allow as href attributes.
+// - "preformattedtextlines" is called once for a block of preformatted text and is
+// passed a slice of gemtext.PreformattedTextLines.
+// - "heading1line" is called once per h1 line and is passed a gemtext.Heading1Line.
+// - "heading2line" is called once per h2 line and is passed a gemtext.Heading2Line.
+// - "heading3line" is called once per h3 line and is passed a gemtext.Heading3Line.
+// - "listitemlines" is called once for a block of contiguous list item lines and
+// is passed a slice of gemtext.ListItemLines.
+// - "quoteline" is passed once per blockquote line and is passed a gemtext.QuoteLine.
+//
+// There exist default implementations of each of these templates, so the "overrides"
+// argument can be nil.
+func Convert(wr io.Writer, doc gemtext.Document, overrides *template.Template) error {
+ if err := internal.ValidateLinks(doc); err != nil {
+ return err
+ }
+
+ tmpl, err := baseTmpl.Clone()
+ if err != nil {
+ return err
+ }
+
+ tmpl, err = internal.AddHTMLTemplates(tmpl, overrides)
+ if err != nil {
+ return err
+ }
+
+ for _, item := range internal.RenderItems(doc) {
+ if err := tmpl.ExecuteTemplate(wr, item.Template, item.Object); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+var baseTmpl = template.Must(template.New("htmlconv").Parse(`
+{{ define "header" }}<html><body>{{ end }}
+{{ define "textline" }}{{ if ne .String "\n" }}<p>{{ . }}</p>{{ end }}{{ end }}
+{{ define "linkline" -}}
+ <p>=> <a href="{{ .ValidatedURL }}">{{ if eq .Label "" -}}
+ {{ .URL }}
+ {{- else -}}
+ {{ .Label }}
+ {{- end -}}
+ </a></p>
+{{- end }}
+{{ define "preformattedtextlines" -}}
+ <pre>
+ {{- range . -}}
+ {{ . }}
+ {{- end -}}
+ </pre>
+{{- end }}
+{{ define "heading1line" }}<h1>{{ .Body }}</h1>{{ end }}
+{{ define "heading2line" }}<h2>{{ .Body }}</h2>{{ end }}
+{{ define "heading3line" }}<h3>{{ .Body }}</h3>{{ end }}
+{{ define "listitemlines" -}}
+ <ul>
+ {{- range . -}}
+ <li>{{ .Body }}</li>
+ {{- end -}}
+ </ul>
+{{- end }}
+{{ define "quoteline" }}<blockquote>{{ .Body }}</blockquote>{{ end }}
+{{ define "footer" }}</body></html>{{ end }}
+`))
diff --git a/gemini/gemtext/htmlconv/convert_test.go b/gemini/gemtext/htmlconv/convert_test.go
new file mode 100644
index 0000000..641ffb8
--- /dev/null
+++ b/gemini/gemtext/htmlconv/convert_test.go
@@ -0,0 +1,46 @@
+package htmlconv_test
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ "tildegit.org/tjp/gus/gemini/gemtext"
+ "tildegit.org/tjp/gus/gemini/gemtext/htmlconv"
+)
+
+var gmiDoc = `
+# top-level header line
+
+## subtitle
+
+This is some non-blank regular text.
+
+* an
+* unordered
+* list
+
+=> gemini://google.com/ as if
+=> https://google.com/
+
+> this is a quote
+> -tjp
+
+`[1:] + "```pre-formatted code\ndoc := gemtext.Parse(req.Body)\n```ignored closing alt-text\n"
+
+func TestConvert(t *testing.T) {
+ htmlDoc := `
+<html><body><h1>top-level header line</h1><h2>subtitle</h2><p>This is some non-blank regular text.
+</p><ul><li>an</li><li>unordered</li><li>list</li></ul><p>=> <a href="gemini://google.com/">as if</a></p><p>=> <a href="https://google.com/">https://google.com/</a></p><blockquote> this is a quote</blockquote><blockquote> -tjp</blockquote><pre>doc := gemtext.Parse(req.Body)
+</pre></body></html>`[1:]
+
+ doc, err := gemtext.Parse(bytes.NewBufferString(gmiDoc))
+ require.Nil(t, err)
+
+ buf := &bytes.Buffer{}
+ require.Nil(t, htmlconv.Convert(buf, doc, nil))
+
+ assert.Equal(t, htmlDoc, buf.String())
+}
diff --git a/gemini/gemtext/internal/templates.go b/gemini/gemtext/internal/templates.go
new file mode 100644
index 0000000..08bc66c
--- /dev/null
+++ b/gemini/gemtext/internal/templates.go
@@ -0,0 +1,150 @@
+package internal
+
+import (
+ htemplate "html/template"
+ "net/url"
+ "text/template"
+
+ "tildegit.org/tjp/gus/gemini/gemtext"
+)
+
+var Renderers = map[gemtext.LineType]string{
+ gemtext.LineTypeText: "textline",
+ gemtext.LineTypeLink: "linkline",
+ gemtext.LineTypeHeading1: "heading1line",
+ gemtext.LineTypeHeading2: "heading2line",
+ gemtext.LineTypeHeading3: "heading3line",
+ gemtext.LineTypeQuote: "quoteline",
+}
+
+func AddAllTemplates(base *template.Template, additions *template.Template) (*template.Template, error) {
+ if additions == nil {
+ return base, nil
+ }
+
+ tmpl := base
+ var err error
+ for _, addition := range additions.Templates() {
+ tmpl, err = tmpl.AddParseTree(addition.Name(), addition.Tree)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return tmpl, nil
+}
+
+func AddHTMLTemplates(base *htemplate.Template, additions *htemplate.Template) (*htemplate.Template, error) {
+ if additions == nil {
+ return base, nil
+ }
+
+ tmpl := base
+ var err error
+ for _, addition := range additions.Templates() {
+ tmpl, err = tmpl.AddParseTree(addition.Name(), addition.Tree)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return tmpl, nil
+}
+
+func ValidateLinks(doc gemtext.Document) error {
+ for _, line := range doc {
+ if linkLine, ok := line.(gemtext.LinkLine); ok {
+ _, err := url.Parse(linkLine.URL())
+ if err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+type RenderItem struct {
+ Template string
+ Object any
+}
+
+func RenderItems(doc gemtext.Document) []RenderItem {
+ out := make([]RenderItem, 0, len(doc))
+ out = append(out, RenderItem{
+ Template: "header",
+ Object: doc,
+ })
+
+ inUL := false
+ ulStart := 0
+ inPF := false
+ pfStart := 0
+
+ for i, line := range doc {
+ switch line.Type() {
+ case gemtext.LineTypeListItem:
+ if !inUL {
+ inUL = true
+ ulStart = i
+ }
+ case gemtext.LineTypePreformatToggle:
+ if inUL {
+ inUL = false
+ out = append(out, RenderItem{
+ Template: "listitemlines",
+ Object: doc[ulStart:i],
+ })
+ }
+ if !inPF {
+ inPF = true
+ pfStart = i
+ } else {
+ inPF = false
+ out = append(out, RenderItem{
+ Template: "preformattedtextlines",
+ Object: doc[pfStart+1 : i],
+ })
+ }
+ case gemtext.LineTypePreformattedText:
+ default:
+ if inUL {
+ inUL = false
+ out = append(out, RenderItem{
+ Template: "listitemlines",
+ Object: doc[ulStart:i],
+ })
+ }
+
+ if linkLine, ok := line.(gemtext.LinkLine); ok {
+ line = validatedLinkLine{linkLine}
+ }
+
+ out = append(out, RenderItem{
+ Template: Renderers[line.Type()],
+ Object: line,
+ })
+ }
+ }
+
+ if inUL {
+ out = append(out, RenderItem{
+ Template: "listitemlines",
+ Object: doc[ulStart:],
+ })
+ }
+
+ out = append(out, RenderItem{
+ Template: "footer",
+ Object: doc,
+ })
+
+ return out
+}
+
+type validatedLinkLine struct {
+ gemtext.LinkLine
+}
+
+func (vll validatedLinkLine) ValidatedURL() htemplate.URL {
+ return htemplate.URL(vll.URL())
+}
diff --git a/gemini/gemtext/mdconv/convert.go b/gemini/gemtext/mdconv/convert.go
new file mode 100644
index 0000000..c2f434d
--- /dev/null
+++ b/gemini/gemtext/mdconv/convert.go
@@ -0,0 +1,78 @@
+package mdconv
+
+import (
+ "io"
+ "text/template"
+
+ "tildegit.org/tjp/gus/gemini/gemtext"
+ "tildegit.org/tjp/gus/gemini/gemtext/internal"
+)
+
+// Convert writes markdown to a writer from the provided gemtext document.
+//
+// Templates can be provided to override the output for different line types.
+// The templates supported are:
+// - "header" is called before any lines and is passed the full Document.
+// - "footer" is called after the lines and is passed the full Document.
+// - "textline" is called once per line of text and is passed a gemtext.TextLine.
+// - "linkline" is called once per link line and is passed a gemtext.LinkLine.
+// - "preformattedtextlines" is called once for a block of preformatted text and is
+// passed a slice of gemtext.PreformattedTextLines.
+// - "heading1line" is called once per h1 line and is passed a gemtext.Heading1Line.
+// - "heading2line" is called once per h2 line and is passed a gemtext.Heading2Line.
+// - "heading3line" is called once per h3 line and is passed a gemtext.Heading3Line.
+// - "listitemlines" is called once for a block of contiguous list item lines and
+// is passed a slice of gemtext.ListItemLines.
+// - "quoteline" is passed once per blockquote line and is passed a gemtext.QuoteLine.
+//
+// There exist default implementations of each of these templates, so the "overrides"
+// argument can be nil.
+func Convert(wr io.Writer, doc gemtext.Document, overrides *template.Template) error {
+ if err := internal.ValidateLinks(doc); err != nil {
+ return err
+ }
+
+ tmpl, err := baseTmpl.Clone()
+ if err != nil {
+ return err
+ }
+
+ tmpl, err = internal.AddAllTemplates(tmpl, overrides)
+ if err != nil {
+ return err
+ }
+
+ for _, item := range internal.RenderItems(doc) {
+ if err := tmpl.ExecuteTemplate(wr, item.Template, item.Object); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+var baseTmpl = template.Must(template.New("mdconv").Parse(`
+{{ define "header" }}{{ end }}
+{{ define "textline" }}{{ if ne .String "\n" }}
+{{ . }}{{ end }}{{ end }}
+{{ define "linkline" }}
+=> [{{ if eq .Label "" }}{{ .URL }}{{ else }}{{ .Label }}{{ end }}]({{ .URL }})
+{{ end }}
+{{ define "preformattedtextlines" }}` + "\n```\n" + `{{ range . }}{{ . }}{{ end }}` + "```\n" + `{{ end }}
+{{ define "heading1line" }}
+# {{ .Body }}
+{{ end }}
+{{ define "heading2line" }}
+## {{ .Body }}
+{{ end }}
+{{ define "heading3line" }}
+### {{ .Body }}
+{{ end }}
+{{ define "listitemlines" }}
+{{ range . }}* {{ .Body }}
+{{ end }}{{ end }}
+{{ define "quoteline" }}
+> {{ .Body }}
+{{ end }}
+{{ define "footer" }}{{ end }}
+`))
diff --git a/gemini/gemtext/mdconv/convert_test.go b/gemini/gemtext/mdconv/convert_test.go
new file mode 100644
index 0000000..c8fd53c
--- /dev/null
+++ b/gemini/gemtext/mdconv/convert_test.go
@@ -0,0 +1,103 @@
+package mdconv_test
+
+import (
+ "bytes"
+ "testing"
+ "text/template"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ "tildegit.org/tjp/gus/gemini/gemtext"
+ "tildegit.org/tjp/gus/gemini/gemtext/mdconv"
+)
+
+var gmiDoc = `
+# top-level header line
+
+## subtitle
+
+This is some non-blank regular text.
+
+* an
+* unordered
+* list
+
+=> gemini://google.com/ as if
+=> https://google.com/
+
+> this is a quote
+> -tjp
+
+`[1:] + "```pre-formatted code\ndoc := gemtext.Parse(req.Body)\n```ignored closing alt-text\n"
+
+func TestConvert(t *testing.T) {
+ mdDoc := `
+# top-level header line
+
+## subtitle
+
+This is some non-blank regular text.
+
+* an
+* unordered
+* list
+
+=> [as if](gemini://google.com/)
+
+=> [https://google.com/](https://google.com/)
+
+> this is a quote
+
+> -tjp
+
+` + "```\ndoc := gemtext.Parse(req.Body)\n```\n"
+
+ doc, err := gemtext.Parse(bytes.NewBufferString(gmiDoc))
+ require.Nil(t, err)
+
+ buf := &bytes.Buffer{}
+ require.Nil(t, mdconv.Convert(buf, doc, nil))
+
+ assert.Equal(t, mdDoc, buf.String())
+}
+
+func TestConvertWithOverrides(t *testing.T) {
+ mdDoc := `
+# h1: top-level header line
+text:
+## h2: subtitle
+text:
+text: This is some non-blank regular text.
+text:
+* li: an
+* li: unordered
+* li: list
+text:
+=> link: [as if](gemini://google.com/)
+=> link: [https://google.com/](https://google.com/)
+text:
+> quote: this is a quote
+> quote: -tjp
+text:
+`[1:] + "```\npf: doc := gemtext.Parse(req.Body)\n```\n"
+
+ overrides := template.Must(template.New("overrides").Parse((`
+ {{define "textline"}}text: {{.}}{{end}}
+ {{define "linkline"}}=> link: [{{if eq .Label ""}}{{.URL}}{{else}}{{.Label}}{{end}}]({{.URL}})` + "\n" + `{{end}}
+ {{define "preformattedtextlines"}}` + "```\n" + `{{range . }}pf: {{.}}{{end}}` + "```\n" + `{{end}}
+ {{define "heading1line"}}# h1: {{.Body}}` + "\n" + `{{end}}
+ {{define "heading2line"}}## h2: {{.Body}}` + "\n" + `{{end}}
+ {{define "heading3line"}}### h3: {{.Body}}` + "\n" + `{{end}}
+ {{define "listitemlines"}}{{range .}}* li: {{.Body}}` + "\n" + `{{end}}{{end}}
+ {{define "quoteline"}}> quote: {{.Body}}` + "\n" + `{{end}}
+ `)[1:]))
+
+ doc, err := gemtext.Parse(bytes.NewBufferString(gmiDoc))
+ require.Nil(t, err)
+
+ buf := &bytes.Buffer{}
+ require.Nil(t, mdconv.Convert(buf, doc, overrides))
+
+ assert.Equal(t, mdDoc, buf.String())
+}
diff --git a/gemini/gemtext/parse.go b/gemini/gemtext/parse.go
new file mode 100644
index 0000000..7041fde
--- /dev/null
+++ b/gemini/gemtext/parse.go
@@ -0,0 +1,49 @@
+package gemtext
+
+import (
+ "bufio"
+ "io"
+)
+
+// Parse parses the full contents of an io.Reader into a gemtext.Document.
+func Parse(input io.Reader) (Document, error) {
+ rdr := bufio.NewReader(input)
+
+ var lines []Line
+ inPFT := false
+
+ for {
+ raw, err := rdr.ReadBytes('\n')
+ if err != io.EOF && err != nil {
+ return nil, err
+ }
+
+ var line Line
+
+ if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') {
+ line = PreformattedTextLine{raw: raw}
+ } else {
+ line = ParseLine(raw)
+ }
+
+ if line != nil && line.Type() == LineTypePreformatToggle {
+ if inPFT {
+ toggle := line.(PreformatToggleLine)
+ (&toggle).clearAlt()
+ line = toggle
+ }
+
+ inPFT = !inPFT
+ }
+
+ if line != nil {
+ lines = append(lines, line)
+ }
+
+ if err == io.EOF {
+ break
+ }
+ }
+
+ return Document(lines), nil
+}
diff --git a/gemini/gemtext/parse_line.go b/gemini/gemtext/parse_line.go
new file mode 100644
index 0000000..39187a8
--- /dev/null
+++ b/gemini/gemtext/parse_line.go
@@ -0,0 +1,107 @@
+package gemtext
+
+import "bytes"
+
+// ParseLine parses a single line (including the trailing \n) into a gemtext.Line.
+func ParseLine(line []byte) Line {
+ if len(line) == 0 {
+ return nil
+ }
+
+ switch line[0] {
+ case '=':
+ if len(line) == 1 || line[1] != '>' {
+ break
+ }
+ return parseLinkLine(line)
+ case '`':
+ if len(line) < 3 || line[1] != '`' || line[2] != '`' {
+ break
+ }
+ return parsePreformatToggleLine(line)
+ case '#':
+ level := 1
+ if len(line) > 1 && line[1] == '#' {
+ level += 1
+ if len(line) > 2 && line[2] == '#' {
+ level += 1
+ }
+ }
+ return parseHeadingLine(level, line)
+ case '*':
+ if len(line) == 1 || line[1] != ' ' {
+ break
+ }
+ return parseListItemLine(line)
+ case '>':
+ return parseQuoteLine(line)
+ }
+
+ return TextLine{raw: line}
+}
+
+func parseLinkLine(raw []byte) LinkLine {
+ line := LinkLine{raw: raw}
+
+ // move past =>[<whitespace>]
+ raw = bytes.TrimLeft(raw[2:], " \t")
+
+ // find the next space or tab
+ spIdx := bytes.IndexByte(raw, ' ')
+ tbIdx := bytes.IndexByte(raw, '\t')
+ idx := spIdx
+ if idx == -1 {
+ idx = tbIdx
+ }
+ if tbIdx >= 0 && tbIdx < idx {
+ idx = tbIdx
+ }
+
+ if idx < 0 {
+ line.url = bytes.TrimRight(raw, "\r\n")
+ return line
+ }
+
+ line.url = raw[:idx]
+ raw = raw[idx+1:]
+
+ label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n")
+ if len(label) > 0 {
+ line.label = label
+ }
+
+ return line
+}
+
+func parsePreformatToggleLine(raw []byte) PreformatToggleLine {
+ line := PreformatToggleLine{raw: raw}
+
+ raw = bytes.TrimRight(raw[3:], "\r\n")
+ if len(raw) > 0 {
+ line.altText = raw
+ }
+
+ return line
+}
+
+func parseHeadingLine(level int, raw []byte) HeadingLine {
+ return HeadingLine{
+ raw: raw,
+ lineType: LineTypeHeading1 - 1 + LineType(level),
+ body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"),
+ }
+}
+
+func parseListItemLine(raw []byte) ListItemLine {
+ return ListItemLine{
+ raw: raw,
+ body: bytes.TrimRight(raw[2:], "\r\n"),
+ }
+}
+
+func parseQuoteLine(raw []byte) QuoteLine {
+ return QuoteLine{
+ raw: raw,
+ body: bytes.TrimRight(raw[1:], "\r\n"),
+ }
+}
diff --git a/gemini/gemtext/parse_line_test.go b/gemini/gemtext/parse_line_test.go
new file mode 100644
index 0000000..a07fa3b
--- /dev/null
+++ b/gemini/gemtext/parse_line_test.go
@@ -0,0 +1,271 @@
+package gemtext_test
+
+import (
+ "testing"
+
+ "tildegit.org/tjp/gus/gemini/gemtext"
+)
+
+func TestParseLinkLine(t *testing.T) {
+ tests := []struct {
+ input string
+ url string
+ label string
+ }{
+ {
+ input: "=> gemini.ctrl-c.club/~tjp/ home page\r\n",
+ url: "gemini.ctrl-c.club/~tjp/",
+ label: "home page",
+ },
+ {
+ input: "=> gemi.dev/\n",
+ url: "gemi.dev/",
+ },
+ {
+ input: "=> /gemlog/foobar 2023-01-13 - Foo Bar\n",
+ url: "/gemlog/foobar",
+ label: "2023-01-13 - Foo Bar",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.input, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test.input))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil line")
+ }
+ if string(line.Raw()) != string(test.input) {
+ t.Error("Raw() does not match input")
+ }
+
+ if line.Type() != gemtext.LineTypeLink {
+ t.Errorf("expected LineTypeLink, got %d", line.Type())
+ }
+ link, ok := line.(gemtext.LinkLine)
+ if !ok {
+ t.Fatalf("expected a LinkLine, got %T", line)
+ }
+
+ if link.URL() != test.url {
+ t.Errorf("expected url %q, got %q", test.url, link.URL())
+ }
+
+ if link.Label() != test.label {
+ t.Errorf("expected label %q, got %q", test.label, link.Label())
+ }
+ })
+ }
+}
+
+func TestParsePreformatToggleLine(t *testing.T) {
+ tests := []struct {
+ input string
+ altText string
+ }{
+ {
+ input: "```\n",
+ },
+ {
+ input: "```some alt-text\r\n",
+ altText: "some alt-text",
+ },
+ {
+ input: "``` leading space preserved\n",
+ altText: " leading space preserved",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.input, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test.input))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil line")
+ }
+ if string(line.Raw()) != string(test.input) {
+ t.Error("Raw() does not match input")
+ }
+
+ if line.Type() != gemtext.LineTypePreformatToggle {
+ t.Errorf("expected LineTypePreformatToggle, got %d", line.Type())
+ }
+ toggle, ok := line.(gemtext.PreformatToggleLine)
+ if !ok {
+ t.Fatalf("expected a PreformatToggleLine, got %T", line)
+ }
+
+ if toggle.AltText() != test.altText {
+ t.Errorf("expected alt-text %q, got %q", test.altText, toggle.AltText())
+ }
+ })
+ }
+}
+
+func TestParseHeadingLine(t *testing.T) {
+ tests := []struct {
+ input string
+ lineType gemtext.LineType
+ body string
+ }{
+ {
+ input: "# this is an H1\n",
+ lineType: gemtext.LineTypeHeading1,
+ body: "this is an H1",
+ },
+ {
+ input: "## extra leading spaces\r\n",
+ lineType: gemtext.LineTypeHeading2,
+ body: "extra leading spaces",
+ },
+ {
+ input: "##no leading space\n",
+ lineType: gemtext.LineTypeHeading2,
+ body: "no leading space",
+ },
+ {
+ input: "#### there is no h4\n",
+ lineType: gemtext.LineTypeHeading3,
+ body: "# there is no h4",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.input, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test.input))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil")
+ }
+
+ if line.Type() != test.lineType {
+ t.Errorf("expected line type %d, got %d", test.lineType, line.Type())
+ }
+ if string(line.Raw()) != test.input {
+ t.Error("line.Raw() does not match input")
+ }
+
+ hdg, ok := line.(gemtext.HeadingLine)
+ if !ok {
+ t.Fatalf("expected HeadingLine, got a %T", line)
+ }
+
+ if hdg.Body() != test.body {
+ t.Errorf("expected body %q, got %q", test.body, hdg.Body())
+ }
+ })
+ }
+}
+
+func TestParseListItemLine(t *testing.T) {
+ tests := []struct {
+ input string
+ body string
+ }{
+ {
+ input: "* this is a list item\r\n",
+ body: "this is a list item",
+ },
+ {
+ input: "* more leading spaces\n",
+ body: " more leading spaces",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.input, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test.input))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil")
+ }
+
+ if line.Type() != gemtext.LineTypeListItem {
+ t.Errorf("expected LineTypeListItem, got %d", line.Type())
+ }
+ if string(line.Raw()) != test.input {
+ t.Error("line.Raw() does not match input")
+ }
+
+ li, ok := line.(gemtext.ListItemLine)
+ if !ok {
+ t.Fatalf("expected ListItemLine, got a %T", line)
+ }
+
+ if li.Body() != test.body {
+ t.Errorf("expected body %q, got %q", test.body, li.Body())
+ }
+ })
+ }
+}
+
+func TestParseQuoteLine(t *testing.T) {
+ tests := []struct {
+ input string
+ body string
+ }{
+ {
+ input: ">a quote line\r\n",
+ body: "a quote line",
+ },
+ {
+ input: "> with a leading space\n",
+ body: " with a leading space",
+ },
+ {
+ input: "> more leading spaces\n",
+ body: " more leading spaces",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.input, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test.input))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil")
+ }
+
+ if line.Type() != gemtext.LineTypeQuote {
+ t.Errorf("expected LineTypeQuote, got %d", line.Type())
+ }
+ if string(line.Raw()) != test.input {
+ t.Error("line.Raw() does not match input")
+ }
+
+ qu, ok := line.(gemtext.QuoteLine)
+ if !ok {
+ t.Fatalf("expected QuoteLine , got a %T", line)
+ }
+
+ if qu.Body() != test.body {
+ t.Errorf("expected body %q, got %q", test.body, qu.Body())
+ }
+ })
+ }
+}
+
+func TestParseTextLine(t *testing.T) {
+ tests := []string {
+ "\n",
+ "simple text line\r\n",
+ " * an invalid list item\n",
+ "*another invalid list item\r\n",
+ }
+
+ for _, test := range tests {
+ t.Run(test, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil")
+ }
+
+ if line.Type() != gemtext.LineTypeText {
+ t.Errorf("expected LineTypeText, got %d", line.Type())
+ }
+ if string(line.Raw()) != test {
+ t.Error("line.Raw() does not match input")
+ }
+
+ _, ok := line.(gemtext.TextLine)
+ if !ok {
+ t.Fatalf("expected TextLine , got a %T", line)
+ }
+ })
+ }
+}
diff --git a/gemini/gemtext/parse_test.go b/gemini/gemtext/parse_test.go
new file mode 100644
index 0000000..d2860ff
--- /dev/null
+++ b/gemini/gemtext/parse_test.go
@@ -0,0 +1,104 @@
+package gemtext_test
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ "tildegit.org/tjp/gus/gemini/gemtext"
+)
+
+func TestParse(t *testing.T) {
+ docBytes := []byte(`
+# top-level header line
+
+## subtitle
+
+This is some non-blank regular text.
+
+* an
+* unordered
+* list
+
+=> gemini://google.com/ as if
+
+> this is a quote
+> -tjp
+
+`[1:] + "```pre-formatted code\ndoc := gemtext.Parse(req.Body)\n```ignored closing alt-text\n")
+
+ assertEmptyLine := func(t *testing.T, line gemtext.Line) {
+ assert.Equal(t, gemtext.LineTypeText, line.Type())
+ assert.Equal(t, "\n", string(line.Raw()))
+ }
+
+ doc, err := gemtext.Parse(bytes.NewBuffer(docBytes))
+ require.Nil(t, err)
+
+ require.Equal(t, 18, len(doc))
+
+ assert.Equal(t, gemtext.LineTypeHeading1, doc[0].Type())
+ assert.Equal(t, "# top-level header line\n", string(doc[0].Raw()))
+ assert.Equal(t, "top-level header line", doc[0].(gemtext.HeadingLine).Body())
+
+ assertEmptyLine(t, doc[1])
+
+ assert.Equal(t, gemtext.LineTypeHeading2, doc[2].Type())
+ assert.Equal(t, "## subtitle\n", string(doc[2].Raw()))
+ assert.Equal(t, "subtitle", doc[2].(gemtext.HeadingLine).Body())
+
+ assertEmptyLine(t, doc[3])
+
+ assert.Equal(t, gemtext.LineTypeText, doc[4].Type())
+ assert.Equal(t, "This is some non-blank regular text.\n", string(doc[4].Raw()))
+
+ assertEmptyLine(t, doc[5])
+
+ assert.Equal(t, gemtext.LineTypeListItem, doc[6].Type())
+ assert.Equal(t, "an", doc[6].(gemtext.ListItemLine).Body())
+
+ assert.Equal(t, gemtext.LineTypeListItem, doc[7].Type())
+ assert.Equal(t, "unordered", doc[7].(gemtext.ListItemLine).Body())
+
+ assert.Equal(t, gemtext.LineTypeListItem, doc[8].Type())
+ assert.Equal(t, "list", doc[8].(gemtext.ListItemLine).Body())
+
+ assertEmptyLine(t, doc[9])
+
+ assert.Equal(t, gemtext.LineTypeLink, doc[10].Type())
+ assert.Equal(t, "=> gemini://google.com/ as if\n", string(doc[10].Raw()))
+ assert.Equal(t, "gemini://google.com/", doc[10].(gemtext.LinkLine).URL())
+ assert.Equal(t, "as if", doc[10].(gemtext.LinkLine).Label())
+
+ assertEmptyLine(t, doc[11])
+
+ assert.Equal(t, gemtext.LineTypeQuote, doc[12].Type())
+ assert.Equal(t, "> this is a quote\n", string(doc[12].Raw()))
+ assert.Equal(t, " this is a quote", doc[12].(gemtext.QuoteLine).Body())
+
+ assert.Equal(t, gemtext.LineTypeQuote, doc[13].Type())
+ assert.Equal(t, "> -tjp\n", string(doc[13].Raw()))
+ assert.Equal(t, " -tjp", doc[13].(gemtext.QuoteLine).Body())
+
+ assertEmptyLine(t, doc[14])
+
+ assert.Equal(t, gemtext.LineTypePreformatToggle, doc[15].Type())
+ assert.Equal(t, "```pre-formatted code\n", string(doc[15].Raw()))
+ assert.Equal(t, "pre-formatted code", doc[15].(gemtext.PreformatToggleLine).AltText())
+
+ assert.Equal(t, gemtext.LineTypePreformattedText, doc[16].Type())
+ assert.Equal(t, "doc := gemtext.Parse(req.Body)\n", string(doc[16].Raw()))
+
+ assert.Equal(t, gemtext.LineTypePreformatToggle, doc[17].Type())
+ assert.Equal(t, "```ignored closing alt-text\n", string(doc[17].Raw()))
+ assert.Equal(t, "", doc[17].(gemtext.PreformatToggleLine).AltText())
+
+ // ensure we can rebuild the original doc from all the line.Raw()s
+ buf := &bytes.Buffer{}
+ for _, line := range doc {
+ _, _ = buf.Write(line.Raw())
+ }
+ assert.Equal(t, string(docBytes), buf.String())
+}
diff --git a/gemini/gemtext/types.go b/gemini/gemtext/types.go
new file mode 100644
index 0000000..440fed4
--- /dev/null
+++ b/gemini/gemtext/types.go
@@ -0,0 +1,184 @@
+package gemtext
+
+// LineType represents the different types of lines in a gemtext document.
+type LineType int
+
+const (
+ // LineTypeText is the default case when nothing else matches.
+ //
+ // It indicates that the line object is a TextLine.
+ LineTypeText LineType = iota + 1
+
+ // LineTypeLink is a link line.
+ //
+ // =>[<ws>]<url>[<ws><label>][\r]\n
+ //
+ // The line is a LinkLine.
+ LineTypeLink
+
+ // LineTypePreformatToggle switches the document between pre-formatted text or not.
+ //
+ // ```[<alt-text>][\r]\n
+ //
+ // The line object is a PreformatToggleLine.
+ LineTypePreformatToggle
+
+ // LineTypePreformattedText is any line between two PreformatToggles.
+ //
+ // The line is a PreformattedTextLine.
+ LineTypePreformattedText
+
+ // LineTypeHeading1 is a top-level heading.
+ //
+ // #[<ws>]body[\r]\n
+ //
+ // The line is a HeadingLine.
+ LineTypeHeading1
+
+ // LineTypeHeading2 is a second-level heading.
+ //
+ // ##[<ws>]body[\r]\n
+ //
+ // The line is a HeadingLine.
+ LineTypeHeading2
+
+ // LineTypeHeading3 is a third-level heading.
+ //
+ // ###[<ws>]<body>[\r]\n
+ //
+ // The line is a HeadingLine.
+ LineTypeHeading3
+
+ // LineTypeListItem is an unordered list item.
+ //
+ // * <body>[\r]\n
+ //
+ // The line object is a ListItemLine.
+ LineTypeListItem
+
+ // LineTypeQuote is a quote line.
+ //
+ // ><body>[\r]\n
+ //
+ // The line object is a QuoteLine.
+ LineTypeQuote
+)
+
+// Line is the interface implemented by all specific line types.
+//
+// Many of those concrete implementation types have additional useful fields,
+// so it can be a good idea to cast these to their concrete types based on the
+// return value of the Type() method.
+type Line interface {
+ // Type returns the specific type of the gemtext line.
+ Type() LineType
+
+ // Raw reproduces the original bytes from the source reader.
+ Raw() []byte
+
+ // String represents the original bytes from the source reader as a string.
+ String() string
+}
+
+// Document is the list of lines that make up a full text/gemini resource.
+type Document []Line
+
+// TextLine is a line of LineTypeText.
+type TextLine struct {
+ raw []byte
+}
+
+func (tl TextLine) Type() LineType { return LineTypeText }
+func (tl TextLine) Raw() []byte { return tl.raw }
+func (tl TextLine) String() string { return string(tl.raw) }
+
+// LinkLine is a line of LineTypeLink.
+type LinkLine struct {
+ raw []byte
+ url []byte
+ label []byte
+}
+
+func (ll LinkLine) Type() LineType { return LineTypeLink }
+func (ll LinkLine) Raw() []byte { return ll.raw }
+func (ll LinkLine) String() string { return string(ll.raw) }
+
+// URL returns the original url portion of the line.
+//
+// It is not guaranteed to be a valid URL.
+func (ll LinkLine) URL() string { return string(ll.url) }
+
+// Label returns the label portion of the line.
+func (ll LinkLine) Label() string { return string(ll.label) }
+
+// PreformatToggleLine is a preformatted text toggle line.
+type PreformatToggleLine struct {
+ raw []byte
+ altText []byte
+}
+
+func (tl PreformatToggleLine) Type() LineType { return LineTypePreformatToggle }
+func (tl PreformatToggleLine) Raw() []byte { return tl.raw }
+func (tl PreformatToggleLine) String() string { return string(tl.raw) }
+
+// AltText returns the alt-text portion of the line.
+//
+// If the line was parsed as part of a full document by Parse(),
+// and this is a *closing* toggle, any alt-text present will be
+// stripped and this will be empty. If the line was parsed by
+// ParseLine() no such correction is performed.
+func (tl PreformatToggleLine) AltText() string { return string(tl.altText) }
+
+func (tl *PreformatToggleLine) clearAlt() { tl.altText = nil }
+
+// PreformattedTextLine represents a line between two toggles.
+//
+// It is never returned by ParseLine but can be part of a
+// document parsed by Parse().
+type PreformattedTextLine struct {
+ raw []byte
+}
+
+func (tl PreformattedTextLine) Type() LineType { return LineTypePreformattedText }
+func (tl PreformattedTextLine) Raw() []byte { return tl.raw }
+func (tl PreformattedTextLine) String() string { return string(tl.raw) }
+
+// HeadingLine is a line of LineTypeHeading[1,2,3].
+type HeadingLine struct {
+ raw []byte
+ lineType LineType
+ body []byte
+}
+
+func (hl HeadingLine) Type() LineType { return hl.lineType }
+func (hl HeadingLine) Raw() []byte { return hl.raw }
+func (hl HeadingLine) String() string { return string(hl.raw) }
+
+// Body returns the portion of the line with the header text.
+func (hl HeadingLine) Body() string { return string(hl.body) }
+
+// ListItemLine is a line of LineTypeListItem.
+type ListItemLine struct {
+ raw []byte
+ body []byte
+}
+
+func (li ListItemLine) Type() LineType { return LineTypeListItem }
+func (li ListItemLine) Raw() []byte { return li.raw }
+func (li ListItemLine) String() string { return string(li.raw) }
+
+// Body returns the text of the list item.
+func (li ListItemLine) Body() string { return string(li.body) }
+
+// QuoteLine is a line of LineTypeQuote.
+type QuoteLine struct {
+ raw []byte
+ body []byte
+}
+
+func (ql QuoteLine) Type() LineType { return LineTypeQuote }
+func (ql QuoteLine) Raw() []byte { return ql.raw }
+func (ql QuoteLine) String() string { return string(ql.raw) }
+
+// Body returns the text of the quote.
+func (ql QuoteLine) Body() string { return string(ql.body) }