diff options
author | tjpcc <tjp@ctrl-c.club> | 2023-01-17 16:41:04 -0700 |
---|---|---|
committer | tjpcc <tjp@ctrl-c.club> | 2023-01-17 16:41:04 -0700 |
commit | 6586db782ea6dcb5f2eb191a690ec7e7df51161f (patch) | |
tree | 36158a53a6d8aad9f5a873c6c43d598ce5647b97 /gemini/gemtext | |
parent | 2ef530daa47b301a40c1ee93cd43b8f36fc68c0b (diff) |
Updates
* update README
* move "gemtext" to within "gemini"
Diffstat (limited to 'gemini/gemtext')
-rw-r--r-- | gemini/gemtext/fuzz_test.go | 16 | ||||
-rw-r--r-- | gemini/gemtext/htmlconv/convert.go | 86 | ||||
-rw-r--r-- | gemini/gemtext/htmlconv/convert_test.go | 46 | ||||
-rw-r--r-- | gemini/gemtext/internal/templates.go | 150 | ||||
-rw-r--r-- | gemini/gemtext/mdconv/convert.go | 78 | ||||
-rw-r--r-- | gemini/gemtext/mdconv/convert_test.go | 103 | ||||
-rw-r--r-- | gemini/gemtext/parse.go | 49 | ||||
-rw-r--r-- | gemini/gemtext/parse_line.go | 107 | ||||
-rw-r--r-- | gemini/gemtext/parse_line_test.go | 271 | ||||
-rw-r--r-- | gemini/gemtext/parse_test.go | 104 | ||||
-rw-r--r-- | gemini/gemtext/types.go | 184 |
11 files changed, 1194 insertions, 0 deletions
diff --git a/gemini/gemtext/fuzz_test.go b/gemini/gemtext/fuzz_test.go new file mode 100644 index 0000000..f5435c1 --- /dev/null +++ b/gemini/gemtext/fuzz_test.go @@ -0,0 +1,16 @@ +package gemtext_test + +import ( + "bytes" + "testing" + + "tildegit.org/tjp/gus/gemini/gemtext" +) + +func FuzzParse(f *testing.F) { + f.Fuzz(func(t *testing.T, input []byte) { + if _, err := gemtext.Parse(bytes.NewBuffer(input)); err != nil { + t.Errorf("Parse error: %s", err.Error()) + } + }) +} diff --git a/gemini/gemtext/htmlconv/convert.go b/gemini/gemtext/htmlconv/convert.go new file mode 100644 index 0000000..5028766 --- /dev/null +++ b/gemini/gemtext/htmlconv/convert.go @@ -0,0 +1,86 @@ +package htmlconv + +import ( + "html/template" + "io" + + "tildegit.org/tjp/gus/gemini/gemtext" + "tildegit.org/tjp/gus/gemini/gemtext/internal" +) + +// Convert writes markdown to a writer from the provided gemtext document. +// +// Templates can be provided to override the output for different line types. +// The templates supported are: +// - "header" is called before any lines and is passed the full Document. +// - "footer" is called after the lines and is passed the full Document. +// - "textline" is called once per line of text and is passed a gemtext.TextLine. +// - "linkline" is called once per link line and is passed an object which wraps +// a gemtext.LinkLine but also supports a ValidatedURL() method returning a +// string which html/template will always allow as href attributes. +// - "preformattedtextlines" is called once for a block of preformatted text and is +// passed a slice of gemtext.PreformattedTextLines. +// - "heading1line" is called once per h1 line and is passed a gemtext.Heading1Line. +// - "heading2line" is called once per h2 line and is passed a gemtext.Heading2Line. +// - "heading3line" is called once per h3 line and is passed a gemtext.Heading3Line. +// - "listitemlines" is called once for a block of contiguous list item lines and +// is passed a slice of gemtext.ListItemLines. +// - "quoteline" is passed once per blockquote line and is passed a gemtext.QuoteLine. +// +// There exist default implementations of each of these templates, so the "overrides" +// argument can be nil. +func Convert(wr io.Writer, doc gemtext.Document, overrides *template.Template) error { + if err := internal.ValidateLinks(doc); err != nil { + return err + } + + tmpl, err := baseTmpl.Clone() + if err != nil { + return err + } + + tmpl, err = internal.AddHTMLTemplates(tmpl, overrides) + if err != nil { + return err + } + + for _, item := range internal.RenderItems(doc) { + if err := tmpl.ExecuteTemplate(wr, item.Template, item.Object); err != nil { + return err + } + } + + return nil +} + +var baseTmpl = template.Must(template.New("htmlconv").Parse(` +{{ define "header" }}<html><body>{{ end }} +{{ define "textline" }}{{ if ne .String "\n" }}<p>{{ . }}</p>{{ end }}{{ end }} +{{ define "linkline" -}} + <p>=> <a href="{{ .ValidatedURL }}">{{ if eq .Label "" -}} + {{ .URL }} + {{- else -}} + {{ .Label }} + {{- end -}} + </a></p> +{{- end }} +{{ define "preformattedtextlines" -}} + <pre> + {{- range . -}} + {{ . }} + {{- end -}} + </pre> +{{- end }} +{{ define "heading1line" }}<h1>{{ .Body }}</h1>{{ end }} +{{ define "heading2line" }}<h2>{{ .Body }}</h2>{{ end }} +{{ define "heading3line" }}<h3>{{ .Body }}</h3>{{ end }} +{{ define "listitemlines" -}} + <ul> + {{- range . -}} + <li>{{ .Body }}</li> + {{- end -}} + </ul> +{{- end }} +{{ define "quoteline" }}<blockquote>{{ .Body }}</blockquote>{{ end }} +{{ define "footer" }}</body></html>{{ end }} +`)) diff --git a/gemini/gemtext/htmlconv/convert_test.go b/gemini/gemtext/htmlconv/convert_test.go new file mode 100644 index 0000000..641ffb8 --- /dev/null +++ b/gemini/gemtext/htmlconv/convert_test.go @@ -0,0 +1,46 @@ +package htmlconv_test + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "tildegit.org/tjp/gus/gemini/gemtext" + "tildegit.org/tjp/gus/gemini/gemtext/htmlconv" +) + +var gmiDoc = ` +# top-level header line + +## subtitle + +This is some non-blank regular text. + +* an +* unordered +* list + +=> gemini://google.com/ as if +=> https://google.com/ + +> this is a quote +> -tjp + +`[1:] + "```pre-formatted code\ndoc := gemtext.Parse(req.Body)\n```ignored closing alt-text\n" + +func TestConvert(t *testing.T) { + htmlDoc := ` +<html><body><h1>top-level header line</h1><h2>subtitle</h2><p>This is some non-blank regular text. +</p><ul><li>an</li><li>unordered</li><li>list</li></ul><p>=> <a href="gemini://google.com/">as if</a></p><p>=> <a href="https://google.com/">https://google.com/</a></p><blockquote> this is a quote</blockquote><blockquote> -tjp</blockquote><pre>doc := gemtext.Parse(req.Body) +</pre></body></html>`[1:] + + doc, err := gemtext.Parse(bytes.NewBufferString(gmiDoc)) + require.Nil(t, err) + + buf := &bytes.Buffer{} + require.Nil(t, htmlconv.Convert(buf, doc, nil)) + + assert.Equal(t, htmlDoc, buf.String()) +} diff --git a/gemini/gemtext/internal/templates.go b/gemini/gemtext/internal/templates.go new file mode 100644 index 0000000..08bc66c --- /dev/null +++ b/gemini/gemtext/internal/templates.go @@ -0,0 +1,150 @@ +package internal + +import ( + htemplate "html/template" + "net/url" + "text/template" + + "tildegit.org/tjp/gus/gemini/gemtext" +) + +var Renderers = map[gemtext.LineType]string{ + gemtext.LineTypeText: "textline", + gemtext.LineTypeLink: "linkline", + gemtext.LineTypeHeading1: "heading1line", + gemtext.LineTypeHeading2: "heading2line", + gemtext.LineTypeHeading3: "heading3line", + gemtext.LineTypeQuote: "quoteline", +} + +func AddAllTemplates(base *template.Template, additions *template.Template) (*template.Template, error) { + if additions == nil { + return base, nil + } + + tmpl := base + var err error + for _, addition := range additions.Templates() { + tmpl, err = tmpl.AddParseTree(addition.Name(), addition.Tree) + if err != nil { + return nil, err + } + } + + return tmpl, nil +} + +func AddHTMLTemplates(base *htemplate.Template, additions *htemplate.Template) (*htemplate.Template, error) { + if additions == nil { + return base, nil + } + + tmpl := base + var err error + for _, addition := range additions.Templates() { + tmpl, err = tmpl.AddParseTree(addition.Name(), addition.Tree) + if err != nil { + return nil, err + } + } + + return tmpl, nil +} + +func ValidateLinks(doc gemtext.Document) error { + for _, line := range doc { + if linkLine, ok := line.(gemtext.LinkLine); ok { + _, err := url.Parse(linkLine.URL()) + if err != nil { + return err + } + } + } + return nil +} + +type RenderItem struct { + Template string + Object any +} + +func RenderItems(doc gemtext.Document) []RenderItem { + out := make([]RenderItem, 0, len(doc)) + out = append(out, RenderItem{ + Template: "header", + Object: doc, + }) + + inUL := false + ulStart := 0 + inPF := false + pfStart := 0 + + for i, line := range doc { + switch line.Type() { + case gemtext.LineTypeListItem: + if !inUL { + inUL = true + ulStart = i + } + case gemtext.LineTypePreformatToggle: + if inUL { + inUL = false + out = append(out, RenderItem{ + Template: "listitemlines", + Object: doc[ulStart:i], + }) + } + if !inPF { + inPF = true + pfStart = i + } else { + inPF = false + out = append(out, RenderItem{ + Template: "preformattedtextlines", + Object: doc[pfStart+1 : i], + }) + } + case gemtext.LineTypePreformattedText: + default: + if inUL { + inUL = false + out = append(out, RenderItem{ + Template: "listitemlines", + Object: doc[ulStart:i], + }) + } + + if linkLine, ok := line.(gemtext.LinkLine); ok { + line = validatedLinkLine{linkLine} + } + + out = append(out, RenderItem{ + Template: Renderers[line.Type()], + Object: line, + }) + } + } + + if inUL { + out = append(out, RenderItem{ + Template: "listitemlines", + Object: doc[ulStart:], + }) + } + + out = append(out, RenderItem{ + Template: "footer", + Object: doc, + }) + + return out +} + +type validatedLinkLine struct { + gemtext.LinkLine +} + +func (vll validatedLinkLine) ValidatedURL() htemplate.URL { + return htemplate.URL(vll.URL()) +} diff --git a/gemini/gemtext/mdconv/convert.go b/gemini/gemtext/mdconv/convert.go new file mode 100644 index 0000000..c2f434d --- /dev/null +++ b/gemini/gemtext/mdconv/convert.go @@ -0,0 +1,78 @@ +package mdconv + +import ( + "io" + "text/template" + + "tildegit.org/tjp/gus/gemini/gemtext" + "tildegit.org/tjp/gus/gemini/gemtext/internal" +) + +// Convert writes markdown to a writer from the provided gemtext document. +// +// Templates can be provided to override the output for different line types. +// The templates supported are: +// - "header" is called before any lines and is passed the full Document. +// - "footer" is called after the lines and is passed the full Document. +// - "textline" is called once per line of text and is passed a gemtext.TextLine. +// - "linkline" is called once per link line and is passed a gemtext.LinkLine. +// - "preformattedtextlines" is called once for a block of preformatted text and is +// passed a slice of gemtext.PreformattedTextLines. +// - "heading1line" is called once per h1 line and is passed a gemtext.Heading1Line. +// - "heading2line" is called once per h2 line and is passed a gemtext.Heading2Line. +// - "heading3line" is called once per h3 line and is passed a gemtext.Heading3Line. +// - "listitemlines" is called once for a block of contiguous list item lines and +// is passed a slice of gemtext.ListItemLines. +// - "quoteline" is passed once per blockquote line and is passed a gemtext.QuoteLine. +// +// There exist default implementations of each of these templates, so the "overrides" +// argument can be nil. +func Convert(wr io.Writer, doc gemtext.Document, overrides *template.Template) error { + if err := internal.ValidateLinks(doc); err != nil { + return err + } + + tmpl, err := baseTmpl.Clone() + if err != nil { + return err + } + + tmpl, err = internal.AddAllTemplates(tmpl, overrides) + if err != nil { + return err + } + + for _, item := range internal.RenderItems(doc) { + if err := tmpl.ExecuteTemplate(wr, item.Template, item.Object); err != nil { + return err + } + } + + return nil +} + +var baseTmpl = template.Must(template.New("mdconv").Parse(` +{{ define "header" }}{{ end }} +{{ define "textline" }}{{ if ne .String "\n" }} +{{ . }}{{ end }}{{ end }} +{{ define "linkline" }} +=> [{{ if eq .Label "" }}{{ .URL }}{{ else }}{{ .Label }}{{ end }}]({{ .URL }}) +{{ end }} +{{ define "preformattedtextlines" }}` + "\n```\n" + `{{ range . }}{{ . }}{{ end }}` + "```\n" + `{{ end }} +{{ define "heading1line" }} +# {{ .Body }} +{{ end }} +{{ define "heading2line" }} +## {{ .Body }} +{{ end }} +{{ define "heading3line" }} +### {{ .Body }} +{{ end }} +{{ define "listitemlines" }} +{{ range . }}* {{ .Body }} +{{ end }}{{ end }} +{{ define "quoteline" }} +> {{ .Body }} +{{ end }} +{{ define "footer" }}{{ end }} +`)) diff --git a/gemini/gemtext/mdconv/convert_test.go b/gemini/gemtext/mdconv/convert_test.go new file mode 100644 index 0000000..c8fd53c --- /dev/null +++ b/gemini/gemtext/mdconv/convert_test.go @@ -0,0 +1,103 @@ +package mdconv_test + +import ( + "bytes" + "testing" + "text/template" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "tildegit.org/tjp/gus/gemini/gemtext" + "tildegit.org/tjp/gus/gemini/gemtext/mdconv" +) + +var gmiDoc = ` +# top-level header line + +## subtitle + +This is some non-blank regular text. + +* an +* unordered +* list + +=> gemini://google.com/ as if +=> https://google.com/ + +> this is a quote +> -tjp + +`[1:] + "```pre-formatted code\ndoc := gemtext.Parse(req.Body)\n```ignored closing alt-text\n" + +func TestConvert(t *testing.T) { + mdDoc := ` +# top-level header line + +## subtitle + +This is some non-blank regular text. + +* an +* unordered +* list + +=> [as if](gemini://google.com/) + +=> [https://google.com/](https://google.com/) + +> this is a quote + +> -tjp + +` + "```\ndoc := gemtext.Parse(req.Body)\n```\n" + + doc, err := gemtext.Parse(bytes.NewBufferString(gmiDoc)) + require.Nil(t, err) + + buf := &bytes.Buffer{} + require.Nil(t, mdconv.Convert(buf, doc, nil)) + + assert.Equal(t, mdDoc, buf.String()) +} + +func TestConvertWithOverrides(t *testing.T) { + mdDoc := ` +# h1: top-level header line +text: +## h2: subtitle +text: +text: This is some non-blank regular text. +text: +* li: an +* li: unordered +* li: list +text: +=> link: [as if](gemini://google.com/) +=> link: [https://google.com/](https://google.com/) +text: +> quote: this is a quote +> quote: -tjp +text: +`[1:] + "```\npf: doc := gemtext.Parse(req.Body)\n```\n" + + overrides := template.Must(template.New("overrides").Parse((` + {{define "textline"}}text: {{.}}{{end}} + {{define "linkline"}}=> link: [{{if eq .Label ""}}{{.URL}}{{else}}{{.Label}}{{end}}]({{.URL}})` + "\n" + `{{end}} + {{define "preformattedtextlines"}}` + "```\n" + `{{range . }}pf: {{.}}{{end}}` + "```\n" + `{{end}} + {{define "heading1line"}}# h1: {{.Body}}` + "\n" + `{{end}} + {{define "heading2line"}}## h2: {{.Body}}` + "\n" + `{{end}} + {{define "heading3line"}}### h3: {{.Body}}` + "\n" + `{{end}} + {{define "listitemlines"}}{{range .}}* li: {{.Body}}` + "\n" + `{{end}}{{end}} + {{define "quoteline"}}> quote: {{.Body}}` + "\n" + `{{end}} + `)[1:])) + + doc, err := gemtext.Parse(bytes.NewBufferString(gmiDoc)) + require.Nil(t, err) + + buf := &bytes.Buffer{} + require.Nil(t, mdconv.Convert(buf, doc, overrides)) + + assert.Equal(t, mdDoc, buf.String()) +} diff --git a/gemini/gemtext/parse.go b/gemini/gemtext/parse.go new file mode 100644 index 0000000..7041fde --- /dev/null +++ b/gemini/gemtext/parse.go @@ -0,0 +1,49 @@ +package gemtext + +import ( + "bufio" + "io" +) + +// Parse parses the full contents of an io.Reader into a gemtext.Document. +func Parse(input io.Reader) (Document, error) { + rdr := bufio.NewReader(input) + + var lines []Line + inPFT := false + + for { + raw, err := rdr.ReadBytes('\n') + if err != io.EOF && err != nil { + return nil, err + } + + var line Line + + if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') { + line = PreformattedTextLine{raw: raw} + } else { + line = ParseLine(raw) + } + + if line != nil && line.Type() == LineTypePreformatToggle { + if inPFT { + toggle := line.(PreformatToggleLine) + (&toggle).clearAlt() + line = toggle + } + + inPFT = !inPFT + } + + if line != nil { + lines = append(lines, line) + } + + if err == io.EOF { + break + } + } + + return Document(lines), nil +} diff --git a/gemini/gemtext/parse_line.go b/gemini/gemtext/parse_line.go new file mode 100644 index 0000000..39187a8 --- /dev/null +++ b/gemini/gemtext/parse_line.go @@ -0,0 +1,107 @@ +package gemtext + +import "bytes" + +// ParseLine parses a single line (including the trailing \n) into a gemtext.Line. +func ParseLine(line []byte) Line { + if len(line) == 0 { + return nil + } + + switch line[0] { + case '=': + if len(line) == 1 || line[1] != '>' { + break + } + return parseLinkLine(line) + case '`': + if len(line) < 3 || line[1] != '`' || line[2] != '`' { + break + } + return parsePreformatToggleLine(line) + case '#': + level := 1 + if len(line) > 1 && line[1] == '#' { + level += 1 + if len(line) > 2 && line[2] == '#' { + level += 1 + } + } + return parseHeadingLine(level, line) + case '*': + if len(line) == 1 || line[1] != ' ' { + break + } + return parseListItemLine(line) + case '>': + return parseQuoteLine(line) + } + + return TextLine{raw: line} +} + +func parseLinkLine(raw []byte) LinkLine { + line := LinkLine{raw: raw} + + // move past =>[<whitespace>] + raw = bytes.TrimLeft(raw[2:], " \t") + + // find the next space or tab + spIdx := bytes.IndexByte(raw, ' ') + tbIdx := bytes.IndexByte(raw, '\t') + idx := spIdx + if idx == -1 { + idx = tbIdx + } + if tbIdx >= 0 && tbIdx < idx { + idx = tbIdx + } + + if idx < 0 { + line.url = bytes.TrimRight(raw, "\r\n") + return line + } + + line.url = raw[:idx] + raw = raw[idx+1:] + + label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n") + if len(label) > 0 { + line.label = label + } + + return line +} + +func parsePreformatToggleLine(raw []byte) PreformatToggleLine { + line := PreformatToggleLine{raw: raw} + + raw = bytes.TrimRight(raw[3:], "\r\n") + if len(raw) > 0 { + line.altText = raw + } + + return line +} + +func parseHeadingLine(level int, raw []byte) HeadingLine { + return HeadingLine{ + raw: raw, + lineType: LineTypeHeading1 - 1 + LineType(level), + body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"), + } +} + +func parseListItemLine(raw []byte) ListItemLine { + return ListItemLine{ + raw: raw, + body: bytes.TrimRight(raw[2:], "\r\n"), + } +} + +func parseQuoteLine(raw []byte) QuoteLine { + return QuoteLine{ + raw: raw, + body: bytes.TrimRight(raw[1:], "\r\n"), + } +} diff --git a/gemini/gemtext/parse_line_test.go b/gemini/gemtext/parse_line_test.go new file mode 100644 index 0000000..a07fa3b --- /dev/null +++ b/gemini/gemtext/parse_line_test.go @@ -0,0 +1,271 @@ +package gemtext_test + +import ( + "testing" + + "tildegit.org/tjp/gus/gemini/gemtext" +) + +func TestParseLinkLine(t *testing.T) { + tests := []struct { + input string + url string + label string + }{ + { + input: "=> gemini.ctrl-c.club/~tjp/ home page\r\n", + url: "gemini.ctrl-c.club/~tjp/", + label: "home page", + }, + { + input: "=> gemi.dev/\n", + url: "gemi.dev/", + }, + { + input: "=> /gemlog/foobar 2023-01-13 - Foo Bar\n", + url: "/gemlog/foobar", + label: "2023-01-13 - Foo Bar", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test.input)) + if line == nil { + t.Fatal("ParseLine() returned nil line") + } + if string(line.Raw()) != string(test.input) { + t.Error("Raw() does not match input") + } + + if line.Type() != gemtext.LineTypeLink { + t.Errorf("expected LineTypeLink, got %d", line.Type()) + } + link, ok := line.(gemtext.LinkLine) + if !ok { + t.Fatalf("expected a LinkLine, got %T", line) + } + + if link.URL() != test.url { + t.Errorf("expected url %q, got %q", test.url, link.URL()) + } + + if link.Label() != test.label { + t.Errorf("expected label %q, got %q", test.label, link.Label()) + } + }) + } +} + +func TestParsePreformatToggleLine(t *testing.T) { + tests := []struct { + input string + altText string + }{ + { + input: "```\n", + }, + { + input: "```some alt-text\r\n", + altText: "some alt-text", + }, + { + input: "``` leading space preserved\n", + altText: " leading space preserved", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test.input)) + if line == nil { + t.Fatal("ParseLine() returned nil line") + } + if string(line.Raw()) != string(test.input) { + t.Error("Raw() does not match input") + } + + if line.Type() != gemtext.LineTypePreformatToggle { + t.Errorf("expected LineTypePreformatToggle, got %d", line.Type()) + } + toggle, ok := line.(gemtext.PreformatToggleLine) + if !ok { + t.Fatalf("expected a PreformatToggleLine, got %T", line) + } + + if toggle.AltText() != test.altText { + t.Errorf("expected alt-text %q, got %q", test.altText, toggle.AltText()) + } + }) + } +} + +func TestParseHeadingLine(t *testing.T) { + tests := []struct { + input string + lineType gemtext.LineType + body string + }{ + { + input: "# this is an H1\n", + lineType: gemtext.LineTypeHeading1, + body: "this is an H1", + }, + { + input: "## extra leading spaces\r\n", + lineType: gemtext.LineTypeHeading2, + body: "extra leading spaces", + }, + { + input: "##no leading space\n", + lineType: gemtext.LineTypeHeading2, + body: "no leading space", + }, + { + input: "#### there is no h4\n", + lineType: gemtext.LineTypeHeading3, + body: "# there is no h4", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test.input)) + if line == nil { + t.Fatal("ParseLine() returned nil") + } + + if line.Type() != test.lineType { + t.Errorf("expected line type %d, got %d", test.lineType, line.Type()) + } + if string(line.Raw()) != test.input { + t.Error("line.Raw() does not match input") + } + + hdg, ok := line.(gemtext.HeadingLine) + if !ok { + t.Fatalf("expected HeadingLine, got a %T", line) + } + + if hdg.Body() != test.body { + t.Errorf("expected body %q, got %q", test.body, hdg.Body()) + } + }) + } +} + +func TestParseListItemLine(t *testing.T) { + tests := []struct { + input string + body string + }{ + { + input: "* this is a list item\r\n", + body: "this is a list item", + }, + { + input: "* more leading spaces\n", + body: " more leading spaces", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test.input)) + if line == nil { + t.Fatal("ParseLine() returned nil") + } + + if line.Type() != gemtext.LineTypeListItem { + t.Errorf("expected LineTypeListItem, got %d", line.Type()) + } + if string(line.Raw()) != test.input { + t.Error("line.Raw() does not match input") + } + + li, ok := line.(gemtext.ListItemLine) + if !ok { + t.Fatalf("expected ListItemLine, got a %T", line) + } + + if li.Body() != test.body { + t.Errorf("expected body %q, got %q", test.body, li.Body()) + } + }) + } +} + +func TestParseQuoteLine(t *testing.T) { + tests := []struct { + input string + body string + }{ + { + input: ">a quote line\r\n", + body: "a quote line", + }, + { + input: "> with a leading space\n", + body: " with a leading space", + }, + { + input: "> more leading spaces\n", + body: " more leading spaces", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test.input)) + if line == nil { + t.Fatal("ParseLine() returned nil") + } + + if line.Type() != gemtext.LineTypeQuote { + t.Errorf("expected LineTypeQuote, got %d", line.Type()) + } + if string(line.Raw()) != test.input { + t.Error("line.Raw() does not match input") + } + + qu, ok := line.(gemtext.QuoteLine) + if !ok { + t.Fatalf("expected QuoteLine , got a %T", line) + } + + if qu.Body() != test.body { + t.Errorf("expected body %q, got %q", test.body, qu.Body()) + } + }) + } +} + +func TestParseTextLine(t *testing.T) { + tests := []string { + "\n", + "simple text line\r\n", + " * an invalid list item\n", + "*another invalid list item\r\n", + } + + for _, test := range tests { + t.Run(test, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test)) + if line == nil { + t.Fatal("ParseLine() returned nil") + } + + if line.Type() != gemtext.LineTypeText { + t.Errorf("expected LineTypeText, got %d", line.Type()) + } + if string(line.Raw()) != test { + t.Error("line.Raw() does not match input") + } + + _, ok := line.(gemtext.TextLine) + if !ok { + t.Fatalf("expected TextLine , got a %T", line) + } + }) + } +} diff --git a/gemini/gemtext/parse_test.go b/gemini/gemtext/parse_test.go new file mode 100644 index 0000000..d2860ff --- /dev/null +++ b/gemini/gemtext/parse_test.go @@ -0,0 +1,104 @@ +package gemtext_test + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "tildegit.org/tjp/gus/gemini/gemtext" +) + +func TestParse(t *testing.T) { + docBytes := []byte(` +# top-level header line + +## subtitle + +This is some non-blank regular text. + +* an +* unordered +* list + +=> gemini://google.com/ as if + +> this is a quote +> -tjp + +`[1:] + "```pre-formatted code\ndoc := gemtext.Parse(req.Body)\n```ignored closing alt-text\n") + + assertEmptyLine := func(t *testing.T, line gemtext.Line) { + assert.Equal(t, gemtext.LineTypeText, line.Type()) + assert.Equal(t, "\n", string(line.Raw())) + } + + doc, err := gemtext.Parse(bytes.NewBuffer(docBytes)) + require.Nil(t, err) + + require.Equal(t, 18, len(doc)) + + assert.Equal(t, gemtext.LineTypeHeading1, doc[0].Type()) + assert.Equal(t, "# top-level header line\n", string(doc[0].Raw())) + assert.Equal(t, "top-level header line", doc[0].(gemtext.HeadingLine).Body()) + + assertEmptyLine(t, doc[1]) + + assert.Equal(t, gemtext.LineTypeHeading2, doc[2].Type()) + assert.Equal(t, "## subtitle\n", string(doc[2].Raw())) + assert.Equal(t, "subtitle", doc[2].(gemtext.HeadingLine).Body()) + + assertEmptyLine(t, doc[3]) + + assert.Equal(t, gemtext.LineTypeText, doc[4].Type()) + assert.Equal(t, "This is some non-blank regular text.\n", string(doc[4].Raw())) + + assertEmptyLine(t, doc[5]) + + assert.Equal(t, gemtext.LineTypeListItem, doc[6].Type()) + assert.Equal(t, "an", doc[6].(gemtext.ListItemLine).Body()) + + assert.Equal(t, gemtext.LineTypeListItem, doc[7].Type()) + assert.Equal(t, "unordered", doc[7].(gemtext.ListItemLine).Body()) + + assert.Equal(t, gemtext.LineTypeListItem, doc[8].Type()) + assert.Equal(t, "list", doc[8].(gemtext.ListItemLine).Body()) + + assertEmptyLine(t, doc[9]) + + assert.Equal(t, gemtext.LineTypeLink, doc[10].Type()) + assert.Equal(t, "=> gemini://google.com/ as if\n", string(doc[10].Raw())) + assert.Equal(t, "gemini://google.com/", doc[10].(gemtext.LinkLine).URL()) + assert.Equal(t, "as if", doc[10].(gemtext.LinkLine).Label()) + + assertEmptyLine(t, doc[11]) + + assert.Equal(t, gemtext.LineTypeQuote, doc[12].Type()) + assert.Equal(t, "> this is a quote\n", string(doc[12].Raw())) + assert.Equal(t, " this is a quote", doc[12].(gemtext.QuoteLine).Body()) + + assert.Equal(t, gemtext.LineTypeQuote, doc[13].Type()) + assert.Equal(t, "> -tjp\n", string(doc[13].Raw())) + assert.Equal(t, " -tjp", doc[13].(gemtext.QuoteLine).Body()) + + assertEmptyLine(t, doc[14]) + + assert.Equal(t, gemtext.LineTypePreformatToggle, doc[15].Type()) + assert.Equal(t, "```pre-formatted code\n", string(doc[15].Raw())) + assert.Equal(t, "pre-formatted code", doc[15].(gemtext.PreformatToggleLine).AltText()) + + assert.Equal(t, gemtext.LineTypePreformattedText, doc[16].Type()) + assert.Equal(t, "doc := gemtext.Parse(req.Body)\n", string(doc[16].Raw())) + + assert.Equal(t, gemtext.LineTypePreformatToggle, doc[17].Type()) + assert.Equal(t, "```ignored closing alt-text\n", string(doc[17].Raw())) + assert.Equal(t, "", doc[17].(gemtext.PreformatToggleLine).AltText()) + + // ensure we can rebuild the original doc from all the line.Raw()s + buf := &bytes.Buffer{} + for _, line := range doc { + _, _ = buf.Write(line.Raw()) + } + assert.Equal(t, string(docBytes), buf.String()) +} diff --git a/gemini/gemtext/types.go b/gemini/gemtext/types.go new file mode 100644 index 0000000..440fed4 --- /dev/null +++ b/gemini/gemtext/types.go @@ -0,0 +1,184 @@ +package gemtext + +// LineType represents the different types of lines in a gemtext document. +type LineType int + +const ( + // LineTypeText is the default case when nothing else matches. + // + // It indicates that the line object is a TextLine. + LineTypeText LineType = iota + 1 + + // LineTypeLink is a link line. + // + // =>[<ws>]<url>[<ws><label>][\r]\n + // + // The line is a LinkLine. + LineTypeLink + + // LineTypePreformatToggle switches the document between pre-formatted text or not. + // + // ```[<alt-text>][\r]\n + // + // The line object is a PreformatToggleLine. + LineTypePreformatToggle + + // LineTypePreformattedText is any line between two PreformatToggles. + // + // The line is a PreformattedTextLine. + LineTypePreformattedText + + // LineTypeHeading1 is a top-level heading. + // + // #[<ws>]body[\r]\n + // + // The line is a HeadingLine. + LineTypeHeading1 + + // LineTypeHeading2 is a second-level heading. + // + // ##[<ws>]body[\r]\n + // + // The line is a HeadingLine. + LineTypeHeading2 + + // LineTypeHeading3 is a third-level heading. + // + // ###[<ws>]<body>[\r]\n + // + // The line is a HeadingLine. + LineTypeHeading3 + + // LineTypeListItem is an unordered list item. + // + // * <body>[\r]\n + // + // The line object is a ListItemLine. + LineTypeListItem + + // LineTypeQuote is a quote line. + // + // ><body>[\r]\n + // + // The line object is a QuoteLine. + LineTypeQuote +) + +// Line is the interface implemented by all specific line types. +// +// Many of those concrete implementation types have additional useful fields, +// so it can be a good idea to cast these to their concrete types based on the +// return value of the Type() method. +type Line interface { + // Type returns the specific type of the gemtext line. + Type() LineType + + // Raw reproduces the original bytes from the source reader. + Raw() []byte + + // String represents the original bytes from the source reader as a string. + String() string +} + +// Document is the list of lines that make up a full text/gemini resource. +type Document []Line + +// TextLine is a line of LineTypeText. +type TextLine struct { + raw []byte +} + +func (tl TextLine) Type() LineType { return LineTypeText } +func (tl TextLine) Raw() []byte { return tl.raw } +func (tl TextLine) String() string { return string(tl.raw) } + +// LinkLine is a line of LineTypeLink. +type LinkLine struct { + raw []byte + url []byte + label []byte +} + +func (ll LinkLine) Type() LineType { return LineTypeLink } +func (ll LinkLine) Raw() []byte { return ll.raw } +func (ll LinkLine) String() string { return string(ll.raw) } + +// URL returns the original url portion of the line. +// +// It is not guaranteed to be a valid URL. +func (ll LinkLine) URL() string { return string(ll.url) } + +// Label returns the label portion of the line. +func (ll LinkLine) Label() string { return string(ll.label) } + +// PreformatToggleLine is a preformatted text toggle line. +type PreformatToggleLine struct { + raw []byte + altText []byte +} + +func (tl PreformatToggleLine) Type() LineType { return LineTypePreformatToggle } +func (tl PreformatToggleLine) Raw() []byte { return tl.raw } +func (tl PreformatToggleLine) String() string { return string(tl.raw) } + +// AltText returns the alt-text portion of the line. +// +// If the line was parsed as part of a full document by Parse(), +// and this is a *closing* toggle, any alt-text present will be +// stripped and this will be empty. If the line was parsed by +// ParseLine() no such correction is performed. +func (tl PreformatToggleLine) AltText() string { return string(tl.altText) } + +func (tl *PreformatToggleLine) clearAlt() { tl.altText = nil } + +// PreformattedTextLine represents a line between two toggles. +// +// It is never returned by ParseLine but can be part of a +// document parsed by Parse(). +type PreformattedTextLine struct { + raw []byte +} + +func (tl PreformattedTextLine) Type() LineType { return LineTypePreformattedText } +func (tl PreformattedTextLine) Raw() []byte { return tl.raw } +func (tl PreformattedTextLine) String() string { return string(tl.raw) } + +// HeadingLine is a line of LineTypeHeading[1,2,3]. +type HeadingLine struct { + raw []byte + lineType LineType + body []byte +} + +func (hl HeadingLine) Type() LineType { return hl.lineType } +func (hl HeadingLine) Raw() []byte { return hl.raw } +func (hl HeadingLine) String() string { return string(hl.raw) } + +// Body returns the portion of the line with the header text. +func (hl HeadingLine) Body() string { return string(hl.body) } + +// ListItemLine is a line of LineTypeListItem. +type ListItemLine struct { + raw []byte + body []byte +} + +func (li ListItemLine) Type() LineType { return LineTypeListItem } +func (li ListItemLine) Raw() []byte { return li.raw } +func (li ListItemLine) String() string { return string(li.raw) } + +// Body returns the text of the list item. +func (li ListItemLine) Body() string { return string(li.body) } + +// QuoteLine is a line of LineTypeQuote. +type QuoteLine struct { + raw []byte + body []byte +} + +func (ql QuoteLine) Type() LineType { return LineTypeQuote } +func (ql QuoteLine) Raw() []byte { return ql.raw } +func (ql QuoteLine) String() string { return string(ql.raw) } + +// Body returns the text of the quote. +func (ql QuoteLine) Body() string { return string(ql.body) } |