summaryrefslogtreecommitdiff
path: root/gemtext
diff options
context:
space:
mode:
Diffstat (limited to 'gemtext')
-rw-r--r--gemtext/fuzz_test.go16
-rw-r--r--gemtext/parse.go109
-rw-r--r--gemtext/parse_line.go107
-rw-r--r--gemtext/parse_line_test.go24
-rw-r--r--gemtext/parse_test.go104
-rw-r--r--gemtext/types.go65
6 files changed, 275 insertions, 150 deletions
diff --git a/gemtext/fuzz_test.go b/gemtext/fuzz_test.go
new file mode 100644
index 0000000..dce0587
--- /dev/null
+++ b/gemtext/fuzz_test.go
@@ -0,0 +1,16 @@
+package gemtext_test
+
+import (
+ "bytes"
+ "testing"
+
+ "tildegit.org/tjp/gus/gemtext"
+)
+
+func FuzzParse(f *testing.F) {
+ f.Fuzz(func(t *testing.T, input []byte) {
+ if _, err := gemtext.Parse(bytes.NewBuffer(input)); err != nil {
+ t.Errorf("Parse error: %s", err.Error())
+ }
+ })
+}
diff --git a/gemtext/parse.go b/gemtext/parse.go
index 4a8c641..7041fde 100644
--- a/gemtext/parse.go
+++ b/gemtext/parse.go
@@ -2,7 +2,6 @@ package gemtext
import (
"bufio"
- "bytes"
"io"
)
@@ -22,12 +21,12 @@ func Parse(input io.Reader) (Document, error) {
var line Line
if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') {
- line = PreformattedTextLine{raw: raw}
+ line = PreformattedTextLine{raw: raw}
} else {
line = ParseLine(raw)
}
- if line.Type() == LineTypePreformatToggle {
+ if line != nil && line.Type() == LineTypePreformatToggle {
if inPFT {
toggle := line.(PreformatToggleLine)
(&toggle).clearAlt()
@@ -48,107 +47,3 @@ func Parse(input io.Reader) (Document, error) {
return Document(lines), nil
}
-
-// ParseLine parses a single line (including the trailing \n) into a gemtext.Line.
-func ParseLine(line []byte) Line {
- if len(line) == 0 {
- return nil
- }
-
- switch line[0] {
- case '=':
- if len(line) == 1 || line[1] != '>' {
- break
- }
- return parseLinkLine(line)
- case '`':
- if len(line) < 3 || line[1] != '`' || line[2] != '`' {
- break
- }
- return parsePreformatToggleLine(line)
- case '#':
- level := 1
- if len(line) > 1 && line[1] == '#' {
- level += 1
- if len(line) > 2 && line[2] == '#' {
- level += 1
- }
- }
- return parseHeadingLine(level, line)
- case '*':
- if len(line) == 1 || line[1] != ' ' {
- break
- }
- return parseListItemLine(line)
- case '>':
- return parseQuoteLine(line)
- }
-
- return TextLine{raw: line}
-}
-
-func parseLinkLine(raw []byte) LinkLine {
- line := LinkLine{raw: raw}
-
- // move past =>[<whitespace>]
- raw = bytes.TrimLeft(raw[2:], " \t")
-
- // find the next space or tab
- spIdx := bytes.IndexByte(raw, ' ')
- tbIdx := bytes.IndexByte(raw, '\t')
- idx := spIdx
- if idx == -1 {
- idx = tbIdx
- }
- if tbIdx >= 0 && tbIdx < idx {
- idx = tbIdx
- }
-
- if idx < 0 {
- line.URL = bytes.TrimRight(raw, "\r\n")
- return line
- }
-
- line.URL = raw[:idx]
- raw = raw[idx+1:]
-
- label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n")
- if len(label) > 0 {
- line.Label = label
- }
-
- return line
-}
-
-func parsePreformatToggleLine(raw []byte) PreformatToggleLine {
- line := PreformatToggleLine{raw: raw}
-
- raw = bytes.TrimRight(raw[3:], "\r\n")
- if len(raw) > 0 {
- line.AltText = raw
- }
-
- return line
-}
-
-func parseHeadingLine(level int, raw []byte) HeadingLine {
- return HeadingLine{
- raw: raw,
- lineType: LineTypeHeading1 - 1 + LineType(level),
- Body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"),
- }
-}
-
-func parseListItemLine(raw []byte) ListItemLine {
- return ListItemLine{
- raw: raw,
- Body: bytes.TrimRight(raw[2:], "\r\n"),
- }
-}
-
-func parseQuoteLine(raw []byte) QuoteLine {
- return QuoteLine{
- raw: raw,
- Body: bytes.TrimRight(raw[1:], "\r\n"),
- }
-}
diff --git a/gemtext/parse_line.go b/gemtext/parse_line.go
new file mode 100644
index 0000000..39187a8
--- /dev/null
+++ b/gemtext/parse_line.go
@@ -0,0 +1,107 @@
+package gemtext
+
+import "bytes"
+
+// ParseLine parses a single line (including the trailing \n) into a gemtext.Line.
+func ParseLine(line []byte) Line {
+ if len(line) == 0 {
+ return nil
+ }
+
+ switch line[0] {
+ case '=':
+ if len(line) == 1 || line[1] != '>' {
+ break
+ }
+ return parseLinkLine(line)
+ case '`':
+ if len(line) < 3 || line[1] != '`' || line[2] != '`' {
+ break
+ }
+ return parsePreformatToggleLine(line)
+ case '#':
+ level := 1
+ if len(line) > 1 && line[1] == '#' {
+ level += 1
+ if len(line) > 2 && line[2] == '#' {
+ level += 1
+ }
+ }
+ return parseHeadingLine(level, line)
+ case '*':
+ if len(line) == 1 || line[1] != ' ' {
+ break
+ }
+ return parseListItemLine(line)
+ case '>':
+ return parseQuoteLine(line)
+ }
+
+ return TextLine{raw: line}
+}
+
+func parseLinkLine(raw []byte) LinkLine {
+ line := LinkLine{raw: raw}
+
+ // move past =>[<whitespace>]
+ raw = bytes.TrimLeft(raw[2:], " \t")
+
+ // find the next space or tab
+ spIdx := bytes.IndexByte(raw, ' ')
+ tbIdx := bytes.IndexByte(raw, '\t')
+ idx := spIdx
+ if idx == -1 {
+ idx = tbIdx
+ }
+ if tbIdx >= 0 && tbIdx < idx {
+ idx = tbIdx
+ }
+
+ if idx < 0 {
+ line.url = bytes.TrimRight(raw, "\r\n")
+ return line
+ }
+
+ line.url = raw[:idx]
+ raw = raw[idx+1:]
+
+ label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n")
+ if len(label) > 0 {
+ line.label = label
+ }
+
+ return line
+}
+
+func parsePreformatToggleLine(raw []byte) PreformatToggleLine {
+ line := PreformatToggleLine{raw: raw}
+
+ raw = bytes.TrimRight(raw[3:], "\r\n")
+ if len(raw) > 0 {
+ line.altText = raw
+ }
+
+ return line
+}
+
+func parseHeadingLine(level int, raw []byte) HeadingLine {
+ return HeadingLine{
+ raw: raw,
+ lineType: LineTypeHeading1 - 1 + LineType(level),
+ body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"),
+ }
+}
+
+func parseListItemLine(raw []byte) ListItemLine {
+ return ListItemLine{
+ raw: raw,
+ body: bytes.TrimRight(raw[2:], "\r\n"),
+ }
+}
+
+func parseQuoteLine(raw []byte) QuoteLine {
+ return QuoteLine{
+ raw: raw,
+ body: bytes.TrimRight(raw[1:], "\r\n"),
+ }
+}
diff --git a/gemtext/parse_line_test.go b/gemtext/parse_line_test.go
index 64c1bc7..0953103 100644
--- a/gemtext/parse_line_test.go
+++ b/gemtext/parse_line_test.go
@@ -46,12 +46,12 @@ func TestParseLinkLine(t *testing.T) {
t.Fatalf("expected a LinkLine, got %T", line)
}
- if string(link.URL) != test.url {
- t.Errorf("expected url %q, got %q", test.url, string(link.URL))
+ if link.URL() != test.url {
+ t.Errorf("expected url %q, got %q", test.url, link.URL())
}
- if string(link.Label) != test.label {
- t.Errorf("expected label %q, got %q", test.label, string(link.Label))
+ if link.Label() != test.label {
+ t.Errorf("expected label %q, got %q", test.label, link.Label())
}
})
}
@@ -93,8 +93,8 @@ func TestParsePreformatToggleLine(t *testing.T) {
t.Fatalf("expected a PreformatToggleLine, got %T", line)
}
- if string(toggle.AltText) != test.altText {
- t.Errorf("expected alt-text %q, got %q", test.altText, string(toggle.AltText))
+ if toggle.AltText() != test.altText {
+ t.Errorf("expected alt-text %q, got %q", test.altText, toggle.AltText())
}
})
}
@@ -147,8 +147,8 @@ func TestParseHeadingLine(t *testing.T) {
t.Fatalf("expected HeadingLine, got a %T", line)
}
- if string(hdg.Body) != test.body {
- t.Errorf("expected body %q, got %q", test.body, string(hdg.Body))
+ if hdg.Body() != test.body {
+ t.Errorf("expected body %q, got %q", test.body, hdg.Body())
}
})
}
@@ -188,8 +188,8 @@ func TestParseListItemLine(t *testing.T) {
t.Fatalf("expected ListItemLine, got a %T", line)
}
- if string(li.Body) != test.body {
- t.Errorf("expected body %q, got %q", test.body, string(li.Body))
+ if li.Body() != test.body {
+ t.Errorf("expected body %q, got %q", test.body, li.Body())
}
})
}
@@ -233,8 +233,8 @@ func TestParseQuoteLine(t *testing.T) {
t.Fatalf("expected QuoteLine , got a %T", line)
}
- if string(qu.Body) != test.body {
- t.Errorf("expected body %q, got %q", test.body, string(qu.Body))
+ if qu.Body() != test.body {
+ t.Errorf("expected body %q, got %q", test.body, qu.Body())
}
})
}
diff --git a/gemtext/parse_test.go b/gemtext/parse_test.go
new file mode 100644
index 0000000..bda5310
--- /dev/null
+++ b/gemtext/parse_test.go
@@ -0,0 +1,104 @@
+package gemtext_test
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ "tildegit.org/tjp/gus/gemtext"
+)
+
+func TestParse(t *testing.T) {
+ docBytes := []byte(`
+# top-level header line
+
+## subtitle
+
+This is some non-blank regular text.
+
+* an
+* unordered
+* list
+
+=> gemini://google.com/ as if
+
+> this is a quote
+> -tjp
+
+`[1:] + "```pre-formatted code\ndoc := gemtext.Parse(req.Body)\n```ignored closing alt-text\n")
+
+ assertEmptyLine := func(t *testing.T, line gemtext.Line) {
+ assert.Equal(t, gemtext.LineTypeText, line.Type())
+ assert.Equal(t, "\n", string(line.Raw()))
+ }
+
+ doc, err := gemtext.Parse(bytes.NewBuffer(docBytes))
+ require.Nil(t, err)
+
+ require.Equal(t, 18, len(doc))
+
+ assert.Equal(t, gemtext.LineTypeHeading1, doc[0].Type())
+ assert.Equal(t, "# top-level header line\n", string(doc[0].Raw()))
+ assert.Equal(t, "top-level header line", doc[0].(gemtext.HeadingLine).Body())
+
+ assertEmptyLine(t, doc[1])
+
+ assert.Equal(t, gemtext.LineTypeHeading2, doc[2].Type())
+ assert.Equal(t, "## subtitle\n", string(doc[2].Raw()))
+ assert.Equal(t, "subtitle", doc[2].(gemtext.HeadingLine).Body())
+
+ assertEmptyLine(t, doc[3])
+
+ assert.Equal(t, gemtext.LineTypeText, doc[4].Type())
+ assert.Equal(t, "This is some non-blank regular text.\n", string(doc[4].Raw()))
+
+ assertEmptyLine(t, doc[5])
+
+ assert.Equal(t, gemtext.LineTypeListItem, doc[6].Type())
+ assert.Equal(t, "an", doc[6].(gemtext.ListItemLine).Body())
+
+ assert.Equal(t, gemtext.LineTypeListItem, doc[7].Type())
+ assert.Equal(t, "unordered", doc[7].(gemtext.ListItemLine).Body())
+
+ assert.Equal(t, gemtext.LineTypeListItem, doc[8].Type())
+ assert.Equal(t, "list", doc[8].(gemtext.ListItemLine).Body())
+
+ assertEmptyLine(t, doc[9])
+
+ assert.Equal(t, gemtext.LineTypeLink, doc[10].Type())
+ assert.Equal(t, "=> gemini://google.com/ as if\n", string(doc[10].Raw()))
+ assert.Equal(t, "gemini://google.com/", doc[10].(gemtext.LinkLine).URL())
+ assert.Equal(t, "as if", doc[10].(gemtext.LinkLine).Label())
+
+ assertEmptyLine(t, doc[11])
+
+ assert.Equal(t, gemtext.LineTypeQuote, doc[12].Type())
+ assert.Equal(t, "> this is a quote\n", string(doc[12].Raw()))
+ assert.Equal(t, " this is a quote", doc[12].(gemtext.QuoteLine).Body())
+
+ assert.Equal(t, gemtext.LineTypeQuote, doc[13].Type())
+ assert.Equal(t, "> -tjp\n", string(doc[13].Raw()))
+ assert.Equal(t, " -tjp", doc[13].(gemtext.QuoteLine).Body())
+
+ assertEmptyLine(t, doc[14])
+
+ assert.Equal(t, gemtext.LineTypePreformatToggle, doc[15].Type())
+ assert.Equal(t, "```pre-formatted code\n", string(doc[15].Raw()))
+ assert.Equal(t, "pre-formatted code", doc[15].(gemtext.PreformatToggleLine).AltText())
+
+ assert.Equal(t, gemtext.LineTypePreformattedText, doc[16].Type())
+ assert.Equal(t, "doc := gemtext.Parse(req.Body)\n", string(doc[16].Raw()))
+
+ assert.Equal(t, gemtext.LineTypePreformatToggle, doc[17].Type())
+ assert.Equal(t, "```ignored closing alt-text\n", string(doc[17].Raw()))
+ assert.Equal(t, "", doc[17].(gemtext.PreformatToggleLine).AltText())
+
+ // ensure we can rebuild the original doc from all the line.Raw()s
+ buf := &bytes.Buffer{}
+ for _, line := range doc {
+ _, _ = buf.Write(line.Raw())
+ }
+ assert.Equal(t, string(docBytes), buf.String())
+}
diff --git a/gemtext/types.go b/gemtext/types.go
index fb9352a..fefbece 100644
--- a/gemtext/types.go
+++ b/gemtext/types.go
@@ -91,39 +91,39 @@ func (tl TextLine) Raw() []byte { return tl.raw }
// LinkLine is a line of LineTypeLink.
type LinkLine struct {
raw []byte
-
- // URL is the original bytes of the url portion of the line.
- //
- // It is not guaranteed to be a valid URL.
- URL []byte
-
- // Label is the label portion of the line.
- //
- // If there was no label it will always be nil, never []byte{}.
- Label []byte
+ url []byte
+ label []byte
}
func (ll LinkLine) Type() LineType { return LineTypeLink }
func (ll LinkLine) Raw() []byte { return ll.raw }
+// URL returns the original url portion of the line.
+//
+// It is not guaranteed to be a valid URL.
+func (ll LinkLine) URL() string { return string(ll.url) }
+
+// Label returns the label portion of the line.
+func (ll LinkLine) Label() string { return string(ll.label) }
+
// PreformatToggleLine is a preformatted text toggle line.
type PreformatToggleLine struct {
raw []byte
-
- // AltText contains the alt-text portion of the line.
- //
- // It will either have len() > 0 or be nil.
- //
- // If the line was parsed as part of a full document by Parse(),
- // and this is a *closing* toggle, any alt-text present will be
- // stripped and this will be nil. If the line was parsed by
- // ParseLine() no such correction is performed.
- AltText []byte
+ altText []byte
}
func (tl PreformatToggleLine) Type() LineType { return LineTypePreformatToggle }
func (tl PreformatToggleLine) Raw() []byte { return tl.raw }
-func (tl *PreformatToggleLine) clearAlt() { tl.AltText = nil }
+
+// AltText returns the alt-text portion of the line.
+//
+// If the line was parsed as part of a full document by Parse(),
+// and this is a *closing* toggle, any alt-text present will be
+// stripped and this will be empty. If the line was parsed by
+// ParseLine() no such correction is performed.
+func (tl PreformatToggleLine) AltText() string { return string(tl.altText) }
+
+func (tl *PreformatToggleLine) clearAlt() { tl.altText = nil }
// PreformattedTextLine represents a line between two toggles.
//
@@ -140,32 +140,35 @@ func (tl PreformattedTextLine) Raw() []byte { return tl.raw }
type HeadingLine struct {
raw []byte
lineType LineType
-
- // Body is the portion of the line with the header text.
- Body []byte
+ body []byte
}
func (hl HeadingLine) Type() LineType { return hl.lineType }
func (hl HeadingLine) Raw() []byte { return hl.raw }
+// Body returns the portion of the line with the header text.
+func (hl HeadingLine) Body() string { return string(hl.body) }
+
// ListItemLine is a line of LineTypeListItem.
type ListItemLine struct {
raw []byte
-
- // Body is the text of the list item.
- Body []byte
+ body []byte
}
func (li ListItemLine) Type() LineType { return LineTypeListItem }
func (li ListItemLine) Raw() []byte { return li.raw }
+// Body returns the text of the list item.
+func (li ListItemLine) Body() string { return string(li.body) }
+
// QuoteLine is a line of LineTypeQuote.
type QuoteLine struct {
- raw []byte
-
- // Body is the text of the quote.
- Body []byte
+ raw []byte
+ body []byte
}
func (ql QuoteLine) Type() LineType { return LineTypeQuote }
func (ql QuoteLine) Raw() []byte { return ql.raw }
+
+// Body returns the text of the quote.
+func (ql QuoteLine) Body() string { return string(ql.body) }