From 0d904f9f10590d3a9117af27151f8c4fe2aea5ff Mon Sep 17 00:00:00 2001 From: tjpcc Date: Sat, 14 Jan 2023 09:57:16 -0700 Subject: Completed gemtext package. resolves #2 - fuzz testing - split out line parsing into a separate file - changed line type-specific public []byte fields to string accessor methods - added document parsing test for a stress test doc - added dependency on stretchr/testify --- gemtext/fuzz_test.go | 16 +++++++ gemtext/parse.go | 109 +-------------------------------------------- gemtext/parse_line.go | 107 ++++++++++++++++++++++++++++++++++++++++++++ gemtext/parse_line_test.go | 24 +++++----- gemtext/parse_test.go | 104 ++++++++++++++++++++++++++++++++++++++++++ gemtext/types.go | 65 ++++++++++++++------------- 6 files changed, 275 insertions(+), 150 deletions(-) create mode 100644 gemtext/fuzz_test.go create mode 100644 gemtext/parse_line.go create mode 100644 gemtext/parse_test.go (limited to 'gemtext') diff --git a/gemtext/fuzz_test.go b/gemtext/fuzz_test.go new file mode 100644 index 0000000..dce0587 --- /dev/null +++ b/gemtext/fuzz_test.go @@ -0,0 +1,16 @@ +package gemtext_test + +import ( + "bytes" + "testing" + + "tildegit.org/tjp/gus/gemtext" +) + +func FuzzParse(f *testing.F) { + f.Fuzz(func(t *testing.T, input []byte) { + if _, err := gemtext.Parse(bytes.NewBuffer(input)); err != nil { + t.Errorf("Parse error: %s", err.Error()) + } + }) +} diff --git a/gemtext/parse.go b/gemtext/parse.go index 4a8c641..7041fde 100644 --- a/gemtext/parse.go +++ b/gemtext/parse.go @@ -2,7 +2,6 @@ package gemtext import ( "bufio" - "bytes" "io" ) @@ -22,12 +21,12 @@ func Parse(input io.Reader) (Document, error) { var line Line if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') { - line = PreformattedTextLine{raw: raw} + line = PreformattedTextLine{raw: raw} } else { line = ParseLine(raw) } - if line.Type() == LineTypePreformatToggle { + if line != nil && line.Type() == LineTypePreformatToggle { if inPFT { toggle := line.(PreformatToggleLine) (&toggle).clearAlt() @@ -48,107 +47,3 @@ func Parse(input io.Reader) (Document, error) { return Document(lines), nil } - -// ParseLine parses a single line (including the trailing \n) into a gemtext.Line. -func ParseLine(line []byte) Line { - if len(line) == 0 { - return nil - } - - switch line[0] { - case '=': - if len(line) == 1 || line[1] != '>' { - break - } - return parseLinkLine(line) - case '`': - if len(line) < 3 || line[1] != '`' || line[2] != '`' { - break - } - return parsePreformatToggleLine(line) - case '#': - level := 1 - if len(line) > 1 && line[1] == '#' { - level += 1 - if len(line) > 2 && line[2] == '#' { - level += 1 - } - } - return parseHeadingLine(level, line) - case '*': - if len(line) == 1 || line[1] != ' ' { - break - } - return parseListItemLine(line) - case '>': - return parseQuoteLine(line) - } - - return TextLine{raw: line} -} - -func parseLinkLine(raw []byte) LinkLine { - line := LinkLine{raw: raw} - - // move past =>[] - raw = bytes.TrimLeft(raw[2:], " \t") - - // find the next space or tab - spIdx := bytes.IndexByte(raw, ' ') - tbIdx := bytes.IndexByte(raw, '\t') - idx := spIdx - if idx == -1 { - idx = tbIdx - } - if tbIdx >= 0 && tbIdx < idx { - idx = tbIdx - } - - if idx < 0 { - line.URL = bytes.TrimRight(raw, "\r\n") - return line - } - - line.URL = raw[:idx] - raw = raw[idx+1:] - - label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n") - if len(label) > 0 { - line.Label = label - } - - return line -} - -func parsePreformatToggleLine(raw []byte) PreformatToggleLine { - line := PreformatToggleLine{raw: raw} - - raw = bytes.TrimRight(raw[3:], "\r\n") - if len(raw) > 0 { - line.AltText = raw - } - - return line -} - -func parseHeadingLine(level int, raw []byte) HeadingLine { - return HeadingLine{ - raw: raw, - lineType: LineTypeHeading1 - 1 + LineType(level), - Body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"), - } -} - -func parseListItemLine(raw []byte) ListItemLine { - return ListItemLine{ - raw: raw, - Body: bytes.TrimRight(raw[2:], "\r\n"), - } -} - -func parseQuoteLine(raw []byte) QuoteLine { - return QuoteLine{ - raw: raw, - Body: bytes.TrimRight(raw[1:], "\r\n"), - } -} diff --git a/gemtext/parse_line.go b/gemtext/parse_line.go new file mode 100644 index 0000000..39187a8 --- /dev/null +++ b/gemtext/parse_line.go @@ -0,0 +1,107 @@ +package gemtext + +import "bytes" + +// ParseLine parses a single line (including the trailing \n) into a gemtext.Line. +func ParseLine(line []byte) Line { + if len(line) == 0 { + return nil + } + + switch line[0] { + case '=': + if len(line) == 1 || line[1] != '>' { + break + } + return parseLinkLine(line) + case '`': + if len(line) < 3 || line[1] != '`' || line[2] != '`' { + break + } + return parsePreformatToggleLine(line) + case '#': + level := 1 + if len(line) > 1 && line[1] == '#' { + level += 1 + if len(line) > 2 && line[2] == '#' { + level += 1 + } + } + return parseHeadingLine(level, line) + case '*': + if len(line) == 1 || line[1] != ' ' { + break + } + return parseListItemLine(line) + case '>': + return parseQuoteLine(line) + } + + return TextLine{raw: line} +} + +func parseLinkLine(raw []byte) LinkLine { + line := LinkLine{raw: raw} + + // move past =>[] + raw = bytes.TrimLeft(raw[2:], " \t") + + // find the next space or tab + spIdx := bytes.IndexByte(raw, ' ') + tbIdx := bytes.IndexByte(raw, '\t') + idx := spIdx + if idx == -1 { + idx = tbIdx + } + if tbIdx >= 0 && tbIdx < idx { + idx = tbIdx + } + + if idx < 0 { + line.url = bytes.TrimRight(raw, "\r\n") + return line + } + + line.url = raw[:idx] + raw = raw[idx+1:] + + label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n") + if len(label) > 0 { + line.label = label + } + + return line +} + +func parsePreformatToggleLine(raw []byte) PreformatToggleLine { + line := PreformatToggleLine{raw: raw} + + raw = bytes.TrimRight(raw[3:], "\r\n") + if len(raw) > 0 { + line.altText = raw + } + + return line +} + +func parseHeadingLine(level int, raw []byte) HeadingLine { + return HeadingLine{ + raw: raw, + lineType: LineTypeHeading1 - 1 + LineType(level), + body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"), + } +} + +func parseListItemLine(raw []byte) ListItemLine { + return ListItemLine{ + raw: raw, + body: bytes.TrimRight(raw[2:], "\r\n"), + } +} + +func parseQuoteLine(raw []byte) QuoteLine { + return QuoteLine{ + raw: raw, + body: bytes.TrimRight(raw[1:], "\r\n"), + } +} diff --git a/gemtext/parse_line_test.go b/gemtext/parse_line_test.go index 64c1bc7..0953103 100644 --- a/gemtext/parse_line_test.go +++ b/gemtext/parse_line_test.go @@ -46,12 +46,12 @@ func TestParseLinkLine(t *testing.T) { t.Fatalf("expected a LinkLine, got %T", line) } - if string(link.URL) != test.url { - t.Errorf("expected url %q, got %q", test.url, string(link.URL)) + if link.URL() != test.url { + t.Errorf("expected url %q, got %q", test.url, link.URL()) } - if string(link.Label) != test.label { - t.Errorf("expected label %q, got %q", test.label, string(link.Label)) + if link.Label() != test.label { + t.Errorf("expected label %q, got %q", test.label, link.Label()) } }) } @@ -93,8 +93,8 @@ func TestParsePreformatToggleLine(t *testing.T) { t.Fatalf("expected a PreformatToggleLine, got %T", line) } - if string(toggle.AltText) != test.altText { - t.Errorf("expected alt-text %q, got %q", test.altText, string(toggle.AltText)) + if toggle.AltText() != test.altText { + t.Errorf("expected alt-text %q, got %q", test.altText, toggle.AltText()) } }) } @@ -147,8 +147,8 @@ func TestParseHeadingLine(t *testing.T) { t.Fatalf("expected HeadingLine, got a %T", line) } - if string(hdg.Body) != test.body { - t.Errorf("expected body %q, got %q", test.body, string(hdg.Body)) + if hdg.Body() != test.body { + t.Errorf("expected body %q, got %q", test.body, hdg.Body()) } }) } @@ -188,8 +188,8 @@ func TestParseListItemLine(t *testing.T) { t.Fatalf("expected ListItemLine, got a %T", line) } - if string(li.Body) != test.body { - t.Errorf("expected body %q, got %q", test.body, string(li.Body)) + if li.Body() != test.body { + t.Errorf("expected body %q, got %q", test.body, li.Body()) } }) } @@ -233,8 +233,8 @@ func TestParseQuoteLine(t *testing.T) { t.Fatalf("expected QuoteLine , got a %T", line) } - if string(qu.Body) != test.body { - t.Errorf("expected body %q, got %q", test.body, string(qu.Body)) + if qu.Body() != test.body { + t.Errorf("expected body %q, got %q", test.body, qu.Body()) } }) } diff --git a/gemtext/parse_test.go b/gemtext/parse_test.go new file mode 100644 index 0000000..bda5310 --- /dev/null +++ b/gemtext/parse_test.go @@ -0,0 +1,104 @@ +package gemtext_test + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "tildegit.org/tjp/gus/gemtext" +) + +func TestParse(t *testing.T) { + docBytes := []byte(` +# top-level header line + +## subtitle + +This is some non-blank regular text. + +* an +* unordered +* list + +=> gemini://google.com/ as if + +> this is a quote +> -tjp + +`[1:] + "```pre-formatted code\ndoc := gemtext.Parse(req.Body)\n```ignored closing alt-text\n") + + assertEmptyLine := func(t *testing.T, line gemtext.Line) { + assert.Equal(t, gemtext.LineTypeText, line.Type()) + assert.Equal(t, "\n", string(line.Raw())) + } + + doc, err := gemtext.Parse(bytes.NewBuffer(docBytes)) + require.Nil(t, err) + + require.Equal(t, 18, len(doc)) + + assert.Equal(t, gemtext.LineTypeHeading1, doc[0].Type()) + assert.Equal(t, "# top-level header line\n", string(doc[0].Raw())) + assert.Equal(t, "top-level header line", doc[0].(gemtext.HeadingLine).Body()) + + assertEmptyLine(t, doc[1]) + + assert.Equal(t, gemtext.LineTypeHeading2, doc[2].Type()) + assert.Equal(t, "## subtitle\n", string(doc[2].Raw())) + assert.Equal(t, "subtitle", doc[2].(gemtext.HeadingLine).Body()) + + assertEmptyLine(t, doc[3]) + + assert.Equal(t, gemtext.LineTypeText, doc[4].Type()) + assert.Equal(t, "This is some non-blank regular text.\n", string(doc[4].Raw())) + + assertEmptyLine(t, doc[5]) + + assert.Equal(t, gemtext.LineTypeListItem, doc[6].Type()) + assert.Equal(t, "an", doc[6].(gemtext.ListItemLine).Body()) + + assert.Equal(t, gemtext.LineTypeListItem, doc[7].Type()) + assert.Equal(t, "unordered", doc[7].(gemtext.ListItemLine).Body()) + + assert.Equal(t, gemtext.LineTypeListItem, doc[8].Type()) + assert.Equal(t, "list", doc[8].(gemtext.ListItemLine).Body()) + + assertEmptyLine(t, doc[9]) + + assert.Equal(t, gemtext.LineTypeLink, doc[10].Type()) + assert.Equal(t, "=> gemini://google.com/ as if\n", string(doc[10].Raw())) + assert.Equal(t, "gemini://google.com/", doc[10].(gemtext.LinkLine).URL()) + assert.Equal(t, "as if", doc[10].(gemtext.LinkLine).Label()) + + assertEmptyLine(t, doc[11]) + + assert.Equal(t, gemtext.LineTypeQuote, doc[12].Type()) + assert.Equal(t, "> this is a quote\n", string(doc[12].Raw())) + assert.Equal(t, " this is a quote", doc[12].(gemtext.QuoteLine).Body()) + + assert.Equal(t, gemtext.LineTypeQuote, doc[13].Type()) + assert.Equal(t, "> -tjp\n", string(doc[13].Raw())) + assert.Equal(t, " -tjp", doc[13].(gemtext.QuoteLine).Body()) + + assertEmptyLine(t, doc[14]) + + assert.Equal(t, gemtext.LineTypePreformatToggle, doc[15].Type()) + assert.Equal(t, "```pre-formatted code\n", string(doc[15].Raw())) + assert.Equal(t, "pre-formatted code", doc[15].(gemtext.PreformatToggleLine).AltText()) + + assert.Equal(t, gemtext.LineTypePreformattedText, doc[16].Type()) + assert.Equal(t, "doc := gemtext.Parse(req.Body)\n", string(doc[16].Raw())) + + assert.Equal(t, gemtext.LineTypePreformatToggle, doc[17].Type()) + assert.Equal(t, "```ignored closing alt-text\n", string(doc[17].Raw())) + assert.Equal(t, "", doc[17].(gemtext.PreformatToggleLine).AltText()) + + // ensure we can rebuild the original doc from all the line.Raw()s + buf := &bytes.Buffer{} + for _, line := range doc { + _, _ = buf.Write(line.Raw()) + } + assert.Equal(t, string(docBytes), buf.String()) +} diff --git a/gemtext/types.go b/gemtext/types.go index fb9352a..fefbece 100644 --- a/gemtext/types.go +++ b/gemtext/types.go @@ -91,39 +91,39 @@ func (tl TextLine) Raw() []byte { return tl.raw } // LinkLine is a line of LineTypeLink. type LinkLine struct { raw []byte - - // URL is the original bytes of the url portion of the line. - // - // It is not guaranteed to be a valid URL. - URL []byte - - // Label is the label portion of the line. - // - // If there was no label it will always be nil, never []byte{}. - Label []byte + url []byte + label []byte } func (ll LinkLine) Type() LineType { return LineTypeLink } func (ll LinkLine) Raw() []byte { return ll.raw } +// URL returns the original url portion of the line. +// +// It is not guaranteed to be a valid URL. +func (ll LinkLine) URL() string { return string(ll.url) } + +// Label returns the label portion of the line. +func (ll LinkLine) Label() string { return string(ll.label) } + // PreformatToggleLine is a preformatted text toggle line. type PreformatToggleLine struct { raw []byte - - // AltText contains the alt-text portion of the line. - // - // It will either have len() > 0 or be nil. - // - // If the line was parsed as part of a full document by Parse(), - // and this is a *closing* toggle, any alt-text present will be - // stripped and this will be nil. If the line was parsed by - // ParseLine() no such correction is performed. - AltText []byte + altText []byte } func (tl PreformatToggleLine) Type() LineType { return LineTypePreformatToggle } func (tl PreformatToggleLine) Raw() []byte { return tl.raw } -func (tl *PreformatToggleLine) clearAlt() { tl.AltText = nil } + +// AltText returns the alt-text portion of the line. +// +// If the line was parsed as part of a full document by Parse(), +// and this is a *closing* toggle, any alt-text present will be +// stripped and this will be empty. If the line was parsed by +// ParseLine() no such correction is performed. +func (tl PreformatToggleLine) AltText() string { return string(tl.altText) } + +func (tl *PreformatToggleLine) clearAlt() { tl.altText = nil } // PreformattedTextLine represents a line between two toggles. // @@ -140,32 +140,35 @@ func (tl PreformattedTextLine) Raw() []byte { return tl.raw } type HeadingLine struct { raw []byte lineType LineType - - // Body is the portion of the line with the header text. - Body []byte + body []byte } func (hl HeadingLine) Type() LineType { return hl.lineType } func (hl HeadingLine) Raw() []byte { return hl.raw } +// Body returns the portion of the line with the header text. +func (hl HeadingLine) Body() string { return string(hl.body) } + // ListItemLine is a line of LineTypeListItem. type ListItemLine struct { raw []byte - - // Body is the text of the list item. - Body []byte + body []byte } func (li ListItemLine) Type() LineType { return LineTypeListItem } func (li ListItemLine) Raw() []byte { return li.raw } +// Body returns the text of the list item. +func (li ListItemLine) Body() string { return string(li.body) } + // QuoteLine is a line of LineTypeQuote. type QuoteLine struct { - raw []byte - - // Body is the text of the quote. - Body []byte + raw []byte + body []byte } func (ql QuoteLine) Type() LineType { return LineTypeQuote } func (ql QuoteLine) Raw() []byte { return ql.raw } + +// Body returns the text of the quote. +func (ql QuoteLine) Body() string { return string(ql.body) } -- cgit v1.2.3