From aa23984bc21f99e00b8552e928d23ef02bb745f3 Mon Sep 17 00:00:00 2001 From: tjpcc Date: Fri, 13 Jan 2023 10:50:30 -0700 Subject: Initial gemtext package. Contains: - gemtext AST (Document and line types) - Parse from an io.Reader - ParseLine a []byte - doc comments on everything - ParseLine tests for every line type Still needs tests for Parse & Document. --- gemtext/parse.go | 154 ++++++++++++++++++++++++++ gemtext/parse_line_test.go | 271 +++++++++++++++++++++++++++++++++++++++++++++ gemtext/types.go | 171 ++++++++++++++++++++++++++++ 3 files changed, 596 insertions(+) create mode 100644 gemtext/parse.go create mode 100644 gemtext/parse_line_test.go create mode 100644 gemtext/types.go diff --git a/gemtext/parse.go b/gemtext/parse.go new file mode 100644 index 0000000..4a8c641 --- /dev/null +++ b/gemtext/parse.go @@ -0,0 +1,154 @@ +package gemtext + +import ( + "bufio" + "bytes" + "io" +) + +// Parse parses the full contents of an io.Reader into a gemtext.Document. +func Parse(input io.Reader) (Document, error) { + rdr := bufio.NewReader(input) + + var lines []Line + inPFT := false + + for { + raw, err := rdr.ReadBytes('\n') + if err != io.EOF && err != nil { + return nil, err + } + + var line Line + + if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') { + line = PreformattedTextLine{raw: raw} + } else { + line = ParseLine(raw) + } + + if line.Type() == LineTypePreformatToggle { + if inPFT { + toggle := line.(PreformatToggleLine) + (&toggle).clearAlt() + line = toggle + } + + inPFT = !inPFT + } + + if line != nil { + lines = append(lines, line) + } + + if err == io.EOF { + break + } + } + + return Document(lines), nil +} + +// ParseLine parses a single line (including the trailing \n) into a gemtext.Line. +func ParseLine(line []byte) Line { + if len(line) == 0 { + return nil + } + + switch line[0] { + case '=': + if len(line) == 1 || line[1] != '>' { + break + } + return parseLinkLine(line) + case '`': + if len(line) < 3 || line[1] != '`' || line[2] != '`' { + break + } + return parsePreformatToggleLine(line) + case '#': + level := 1 + if len(line) > 1 && line[1] == '#' { + level += 1 + if len(line) > 2 && line[2] == '#' { + level += 1 + } + } + return parseHeadingLine(level, line) + case '*': + if len(line) == 1 || line[1] != ' ' { + break + } + return parseListItemLine(line) + case '>': + return parseQuoteLine(line) + } + + return TextLine{raw: line} +} + +func parseLinkLine(raw []byte) LinkLine { + line := LinkLine{raw: raw} + + // move past =>[] + raw = bytes.TrimLeft(raw[2:], " \t") + + // find the next space or tab + spIdx := bytes.IndexByte(raw, ' ') + tbIdx := bytes.IndexByte(raw, '\t') + idx := spIdx + if idx == -1 { + idx = tbIdx + } + if tbIdx >= 0 && tbIdx < idx { + idx = tbIdx + } + + if idx < 0 { + line.URL = bytes.TrimRight(raw, "\r\n") + return line + } + + line.URL = raw[:idx] + raw = raw[idx+1:] + + label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n") + if len(label) > 0 { + line.Label = label + } + + return line +} + +func parsePreformatToggleLine(raw []byte) PreformatToggleLine { + line := PreformatToggleLine{raw: raw} + + raw = bytes.TrimRight(raw[3:], "\r\n") + if len(raw) > 0 { + line.AltText = raw + } + + return line +} + +func parseHeadingLine(level int, raw []byte) HeadingLine { + return HeadingLine{ + raw: raw, + lineType: LineTypeHeading1 - 1 + LineType(level), + Body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"), + } +} + +func parseListItemLine(raw []byte) ListItemLine { + return ListItemLine{ + raw: raw, + Body: bytes.TrimRight(raw[2:], "\r\n"), + } +} + +func parseQuoteLine(raw []byte) QuoteLine { + return QuoteLine{ + raw: raw, + Body: bytes.TrimRight(raw[1:], "\r\n"), + } +} diff --git a/gemtext/parse_line_test.go b/gemtext/parse_line_test.go new file mode 100644 index 0000000..64c1bc7 --- /dev/null +++ b/gemtext/parse_line_test.go @@ -0,0 +1,271 @@ +package gemtext_test + +import ( + "testing" + + "tildegit.org/tjp/gus/gemtext" +) + +func TestParseLinkLine(t *testing.T) { + tests := []struct { + input string + url string + label string + }{ + { + input: "=> gemini.ctrl-c.club/~tjp/ home page\r\n", + url: "gemini.ctrl-c.club/~tjp/", + label: "home page", + }, + { + input: "=> gemi.dev/\n", + url: "gemi.dev/", + }, + { + input: "=> /gemlog/foobar 2023-01-13 - Foo Bar\n", + url: "/gemlog/foobar", + label: "2023-01-13 - Foo Bar", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test.input)) + if line == nil { + t.Fatal("ParseLine() returned nil line") + } + if string(line.Raw()) != string(test.input) { + t.Error("Raw() does not match input") + } + + if line.Type() != gemtext.LineTypeLink { + t.Errorf("expected LineTypeLink, got %d", line.Type()) + } + link, ok := line.(gemtext.LinkLine) + if !ok { + t.Fatalf("expected a LinkLine, got %T", line) + } + + if string(link.URL) != test.url { + t.Errorf("expected url %q, got %q", test.url, string(link.URL)) + } + + if string(link.Label) != test.label { + t.Errorf("expected label %q, got %q", test.label, string(link.Label)) + } + }) + } +} + +func TestParsePreformatToggleLine(t *testing.T) { + tests := []struct { + input string + altText string + }{ + { + input: "```\n", + }, + { + input: "```some alt-text\r\n", + altText: "some alt-text", + }, + { + input: "``` leading space preserved\n", + altText: " leading space preserved", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test.input)) + if line == nil { + t.Fatal("ParseLine() returned nil line") + } + if string(line.Raw()) != string(test.input) { + t.Error("Raw() does not match input") + } + + if line.Type() != gemtext.LineTypePreformatToggle { + t.Errorf("expected LineTypePreformatToggle, got %d", line.Type()) + } + toggle, ok := line.(gemtext.PreformatToggleLine) + if !ok { + t.Fatalf("expected a PreformatToggleLine, got %T", line) + } + + if string(toggle.AltText) != test.altText { + t.Errorf("expected alt-text %q, got %q", test.altText, string(toggle.AltText)) + } + }) + } +} + +func TestParseHeadingLine(t *testing.T) { + tests := []struct { + input string + lineType gemtext.LineType + body string + }{ + { + input: "# this is an H1\n", + lineType: gemtext.LineTypeHeading1, + body: "this is an H1", + }, + { + input: "## extra leading spaces\r\n", + lineType: gemtext.LineTypeHeading2, + body: "extra leading spaces", + }, + { + input: "##no leading space\n", + lineType: gemtext.LineTypeHeading2, + body: "no leading space", + }, + { + input: "#### there is no h4\n", + lineType: gemtext.LineTypeHeading3, + body: "# there is no h4", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test.input)) + if line == nil { + t.Fatal("ParseLine() returned nil") + } + + if line.Type() != test.lineType { + t.Errorf("expected line type %d, got %d", test.lineType, line.Type()) + } + if string(line.Raw()) != test.input { + t.Error("line.Raw() does not match input") + } + + hdg, ok := line.(gemtext.HeadingLine) + if !ok { + t.Fatalf("expected HeadingLine, got a %T", line) + } + + if string(hdg.Body) != test.body { + t.Errorf("expected body %q, got %q", test.body, string(hdg.Body)) + } + }) + } +} + +func TestParseListItemLine(t *testing.T) { + tests := []struct { + input string + body string + }{ + { + input: "* this is a list item\r\n", + body: "this is a list item", + }, + { + input: "* more leading spaces\n", + body: " more leading spaces", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test.input)) + if line == nil { + t.Fatal("ParseLine() returned nil") + } + + if line.Type() != gemtext.LineTypeListItem { + t.Errorf("expected LineTypeListItem, got %d", line.Type()) + } + if string(line.Raw()) != test.input { + t.Error("line.Raw() does not match input") + } + + li, ok := line.(gemtext.ListItemLine) + if !ok { + t.Fatalf("expected ListItemLine, got a %T", line) + } + + if string(li.Body) != test.body { + t.Errorf("expected body %q, got %q", test.body, string(li.Body)) + } + }) + } +} + +func TestParseQuoteLine(t *testing.T) { + tests := []struct { + input string + body string + }{ + { + input: ">a quote line\r\n", + body: "a quote line", + }, + { + input: "> with a leading space\n", + body: " with a leading space", + }, + { + input: "> more leading spaces\n", + body: " more leading spaces", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test.input)) + if line == nil { + t.Fatal("ParseLine() returned nil") + } + + if line.Type() != gemtext.LineTypeQuote { + t.Errorf("expected LineTypeQuote, got %d", line.Type()) + } + if string(line.Raw()) != test.input { + t.Error("line.Raw() does not match input") + } + + qu, ok := line.(gemtext.QuoteLine) + if !ok { + t.Fatalf("expected QuoteLine , got a %T", line) + } + + if string(qu.Body) != test.body { + t.Errorf("expected body %q, got %q", test.body, string(qu.Body)) + } + }) + } +} + +func TestParseTextLine(t *testing.T) { + tests := []string { + "\n", + "simple text line\r\n", + " * an invalid list item\n", + "*another invalid list item\r\n", + } + + for _, test := range tests { + t.Run(test, func(t *testing.T) { + line := gemtext.ParseLine([]byte(test)) + if line == nil { + t.Fatal("ParseLine() returned nil") + } + + if line.Type() != gemtext.LineTypeText { + t.Errorf("expected LineTypeText, got %d", line.Type()) + } + if string(line.Raw()) != test { + t.Error("line.Raw() does not match input") + } + + _, ok := line.(gemtext.TextLine) + if !ok { + t.Fatalf("expected TextLine , got a %T", line) + } + }) + } +} diff --git a/gemtext/types.go b/gemtext/types.go new file mode 100644 index 0000000..fb9352a --- /dev/null +++ b/gemtext/types.go @@ -0,0 +1,171 @@ +package gemtext + +// LineType represents the different types of lines in a gemtext document. +type LineType int + +const ( + // LineTypeText is the default case when nothing else matches. + // + // It indicates that the line object is a TextLine. + LineTypeText LineType = iota + 1 + + // LineTypeLink is a link line. + // + // =>[][