summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gemtext/parse.go154
-rw-r--r--gemtext/parse_line_test.go271
-rw-r--r--gemtext/types.go171
3 files changed, 596 insertions, 0 deletions
diff --git a/gemtext/parse.go b/gemtext/parse.go
new file mode 100644
index 0000000..4a8c641
--- /dev/null
+++ b/gemtext/parse.go
@@ -0,0 +1,154 @@
+package gemtext
+
+import (
+ "bufio"
+ "bytes"
+ "io"
+)
+
+// Parse parses the full contents of an io.Reader into a gemtext.Document.
+func Parse(input io.Reader) (Document, error) {
+ rdr := bufio.NewReader(input)
+
+ var lines []Line
+ inPFT := false
+
+ for {
+ raw, err := rdr.ReadBytes('\n')
+ if err != io.EOF && err != nil {
+ return nil, err
+ }
+
+ var line Line
+
+ if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') {
+ line = PreformattedTextLine{raw: raw}
+ } else {
+ line = ParseLine(raw)
+ }
+
+ if line.Type() == LineTypePreformatToggle {
+ if inPFT {
+ toggle := line.(PreformatToggleLine)
+ (&toggle).clearAlt()
+ line = toggle
+ }
+
+ inPFT = !inPFT
+ }
+
+ if line != nil {
+ lines = append(lines, line)
+ }
+
+ if err == io.EOF {
+ break
+ }
+ }
+
+ return Document(lines), nil
+}
+
+// ParseLine parses a single line (including the trailing \n) into a gemtext.Line.
+func ParseLine(line []byte) Line {
+ if len(line) == 0 {
+ return nil
+ }
+
+ switch line[0] {
+ case '=':
+ if len(line) == 1 || line[1] != '>' {
+ break
+ }
+ return parseLinkLine(line)
+ case '`':
+ if len(line) < 3 || line[1] != '`' || line[2] != '`' {
+ break
+ }
+ return parsePreformatToggleLine(line)
+ case '#':
+ level := 1
+ if len(line) > 1 && line[1] == '#' {
+ level += 1
+ if len(line) > 2 && line[2] == '#' {
+ level += 1
+ }
+ }
+ return parseHeadingLine(level, line)
+ case '*':
+ if len(line) == 1 || line[1] != ' ' {
+ break
+ }
+ return parseListItemLine(line)
+ case '>':
+ return parseQuoteLine(line)
+ }
+
+ return TextLine{raw: line}
+}
+
+func parseLinkLine(raw []byte) LinkLine {
+ line := LinkLine{raw: raw}
+
+ // move past =>[<whitespace>]
+ raw = bytes.TrimLeft(raw[2:], " \t")
+
+ // find the next space or tab
+ spIdx := bytes.IndexByte(raw, ' ')
+ tbIdx := bytes.IndexByte(raw, '\t')
+ idx := spIdx
+ if idx == -1 {
+ idx = tbIdx
+ }
+ if tbIdx >= 0 && tbIdx < idx {
+ idx = tbIdx
+ }
+
+ if idx < 0 {
+ line.URL = bytes.TrimRight(raw, "\r\n")
+ return line
+ }
+
+ line.URL = raw[:idx]
+ raw = raw[idx+1:]
+
+ label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n")
+ if len(label) > 0 {
+ line.Label = label
+ }
+
+ return line
+}
+
+func parsePreformatToggleLine(raw []byte) PreformatToggleLine {
+ line := PreformatToggleLine{raw: raw}
+
+ raw = bytes.TrimRight(raw[3:], "\r\n")
+ if len(raw) > 0 {
+ line.AltText = raw
+ }
+
+ return line
+}
+
+func parseHeadingLine(level int, raw []byte) HeadingLine {
+ return HeadingLine{
+ raw: raw,
+ lineType: LineTypeHeading1 - 1 + LineType(level),
+ Body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"),
+ }
+}
+
+func parseListItemLine(raw []byte) ListItemLine {
+ return ListItemLine{
+ raw: raw,
+ Body: bytes.TrimRight(raw[2:], "\r\n"),
+ }
+}
+
+func parseQuoteLine(raw []byte) QuoteLine {
+ return QuoteLine{
+ raw: raw,
+ Body: bytes.TrimRight(raw[1:], "\r\n"),
+ }
+}
diff --git a/gemtext/parse_line_test.go b/gemtext/parse_line_test.go
new file mode 100644
index 0000000..64c1bc7
--- /dev/null
+++ b/gemtext/parse_line_test.go
@@ -0,0 +1,271 @@
+package gemtext_test
+
+import (
+ "testing"
+
+ "tildegit.org/tjp/gus/gemtext"
+)
+
+func TestParseLinkLine(t *testing.T) {
+ tests := []struct {
+ input string
+ url string
+ label string
+ }{
+ {
+ input: "=> gemini.ctrl-c.club/~tjp/ home page\r\n",
+ url: "gemini.ctrl-c.club/~tjp/",
+ label: "home page",
+ },
+ {
+ input: "=> gemi.dev/\n",
+ url: "gemi.dev/",
+ },
+ {
+ input: "=> /gemlog/foobar 2023-01-13 - Foo Bar\n",
+ url: "/gemlog/foobar",
+ label: "2023-01-13 - Foo Bar",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.input, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test.input))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil line")
+ }
+ if string(line.Raw()) != string(test.input) {
+ t.Error("Raw() does not match input")
+ }
+
+ if line.Type() != gemtext.LineTypeLink {
+ t.Errorf("expected LineTypeLink, got %d", line.Type())
+ }
+ link, ok := line.(gemtext.LinkLine)
+ if !ok {
+ t.Fatalf("expected a LinkLine, got %T", line)
+ }
+
+ if string(link.URL) != test.url {
+ t.Errorf("expected url %q, got %q", test.url, string(link.URL))
+ }
+
+ if string(link.Label) != test.label {
+ t.Errorf("expected label %q, got %q", test.label, string(link.Label))
+ }
+ })
+ }
+}
+
+func TestParsePreformatToggleLine(t *testing.T) {
+ tests := []struct {
+ input string
+ altText string
+ }{
+ {
+ input: "```\n",
+ },
+ {
+ input: "```some alt-text\r\n",
+ altText: "some alt-text",
+ },
+ {
+ input: "``` leading space preserved\n",
+ altText: " leading space preserved",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.input, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test.input))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil line")
+ }
+ if string(line.Raw()) != string(test.input) {
+ t.Error("Raw() does not match input")
+ }
+
+ if line.Type() != gemtext.LineTypePreformatToggle {
+ t.Errorf("expected LineTypePreformatToggle, got %d", line.Type())
+ }
+ toggle, ok := line.(gemtext.PreformatToggleLine)
+ if !ok {
+ t.Fatalf("expected a PreformatToggleLine, got %T", line)
+ }
+
+ if string(toggle.AltText) != test.altText {
+ t.Errorf("expected alt-text %q, got %q", test.altText, string(toggle.AltText))
+ }
+ })
+ }
+}
+
+func TestParseHeadingLine(t *testing.T) {
+ tests := []struct {
+ input string
+ lineType gemtext.LineType
+ body string
+ }{
+ {
+ input: "# this is an H1\n",
+ lineType: gemtext.LineTypeHeading1,
+ body: "this is an H1",
+ },
+ {
+ input: "## extra leading spaces\r\n",
+ lineType: gemtext.LineTypeHeading2,
+ body: "extra leading spaces",
+ },
+ {
+ input: "##no leading space\n",
+ lineType: gemtext.LineTypeHeading2,
+ body: "no leading space",
+ },
+ {
+ input: "#### there is no h4\n",
+ lineType: gemtext.LineTypeHeading3,
+ body: "# there is no h4",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.input, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test.input))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil")
+ }
+
+ if line.Type() != test.lineType {
+ t.Errorf("expected line type %d, got %d", test.lineType, line.Type())
+ }
+ if string(line.Raw()) != test.input {
+ t.Error("line.Raw() does not match input")
+ }
+
+ hdg, ok := line.(gemtext.HeadingLine)
+ if !ok {
+ t.Fatalf("expected HeadingLine, got a %T", line)
+ }
+
+ if string(hdg.Body) != test.body {
+ t.Errorf("expected body %q, got %q", test.body, string(hdg.Body))
+ }
+ })
+ }
+}
+
+func TestParseListItemLine(t *testing.T) {
+ tests := []struct {
+ input string
+ body string
+ }{
+ {
+ input: "* this is a list item\r\n",
+ body: "this is a list item",
+ },
+ {
+ input: "* more leading spaces\n",
+ body: " more leading spaces",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.input, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test.input))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil")
+ }
+
+ if line.Type() != gemtext.LineTypeListItem {
+ t.Errorf("expected LineTypeListItem, got %d", line.Type())
+ }
+ if string(line.Raw()) != test.input {
+ t.Error("line.Raw() does not match input")
+ }
+
+ li, ok := line.(gemtext.ListItemLine)
+ if !ok {
+ t.Fatalf("expected ListItemLine, got a %T", line)
+ }
+
+ if string(li.Body) != test.body {
+ t.Errorf("expected body %q, got %q", test.body, string(li.Body))
+ }
+ })
+ }
+}
+
+func TestParseQuoteLine(t *testing.T) {
+ tests := []struct {
+ input string
+ body string
+ }{
+ {
+ input: ">a quote line\r\n",
+ body: "a quote line",
+ },
+ {
+ input: "> with a leading space\n",
+ body: " with a leading space",
+ },
+ {
+ input: "> more leading spaces\n",
+ body: " more leading spaces",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.input, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test.input))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil")
+ }
+
+ if line.Type() != gemtext.LineTypeQuote {
+ t.Errorf("expected LineTypeQuote, got %d", line.Type())
+ }
+ if string(line.Raw()) != test.input {
+ t.Error("line.Raw() does not match input")
+ }
+
+ qu, ok := line.(gemtext.QuoteLine)
+ if !ok {
+ t.Fatalf("expected QuoteLine , got a %T", line)
+ }
+
+ if string(qu.Body) != test.body {
+ t.Errorf("expected body %q, got %q", test.body, string(qu.Body))
+ }
+ })
+ }
+}
+
+func TestParseTextLine(t *testing.T) {
+ tests := []string {
+ "\n",
+ "simple text line\r\n",
+ " * an invalid list item\n",
+ "*another invalid list item\r\n",
+ }
+
+ for _, test := range tests {
+ t.Run(test, func(t *testing.T) {
+ line := gemtext.ParseLine([]byte(test))
+ if line == nil {
+ t.Fatal("ParseLine() returned nil")
+ }
+
+ if line.Type() != gemtext.LineTypeText {
+ t.Errorf("expected LineTypeText, got %d", line.Type())
+ }
+ if string(line.Raw()) != test {
+ t.Error("line.Raw() does not match input")
+ }
+
+ _, ok := line.(gemtext.TextLine)
+ if !ok {
+ t.Fatalf("expected TextLine , got a %T", line)
+ }
+ })
+ }
+}
diff --git a/gemtext/types.go b/gemtext/types.go
new file mode 100644
index 0000000..fb9352a
--- /dev/null
+++ b/gemtext/types.go
@@ -0,0 +1,171 @@
+package gemtext
+
+// LineType represents the different types of lines in a gemtext document.
+type LineType int
+
+const (
+ // LineTypeText is the default case when nothing else matches.
+ //
+ // It indicates that the line object is a TextLine.
+ LineTypeText LineType = iota + 1
+
+ // LineTypeLink is a link line.
+ //
+ // =>[<ws>]<url>[<ws><label>][\r]\n
+ //
+ // The line is a LinkLine.
+ LineTypeLink
+
+ // LineTypePreformatToggle switches the document between pre-formatted text or not.
+ //
+ // ```[<alt-text>][\r]\n
+ //
+ // The line object is a PreformatToggleLine.
+ LineTypePreformatToggle
+
+ // LineTypePreformattedText is any line between two PreformatToggles.
+ //
+ // The line is a PreformattedTextLine.
+ LineTypePreformattedText
+
+ // LineTypeHeading1 is a top-level heading.
+ //
+ // #[<ws>]body[\r]\n
+ //
+ // The line is a HeadingLine.
+ LineTypeHeading1
+
+ // LineTypeHeading2 is a second-level heading.
+ //
+ // ##[<ws>]body[\r]\n
+ //
+ // The line is a HeadingLine.
+ LineTypeHeading2
+
+ // LineTypeHeading3 is a third-level heading.
+ //
+ // ###[<ws>]<body>[\r]\n
+ //
+ // The line is a HeadingLine.
+ LineTypeHeading3
+
+ // LineTypeListItem is an unordered list item.
+ //
+ // * <body>[\r]\n
+ //
+ // The line object is a ListItemLine.
+ LineTypeListItem
+
+ // LineTypeQuote is a quote line.
+ //
+ // ><body>[\r]\n
+ //
+ // The line object is a QuoteLine.
+ LineTypeQuote
+)
+
+// Line is the interface implemented by all specific line types.
+//
+// Many of those concrete implementation types have additional useful fields,
+// so it can be a good idea to cast these to their concrete types based on the
+// return value of the Type() method.
+type Line interface {
+ // Type returns the specific type of the gemtext line.
+ Type() LineType
+
+ // Raw reproduces the original bytes from the source reader.
+ Raw() []byte
+}
+
+// Document is the list of lines that make up a full text/gemini resource.
+type Document []Line
+
+// TextLine is a line of LineTypeText.
+type TextLine struct {
+ raw []byte
+}
+
+func (tl TextLine) Type() LineType { return LineTypeText }
+func (tl TextLine) Raw() []byte { return tl.raw }
+
+// LinkLine is a line of LineTypeLink.
+type LinkLine struct {
+ raw []byte
+
+ // URL is the original bytes of the url portion of the line.
+ //
+ // It is not guaranteed to be a valid URL.
+ URL []byte
+
+ // Label is the label portion of the line.
+ //
+ // If there was no label it will always be nil, never []byte{}.
+ Label []byte
+}
+
+func (ll LinkLine) Type() LineType { return LineTypeLink }
+func (ll LinkLine) Raw() []byte { return ll.raw }
+
+// PreformatToggleLine is a preformatted text toggle line.
+type PreformatToggleLine struct {
+ raw []byte
+
+ // AltText contains the alt-text portion of the line.
+ //
+ // It will either have len() > 0 or be nil.
+ //
+ // If the line was parsed as part of a full document by Parse(),
+ // and this is a *closing* toggle, any alt-text present will be
+ // stripped and this will be nil. If the line was parsed by
+ // ParseLine() no such correction is performed.
+ AltText []byte
+}
+
+func (tl PreformatToggleLine) Type() LineType { return LineTypePreformatToggle }
+func (tl PreformatToggleLine) Raw() []byte { return tl.raw }
+func (tl *PreformatToggleLine) clearAlt() { tl.AltText = nil }
+
+// PreformattedTextLine represents a line between two toggles.
+//
+// It is never returned by ParseLine but can be part of a
+// document parsed by Parse().
+type PreformattedTextLine struct {
+ raw []byte
+}
+
+func (tl PreformattedTextLine) Type() LineType { return LineTypePreformattedText }
+func (tl PreformattedTextLine) Raw() []byte { return tl.raw }
+
+// HeadingLine is a line of LineTypeHeading[1,2,3].
+type HeadingLine struct {
+ raw []byte
+ lineType LineType
+
+ // Body is the portion of the line with the header text.
+ Body []byte
+}
+
+func (hl HeadingLine) Type() LineType { return hl.lineType }
+func (hl HeadingLine) Raw() []byte { return hl.raw }
+
+// ListItemLine is a line of LineTypeListItem.
+type ListItemLine struct {
+ raw []byte
+
+ // Body is the text of the list item.
+ Body []byte
+}
+
+func (li ListItemLine) Type() LineType { return LineTypeListItem }
+func (li ListItemLine) Raw() []byte { return li.raw }
+
+// QuoteLine is a line of LineTypeQuote.
+type QuoteLine struct {
+ raw []byte
+
+ // Body is the text of the quote.
+ Body []byte
+}
+
+func (ql QuoteLine) Type() LineType { return LineTypeQuote }
+func (ql QuoteLine) Raw() []byte { return ql.raw }