From aa23984bc21f99e00b8552e928d23ef02bb745f3 Mon Sep 17 00:00:00 2001 From: tjpcc Date: Fri, 13 Jan 2023 10:50:30 -0700 Subject: Initial gemtext package. Contains: - gemtext AST (Document and line types) - Parse from an io.Reader - ParseLine a []byte - doc comments on everything - ParseLine tests for every line type Still needs tests for Parse & Document. --- gemtext/parse.go | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 gemtext/parse.go (limited to 'gemtext/parse.go') diff --git a/gemtext/parse.go b/gemtext/parse.go new file mode 100644 index 0000000..4a8c641 --- /dev/null +++ b/gemtext/parse.go @@ -0,0 +1,154 @@ +package gemtext + +import ( + "bufio" + "bytes" + "io" +) + +// Parse parses the full contents of an io.Reader into a gemtext.Document. +func Parse(input io.Reader) (Document, error) { + rdr := bufio.NewReader(input) + + var lines []Line + inPFT := false + + for { + raw, err := rdr.ReadBytes('\n') + if err != io.EOF && err != nil { + return nil, err + } + + var line Line + + if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') { + line = PreformattedTextLine{raw: raw} + } else { + line = ParseLine(raw) + } + + if line.Type() == LineTypePreformatToggle { + if inPFT { + toggle := line.(PreformatToggleLine) + (&toggle).clearAlt() + line = toggle + } + + inPFT = !inPFT + } + + if line != nil { + lines = append(lines, line) + } + + if err == io.EOF { + break + } + } + + return Document(lines), nil +} + +// ParseLine parses a single line (including the trailing \n) into a gemtext.Line. +func ParseLine(line []byte) Line { + if len(line) == 0 { + return nil + } + + switch line[0] { + case '=': + if len(line) == 1 || line[1] != '>' { + break + } + return parseLinkLine(line) + case '`': + if len(line) < 3 || line[1] != '`' || line[2] != '`' { + break + } + return parsePreformatToggleLine(line) + case '#': + level := 1 + if len(line) > 1 && line[1] == '#' { + level += 1 + if len(line) > 2 && line[2] == '#' { + level += 1 + } + } + return parseHeadingLine(level, line) + case '*': + if len(line) == 1 || line[1] != ' ' { + break + } + return parseListItemLine(line) + case '>': + return parseQuoteLine(line) + } + + return TextLine{raw: line} +} + +func parseLinkLine(raw []byte) LinkLine { + line := LinkLine{raw: raw} + + // move past =>[] + raw = bytes.TrimLeft(raw[2:], " \t") + + // find the next space or tab + spIdx := bytes.IndexByte(raw, ' ') + tbIdx := bytes.IndexByte(raw, '\t') + idx := spIdx + if idx == -1 { + idx = tbIdx + } + if tbIdx >= 0 && tbIdx < idx { + idx = tbIdx + } + + if idx < 0 { + line.URL = bytes.TrimRight(raw, "\r\n") + return line + } + + line.URL = raw[:idx] + raw = raw[idx+1:] + + label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n") + if len(label) > 0 { + line.Label = label + } + + return line +} + +func parsePreformatToggleLine(raw []byte) PreformatToggleLine { + line := PreformatToggleLine{raw: raw} + + raw = bytes.TrimRight(raw[3:], "\r\n") + if len(raw) > 0 { + line.AltText = raw + } + + return line +} + +func parseHeadingLine(level int, raw []byte) HeadingLine { + return HeadingLine{ + raw: raw, + lineType: LineTypeHeading1 - 1 + LineType(level), + Body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"), + } +} + +func parseListItemLine(raw []byte) ListItemLine { + return ListItemLine{ + raw: raw, + Body: bytes.TrimRight(raw[2:], "\r\n"), + } +} + +func parseQuoteLine(raw []byte) QuoteLine { + return QuoteLine{ + raw: raw, + Body: bytes.TrimRight(raw[1:], "\r\n"), + } +} -- cgit v1.2.3