diff --git a/configure b/configure

index 407886859d8ca0b9cbcb085ecef89d004c68cde4..aca6e8a1eaa9a6271f03eb8863640d0d93cdf435 100755

--- a/configure

+++ b/configure

@@ -7,6 +7,7 @@ genrules gmni \

src/client.c \

src/escape.c \

src/gmni.c \

+ src/parser.c \

src/url.c

}

diff --git a/include/gmni.h b/include/gmni.h

index 4d46380f2ef13db72ebf2e1a3434865142b6bcbc..d78d1c8eb4e94507c363219573691e60d40e8ea8 100644

--- a/include/gmni.h

+++ b/include/gmni.h

@@ -103,4 +103,64 @@ // Returns the general response class (i.e. with the second digit set to zero)

// of the given Gemini status code.

enum gemini_status_class gemini_response_class(enum gemini_status status);

+enum gemini_tok {

+ GEMINI_TEXT,

+ GEMINI_LINK,

+ GEMINI_PREFORMATTED,

+ GEMINI_HEADING,

+ GEMINI_LIST_ITEM,

+ GEMINI_QUOTE,

+};

+

+struct gemini_token {

+ enum gemini_tok token;

+

+ // The token field determines which of the union members is valid.

+ union {

+ char *text;

+

+ struct {

+ char *text;

+ char *url; // May be NULL

+ } link;

+

+ struct {

+ char *text;

+ char *alt_text; // May be NULL

+ } preformatted;

+

+ struct {

+ char *title;

+ int level; // 1, 2, or 3

+ } heading;

+

+ char *list_item;

+ char *quote_text;

+ };

+};

+

+struct gemini_parser {

+ BIO *f;

+ char *buf;

+ size_t bufsz;

+ size_t bufln;

+};

+

+// Initializes a text/gemini parser which reads from the specified BIO.

+void gemini_parser_init(struct gemini_parser *p, BIO *f);

+

+// Finishes this text/gemini parser and frees up its resources.

+void gemini_parser_finish(struct gemini_parser *p);

+

+// Reads the next token from a text/gemini file.

+//

+// Returns 0 on success, 1 on EOF, and -1 on failure.

+//

+// Caller must call gemini_token_finish before exiting or re-using the token

+// parameter.

+int gemini_parser_next(struct gemini_parser *p, struct gemini_token *token);

+

+// Must be called after gemini_next to free up resources for the next token.

+void gemini_token_finish(struct gemini_token *token);

+

#endif

diff --git a/src/gmni.c b/src/gmni.c

index 6e27b2f859c692f37fb483184cda245a7e979cbe..75c6c5afb6e7f1f286d314d93f2b44ef8414afb3 100644

--- a/src/gmni.c

+++ b/src/gmni.c

@@ -233,9 +233,11 @@ case SHOW_HEADERS:

printf("%d %s\n", resp.status, resp.meta);

/* fallthrough */

case OMIT_HEADERS:

- if (resp.status / 10 != 2) {

+ if (gemini_response_class(resp.status) !=

+ GEMINI_STATUS_CLASS_SUCCESS) {

break;

}

+

char last;

char buf[BUFSIZ];

for (int n = 1; n > 0;) {

diff --git a/src/parser.c b/src/parser.c

new file mode 100644

index 0000000000000000000000000000000000000000..ffcc28767be7d638d18609764999d004790aa9a2

--- /dev/null

+++ b/src/parser.c

@@ -0,0 +1,144 @@

+#include <assert.h>

+#include <ctype.h>

+#include <openssl/bio.h>

+#include <stddef.h>

+#include <stdlib.h>

+#include <string.h>

+#include "gmni.h"

+

+void

+gemini_parser_init(struct gemini_parser *p, BIO *f)

+{

+ p->f = f;

+ p->bufln = 0;

+ p->bufsz = BUFSIZ;

+ p->buf = malloc(p->bufsz + 1);

+ p->buf[0] = 0;

+ BIO_up_ref(p->f);

+}

+

+void

+gemini_parser_finish(struct gemini_parser *p)

+{

+ if (!p) {

+ return;

+ }

+ BIO_free(p->f);

+ free(p->buf);

+}

+

+int

+gemini_parser_next(struct gemini_parser *p, struct gemini_token *tok)

+{

+ memset(tok, 0, sizeof(*tok));

+

+ int eof = 0;

+ while (!strstr(p->buf, "\n")) {

+ if (p->bufln == p->bufsz) {

+ p->bufsz *= 2;

+ char *buf = realloc(p->buf, p->bufsz);

+ assert(buf);

+ p->buf = buf;

+ }

+

+ int n = BIO_read(p->f, &p->buf[p->bufln], p->bufsz - p->bufln);

+ if (n == -1) {

+ return -1;

+ } else if (n == 0) {

+ eof = 1;

+ break;

+ }

+ p->bufln += n;

+ p->buf[p->bufln] = 0;

+ }

+

+ // TODO: Collapse multi-line text for the user-agent to wrap

+ char *end;

+ if ((end = strstr(p->buf, "\n")) != NULL) {

+ *end = 0;

+ }

+

+ // TODO: Provide whitespace trimming helper function

+ if (strncmp(p->buf, "=>", 2) == 0) {

+ tok->token = GEMINI_LINK;

+ int i = 2;

+ while (p->buf[i] && isspace(p->buf[i])) ++i;

+ tok->link.url = &p->buf[i];

+

+ for (; p->buf[i]; ++i) {

+ if (isspace(p->buf[i])) {

+ p->buf[i++] = 0;

+ while (isspace(p->buf[i])) ++i;

+ if (p->buf[i]) {

+ tok->link.text = strdup(&p->buf[i]);

+ }

+ break;

+ }

+ }

+

+ tok->link.url = strdup(tok->link.url);

+ } else if (strncmp(p->buf, "```", 3) == 0) {

+ tok->token = GEMINI_PREFORMATTED; // TODO

+ tok->preformatted.text = strdup("<text>");

+ tok->preformatted.alt_text = strdup("<alt-text>");

+ } else if (p->buf[0] == '#') {

+ tok->token = GEMINI_HEADING;

+ int level = 1;

+ while (p->buf[level] == '#' && level < 3) {

+ ++level;

+ }

+ tok->heading.level = level;

+ tok->heading.title = strdup(&p->buf[level]);

+ } else if (p->buf[0] == '*') {

+ tok->token = GEMINI_LIST_ITEM;

+ tok->list_item = strdup(&p->buf[1]);

+ } else if (p->buf[0] == '>') {

+ tok->token = GEMINI_QUOTE;

+ tok->quote_text = strdup(&p->buf[1]);

+ } else {

+ tok->token = GEMINI_TEXT;

+ tok->text = strdup(p->buf);

+ }

+

+ if (end && end + 1 < p->buf + p->bufln) {

+ size_t len = end - p->buf + 1;

+ memmove(p->buf, end + 1, p->bufln - len);

+ p->bufln -= len;

+ } else {

+ p->buf[0] = 0;

+ p->bufln = 0;

+ }

+

+ return eof;

+}

+

+void

+gemini_token_finish(struct gemini_token *tok)

+{

+ if (!tok) {

+ return;

+ }

+

+ switch (tok->token) {

+ case GEMINI_TEXT:

+ free(tok->text);

+ break;

+ case GEMINI_LINK:

+ free(tok->link.text);

+ free(tok->link.url);

+ break;

+ case GEMINI_PREFORMATTED:

+ free(tok->preformatted.text);

+ free(tok->preformatted.alt_text);

+ break;

+ case GEMINI_HEADING:

+ free(tok->heading.title);

+ break;

+ case GEMINI_LIST_ITEM:

+ free(tok->list_item);

+ break;

+ case GEMINI_QUOTE:

+ free(tok->quote_text);

+ break;

+ }

+}