Parsing an HTTP request

This sketch contains functions to correctly decode an HTTP request line coming from an HTML form. It does not manage HTTP headers or the body - it's purely about getting the URL out of the request, decoding the percent-escapes, and parsing the www-form-encoded parameters.

The parser uses the "in-place" or "destructive" method, blatting the delimiters with '\0' to break up the string. The sketch produces output demonstrating the effect that this has on the buffer.

This demo does not use F() to conserve memory, because the strings are just for demonstration.


/**
   This sketch demonstrates how to parse an http request line and
   its url into bits, including decoding the http-encoded url.

   The technique used is to break up the content "in-place".
   The delimiters in buffer holding the content of the request are
   overwritten with '\0' to breakup the string. The locations
   of the portions of the request are recorded in char pointers,
   which are C strings (little s, not big S).

   This code attempts to catch errors and buffer overruns.
*/


/**
   The structure of an HTTP request line is defined here
   https://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5
*/

struct HttpRequestLine {
  char *method;
  char *request_uri;
  char *http_version;
};

const int MAX_URI_PARAMS = 10;

struct DecodedUri {
  char *resource;
  char *param[MAX_URI_PARAMS];
  int nparam;
};

struct HttpFormParam {
  char *key[MAX_URI_PARAMS];
  char *value[MAX_URI_PARAMS];
  int nparam;
};

boolean decodeHttpRequest(char *buf, int n, HttpRequestLine &out) {
  char *p;
  out.method = buf;
  p = strchr(buf, ' ');
  if (!p || p >= buf + n) {
    return false;
  }
  *p++ = '\0';
  out.request_uri = p;
  p = strchr(p, ' ');
  if (!p || p >= buf + n) {
    return false;
  }
  *p++ = '\0';
  out.http_version = p;
  if (strnlen(p, n - (p - buf)) >= n - (p - buf)) {
    return false;
  }

  return true;
}

boolean splitUri(char *buf, DecodedUri &out) {
  out.resource = buf;
  out.nparam = 0;

  char *p = strchr(buf, '?');

  if (!p) return true;

  for (p = strchr(buf, '?');  p; p = strchr(p, '&')) {
    if (out.nparam >= MAX_URI_PARAMS) {
      // too many paramters
      return false;
    }

    *p++ = '\0';
    out.param[out.nparam++] = p;
  }

  return true;
}

boolean decodeUri(DecodedUri &out) {
  if (!decodeUri(out.resource)) return false;

  for (int i = 0; i < out.nparam; i++) {
    if (!decodeUri(out.param[i])) return false;
  }
  return true;
}

boolean decodeUri(char *p) {
  int n = strlen(p);

  while (*p) {
    if (*p != '%') {
      p++;
      n--;
      continue;
    }

    if (!isxdigit(p[1]) || !isxdigit(p[2])) return false;

    *p = 0;
    if (p[1] <= '9')
      *p = (p[1] - '0') << 4;
    else if (p[1] <= 'F')
      *p = (p[1] - 'A' + 10) << 4;
    else
      *p = (p[1] - 'a' + 10) << 4;

    if (p[2] <= '9')
      *p |= (p[2] - '0');
    else if (p[2] <= 'F')
      *p |= (p[2] - 'A' + 10);
    else
      *p |= (p[2] - 'a' + 10);

    n -= 3;
    p++;
    memcpy(p, p + 2, n + 1); // +1 to include the '\0'
  }


  return true;
}

boolean decodeHttpFormParams(DecodedUri &in, HttpFormParam &out) {
  out.nparam = in.nparam;
  for(int i = 0; i<out.nparam; i++) {
    out.key[i] = in.param[i];
    char *p = strchr(in.param[i], '=');
    if(!p) {
      // a convienient way to get a '\0' 
      out.value[i] = in.param[i] + strlen(in.param[i]); 
    }
    else {
      *p++ = '\0';
      out.value[i]= p;
      while(*p) {
        if(*p == '+') *p = ' ';
        p++;
      }
    }
  }

  return true;
}

/////////////////////////////////////////////////////////////////////
// Everything below this point is just code for this particular demo

char *sampleRequests[] =
{
  "POST /cgi-bin/process.cgi HTTP/1.0", //old school http
  "OPTIONS * HTTP/1.2", // http 1.2 doesn't exist yet
  "GET /pub/WWW/TheProject.html HTTP/1.1",
  "POST /myForm?data1=Some+data&data2=Other%20data&data3=What%20data%3F%3F%3F&data4=Data+%26%20More%21 HTTP/1.1",
  NULL
};

char buf[120];

void setup() {
  Serial.begin(9600);
  while (!Serial);
  Serial.print("Beginning sketch in ");
  for (int i = 3; i > 0; i--) {
    Serial.print(i);
    Serial.print(' ');
    delay(500);
  }
  Serial.println(".");

  for (char **req = sampleRequests; *req; req++) {
    demoDecode(*req);
  }
}

void demoDecode(char *req) {
  Serial.println();
  Serial.println("-----------------------------------------");
  Serial.print("Decoding ");
  Serial.println(req);

  // clear the buffer for demonstration purposes
  memset(buf, '\0', sizeof(buf));
  strncpy(buf, req, sizeof(buf));

  Serial.println("Initially, the buffer contains this:");
  displayln(buf, sizeof(buf));

  Serial.println();
  Serial.println("STEP 1 - split the request line into its parts");
  HttpRequestLine requestLine;

  if (!decodeHttpRequest(buf, sizeof(buf), requestLine)) {
    Serial.println("The contents of the buffer cannot be decoded.");
    return;
  }

  Serial.println("After parsing the request line, the buffer contains this:");
  displayln(buf, sizeof(buf));
  println("The HTTP method is ", requestLine.method);
  println("The request uri is ", requestLine.request_uri);
  println("The HTTP version is ", requestLine.http_version);

  if (strcmp(requestLine.http_version, "HTTP/1.0") == 0) {
    Serial.println("Immediatrely after this request line will come the HTTP body (if any)");
  }
  else if (strcmp(requestLine.http_version, "HTTP/1.1") == 0) {
    Serial.println("Immediately after this request line will come some HTTP headers, then a blank line, then the body (if aby)");
  }
  else  {
    Serial.println("Unrecognised HTTP version.");
    return;
  }

  Serial.println();
  Serial.println("STEP 2 - split the request uri into its parts");

  DecodedUri decodedUri;

  if (!splitUri(requestLine.request_uri, decodedUri)) {
    Serial.println("The uri cannot be split into parts.");
    return;
  }

  Serial.println("After splitting the uri, the buffer contains this:");
  displayln(buf, sizeof(buf));

  println("Reqested resource", decodedUri.resource);
  println("Number of parameters", decodedUri.nparam);
  for (int i = 0; i < decodedUri.nparam; i++) {
    Serial.print("Parameter ");
    Serial.print(i);
    Serial.print(": ");
    Serial.println(decodedUri.param[i]);
  }

  Serial.println();
  Serial.println("STEP 3 - decode the components");
  if (!decodeUri(decodedUri)) {
    Serial.println("The uri ccannot be decoded.");
    return;
  }

  Serial.println("After decoding the uri, the buffer contains this:");
  displayln(buf, sizeof(buf));
  println("Reqested resource", decodedUri.resource);
  println("Number of parameters", decodedUri.nparam);
  for (int i = 0; i < decodedUri.nparam; i++) {
    Serial.print("Parameter ");
    Serial.print(i);
    Serial.print(": ");
    Serial.println(decodedUri.param[i]);
  }

  Serial.println();
  Serial.println("STEP 4 - html form decoding");

  HttpFormParam httpFormParams;

  if (!decodeHttpFormParams(decodedUri, httpFormParams)) {
    Serial.println("The parameters do not appear to be HTTP form params.");
    return;
  }

  Serial.println("After decoding html form parameters, the buffer contains this:");
  displayln(buf, sizeof(buf));

  println("Number of parameters", httpFormParams.nparam);
  for (int i = 0; i < httpFormParams.nparam; i++) {
    println(httpFormParams.key[i], httpFormParams.value[i]);
  }

}

void println(char *title, char *content) {
  Serial.print(title);
  Serial.print(": ");
  Serial.println(content);
}

void println(char *title, int content) {
  Serial.print(title);
  Serial.print(": ");
  Serial.println(content);
}

void displayln(char *s, int n) {
  display(s, n);
  Serial.println();
}

void display(char *s, int n) {
  Serial.print('"');
  boolean inQuote = true;
  for (int i = 0; i < n; i++) {
    if (s[i]) {
      if (!inQuote) {
        Serial.print(" \"");
        inQuote = true;
      }
      Serial.print(s[i]);
    }
    else {
      if (inQuote) {
        Serial.print("\" ");
        inQuote = false;
      }
      Serial.print("0");
    }
  }
  if (inQuote) Serial.print("\"");
}

void loop() {
  // put your main code here, to run repeatedly:
}

Share