Nmap Development mailing list archives

[NSE] Improved HTML conformance for http.parse_form()


From: nnposter () users sourceforge net
Date: Thu, 4 Sep 2014 18:10:25 +0000

The patch below improves HTML conformance of http.parse_form() and
http.grab_forms() by covering the following cases:

* Distinguising between similarly named elements or attributes, such
  as form/formfoo or type/footype

* Legitimate space in the tag, such as </form(SPACE)>

* Legitimate space around attribute value, such as foo(SPACE)=(SPACE)"bar"

* Unquoted attribute syntax, such as foo=bar instead of foo="bar"

* Empty attribute syntax, such as foo instead of foo=""

* Presence of quotes in attribute values, such as foo='Joe "Wheel" Doe'


The patch fits on top of another patch for handling forms without
the action attribute. (See http://seclists.org/nmap-dev/2014/q3/384 and
http://seclists.org/nmap-dev/2014/q3/388 for details.)


Cheers,
nnposter


--- nselib/http.lua.orig        2014-08-29 13:14:44.689699000 -0600
+++ nselib/http.lua     2014-09-04 11:06:35.721499100 -0600
@@ -1880,8 +1880,8 @@
 function grab_forms(body)
   local forms = {}
   if not body then return forms end
-  local form_start_expr = '<%s*[Ff][Oo][Rr][Mm]'
-  local form_end_expr = '</%s*[Ff][Oo][Rr][Mm]>'
+  local form_start_expr = '<%s*[Ff][Oo][Rr][Mm][%s>]'
+  local form_end_expr = '</%s*[Ff][Oo][Rr][Mm][%s>]'
 
   local form_opening = string.find(body, form_start_expr)
   local forms = {}
@@ -1911,19 +1911,36 @@
 function parse_form(form)
   local parsed = {}
   local fields = {}
-  local form_action = string.match(form, '[Aa][Cc][Tt][Ii][Oo][Nn]=[\'"](.-)[\'"]')
+  local get_attr = function (html, name)
+                     local lhtml = html:lower()
+                     local lname = name:lower()
+                     -- try the attribute-value syntax first
+                     local _, pos = lhtml:find('%s' .. lname .. '%s*=%s*[^%s]')
+                     if not pos then
+                       -- try the empty attribute syntax and, if found,
+                       -- return zero-length string as its value; nil otherwise
+                       return lhtml:match('[^%s=]%s+' .. lname .. '[%s/>]') and "" or nil
+                     end
+                     local value
+                     _, value = html:match('^([\'"])(.-)%1', pos)
+                     if not value then
+                       value = html:match('^[^%s<>=\'"`]+', pos)
+                     end
+                     return value
+                   end
+  local form_action = get_attr(form, "action")
   if form_action then
     parsed["action"] = form_action
   end
 
   -- determine if the form is using get or post
-  local form_method = string.match(form, '[Mm][Ee][Tt][Hh][Oo][Dd]=[\'"](.-)[\'"]')
+  local form_method = get_attr(form, "method")
   if form_method then
     parsed["method"] = string.lower(form_method)
   end
 
   -- get the id of the form
-  local form_id = string.match(form, '[iI][dD]=[\'"](.-)[\'"]')
+  local form_id = get_attr(form, "id")
   if form_id then
     parsed["id"] = string.lower(form_id)
   end
@@ -1934,10 +1951,10 @@
   local input_value
 
   -- first find regular inputs
-  for f in string.gmatch(form, '<%s*[Ii][Nn][Pp][Uu][Tt].->') do
-    input_type = string.match(f, '[Tt][Yy][Pp][Ee]=[\'"](.-)[\'"]')
-    input_name = string.match(f, '[Nn][Aa][Mm][Ee]=[\'"](.-)[\'"]')
-    input_value = string.match(f, '[Vv][Aa][Ll][Uu][Ee]=[\'"](.-)[\'"]')
+  for f in string.gmatch(form, '<%s*[Ii][Nn][Pp][Uu][Tt]%f[%s/>].->') do
+    input_type = get_attr(f, "type")
+    input_name = get_attr(f, "name")
+    input_value = get_attr(f, "value")
     local next_field_index = #fields+1
     if input_name then
       fields[next_field_index] = {}
@@ -1952,8 +1969,8 @@
   end
 
   -- now search for textareas
-  for f in string.gmatch(form, '<%s*[Tt][Ee][Xx][Tt][Aa][Rr][Ee][Aa].->') do
-    input_name = string.match(f, '[Nn][Aa][Mm][Ee]=[\'"](.-)[\'"]')
+  for f in string.gmatch(form, '<%s*[Tt][Ee][Xx][Tt][Aa][Rr][Ee][Aa]%f[%s/>].->') do
+    input_name = get_attr(f, "name")
     local next_field_index = #fields+1
     if input_name then
       fields[next_field_index] = {}
_______________________________________________
Sent through the dev mailing list
http://nmap.org/mailman/listinfo/dev
Archived at http://seclists.org/nmap-dev/


Current thread: