Description: Fix heap-based buffer over-read in io::trim_chars (CVE-2018-13421)
Author: Ben Strasser <code@ben-strasser.net>
Origin: https://github.com/ben-strasser/fast-cpp-csv-parser/commit/8cf591aa7397f4372778cc927e184d28ee591093#diff-ef7f3342d40d9604321eba8dd7e59f5d
Bug: https://github.com/ben-strasser/fast-cpp-csv-parser/issues/67
Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=903247
Forwarded: no
Last-Update: 2018-07-08
---
This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
Index: trunk/csv.h
===================================================================
--- trunk.orig/csv.h
+++ trunk/csv.h
@@ -63,7 +63,7 @@ namespace io{
                                 return error_message_buffer;
                         }
 
-                        mutable char error_message_buffer[256];
+                        mutable char error_message_buffer[512];
                 };
 
                 const int max_file_name_length = 255;
@@ -74,8 +74,12 @@ namespace io{
                         }
                        
                         void set_file_name(const char*file_name){
-                                std::strncpy(this->file_name, file_name, max_file_name_length);
-                                this->file_name[max_file_name_length] = '\0';
+                                if(file_name != nullptr){
+                                        strncpy(this->file_name, file_name, error::max_file_name_length);
+                                        this->file_name[error::max_file_name_length] = '\0';
+                                }else{
+                                        this->file_name[0] = '\0';
+                                }
                         }
 
                         char file_name[max_file_name_length+1];
@@ -233,7 +237,7 @@ namespace io{
                         }
 
                         bool is_valid()const{
-                                return byte_source != 0;
+                                return byte_source != nullptr;
                         }
 
                         void start_read(char*arg_buffer, int arg_desired_byte_count){
@@ -259,7 +263,7 @@ namespace io{
                         }
 
                         ~AsynchronousReader(){
-                                if(byte_source != 0){
+                                if(byte_source != nullptr){
                                         {
                                                 std::unique_lock<std::mutex>guard(lock);
                                                 termination_requested = true;
@@ -293,7 +297,7 @@ namespace io{
                         }
 
                         bool is_valid()const{
-                                return byte_source != 0;
+                                return byte_source != nullptr;
                         }
 
                         void start_read(char*arg_buffer, int arg_desired_byte_count){
@@ -314,12 +318,12 @@ namespace io{
         class LineReader{
         private:
                 static const int block_len = 1<<24;
+                std::unique_ptr<char[]>buffer; // must be constructed before (and thus destructed after) the reader!
                 #ifdef CSV_IO_NO_THREAD
                 detail::SynchronousReader reader;
                 #else
                 detail::AsynchronousReader reader;
                 #endif
-                char*buffer;
                 int data_begin;
                 int data_end;
 
@@ -343,22 +347,17 @@ namespace io{
                 void init(std::unique_ptr<ByteSourceBase>byte_source){
                         file_line = 0;
 
-                        buffer = new char[3*block_len];
-                        try{
-                                data_begin = 0;
-                                data_end = byte_source->read(buffer, 2*block_len);
-
-                                // Ignore UTF-8 BOM
-                                if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
-                                        data_begin = 3;
-
-                                if(data_end == 2*block_len){
-                                        reader.init(std::move(byte_source));
-                                        reader.start_read(buffer + 2*block_len, block_len);
-                                }
-                        }catch(...){
-                                delete[]buffer;
-                                throw;
+                        buffer = std::unique_ptr<char[]>(new char[3*block_len]);
+                        data_begin = 0;
+                        data_end = byte_source->read(buffer.get(), 2*block_len);
+
+                        // Ignore UTF-8 BOM
+                        if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
+                                data_begin = 3;
+
+                        if(data_end == 2*block_len){
+                                reader.init(std::move(byte_source));
+                                reader.start_read(buffer.get() + 2*block_len, block_len);
                         }
                 }
 
@@ -422,8 +421,12 @@ namespace io{
                 }
 
                 void set_file_name(const char*file_name){
-                        strncpy(this->file_name, file_name, error::max_file_name_length);
-                        this->file_name[error::max_file_name_length] = '\0';
+                        if(file_name != nullptr){
+                                strncpy(this->file_name, file_name, error::max_file_name_length);
+                                this->file_name[error::max_file_name_length] = '\0';
+                        }else{
+                                this->file_name[0] = '\0';
+                        }
                 }
 
                 const char*get_truncated_file_name()const{
@@ -448,14 +451,14 @@ namespace io{
                         assert(data_end <= block_len*2);
 
                         if(data_begin >= block_len){
-                                std::memcpy(buffer, buffer+block_len, block_len);
+                                std::memcpy(buffer.get(), buffer.get()+block_len, block_len);
                                 data_begin -= block_len;
                                 data_end -= block_len;
                                 if(reader.is_valid())
                                 {
                                         data_end += reader.finish_read();
-                                        std::memcpy(buffer+block_len, buffer+2*block_len, block_len);
-                                        reader.start_read(buffer + 2*block_len, block_len);
+                                        std::memcpy(buffer.get()+block_len, buffer.get()+2*block_len, block_len);
+                                        reader.start_read(buffer.get() + 2*block_len, block_len);
                                 }
                         }
 
@@ -484,14 +487,10 @@ namespace io{
                         if(line_end != data_begin && buffer[line_end-1] == '\r')
                                 buffer[line_end-1] = '\0';
 
-                        char*ret = buffer + data_begin;
+                        char*ret = buffer.get() + data_begin;
                         data_begin = line_end+1;
                         return ret;
                 }
-
-                ~LineReader(){
-                        delete[] buffer;
-                }
         };
 
 
@@ -507,8 +506,12 @@ namespace io{
                         }
                        
                         void set_column_name(const char*column_name){
-                                std::strncpy(this->column_name, column_name, max_column_name_length);
-                                this->column_name[max_column_name_length] = '\0';
+                                if(column_name != nullptr){
+                                        std::strncpy(this->column_name, column_name, max_column_name_length);
+                                        this->column_name[max_column_name_length] = '\0';
+                                }else{
+                                        this->column_name[0] = '\0';
+                                }
                         }
 
                         char column_name[max_column_name_length+1];
@@ -523,8 +526,12 @@ namespace io{
                         }
                        
                         void set_column_content(const char*column_content){
-                                std::strncpy(this->column_content, column_content, max_column_content_length);
-                                this->column_content[max_column_content_length] = '\0';
+                                if(column_content != nullptr){
+                                        std::strncpy(this->column_content, column_content, max_column_content_length);
+                                        this->column_content[max_column_content_length] = '\0';
+                                }else{
+                                        this->column_content[0] = '\0';
+                                }
                         }
 
                         char column_content[max_column_content_length+1];
@@ -692,9 +699,9 @@ namespace io{
 
         public:
                 static void trim(char*&str_begin, char*&str_end){
-                        while(is_trim_char(*str_begin, trim_char_list...) && str_begin != str_end)
+                        while(str_begin != str_end && is_trim_char(*str_begin, trim_char_list...))
                                 ++str_begin;
-                        while(is_trim_char(*(str_end-1), trim_char_list...) && str_begin != str_end)
+                        while(str_begin != str_end && is_trim_char(*(str_end-1), trim_char_list...))
                                 --str_end;
                         *str_end = '\0';
                 }
@@ -786,7 +793,7 @@ namespace io{
                                         --col_end;
                                         char*out = col_begin;
                                         for(char*in = col_begin; in!=col_end; ++in){
-                                                if(*in == quote && *(in+1) == quote){
+                                                if(*in == quote && (in+1) != col_end && *(in+1) == quote){
                                                          ++in;
                                                 }
                                                 *out = *in;
@@ -1083,6 +1090,9 @@ namespace io{
 
                 template<class overflow_policy, class T>
                 void parse(char*col, T&x){
+                        // Mute unused variable compiler warning
+                        (void)col;
+                        (void)x;
                         // GCC evalutes "false" when reading the template and
                         // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
                         // this strange construct is used.
@@ -1131,9 +1141,9 @@ namespace io{
                                 column_names[i-1] = "col"+std::to_string(i);
                 }
 
-		char*next_line(){
-			return in.next_line();
-		}
+	char*next_line(){
+	    return in.next_line();
+	}
 
                 template<class ...ColNames>
                 void read_header(ignore_column ignore_policy, ColNames...cols){
@@ -1255,4 +1265,3 @@ namespace io{
         };
 }
 #endif
-
