From 44621bff1158300fc61486024b6c9ddad2508e29 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Fri, 10 Aug 2012 18:48:21 +0200 Subject: [PATCH] detect byte order mark in buffers, and write back --- src/file.cc | 27 +++++++++++++++++++++------ src/option_manager.cc | 1 + 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/file.cc b/src/file.cc index 943ceb772..c6dc755ef 100644 --- a/src/file.cc +++ b/src/file.cc @@ -92,14 +92,25 @@ Buffer* create_buffer_from_file(const String& filename) String content; char buf[256]; bool crlf = false; + bool bom = false; + bool at_file_begin = true; while (true) { ssize_t size = read(fd, buf, 256); if (size == -1 or size == 0) break; - ssize_t start = 0; - for (ssize_t pos = 0; pos < size+1; ++pos) + ssize_t pos = 0; + // detect utf-8 byte order mark + if (at_file_begin and size >= 3 and + buf[0] == '\xEF' and buf[1] == '\xBB' and buf[2] == '\xBF') + { + bom = true; + pos = 3; + } + ssize_t start = pos; + + while (pos < size+1) { if (buf[pos] == '\r' or pos == size) { @@ -109,14 +120,15 @@ Buffer* create_buffer_from_file(const String& filename) buffer->modify(Modification::make_insert(buffer->end(), String(buf+start, buf+pos))); start = pos+1; } + ++pos; } + at_file_begin = false; } close(fd); - if (crlf) - buffer->option_manager().set_option("eolformat", Option("crlf")); - else - buffer->option_manager().set_option("eolformat", Option("lf")); + OptionManager& option_manager = buffer->option_manager(); + option_manager.set_option("eolformat", Option(crlf ? "crlf" : "lf")); + option_manager.set_option("BOM", Option(bom ? "utf-8" : "no")); // it never happened, buffer always was like that buffer->reset_undo_data(); @@ -153,6 +165,9 @@ void write_buffer_to_file(const Buffer& buffer, const String& filename) if (fd == -1) throw file_access_error(filename, strerror(errno)); + if (buffer.option_manager()["BOM"].as_string() == "utf-8") + ::write(fd, "\xEF\xBB\xBF", 3); + for (size_t i = 0; i < buffer.line_count(); ++i) { // end of lines are written according to eolformat but always diff --git a/src/option_manager.cc b/src/option_manager.cc index c0c8ec62c..487fca6bd 100644 --- a/src/option_manager.cc +++ b/src/option_manager.cc @@ -95,6 +95,7 @@ GlobalOptionManager::GlobalOptionManager() { set_option("tabstop", Option(8)); set_option("eolformat", Option("lf")); + set_option("BOM", Option("no")); } }