fix: improve CSV parser error handling

This commit is contained in:
carla 2026-01-15 11:08:22 +01:00
parent 31cf07c071
commit 3bbe9895ee
2 changed files with 51 additions and 34 deletions

View file

@ -23,13 +23,9 @@ defmodule Mv.Membership.Import.CsvParser do
@spec parse(binary()) :: {:ok, row(), [numbered_row()]} | {:error, String.t()}
def parse(file_content) when is_binary(file_content) do
content =
file_content
|> strip_bom()
|> normalize_line_endings()
with :ok <- validate_content_not_empty(content),
:ok <- check_balanced_quotes(content),
with :ok <- validate_utf8(file_content),
content <- file_content |> strip_bom() |> normalize_line_endings(),
:ok <- validate_content_not_empty(content),
{:ok, header_record, data_records} <- extract_header_and_data(content),
:ok <- validate_header_not_empty(header_record),
{:ok, headers, rows} <- parse_csv_records(header_record, data_records) do
@ -41,6 +37,15 @@ defmodule Mv.Membership.Import.CsvParser do
def parse(_), do: {:error, "Invalid CSV content"}
@spec validate_utf8(binary()) :: :ok | {:error, String.t()}
defp validate_utf8(content) do
if String.valid?(content) do
:ok
else
{:error, "CSV must be valid UTF-8"}
end
end
@spec validate_content_not_empty(binary()) :: :ok | {:error, String.t()}
defp validate_content_not_empty(content) do
if String.trim(content) == "" do
@ -157,10 +162,10 @@ defmodule Mv.Membership.Import.CsvParser do
try do
rows =
data_records
|> Enum.reduce([], fn {line_no, record}, acc ->
|> Enum.reduce_while([], fn {line_no, record}, acc ->
case String.trim(record) do
"" ->
acc
{:cont, acc}
_ ->
parsed = parser.parse_string(ensure_trailing_newline(record), skip_headers: false)
@ -168,20 +173,23 @@ defmodule Mv.Membership.Import.CsvParser do
case parsed do
[row] when is_list(row) ->
if empty_row?(row) do
acc
{:cont, acc}
else
[{line_no, row} | acc]
{:cont, [{line_no, row} | acc]}
end
# empty row or unparsable row -> treat as skipped empty row
# unparsable row -> return error with line number
_ ->
acc
snippet = String.slice(record, 0, min(50, String.length(record)))
{:halt, {:error, "Failed to parse CSV data at line #{line_no}: #{inspect(snippet)}"}}
end
end
end)
|> Enum.reverse()
{:ok, rows}
case rows do
{:error, reason} -> {:error, reason}
rows -> {:ok, Enum.reverse(rows)}
end
rescue
e ->
{:error, "Failed to parse CSV: #{Exception.message(e)}"}
@ -193,21 +201,6 @@ defmodule Mv.Membership.Import.CsvParser do
Enum.all?(row, fn field -> String.trim(field) == "" end)
end
# Check if quotes are balanced in the content.
#
# This is a simple check that counts quote characters. In CSV, escaped quotes
# are represented as "", so an odd number of quotes indicates unbalanced quotes.
@spec check_balanced_quotes(binary()) :: :ok | {:error, String.t()}
defp check_balanced_quotes(content) do
quote_count = content |> :binary.bin_to_list() |> Enum.count(&(&1 == @quote))
if rem(quote_count, 2) != 0 do
{:error, "Unbalanced quotes in CSV file"}
else
:ok
end
end
# --- Record splitting with correct line numbers (quote-aware) ---
#
# This splits the CSV into "records" separated by newline *outside quotes*,