mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-11-23 11:26:10 +03:00
Tweaks for Excel to Markdown conversion (#3022)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
dc82f883f8
commit
b850e7c867
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -16,4 +16,4 @@
|
||||
url = https://github.com/nomic-ai/DuckX.git
|
||||
[submodule "gpt4all-chat/deps/QXlsx"]
|
||||
path = gpt4all-chat/deps/QXlsx
|
||||
url = https://github.com/QtExcel/QXlsx.git
|
||||
url = https://github.com/nomic-ai/QXlsx.git
|
||||
|
@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004))
|
||||
- Simplify chatmodel to get rid of unnecessary field and bump chat version ([#3016](https://github.com/nomic-ai/gpt4all/pull/3016))
|
||||
- Allow ChatLLM to have direct access to ChatModel for restoring state from text ([#3018](https://github.com/nomic-ai/gpt4all/pull/3018))
|
||||
- Improvements to XLSX conversion and UI fix ([#3022](https://github.com/nomic-ai/gpt4all/pull/3022))
|
||||
|
||||
### Fixed
|
||||
- Fix a crash when attempting to continue a chat loaded from disk ([#2995](https://github.com/nomic-ai/gpt4all/pull/2995))
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit fda6b806e2ceebd81c01cdded07ae84c94f5879c
|
||||
Subproject commit 29e81b369128525749dcb6516195b6b062eda955
|
@ -939,6 +939,7 @@ Rectangle {
|
||||
|
||||
Text {
|
||||
id: attachmentFileText
|
||||
width: 295
|
||||
height: 40
|
||||
text: modelData.file
|
||||
color: theme.textColor
|
||||
@ -947,6 +948,7 @@ Rectangle {
|
||||
font.pixelSize: theme.fontSizeMedium
|
||||
font.bold: true
|
||||
wrapMode: Text.WrapAnywhere
|
||||
elide: Qt.ElideRight
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1971,6 +1973,7 @@ Rectangle {
|
||||
|
||||
Text {
|
||||
id: attachmentFileText2
|
||||
width: 265
|
||||
height: 40
|
||||
text: model.file
|
||||
color: theme.textColor
|
||||
@ -1979,6 +1982,7 @@ Rectangle {
|
||||
font.pixelSize: theme.fontSizeMedium
|
||||
font.bold: true
|
||||
wrapMode: Text.WrapAnywhere
|
||||
elide: Qt.ElideRight
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,8 +10,10 @@
|
||||
#include <QDateTime>
|
||||
#include <QDebug>
|
||||
#include <QList>
|
||||
#include <QRegularExpression>
|
||||
#include <QString>
|
||||
#include <QStringList>
|
||||
#include <QStringView>
|
||||
#include <QVariant>
|
||||
#include <QtGlobal>
|
||||
#include <QtLogging>
|
||||
@ -33,7 +35,7 @@ static QString formatCellText(const QXlsx::Cell *cell)
|
||||
if (cell->isDateTime()) {
|
||||
// Handle DateTime
|
||||
QDateTime dateTime = cell->dateTime().toDateTime();
|
||||
cellText = dateTime.isValid() ? dateTime.toString("yyyy-MM-dd") : value.toString();
|
||||
cellText = dateTime.isValid() ? dateTime.toString(QStringView(u"yyyy-MM-dd")) : value.toString();
|
||||
} else {
|
||||
cellText = value.toString();
|
||||
}
|
||||
@ -41,23 +43,32 @@ static QString formatCellText(const QXlsx::Cell *cell)
|
||||
if (cellText.isEmpty())
|
||||
return QString();
|
||||
|
||||
// Apply Markdown and HTML formatting based on font styles
|
||||
QString formattedText = cellText;
|
||||
|
||||
if (format.fontBold() && format.fontItalic())
|
||||
formattedText = "***" + formattedText + "***";
|
||||
else if (format.fontBold())
|
||||
formattedText = "**" + formattedText + "**";
|
||||
else if (format.fontItalic())
|
||||
formattedText = "*" + formattedText + "*";
|
||||
// Escape special characters
|
||||
static QRegularExpression special(
|
||||
QStringLiteral(
|
||||
R"(()([\\`*_[\]<>()!|])|)" // special characters
|
||||
R"(^(\s*)(#+(?:\s|$))|)" // headings
|
||||
R"(^(\s*[0-9])(\.(?:\s|$))|)" // ordered lists ("1. a")
|
||||
R"(^(\s*)([+-](?:\s|$)))" // unordered lists ("- a")
|
||||
),
|
||||
QRegularExpression::MultilineOption
|
||||
);
|
||||
cellText.replace(special, uR"(\1\\2)"_s);
|
||||
cellText.replace(u'&', "&"_L1);
|
||||
cellText.replace(u'<', "<"_L1);
|
||||
cellText.replace(u'>', ">"_L1);
|
||||
|
||||
// Apply Markdown formatting based on font styles
|
||||
if (format.fontUnderline())
|
||||
cellText = u"_%1_"_s.arg(cellText);
|
||||
if (format.fontBold())
|
||||
cellText = u"**%1**"_s.arg(cellText);
|
||||
if (format.fontItalic())
|
||||
cellText = u"*%1*"_s.arg(cellText);
|
||||
if (format.fontStrikeOut())
|
||||
formattedText = "~~" + formattedText + "~~";
|
||||
cellText = u"~~%1~~"_s.arg(cellText);
|
||||
|
||||
// Escape pipe characters to prevent Markdown table issues
|
||||
formattedText.replace("|", "\\|");
|
||||
|
||||
return formattedText;
|
||||
return cellText;
|
||||
}
|
||||
|
||||
static QString getCellValue(QXlsx::Worksheet *sheet, int row, int col)
|
||||
@ -124,44 +135,35 @@ QString XLSXToMD::toMarkdown(QIODevice *xlsxDevice)
|
||||
|
||||
if (firstRow > lastRow || firstCol > lastCol) {
|
||||
qWarning() << "Sheet" << sheetName << "is empty.";
|
||||
markdown += "*No data available.*\n\n";
|
||||
markdown += QStringView(u"*No data available.*\n\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Assume the first row is the header
|
||||
int headerRow = firstRow;
|
||||
auto appendRow = [&markdown](auto &list) { markdown += u"|%1|\n"_s.arg(list.join(u'|')); };
|
||||
|
||||
// Collect headers
|
||||
// Empty header
|
||||
static QString header(u' ');
|
||||
static QString separator(u'-');
|
||||
QStringList headers;
|
||||
for (int col = firstCol; col <= lastCol; ++col) {
|
||||
QString header = getCellValue(sheet, headerRow, col);
|
||||
headers << header;
|
||||
}
|
||||
|
||||
// Create Markdown header row
|
||||
QString headerRowMarkdown = "|" + headers.join("|") + "|";
|
||||
markdown += headerRowMarkdown + "\n";
|
||||
|
||||
// Create Markdown separator row
|
||||
QStringList separators;
|
||||
for (int i = 0; i < headers.size(); ++i)
|
||||
separators << "---";
|
||||
QString separatorRow = "|" + separators.join("|") + "|";
|
||||
markdown += separatorRow + "\n";
|
||||
for (int col = firstCol; col <= lastCol; ++col) {
|
||||
headers << header;
|
||||
separators << separator;
|
||||
}
|
||||
appendRow(headers);
|
||||
appendRow(separators);
|
||||
|
||||
// Iterate through data rows (starting from the row after header)
|
||||
for (int row = headerRow + 1; row <= lastRow; ++row) {
|
||||
// Iterate through data rows
|
||||
for (int row = firstRow; row <= lastRow; ++row) {
|
||||
QStringList rowData;
|
||||
for (int col = firstCol; col <= lastCol; ++col) {
|
||||
QString cellText = getCellValue(sheet, row, col);
|
||||
rowData << cellText;
|
||||
rowData << (cellText.isEmpty() ? u" "_s : cellText);
|
||||
}
|
||||
|
||||
QString dataRow = "|" + rowData.join("|") + "|";
|
||||
markdown += dataRow + "\n";
|
||||
appendRow(rowData);
|
||||
}
|
||||
|
||||
markdown += "\n"; // Add an empty line between sheets
|
||||
markdown += u'\n'; // Add an empty line between sheets
|
||||
}
|
||||
return markdown;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user