Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion frontend/src/components/business/DatasetFileTransfer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
selectedFilesMap,
onSelectedFilesChange,
onDatasetSelect,
datasetTypeFilter = DatasetType.TEXT,
datasetTypeFilter,
...props
}) => {
const [datasets, setDatasets] = React.useState<Dataset[]>([]);
Expand Down Expand Up @@ -85,6 +85,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
page: datasetPagination.current,
size: datasetPagination.pageSize,
keyword: datasetSearch,
// 仅在显式传入过滤类型时才按类型过滤;否则后端返回所有类型
type: datasetTypeFilter,
});
setDatasets(data.content.map(mapDataset) || []);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,16 +183,8 @@ export default function CreateAnnotationTask({
// 手动标注也支持跨数据集、精确到文件的选择
const selectedFiles = Object.values(selectedFilesMap) as any[];

const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"];
const imageFileIds = selectedFiles
.filter((file) => {
const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || "";
return imageExtensions.includes(ext);
})
.map((file) => file.id);

if (imageFileIds.length === 0) {
message?.error?.("请至少选择一个图像文件");
if (selectedFiles.length === 0) {
message?.error?.("请至少选择一个文件");
setSubmitting(false);
return;
}
Expand All @@ -213,7 +205,7 @@ export default function CreateAnnotationTask({
description: values.description,
datasetId: effectiveDatasetId,
templateId: values.templateId,
fileIds: imageFileIds,
fileIds: selectedFiles.map((file) => file.id),
};

await createAnnotationTaskUsingPost(requestData);
Expand Down Expand Up @@ -326,8 +318,8 @@ export default function CreateAnnotationTask({
label: "手动标注",
children: (
<Form form={manualForm} layout="vertical">
{/* 选择数据集和图像文件(支持多数据集、多文件) */}
<Form.Item label="选择数据集和图像文件" required>
{/* 选择数据集和文件(支持多数据集、多文件) */}
<Form.Item label="选择数据集和文件" required>
<DatasetFileTransfer
open
selectedFilesMap={selectedFilesMap}
Expand All @@ -346,12 +338,11 @@ export default function CreateAnnotationTask({
manualForm.setFieldsValue({ name: defaultName });
}
}}
datasetTypeFilter={DatasetType.IMAGE}
/>
{selectedDataset && (
<div className="mt-2 p-2 bg-blue-50 rounded border border-blue-200 text-xs">
当前数据集:<span className="font-medium">{selectedDataset.name}</span> - 已选择
<span className="font-medium text-blue-600"> {imageFileCount} </span>个图像文件
<span className="font-medium text-blue-600"> {Object.keys(selectedFilesMap).length} </span>个文件
</div>
)}
</Form.Item>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
open={open}
selectedFilesMap={selectedFilesMap}
onSelectedFilesChange={setSelectedFilesMap}
datasetTypeFilter={DatasetType.TEXT}
/>
)}

Expand Down
3 changes: 2 additions & 1 deletion frontend/src/pages/SynthesisTask/CreateTask.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { useEffect, useState } from "react";
import type { Dataset, DatasetFile } from "@/pages/DataManagement/dataset.model";
import { DatasetType } from "@/pages/DataManagement/dataset.model";
import { Steps, Card, Select, Input, Button, Form, message, Tag, Tooltip, InputNumber } from "antd";
import { Eye, ArrowLeft, ArrowRight, Play, Search, Sparkles, Brain, Layers } from "lucide-react";
import { Link, useNavigate } from "react-router";
Expand Down Expand Up @@ -304,7 +305,7 @@ export default function SynthesisTaskCreate() {
<DatasetFileTransfer open selectedFilesMap={selectedMap} onSelectedFilesChange={setSelectedMap} onDatasetSelect={(dataset) => {
setSelectedDataset(dataset);
form.setFieldsValue({ sourceDataset: dataset?.id ?? "" });
}} />
}} datasetTypeFilter={DatasetType.TEXT} />
{selectedDataset && (
<div className="mt-4 p-3 bg-gray-50 rounded border text-xs text-gray-600">
当前数据集:<span className="font-medium text-gray-900">{selectedDataset.name}</span>
Expand Down
38 changes: 32 additions & 6 deletions runtime/datamate-python/app/module/annotation/service/sync.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Optional, List, Dict, Any, Tuple, Set
import os

from app.module.dataset import DatasetManagementService
from sqlalchemy import update, select
from app.db.models import DatasetFiles
Expand Down Expand Up @@ -52,14 +54,38 @@ def _determine_data_type(self, file_type: str) -> str:
def _build_task_data(self, file_info: Any, dataset_id: str) -> dict:
"""构建Label Studio任务数据"""
data_type = self._determine_data_type(file_info.fileType)

# 替换文件路径前缀
file_path = file_info.filePath.removeprefix(settings.dm_file_path_prefix)
file_path = settings.label_studio_file_path_prefix + file_path


# 默认仍然走 Label Studio 本地文件 URL
# 先替换文件路径前缀,构造 /data/local-files/?d=/... 形式
relative_path = file_info.filePath.removeprefix(settings.dm_file_path_prefix)
ls_file_url = settings.label_studio_file_path_prefix + relative_path

data_value: Any = ls_file_url

# 对于纯文本文件(例如 .txt),支持直接把文件内容写入到 data.text,
# 这样在 Label Studio 里会直接显示文本内容,而不是 URL。
if data_type == "text":
try:
_, ext = os.path.splitext(file_info.filePath)
ext = ext.lower()

# 目前只对 .txt 做内联,其他如 pdf/doc 仍然使用 URL
if ext == ".txt":
with open(file_info.filePath, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
if content:
data_value = content
except Exception as e:
# 读取失败时退回到原来的 URL 形式,避免中断同步流程
logger.warning(
"Failed to inline text content for file %s: %s",
getattr(file_info, "filePath", "<unknown>"),
str(e),
)

return {
"data": {
f"{data_type}": file_path,
f"{data_type}": data_value,
"file_path": file_info.filePath,
"file_id": file_info.id,
"original_name": file_info.originalName,
Expand Down
Loading