-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocs.py
More file actions
190 lines (182 loc) Β· 6.27 KB
/
docs.py
File metadata and controls
190 lines (182 loc) Β· 6.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# docs.py - Centralized API documentation
API_TITLE = "File Storage with Deduplication API"
API_VERSION = "1.0.0"
API_DESCRIPTION = """
## π File Storage with Intelligent Deduplication
A production-ready content-addressable storage system with automatic chunk-level deduplication.
### π Key Features
- **Automatic Deduplication**: Identical chunks stored once across all files
- **Async Processing**: Background processing with Redis Queue
- **Streaming Downloads**: Memory-efficient file reconstruction
- **Image Gallery**: Built-in gallery with thumbnails
- **Safe Deletion**: Preserves chunks used by other files
- **Comprehensive Stats**: Track storage efficiency
### ποΈ Architecture
- **FastAPI**: High-performance async API
- **PostgreSQL**: Metadata and reference counting
- **Redis + RQ**: Job queue and background workers
- **Chunk Storage**: SHA256-based sharded directories
### π Deduplication Process
1. Upload β Temp storage β Queue
2. Worker splits file into chunks
3. SHA256 hash identifies duplicates
4. Only unique chunks stored; ref_count incremented
5. Files reconstructed by chunk order
"""
API_TAGS = [
{
"name": "Files",
"description": "Upload, download, list, search, delete files"
},
{
"name": "Images",
"description": "Display, gallery, thumbnails for images"
},
{
"name": "Statistics",
"description": "Deduplication metrics and storage efficiency"
},
{
"name": "Chunks",
"description": "Chunk-level operations and cleanup"
},
{
"name": "System",
"description": "Health checks and monitoring"
}
]
# Endpoint documentation templates
ENDPOINT_DOCS = {
"upload": {
"summary": "Upload a file",
"description": """
Upload file β queued for deduplication.
- **<50MB**: 'uploads' queue (1hr timeout)
- **>50MB**: 'large-files' queue (2hr timeout)
Returns file_id and job_id immediately.
""",
"response_description": "File accepted, processing in background"
},
"list_files": {
"summary": "List all files",
"description": """
Paginated file list with chunk counts.
- `skip`: Pagination offset
- `limit`: Items per page (1-1000)
- `sort_by`: created_at, filename, size
- `order`: asc, desc
""",
"response_description": "List of files with metadata"
},
"search_files": {
"summary": "Search files",
"description": """
Case-insensitive search by filename and size range.
- `q`: Search term (partial matches)
- `min_size`/`max_size`: Size filter in bytes
""",
"response_description": "Search results"
},
"download": {
"summary": "Download a file",
"description": """
Stream file reconstructed from chunks.
- Preserves original filename
- Memory-efficient for large files
- Sets Content-Length for progress bars
""",
"response_description": "The requested file"
},
"stats": {
"summary": "System statistics",
"description": """
Deduplication metrics:
- Files: count, total size
- Chunks: unique, references, sharing
- Storage: logical vs physical, savings
""",
"response_description": "System statistics"
},
"health": {
"summary": "Health check",
"description": "Checks Redis, PostgreSQL, disk space",
"response_description": "Health status"
},
"image_display": {
"summary": "Display image",
"description": "Render image directly in browser. Supports PNG, JPG, GIF, WebP",
"response_description": "Image file"
},
"gallery": {
"summary": "Image gallery",
"description": "HTML gallery with modal view and pagination",
"response_description": "HTML page"
},
"thumbnail": {
"summary": "Image thumbnail",
"description": "Generate resized thumbnail. Cache 24 hours.",
"response_description": "Thumbnail image"
},
"list_images": {
"summary": "List images (JSON)",
"description": "JSON list of all images with URLs",
"response_description": "Image list"
},
"delete_file": {
"summary": "Delete file",
"description": """
Safe deletion:
- Only removes chunks with ref_count=0
- Shared chunks preserved
- Requires `force=true` for files with shared chunks
""",
"response_description": "Deletion result"
},
"delete_batch": {
"summary": "Delete multiple files",
"description": "Batch delete with individual results",
"response_description": "Batch deletion results"
},
"delete_preview": {
"summary": "Preview deletion",
"description": "Shows which files share chunks and what would be freed",
"response_description": "Deletion preview"
},
"shared_chunks": {
"summary": "Shared chunks analysis",
"description": "Lists chunks this file shares with other files",
"response_description": "Shared chunks details"
},
"unused_chunks": {
"summary": "List unused chunks",
"description": "Chunks with ref_count=0 ready for cleanup",
"response_description": "Unused chunks list"
},
"cleanup_chunks": {
"summary": "Cleanup unused chunks",
"description": "Permanently delete chunks with ref_count=0",
"response_description": "Cleanup results"
}
}
# Response templates
RESPONSES = {
"upload": {
202: {"description": "File accepted, processing in background"},
500: {"description": "Internal server error"}
},
"download": {
200: {"description": "File downloaded successfully", "content": {"application/octet-stream": {}}},
404: {"description": "File not found"},
500: {"description": "Internal server error"}
},
"image": {
200: {"description": "Image displayed successfully"},
404: {"description": "File not found"},
400: {"description": "File is not an image"}
},
"delete": {
200: {"description": "File deleted successfully"},
404: {"description": "File not found"},
500: {"description": "Internal server error"}
}
}