forked from dathere/datapusher-plus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdot-env.template
171 lines (134 loc) · 6.4 KB
/
dot-env.template
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# To specify Datapusher+ settings, modify and copy this file to ".env"
# and put it in working directory from where DP+ is started.
# e.g. in development mode, in the datapusher-plus/datapusher directory
# in production mode, in the /etc/ckan/datapusher-plus directory
#
# Note that DP+ settings can also be passed using environment variables
# e.g. export PII_SCREENING=True
# ============= DATABASE SETTINGS =============
# The connect string of the CKAN Datastore
WRITE_ENGINE_URL = 'postgresql://datapusher:YOURPASSWORD@localhost/datastore_default'
# The connect string of the Datapusher+ Job database
SQLALCHEMY_DATABASE_URI = 'postgresql://datapusher_jobs:YOURPASSWORD@localhost/datapusher_jobs'
# READ BUFFER SIZE IN BYTES WHEN READING CSV FILE WHEN USING POSTGRES COPY
# default 64k = 65536
COPY_READBUFFER_SIZE = 65536
# =============== DOWNLOAD SETTINGS ==============
# 25mb, this is ignored if either PREVIEW_ROWS > 0
MAX_CONTENT_LENGTH = 25600000
# A Datapusher+ job is triggered automatically everytime a resource is modified (even just its metadata)
# if its mimetype is one of the supported datapusher.formats.
# To ensure DP+ doesn't push an unchanged resource, it computes and stores the hash of the file
# If the hash has not changed (i.e. the file has not been modified), it refrains from "re-pushing" it
IGNORE_FILE_HASH = False
# In bytes. The resource is downloaded on a streaming basis, 16K at a time
CHUNK_SIZE = 16384
# In seconds. How long before DP+ download times out
DOWNLOAD_TIMEOUT = 30
# If the SSL certificate is verified. This is set to False by default
# since externally hosted datasets may sometimes have expired/self-signed SSL certificates
SSL_VERIFY = False
# If this is not zero, the number of preview rows to push into the datastore
# If zero, it pushes the entire file
PREVIEW_ROWS = 0
DOWNLOAD_PROXY = ''
# =========== CKAN SERVICE PROVIDER SETTINGS ==========
HOST = "0.0.0.0"
PORT = 8800
# turns on logger at Debug level
DEBUG = False
# If False, configures the logger for production
# i.e. logs to STDERR and LOG_FILE (autorotates after 68mb, with 5 backups),
# and emails errors to admins.
# If True, only turns on Debug if DEBUG = True
TESTING = False
FROM_EMAIL = '[email protected]'
# comma-delimited list of emails to send CKAN Service Provider errors to
ADMINS = ''
# Error logging
LOG_FILE = '/tmp/ckan_service.log'
# Also show log on STDERR
STDERR = True
# These settings are randomly generated by default
# only set these if you need to interface with the CKAN Service Provider API
# see https://ckan-service-provider.readthedocs.io/
# SECRET_KEY = "please replace me"
# USERNAME = "admin"
# PASSWORD = "changeme"
# number of days to keep job history
KEEP_JOBS_AGE = 60
# ============ QSV ANALYSIS SETTINGS ==========
# ---------- BINARY PATHS -------------
# qsv binary to use
# optionally, you can also use qsvdp_nightly.
# qsvdp is already very fast, but if you want even more speed
# qsvdp_nightly is compiled/linked in such a way that it's even faster/smaller
# see https://github.com/jqnatividad/qsv/blob/master/docs/PERFORMANCE.md#nightly-release-builds
QSV_BIN = '/usr/local/bin/qsvdp'
# file binary to use. `file` is used to get file metadata to display on the log
# if qsv cannot open a spreadsheet file (probably, because its password-protected or corrupt)
FILE_BIN = '/usr/bin/file'
# Dates are parsed with an MDY preference by default
# set PREFER_DMY = True if date-parsing should prefer DMY instead
PREFER_DMY = False
# The zero-based index of the default sheet to export to CSV. 0 is the first sheet.
# Accepts negative numbers. -1 is the last sheet, -2 the 2nd to last sheet, etc.
DEFAULT_EXCEL_SHEET = 0
# Check if a file is sorted and has duplicates
SORT_AND_DUPE_CHECK = True
# Should CSVs be deduped? Note that deduping also
# sorts the CSV.
DEDUP = False
# --------- COLUMN HEADER NAME SAFENAMES SETTINGS --------
# unsafe prefix to use if a column name is found to be "unsafe"
UNSAFE_PREFIX = unsafe_
# Comma-delimited list of additional case-insensitive reserved names
# that should be considered "unsafe". If a header name is found in the
# list, it will be prefixed with "reserved_"
RESERVED_COLNAMES = _id
# -------- SUMMARY STATS SETTINGS -----------
# Create a resource for calculated summary stats?
ADD_SUMMARY_STATS_RESOURCE = False
# additional command line options to pass to qsv stats when creating
# summary stats. Set to `--everything` if you want to include all the stats,
# particularly, when ADD_SUMMARY_STATS_RESOURCE is True
SUMMARY_STATS_OPTIONS = ''
# -------- AUTO INDEX SETTINGS ----------
# if AUTO_INDEX_THRESHOLD > 0 or AUTO_INDEX_DATES is true
# create indices automatically based on as column's cardinality (number of unique values)
# - if a column's cardinality <= AUTO_INDEX_THRESHOLD, create an index for that column
# - if AUTO_INDEX_THRESHOLD = -1, index all columns regardless of its cardinality
AUTO_INDEX_THRESHOLD = 3
# for columns w/ cardinality equal to record_count, it's all unique values, create a unique index
AUTO_UNIQUE_INDEX = True
# always index date fields?
AUTO_INDEX_DATES = True
# ------ AUTO ALIAS SETTINGS ----------
# Should an alias be automatically created?
# Aliases are easier to use than resource_ids, and can be used with the CKAN API where
# resource_ids are used. Aliases are also SQL views that are easier to use when querying
# the CKAN Datastore database.
# Aliases are created by concatenating "{resource_name}-{package_name}-{owner_org_name}"
# truncated at 55-characters.
AUTO_ALIAS = False
# Should aliases should always be unique? In case of an alias name collision, a three-digit
# sequence number is appended.
AUTO_ALIAS_UNIQUE = False
# -------- PII SETTINGS -----------
PII_SCREENING = False
# Stop scanning on first PII found
PII_QUICK_SCREEN = False
# Abort Datapusher+ job if PII is found
PII_FOUND_ABORT = True
# Create a resource where PII candidates are stored?
PII_SHOW_CANDIDATES = True
# The resource ID/alias of a Text file that has the
# regex patterns to use for PII scanning.
# If this is not specified, the default PII scanning rules in
# default_pii_regexes.txt are used.
# Note that Rust regex syntax is used, NOT Python, as we use the
# qsv searchset command which can scan for MULTIPLE regex patterns in
# one pass, and is at least an order of magnitude faster than Python
# https://docs.rs/regex/latest/regex/index.html#syntax
# You can test your regex at https://regex101.com using the Rust Flavor
PII_REGEX_RESOURCE_ID_OR_ALIAS = ''