Attachment 'settings_example.xml'
Download 1 <?xml version="1.0" encoding="UTF-8"?>
2 <settings xmlns="http://www.netarkivet.dk/schemas/settings" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3 <common>
4 <!-- Common temporary directory for all applications. -->
5 <tempDir>./tests/commontempdir</tempDir>
6 <!-- FTP connection data-->
7 <remoteFile xsi:type="ftpremotefile">
8 <!-- The class to use for RemoteFile objects. -->
9 <class>dk.netarkivet.common.distribute.FTPRemoteFile</class>
10 <!-- The default FTP-server used -->
11 <serverName>localhost</serverName>
12 <!-- The default FTP-server port used -->
13 <serverPort>21</serverPort>
14 <!-- The default FTP username -->
15 <userName>exampleusername</userName>
16 <!-- The default FTP password -->
17 <userPassword>examplepassword</userPassword>
18 <!-- The number of times FTPRemoteFile should try before giving up
19 a copyTo operation. We augment FTP with checksum checks. -->
20 <retries>3</retries>
21 </remoteFile>
22 <!-- Connection data for JMS-->
23 <jms>
24 <!-- Selects the broker vendor to be used. Fx. ActiveMQ. -->
25 <class>SunMQ</class>
26 <!-- The JMS broker host contacted by the JMS connection -->
27 <broker>localhost</broker>
28 <!-- The port the JMS connection should use -->
29 <port>7676</port>
30 <!-- The name of the environment in which this code is running, e.g.
31 PROD, RELEASETEST, NHC,... Common prefix to all JMS channels
32 -->
33 <environmentName>DEV</environmentName>
34 </jms>
35 <http>
36 <!-- The *unique* (per host) port number that may or may not be
37 used to serve http, but is frequently used to identify
38 the process.-->
39 <port>8076</port>
40 </http>
41 <arcrepositoryClient xsi:type="jmsarcrepositoryclient">
42 <!-- The class that implements the ArcRepositoryClient. This class
43 will be instantiated by the ArcRepositoryClientFactory -->
44 <class>dk.netarkivet.archive.arcrepository.distribute.JMSArcRepositoryClient</class>
45 <!-- How many milliseconds we will wait before giving up on a
46 lookup request to the Arcrepository. Set to 1 minute to
47 make it possible to retrieve large records using FTP -->
48 <getTimeout>60000</getTimeout>
49 <!-- Number of times to try sending a store message before failing,
50 including the first attempt -->
51 <storeRetries>3</storeRetries>
52 <!-- Timeout in milliseconds before retrying when calling
53 ArcRepositoryClient.store() -->
54 <storeTimeout>3600000</storeTimeout>
55 </arcrepositoryClient>
56 <indexClient xsi:type="indexrequestclient">
57 <!-- The class instantiated to give access to indices. Will be
58 created by IndexClientFactory -->
59 <class>dk.netarkivet.archive.indexserver.distribute.IndexRequestClient</class>
60 <!-- The amount of time, in milliseconds, we should wait for replies
61 when issuing a call to generate an index over som jobs.
62 -->
63 <indexRequestTimeout>43200000</indexRequestTimeout>
64 </indexClient>
65 <!-- The name of the directory where cache data global to the entire
66 machine can be stored. Various kinds of caches should be stored in
67 subdirectories of this -->
68 <cacheDir>cache</cacheDir>
69 <!-- Error notification settings -->
70 <notifications>
71 <!-- Which class to instantiate to handle error notifications -->
72 <class>dk.netarkivet.common.utils.EMailNotifications</class>
73 <!-- The receiver of emails -->
74 <receiver>example@netarkivet.dk</receiver>
75 <!-- The stated sender of emails (and receiver of bounces)-->
76 <sender>example@netarkivet.dk</sender>
77 </notifications>
78 <!-- Settings for sending email. Currently mail is only used for email
79 notifications. -->
80 <mail>
81 <!-- The email server to use -->
82 <server>examplesmtpserver.netarkivet.dk</server>
83 </mail>
84 <!-- JMX logging settings -->
85 <jmx>
86 <!-- The port to connect to using JMX -->
87 <port>8100</port>
88 <!-- The RMI port used for communicating with beans -->
89 <rmiPort>8200</rmiPort>
90 <!-- The password file, containing information about who may
91 connect -->
92 <passwordFile>conf/jmxremote.password</passwordFile>
93 </jmx>
94 <!-- Settings for the web GUI -->
95 <webinterface>
96 <!-- Language settings -->
97 <language>
98 <!-- A locale the GUI is available as -->
99 <locale>da</locale>
100 <!-- Native name of the language for this locale -->
101 <name>Dansk</name>
102 </language>
103 <!-- Language settings -->
104 <language>
105 <!-- A locale the GUI is available as -->
106 <locale>en</locale>
107 <!-- Native name of the language for this locale -->
108 <name>English</name>
109 </language>
110 <siteSection>
111 <!-- A subclass of SiteSection that defines this part of the
112 web interface. -->
113 <class>dk.netarkivet.harvester.webinterface.DefinitionsSiteSection</class>
114 <!-- The directory or war-file containing the web application
115 for this site section.-->
116 <webapplication>webpages/HarvestDefinition</webapplication>
117 <!-- The URL path for this section of the web interface. -->
118 <deployPath>/HarvestDefinition</deployPath>
119 </siteSection>
120 <siteSection>
121 <!-- A subclass of SiteSection that defines this part of the
122 web interface. -->
123 <class>dk.netarkivet.harvester.webinterface.HistorySiteSection</class>
124 <!-- The directory or war-file containing the web application
125 for this site section.-->
126 <webapplication>webpages/History</webapplication>
127 <!-- The URL path for this section of the web interface. -->
128 <deployPath>/History</deployPath>
129 </siteSection>
130 <siteSection>
131 <!-- A subclass of SiteSection that defines this part of the
132 web interface. -->
133 <class>dk.netarkivet.archive.webinterface.BitPreservationSiteSection</class>
134 <!-- The directory or war-file containing the web application
135 for this site section.-->
136 <webapplication>webpages/BitPreservation</webapplication>
137 <!-- The URL path for this section of the web interface. -->
138 <deployPath>/BitPreservation</deployPath>
139 </siteSection>
140 <siteSection>
141 <!-- A subclass of SiteSection that defines this part of the
142 web interface. -->
143 <class>dk.netarkivet.viewerproxy.webinterface.QASiteSection</class>
144 <!-- The directory or war-file containing the web application
145 for this site section.-->
146 <webapplication>webpages/QA</webapplication>
147 <!-- The URL path for this section of the web interface. -->
148 <deployPath>/QA</deployPath>
149 </siteSection>
150 <siteSection>
151 <!-- A subclass of SiteSection that defines this part of the
152 web interface. -->
153 <class>dk.netarkivet.monitor.webinterface.StatusSiteSection</class>
154 <!-- The directory or war-file containing the web application
155 for this site section.-->
156 <webapplication>webpages/Status</webapplication>
157 <!-- The URL path for this section of the web interface. -->
158 <deployPath>/Status</deployPath>
159 </siteSection>
160 </webinterface>
161 </common>
162 <harvester>
163 <datamodel>
164 <domain>
165 <!-- Default seed list to use when new domains are created -->
166 <defaultSeedlist>defaultseeds</defaultSeedlist>
167 <!-- The name of a configuration that is created by default and
168 which is initially used for snapshot harvests-->
169 <defaultConfig>defaultconfig</defaultConfig>
170 <!-- Name of order xml template used for domains if nothing
171 else is specified (e.g. newly created configrations use this) -->
172 <defaultOrderxml>default_orderxml</defaultOrderxml>
173 <!-- Default download rate for domain configuration.
174 Not currently enforced. -->
175 <defaultMaxrate>100</defaultMaxrate>
176 <!-- This setting describes a regular expression used to
177 validate domains against. -->
178 <validDomainRegex>^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+|[^\0000-,.-/:-@\[-`{-\0177]+\.(ac|ad|ae|aero|af|ag|ai|al|am|an|ao|aq|ar|arpa|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|biz|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cat|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|com|coop|cr|cs|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|info|int|io|iq|ir|is|it|je|jm|jo|jobs|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|mg|mh|mil|mk|ml|mm|mn|mo|mobi|mp|mq|mr|ms|mt|mu|museum|mv|mw|mx|my|mz|na|name|nc|ne|net|nf|ng|ni|nl|no|np|nr|nt|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|pro|ps|pt|pw|py|qa|re|ro|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|travel|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw))$</validDomainRegex>
179 </domain>
180 <database xsi:type="derbydatabase">
181 <!-- The full URL for connecting to the database.
182 If present and not empty, this URL must match the settings
183 baseDir and class.-->
184 <url>jdbc:derby:harvestdefinitionbasedir/fullhddb</url>
185 <!-- The class that handles DB-specific methods -->
186 <specificsclass>dk.netarkivet.harvester.datamodel.DerbyEmbeddedSpecifics</specificsclass>
187 <!-- The earliest time of day backup will be initiated, 0..24
188 hours. At a time shortly after this, a consistent backup
189 copy of the database will be created -->
190 <backupInitHour>3</backupInitHour>
191 </database>
192 </datamodel>
193 <scheduler>
194 <!-- Used when calculating expected size of a harvest of some
195 configuration during job-creation process. This defines how
196 great a possible factor we will permit a harvest to be larger
197 then the expectation, when basing the expectation on a previous
198 completed job. -->
199 <errorFactorPrevResult>10</errorFactorPrevResult>
200 <!-- Used when calculating expected size of a harvest of some
201 configuration during job-creation process. This defines how
202 great a possible factor we will permit a harvest to be larger
203 then the expectation, when basing the expectation on previous
204 uncompleted harvests or no harvest data at all. -->
205 <errorFactorBestGuess>20</errorFactorBestGuess>
206 <!-- How many bytes the average object is expected to be on domains
207 where we don't know any better. This number should grow over
208 time, as of end of 2005 empirical data shows 38000 -->
209 <expectedAverageBytesPerObject>38000</expectedAverageBytesPerObject>
210 <!-- Initial guess of #objects in an unknown domain -->
211 <maxDomainSize>5000</maxDomainSize>
212 <jobs><!-- One Job corresponds to a Heritrix run -->
213 <!-- The maximum allowed relative difference in expected number
214 of objects retrieved in a single job definition. Set to
215 MAX_LONG for no splitting -->
216 <maxRelativeSizeDifference>100</maxRelativeSizeDifference>
217 <!-- Size differences for jobs below this threshold are ignored,
218 regardless of the limits for the relative size difference.
219 Set to MAX_LONG for no splitting. -->
220 <minAbsoluteSizeDifference>2000</minAbsoluteSizeDifference>
221 <!-- When this limit is exceeded no more configurations may be
222 added to a job. Set to MAX_LONG for no splitting. -->
223 <maxTotalSize>2000000</maxTotalSize>
224 </jobs>
225 <!-- How many domain configurations we will process in one go before
226 making jobs out of them. This amount of domains will be stored
227 in memory at the same time. Set to MAX_LONG for no job
228 splitting. -->
229 <configChunkSize>10000</configChunkSize>
230 </scheduler>
231 <harvesting>
232 <!-- Each job gets a subdir of this dir. Job data is written and
233 Heritrix writes to that subdir-->
234 <serverDir>server</serverDir>
235 <!-- The directory in which data from old jobs is kept after
236 uploading. Each directory from serverDir will be moved to
237 here if any data remains, either due to failed uploads or
238 because it wasn't attempted uploaded. -->
239 <oldjobsDir>oldjobs</oldjobsDir>
240 <!-- Pool to take jobs from -->
241 <queuePriority>HIGHPRIORITY</queuePriority>
242 <!-- When to stop Heritrix, timeouts in ms. -->
243 <heritrix>
244 <!-- The timeout setting for aborting a crawl based on
245 crawler-inactivity. If the crawler is inactive for this
246 amount of milliseconds the crawl will be aborted.
247 The inactivity is measured on the
248 crawlController.activeToeCount(). -->
249 <inactivityTimeout>1800</inactivityTimeout>
250 <!-- The timeout value (in seconds) used in HeritrixLauncher
251 for aborting crawl when no bytes are being received from
252 web servers. -->
253 <noresponseTimeout>1800</noresponseTimeout>
254 </heritrix>
255 <!-- The file used to signal that the harvest controller is running.
256 Sidekick starts HarvestController if this file is not present
257 -->
258 <isrunningFile>./hcsRunning.tmp</isrunningFile>
259 </harvesting>
260 </harvester>
261 <archive>
262 <arcrepository>
263 <!-- Absolute/relative path to where the "central list of files and
264 checksums" (admin.data) is written. Used by ArcRepository and
265 BitPreservation. -->
266 <baseDir>.</baseDir>
267 <!-- The names of all institutional bit archive locations in the
268 environment, e.g., "KB" and "SB". -->
269 <location>
270 <name>SB</name>
271 </location>
272 <location>
273 <name>KB</name>
274 </location>
275 <!-- Default bit archive to use for batch jobs (if none is specified) -->
276 <batchLocation>KB</batchLocation>
277 </arcrepository>
278 <bitarchive>
279 <!-- The minimum amount of bytes left *in any dir* that we will
280 allow a bitarchive machine to accept uploads with. When no
281 dir has more space than this, the bitarchive machine stops
282 listening for uploads. This values should at the very least
283 be greater than the largest ARC file you expect to receive.
284 -->
285 <minSpaceLeft>200000000</minSpaceLeft>
286 <!-- These are the directories where ARC files are stored
287 (in a subdir). If more than one is given, they are used from
288 one end. -->
289 <fileDir>m:\bitarchive</fileDir>
290 <fileDir>n:\bitarchive</fileDir>
291 <fileDir>o:\bitarchive</fileDir>
292 <fileDir>p:\bitarchive</fileDir>
293 <!-- The frequency in milliseconds of heartbeats that are sent by
294 each BitarchiveServer to the BitarchiveMonitor. -->
295 <heartbeatFrequency>1000</heartbeatFrequency>
296 <!-- If we haven't heard from a bit archive within this many
297 milliseconds, we don't excpect it to be online and won't wait
298 for them to reply on a batch job. This number should be
299 significantly greater than heartbeatFrequency to account for
300 temporary network congestion. -->
301 <acceptableHeartbeatDelay>60000</acceptableHeartbeatDelay>
302 <!-- The BitarchiveMonitorServer will listen for BatchEndedMessages
303 for this many milliseconds before it decides that a batch job
304 is taking too long and returns just the replies it has
305 received at that point. -->
306 <batchMessageTimeout>1209600000</batchMessageTimeout>
307 <!-- For archiving applications, which bit archive are you part of?-->
308 <thisLocation>SB</thisLocation>
309 <!-- Credentials to enter in the GUI for "deleting" ARC files in
310 this bit archive -->
311 <thisCredentials>examplecredentials</thisCredentials>
312 <!-- When the length record exceeds this number, the contents of the record
313 will be transferred using a RemoteFile. Currently set to 10 MB
314 -->
315 <limitForRecordDatatransferInFile>10485760</limitForRecordDatatransferInFile>
316 </bitarchive>
317 <bitpreservation>
318 <!-- Absolute or relative path to dir containing results of
319 file-list-batch-jobs and checksumming batch jobs
320 for bit preservation-->
321 <baseDir>bitpreservation</baseDir>
322 </bitpreservation>
323 </archive>
324 <viewerproxy>
325 <!-- The name of the server used for viewerproxy links in the GUI. -->
326 <hostName>kb-dev-acs-001.kb.dk</hostName>
327 <!-- The main directory for the ViewerProxy, used for storing the Lucene
328 index for the jobs being viewed -->
329 <baseDir>viewerproxy</baseDir>
330 </viewerproxy>
331 <monitor>
332 <!-- The name of the application, fx. "BitarchiveServerApplication".
333 The monitor puts this with each log message -->
334 <applicationName>NA</applicationName>
335 <logging>
336 <historySize>100</historySize>
337 </logging>
338 <!-- Log dir for the unused Monitor application, wherein logs used
339 for creating monitoring pages in the GUI are dropped.-->
340 <htmlLogsDir>./log/</htmlLogsDir>
341 </monitor>
342 <deploy>
343 <jmxMonitorRolePassword>JMX_MONITOR_ROLE_PASSWORD_PLACEHOLDER</jmxMonitorRolePassword>
344 <numberOfHosts>1</numberOfHosts>
345 <host1>
346 <name>localhost</name>
347 <jmxport>8100</jmxport>
348 </host1>
349 </deploy>
350 </settings>
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.