Attachment 'settings_example.xml'

Download

   1 <?xml version="1.0" encoding="UTF-8"?>
   2 <settings xmlns="http://www.netarkivet.dk/schemas/settings" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
   3     <common>
   4         <!-- Common temporary directory for all applications. -->
   5         <tempDir>./tests/commontempdir</tempDir>
   6         <!-- FTP connection data-->
   7         <remoteFile xsi:type="ftpremotefile">
   8             <!-- The class to use for RemoteFile objects. -->
   9             <class>dk.netarkivet.common.distribute.FTPRemoteFile</class>
  10             <!-- The default FTP-server used -->
  11             <serverName>localhost</serverName>
  12             <!-- The default FTP-server port used -->
  13             <serverPort>21</serverPort>
  14             <!-- The default FTP username -->
  15             <userName>exampleusername</userName>
  16             <!-- The default FTP password -->
  17             <userPassword>examplepassword</userPassword>
  18             <!-- The number of times FTPRemoteFile should try before giving up
  19                  a copyTo operation. We augment FTP with checksum checks. -->
  20             <retries>3</retries>
  21         </remoteFile>
  22         <!-- Connection data for JMS-->
  23         <jms>
  24             <!-- Selects the broker vendor to be used. Fx. ActiveMQ. -->
  25             <class>SunMQ</class>
  26             <!-- The JMS broker host contacted by the JMS connection -->
  27             <broker>localhost</broker>
  28             <!-- The port the JMS connection should use -->
  29             <port>7676</port>
  30             <!-- The name of the environment in which this code is running, e.g.
  31                  PROD, RELEASETEST, NHC,... Common prefix to all JMS channels
  32                   -->
  33             <environmentName>DEV</environmentName>
  34         </jms>
  35         <http>
  36             <!-- The *unique* (per host) port number that may or may not be
  37                  used to serve http, but is frequently used to identify
  38                  the process.-->
  39             <port>8076</port>
  40         </http>
  41         <arcrepositoryClient xsi:type="jmsarcrepositoryclient">
  42             <!-- The class that implements the ArcRepositoryClient.  This class
  43                  will be instantiated by the ArcRepositoryClientFactory -->
  44             <class>dk.netarkivet.archive.arcrepository.distribute.JMSArcRepositoryClient</class>
  45             <!-- How many milliseconds we will wait before giving up on a
  46                  lookup request to the Arcrepository. Set to 1 minute to
  47                  make it possible to retrieve large records using FTP -->
  48             <getTimeout>60000</getTimeout>
  49             <!-- Number of times to try sending a store message before failing,
  50                  including the first attempt -->
  51             <storeRetries>3</storeRetries>
  52             <!-- Timeout in milliseconds before retrying when calling
  53                  ArcRepositoryClient.store() -->
  54             <storeTimeout>3600000</storeTimeout>
  55         </arcrepositoryClient>
  56         <indexClient xsi:type="indexrequestclient">
  57             <!-- The class instantiated to give access to indices.  Will be
  58                  created by IndexClientFactory -->
  59             <class>dk.netarkivet.archive.indexserver.distribute.IndexRequestClient</class>
  60             <!-- The amount of time, in milliseconds, we should wait for replies
  61                  when issuing a call to generate an index over som jobs.
  62              -->
  63             <indexRequestTimeout>43200000</indexRequestTimeout>
  64         </indexClient>
  65         <!-- The name of the directory where cache data global to the entire
  66              machine can be stored.  Various kinds of caches should be stored in
  67              subdirectories of this -->
  68         <cacheDir>cache</cacheDir>
  69         <!-- Error notification settings -->
  70         <notifications>
  71             <!-- Which class to instantiate to handle error notifications -->
  72             <class>dk.netarkivet.common.utils.EMailNotifications</class>
  73             <!-- The receiver of emails -->
  74             <receiver>example@netarkivet.dk</receiver>
  75             <!-- The stated sender of emails  (and receiver of bounces)-->
  76             <sender>example@netarkivet.dk</sender>
  77         </notifications>
  78         <!-- Settings for sending email. Currently mail is only used for email
  79          notifications. -->
  80         <mail>
  81             <!-- The email server to use -->
  82             <server>examplesmtpserver.netarkivet.dk</server>
  83         </mail>
  84         <!-- JMX logging settings -->
  85         <jmx>
  86             <!-- The port to connect to using JMX -->
  87             <port>8100</port>
  88             <!-- The RMI port used for communicating with beans -->
  89             <rmiPort>8200</rmiPort>
  90             <!-- The password file, containing information about who may
  91             connect -->
  92             <passwordFile>conf/jmxremote.password</passwordFile>
  93         </jmx>
  94         <!-- Settings for the web GUI -->
  95         <webinterface>
  96             <!-- Language settings -->
  97             <language>
  98                 <!-- A locale the GUI is available as -->
  99                 <locale>da</locale>
 100                 <!-- Native name of the language for this locale -->
 101                 <name>Dansk</name>
 102             </language>
 103             <!-- Language settings -->
 104             <language>
 105                 <!-- A locale the GUI is available as -->
 106                 <locale>en</locale>
 107                 <!-- Native name of the language for this locale -->
 108                 <name>English</name>
 109             </language>
 110             <siteSection>
 111                 <!-- A subclass of SiteSection that defines this part of the
 112                      web interface. -->
 113                 <class>dk.netarkivet.harvester.webinterface.DefinitionsSiteSection</class>
 114                 <!-- The directory or war-file containing the web application
 115                      for this site section.-->
 116                 <webapplication>webpages/HarvestDefinition</webapplication>
 117                 <!-- The URL path for this section of the web interface. -->
 118                 <deployPath>/HarvestDefinition</deployPath>
 119             </siteSection>
 120             <siteSection>
 121                 <!-- A subclass of SiteSection that defines this part of the
 122                      web interface. -->
 123                 <class>dk.netarkivet.harvester.webinterface.HistorySiteSection</class>
 124                 <!-- The directory or war-file containing the web application
 125                      for this site section.-->
 126                 <webapplication>webpages/History</webapplication>
 127                 <!-- The URL path for this section of the web interface. -->
 128                 <deployPath>/History</deployPath>
 129             </siteSection>
 130             <siteSection>
 131                 <!-- A subclass of SiteSection that defines this part of the
 132                      web interface. -->
 133                 <class>dk.netarkivet.archive.webinterface.BitPreservationSiteSection</class>
 134                 <!-- The directory or war-file containing the web application
 135                      for this site section.-->
 136                 <webapplication>webpages/BitPreservation</webapplication>
 137                 <!-- The URL path for this section of the web interface. -->
 138                 <deployPath>/BitPreservation</deployPath>
 139             </siteSection>
 140             <siteSection>
 141                 <!-- A subclass of SiteSection that defines this part of the
 142                      web interface. -->
 143                 <class>dk.netarkivet.viewerproxy.webinterface.QASiteSection</class>
 144                 <!-- The directory or war-file containing the web application
 145                      for this site section.-->
 146                 <webapplication>webpages/QA</webapplication>
 147                 <!-- The URL path for this section of the web interface. -->
 148                 <deployPath>/QA</deployPath>
 149             </siteSection>
 150             <siteSection>
 151                 <!-- A subclass of SiteSection that defines this part of the
 152                      web interface. -->
 153                 <class>dk.netarkivet.monitor.webinterface.StatusSiteSection</class>
 154                 <!-- The directory or war-file containing the web application
 155                      for this site section.-->
 156                 <webapplication>webpages/Status</webapplication>
 157                 <!-- The URL path for this section of the web interface. -->
 158                 <deployPath>/Status</deployPath>
 159             </siteSection>
 160         </webinterface>
 161     </common>
 162     <harvester>
 163         <datamodel>
 164             <domain>
 165                 <!-- Default seed list to use when new domains are created -->
 166                 <defaultSeedlist>defaultseeds</defaultSeedlist>
 167                 <!-- The name of a configuration that is created by default and
 168                      which is initially used for snapshot harvests-->
 169                 <defaultConfig>defaultconfig</defaultConfig>
 170                 <!-- Name of order xml template used for domains if nothing
 171                  else is specified (e.g. newly created configrations use this) -->
 172                 <defaultOrderxml>default_orderxml</defaultOrderxml>
 173                 <!-- Default download rate for domain configuration.
 174                  Not currently enforced. -->
 175                 <defaultMaxrate>100</defaultMaxrate>
 176                 <!-- This setting describes a regular expression used to
 177                      validate domains against. -->
 178                 <validDomainRegex>^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+|[^\0000-,.-/:-@\[-`{-\0177]+\.(ac|ad|ae|aero|af|ag|ai|al|am|an|ao|aq|ar|arpa|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|biz|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cat|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|com|coop|cr|cs|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|info|int|io|iq|ir|is|it|je|jm|jo|jobs|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|mg|mh|mil|mk|ml|mm|mn|mo|mobi|mp|mq|mr|ms|mt|mu|museum|mv|mw|mx|my|mz|na|name|nc|ne|net|nf|ng|ni|nl|no|np|nr|nt|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|pro|ps|pt|pw|py|qa|re|ro|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|travel|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw))$</validDomainRegex>
 179             </domain>
 180             <database xsi:type="derbydatabase">
 181                 <!-- The full URL for connecting to the database.
 182                      If present and not empty, this URL must match the settings
 183                      baseDir and class.-->
 184                 <url>jdbc:derby:harvestdefinitionbasedir/fullhddb</url>
 185                 <!-- The class that handles DB-specific methods -->
 186                 <specificsclass>dk.netarkivet.harvester.datamodel.DerbyEmbeddedSpecifics</specificsclass>
 187                 <!-- The earliest time of day backup will be initiated, 0..24
 188                      hours.  At a time shortly after this, a consistent backup
 189                      copy of the database will be created -->
 190                 <backupInitHour>3</backupInitHour>
 191             </database>
 192         </datamodel>
 193         <scheduler>
 194             <!-- Used when calculating expected size of a harvest of some
 195                  configuration during job-creation process. This defines how
 196                  great a possible factor we will permit a harvest to be larger
 197                  then the expectation, when basing the expectation on a previous
 198                  completed job. -->
 199             <errorFactorPrevResult>10</errorFactorPrevResult>
 200             <!-- Used when calculating expected size of a harvest of some
 201                  configuration during job-creation process.  This defines how
 202                  great a possible factor we will permit a harvest to be larger
 203                  then the expectation, when basing the expectation on previous
 204                  uncompleted harvests or no harvest data at all. -->
 205             <errorFactorBestGuess>20</errorFactorBestGuess>
 206             <!-- How many bytes the average object is expected to be on domains
 207                  where we don't know any better.  This number should grow over
 208                  time, as of end of 2005 empirical data shows 38000 -->
 209             <expectedAverageBytesPerObject>38000</expectedAverageBytesPerObject>
 210             <!-- Initial guess of #objects in an unknown domain -->
 211             <maxDomainSize>5000</maxDomainSize>
 212             <jobs><!-- One Job corresponds to a Heritrix run -->
 213                 <!-- The maximum allowed relative difference in expected number
 214                      of objects retrieved in a single job definition.  Set to
 215                      MAX_LONG for no splitting -->
 216                 <maxRelativeSizeDifference>100</maxRelativeSizeDifference>
 217                 <!-- Size differences for jobs below this threshold are ignored,
 218                      regardless of the limits for the relative size difference.
 219                      Set to MAX_LONG for no splitting. -->
 220                 <minAbsoluteSizeDifference>2000</minAbsoluteSizeDifference>
 221                 <!-- When this limit is exceeded no more configurations may be
 222                      added to a job. Set to MAX_LONG for no splitting. -->
 223                 <maxTotalSize>2000000</maxTotalSize>
 224             </jobs>
 225             <!-- How many domain configurations we will process in one go before
 226                  making jobs out of them.  This amount of domains will be stored
 227                  in memory at the same time.  Set to MAX_LONG for no job
 228                  splitting. -->
 229             <configChunkSize>10000</configChunkSize>
 230         </scheduler>
 231         <harvesting>
 232             <!-- Each job gets a subdir of this dir. Job data is written and
 233                  Heritrix writes to that subdir-->
 234             <serverDir>server</serverDir>
 235             <!-- The directory in which data from old jobs is kept after
 236                  uploading.  Each directory from serverDir will be moved to
 237                  here if any data remains, either due to failed uploads or
 238                  because it wasn't attempted uploaded. -->
 239             <oldjobsDir>oldjobs</oldjobsDir>
 240             <!-- Pool to take jobs from -->
 241             <queuePriority>HIGHPRIORITY</queuePriority>
 242             <!-- When to stop Heritrix, timeouts in ms. -->
 243             <heritrix>
 244                 <!-- The timeout setting for aborting a crawl based on
 245                     crawler-inactivity. If the crawler is inactive for this
 246                     amount of milliseconds the crawl will be aborted.
 247                     The inactivity is measured on the
 248                     crawlController.activeToeCount(). -->
 249                 <inactivityTimeout>1800</inactivityTimeout>
 250                 <!-- The timeout value (in seconds) used in HeritrixLauncher
 251                      for aborting crawl when no bytes are being received from
 252                      web servers. -->
 253                 <noresponseTimeout>1800</noresponseTimeout>
 254             </heritrix>
 255             <!-- The file used to signal that the harvest controller is running.
 256                  Sidekick starts HarvestController if this file is not present
 257                  -->
 258             <isrunningFile>./hcsRunning.tmp</isrunningFile>
 259         </harvesting>
 260     </harvester>
 261     <archive>
 262         <arcrepository>
 263             <!-- Absolute/relative path to where the "central list of files and
 264                  checksums" (admin.data) is written. Used by ArcRepository and
 265                  BitPreservation. -->
 266             <baseDir>.</baseDir>
 267             <!-- The names of all institutional bit archive locations in the
 268                  environment, e.g., "KB" and "SB". -->
 269             <location>
 270                 <name>SB</name>
 271             </location>
 272             <location>
 273                 <name>KB</name>
 274             </location>
 275             <!-- Default bit archive to use for batch jobs (if none is specified) -->
 276             <batchLocation>KB</batchLocation>
 277         </arcrepository>
 278         <bitarchive>
 279             <!-- The minimum amount of bytes left *in any dir* that we will
 280                  allow a bitarchive machine to accept uploads with.  When no
 281                  dir has more space than this, the bitarchive machine stops
 282                  listening for uploads.  This values should at the very least
 283                  be greater than the largest ARC file you expect to receive.
 284             -->
 285             <minSpaceLeft>200000000</minSpaceLeft>
 286             <!-- These are the directories where ARC files are stored
 287                  (in a subdir). If more than one is given, they are used from
 288                  one end. -->
 289             <fileDir>m:\bitarchive</fileDir>
 290             <fileDir>n:\bitarchive</fileDir>
 291             <fileDir>o:\bitarchive</fileDir>
 292             <fileDir>p:\bitarchive</fileDir>
 293             <!-- The frequency in milliseconds of heartbeats that are sent by
 294                  each BitarchiveServer to the BitarchiveMonitor. -->
 295             <heartbeatFrequency>1000</heartbeatFrequency>
 296             <!-- If we haven't heard from a bit archive within this many
 297                  milliseconds, we don't excpect it to be online and won't wait
 298                  for them to reply on a batch job.  This number should be
 299                  significantly greater than heartbeatFrequency to account for
 300                  temporary network congestion. -->
 301             <acceptableHeartbeatDelay>60000</acceptableHeartbeatDelay>
 302             <!-- The BitarchiveMonitorServer will listen for BatchEndedMessages
 303                  for this many milliseconds before it decides that a batch job
 304                  is taking too long and returns just the replies it has
 305                  received at that point. -->
 306             <batchMessageTimeout>1209600000</batchMessageTimeout>
 307             <!-- For archiving applications, which bit archive are you part of?-->
 308             <thisLocation>SB</thisLocation>
 309             <!-- Credentials to enter in the GUI for "deleting" ARC files in
 310                  this bit archive -->
 311             <thisCredentials>examplecredentials</thisCredentials>
 312             <!-- When the length record exceeds this number, the contents of the record
 313                  will be transferred using a RemoteFile. Currently set to 10 MB
 314               -->
 315             <limitForRecordDatatransferInFile>10485760</limitForRecordDatatransferInFile>
 316         </bitarchive>
 317         <bitpreservation>
 318             <!-- Absolute or relative path to dir containing results of
 319                  file-list-batch-jobs and checksumming batch jobs
 320                  for bit preservation-->
 321             <baseDir>bitpreservation</baseDir>
 322         </bitpreservation>
 323     </archive>
 324     <viewerproxy>
 325         <!-- The name of the server used for viewerproxy links in the GUI. -->
 326         <hostName>kb-dev-acs-001.kb.dk</hostName>
 327         <!-- The main directory for the ViewerProxy, used for storing the Lucene
 328              index for the jobs being viewed -->
 329         <baseDir>viewerproxy</baseDir>
 330     </viewerproxy>
 331     <monitor>
 332         <!-- The name of the application, fx. "BitarchiveServerApplication".
 333              The monitor puts this with each log message -->
 334         <applicationName>NA</applicationName>
 335         <logging>
 336             <historySize>100</historySize>
 337         </logging>
 338         <!-- Log dir for the unused Monitor application, wherein logs used
 339 for creating monitoring pages in the GUI are dropped.-->
 340         <htmlLogsDir>./log/</htmlLogsDir>
 341     </monitor>
 342     <deploy>
 343 <jmxMonitorRolePassword>JMX_MONITOR_ROLE_PASSWORD_PLACEHOLDER</jmxMonitorRolePassword>
 344 <numberOfHosts>1</numberOfHosts>
 345 <host1>
 346 <name>localhost</name>
 347 <jmxport>8100</jmxport>
 348 </host1>
 349 </deploy>
 350 </settings>

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2007-09-05 15:15:24, 100.5 KB) [[attachment:Installation-Discussions-Settings.ppt]]
  • [get | view] (2007-09-04 10:59:04, 19.2 KB) [[attachment:settings_example.xml]]
  • [get | view] (2007-09-04 11:06:49, 50.5 KB) [[attachment:settings_xsd.txt]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.