/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the "Elastic License
 * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
 * Public License v 1"; you may not use this file except in compliance with, at
 * your election, the "Elastic License 2.0", the "GNU Affero General Public
 * License v3.0 only", or the "Server Side Public License, v 1".
 */
apply plugin: 'elasticsearch.internal-yaml-rest-test'
apply plugin: 'elasticsearch.yaml-rest-compat-test'

esplugin {
  description = 'Ingest processor that uses Apache Tika to extract contents'
  classname = 'org.elasticsearch.ingest.attachment.IngestAttachmentPlugin'
}

// this overwrites the 'versions' map from Elasticsearch itself, but indeed we want that -- we're interested in managing our dependencies
// as we (and tika) demand, and are not interested in, say, having the same version of commons-codec as elasticsearch itself
// when updating tika, please review it's parent pom : https://repo1.maven.org/maven2/org/apache/tika/tika-parent
// and manually update the transitive dependencies here
def versions = [
  'tika'  : '2.9.3',
  'pdfbox': '2.0.33',
  'poi'   : '5.4.0',
  'sparsebitset' : '1.3', //poi dependency: https://repo1.maven.org/maven2/org/apache/poi/poi/
  'mime4j': '0.8.12',
  'commonsCodec': '1.18.0',
  'slf4' : '2.0.16',
  'xz' : '1.10',
  'commonsIo' : '2.18.0',
  //intentionally using the elder "juniversalchardet:juniversalchardet" rather than the newer "com.github.albfernandez:juniversalchardet"
  //since the "com.github.albfernandez" fork has some problems with Chinese.
  'juniversalchardet' : '1.0.3',
  'tagsoup' : '1.2.1',
  'jempbox' : '1.8.17',
  'xmlbeans' : '5.3.0', //poi-ooxml dependency: https://repo1.maven.org/maven2/org/apache/poi/poi-ooxml/
  'commonsCollections4' : '4.4',
  'commonsCompress' : '1.27.1',
  'commonsLang3' :'3.17.0',
  'commonsMath3' : '3.6.1'
]

// exclude commons-logging from test dependencies to avoid jar-hell, we use jcl-over-slf4j here
configurations.testRuntimeClasspath {
  exclude module: 'commons-logging'
  // The version used by POI potentially conflicts with the one pulled in by :test:framework:
  resolutionStrategy.force "commons-codec:commons-codec:${versions.commonsCodec}"
}

configurations.testCompileClasspath {
  // The version used by POI potentially conflicts with the one pulled in by :test:framework:
  resolutionStrategy.force "commons-codec:commons-codec:${versions.commonsCodec}"
}

dependencies {
  // take over logging for all dependencies
  api "org.slf4j:slf4j-api:${versions.slf4}"
  api "org.slf4j:jcl-over-slf4j:${versions.slf4}"

  // route slf4j over log4j
  // TODO blocked on https://github.com/elastic/elasticsearch/issues/93714
  // api "org.apache.logging.log4j:log4j-slf4j2-impl:2.19.0"

  // nop all slf4j logging
  // workaround for https://github.com/elastic/elasticsearch/issues/93714
  api "org.slf4j:slf4j-nop:${versions.slf4}"

  // mandatory for tika
  api "org.apache.tika:tika-core:${versions.tika}"
  api "org.apache.tika:tika-langdetect:${versions.tika}"
  api "org.apache.tika:tika-langdetect-tika:${versions.tika}"
  api "org.apache.tika:tika-parser-html-module:${versions.tika}"
  api "org.apache.tika:tika-parser-microsoft-module:${versions.tika}"
  api "org.apache.tika:tika-parser-pdf-module:${versions.tika}"
  api "org.apache.tika:tika-parser-xml-module:${versions.tika}"
  api "org.apache.tika:tika-parser-text-module:${versions.tika}"
  api "org.apache.tika:tika-parser-miscoffice-module:${versions.tika}"
  api "org.apache.tika:tika-parser-apple-module:${versions.tika}"
  api "org.apache.tika:tika-parser-xmp-commons:${versions.tika}"
  api "org.apache.tika:tika-parser-zip-commons:${versions.tika}"
  api "org.tukaani:xz:${versions.xz}"
  api "commons-io:commons-io:${versions.commonsIo}"

  // character set detection
  api "com.googlecode.juniversalchardet:juniversalchardet:${versions.juniversalchardet}"

  // external parser libraries
  // HTML
  api "org.ccil.cowan.tagsoup:tagsoup:${versions.tagsoup}"
  // Adobe PDF
  api "org.apache.pdfbox:pdfbox:${versions.pdfbox}"
  api "org.apache.pdfbox:fontbox:${versions.pdfbox}"
  api "org.apache.pdfbox:jempbox:${versions.jempbox}"
  // OpenOffice
  api "org.apache.poi:poi-ooxml:${versions.poi}"
  api "org.apache.poi:poi-ooxml-lite:${versions.poi}"
  api "org.apache.poi:poi:${versions.poi}"
  api "commons-codec:commons-codec:${versions.commonsCodec}"
  api "org.apache.xmlbeans:xmlbeans:${versions.xmlbeans}"
  api "org.apache.commons:commons-collections4:${versions.commonsCollections4}"
  // MS Office
  api "org.apache.poi:poi-scratchpad:${versions.poi}"
  // Apple iWork
  api "org.apache.commons:commons-compress:${versions.commonsCompress}"
  // Outlook documents
  api "org.apache.james:apache-mime4j-core:${versions.mime4j}"
  api "org.apache.james:apache-mime4j-dom:${versions.mime4j}"
  // EPUB books
  api "org.apache.commons:commons-lang3:${versions.commonsLang3}"
  // Microsoft Word files with visio diagrams
  api "org.apache.commons:commons-math3:${versions.commonsMath3}"
  // POIs dependency
  api "com.zaxxer:SparseBitSet:${versions.sparsebitset}"
}

restResources {
  restApi {
    include '_common', 'cluster', 'nodes', 'ingest', 'index', 'get'
  }
}

tasks.named("dependencyLicenses").configure {
  mapping from: /apache-mime4j-.*/, to: 'apache-mime4j'
  mapping from: /tika-langdetect-.*/, to: 'tika-langdetect'
}

esplugin.bundleSpec.from('config/ingest-attachment') {
  into 'config'
}

tasks.named("forbiddenPatterns").configure {
  exclude '**/*.doc'
  exclude '**/*.docx'
  exclude '**/*.pdf'
  exclude '**/*.epub'
  exclude '**/*.vsdx'
  exclude '**/text-cjk-*.txt'
}

tasks.named("thirdPartyAudit").configure {
  ignoreMissingClasses()
}

if (buildParams.inFipsJvm) {
  tasks.named("test").configure { enabled = false }
  tasks.named("yamlRestTest").configure { enabled = false };
  tasks.named("yamlRestCompatTest").configure { enabled = false };
}
