Skip to content

Commit

Permalink
Merge pull request #814 from folio-org/release_7.0.7
Browse files Browse the repository at this point in the history
Release 7.0.7
  • Loading branch information
EthanFreestone authored Aug 23, 2024
2 parents 50acc4d + 8e0dbbe commit 5a34b7d
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 71 deletions.
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 7.0.7 2024-08-23
* ERM-3310 Expose concurrent jobs as env var
* ERM-3308 Ingest process slows down dramatically on certain packages
* Tweaked processing log message for more accurate harvest performance numbers

## 7.0.6 2024-07-05
* ERM-3291 Fix permissions on /erm/validate/remoteKB in mod-agreements
* ERM-3289 Fix permission on /erm/validate/subscriptionAgreement in mod-agreements
Expand Down
2 changes: 1 addition & 1 deletion service/gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ gormVersion=8.0.3

# Application
appName=mod-agreements
appVersion=7.0.6
appVersion=7.0.7
dockerTagSuffix=
dockerRepo=folioci

Expand Down
5 changes: 2 additions & 3 deletions service/grails-app/conf/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ webtoolkit:
numbers:
fixedLocale: true # Can be a boolean for default locale or an IETF BCP 47 language string

concurrentJobsGlobal: "${CONCURRENT_JOBS_GLOBAL:3}"

# Environment specific overrides.
environments:
test:
Expand All @@ -220,7 +222,6 @@ environments:
aws_secret: "DIKU_AGG_SECRET_KEY"
aws_bucket: "diku-shared"
aws_access_key_id: "DIKU_AGG_ACCESS_KEY"
concurrentJobsGlobal: 2

dbGen:
grails:
Expand All @@ -247,14 +248,12 @@ environments:
aws_secret: "${aws.secret.access.key:none}"
aws_bucket: "${aws.bucket:none}"
aws_access_key_id: "${aws.access.key.id:none}"
concurrentJobsGlobal: 3

development:
logging:
config: classpath:logback-development.xml
level:
web: DEBUG
concurrentJobsGlobal: 2
kiwt:
# Filestore values from the minio config defined in the docker-compose file at ../tools/testing
filestore:
Expand Down
5 changes: 5 additions & 0 deletions service/grails-app/domain/org/olf/kb/CoverageStatement.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,9 @@ public class CoverageStatement extends AbstractCoverageStatement implements Vali
endVolume column:'cs_end_volume'
endIssue column:'cs_end_issue'
}

// TODO is this a good enough toString to be official rather than "toGoKBString"?
String toString() {
"v${startVolume ?: '*'}/i${startIssue ?: '*'}/${startDate ?: '*'} - v${endVolume ?: '*'}/i${endIssue ?: '*'}/${endDate ?: '*'}"
}
}
14 changes: 10 additions & 4 deletions service/grails-app/domain/org/olf/kb/ErmResource.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ public class ErmResource extends ErmTitleList implements MultiTenant<ErmResource
Set<AlternateResourceName> alternateResourceNames

Set<TemplatedUrl> templatedUrls = []


// Do not store this value, only used to check whether or not beforeValidate should calculate coverage
// We wish to avoid this during ingest process, for example, where we do this manually based on
// external information
boolean doNotCalculateCoverage

static hasMany = [
coverage: CoverageStatement,
Expand Down Expand Up @@ -82,12 +86,12 @@ alternateResourceNames cascade: 'all-delete-orphan'
coverage (validator: CoverageStatement.STATEMENT_COLLECTION_VALIDATOR, sort:'startDate')
templatedUrls (bindable: false)
}

private validating = false
def beforeValidate() {
this.name = StringUtils.truncate(name)

if (!validating) {
if (!validating && !doNotCalculateCoverage) {
validating = true
// Attempt to avoid session locking
ErmResource.withSession {
Expand All @@ -97,13 +101,15 @@ alternateResourceNames cascade: 'all-delete-orphan'
normalizedName = StringUtils.normaliseWhitespaceAndCase(name)
validating = false
}

this.doNotCalculateCoverage = false // Unset afterwards in case of a string of saves
}

String toString() {
name
}

static transients = ['approvedIdentifierOccurrences']
static transients = ['approvedIdentifierOccurrences', 'doNotCalculateCoverage']

public Set<IdentifierOccurrence> getApprovedIdentifierOccurrences() {
identifiers
Expand Down
106 changes: 67 additions & 39 deletions service/grails-app/services/org/olf/CoverageService.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -127,24 +127,37 @@ public class CoverageService {
return (value?.trim()?.length() ?: 0) > 0 ? value : null
}

// FIXME this indicates a need to refactor the beforeValidate call which forces a calculate coverage on every save
public static void saveResourceWithoutCalculatingCoverage (ErmResource resource, boolean flush = true, boolean failOnError = true) {
resource.doNotCalculateCoverage = true;
resource.save(failOnError: failOnError, flush: flush)
}

/**
* Set coverage from schema
*/
public static void setCoverageFromSchema (final ErmResource resource, final Iterable<CoverageStatementSchema> coverage_statements) {
public static void setCoverageFromSchema (
final ErmResource resource,
final Iterable<CoverageStatementSchema> coverage_statements,
final boolean calculateCoverageAtEnd = true
) {
// resource is null for logging unless a gorm operation is applied to it... something something proxying something :p
//log.debug("CoverageService::setCoverageFromSchema(${resource}, ${coverage_statements}, ${calculateCoverageAtEnd})")

// ErmResource.withTransaction {

boolean changed = false
final Set<CoverageStatement> statements = []
try {

// Clear the existing coverage, or initialize to empty set.
if (resource.coverage) {
statements.addAll( resource.coverage.collect() )

resource.coverage.clear()
resource.save(failOnError: true) // Necessary to remove the orphans.
}

// I don't think we need to save here... On my head be it :p
//saveResourceWithoutCalculatingCoverage(resource, false, true)
}
for ( CoverageStatementSchema cs : coverage_statements ) {
/* Not using utilityService.checkValidBinding here
* because we have custom error logging behaviour
Expand All @@ -161,13 +174,14 @@ public class CoverageService {

resource.addToCoverage( new_cs )

if (!utilityService.checkValidBinding(resource)) {
resource.doNotCalculateCoverage = true // Validate is called inside utility service here -- don't trigger calculate coverage
// This will _already_ log out errors, add extra context around what a failure means in this case.
if (!utilityService.checkValidBinding(resource, "Coverage statements (${coverage_statements}) invalid. Coverage for ${resource} will be reset (${statements})")) {
throw new ValidationException('Adding coverage statement invalidates Resource', resource.errors)
}

resource.save()
saveResourceWithoutCalculatingCoverage(resource, false, false)
} else {

// Not valid coverage statement
cs.errors.allErrors.each { ObjectError error ->
/* TODO Perhaps in future we can extend checkValidBinding to this use case */
Expand All @@ -176,24 +190,36 @@ public class CoverageService {
log.error (messageSource.getMessage(error, LocaleContextHolder.locale))
}
}

// Throwing a ValildationException here we "reset" if even one coverageStatement is wrong.
// Without this we simply ignore the incorrect statement and try to continue...
throw new ValidationException('Coverage statement is incorrect', cs.errors)
}
}

// log.debug("New coverage saved")
changed = true
} catch (ValidationException e) {
log.error("Coverage changes to Resource ${resource.id} not saved")
}
// Don't bother erroring this to the user, the above validation errors will log through UtilityService
log.debug("Coverage changes to resource ${resource} not saved. \n${e.message}")
//e.printStackTrace() // Can turn off except for dev

// In this case we must RESET the coverage
// This shouldn't need to be a log error, as the validation error above comes from somewhere which ALREADY logs as error.
resource.coverage?.clear();

if (!changed) {
// Revert the coverage set.
if (!resource.coverage) resource.coverage = []
statements.each {
resource.addToCoverage( it )
}
}

resource.save(failOnError: true, flush:true) // Save.
// Save and flush (Don't propogate coverage changes yet)
// Flush is NECESSARY here so that changelistener lookups have access to all created coverageStatements
saveResourceWithoutCalculatingCoverage(resource)

if (calculateCoverageAtEnd) {
// If we want this to propogate down we need to run the change listener here specifically.
changeListener(resource);
// Again, if changeListener ends up doing more things
// we may need to separate out the coverage-only part
}
// }
}

Expand All @@ -204,27 +230,24 @@ public class CoverageService {
* @param pti The PlatformTitleInstance
*/
public static void calculateCoverage( final PlatformTitleInstance pti ) {

// log.debug 'Calculate coverage for PlatformTitleInstance {}', pti.id

// Use a sub query to select all the coverage statements linked to PCIs,
// linked to this pti
List<org.olf.dataimport.erm.CoverageStatement> allCoverage = CoverageStatement.createCriteria().list {
'in' 'resource.id', new DetachedCriteria(PackageContentItem).build {
readOnly (true)
eq 'pti.id', pti.id

projections {
property ('id')
}
}
}.collect{ CoverageStatement cs ->
List<org.olf.dataimport.erm.CoverageStatement> allCoverage = CoverageStatement.executeQuery(
"""
SELECT cs FROM CoverageStatement cs WHERE
cs.resource.id IN (
SELECT pci.id FROM PackageContentItem pci WHERE
pci.pti.id = :ptiId
)
""".toString(), [ptiId: pti.id]
).collect { CoverageStatement cs ->
new org.olf.dataimport.erm.CoverageStatement([
'startDate': cs.startDate,
'endDate': cs.endDate
])
}

allCoverage = collateCoverageStatements(allCoverage)

setCoverageFromSchema(pti, allCoverage)
Expand All @@ -240,18 +263,16 @@ public class CoverageService {
// Use a sub query to select all the coverage statements linked to PTIs,
// linked to this TI
// TitleInstance.withTransaction {
List<CoverageStatement> results = CoverageStatement.createCriteria().list {
'in' 'resource.id', new DetachedCriteria(PlatformTitleInstance, 'linked_ptis').build {
readOnly (true)
eq 'titleInstance.id', ti.id

projections {
property ('id')
}
}
}

List<org.olf.dataimport.erm.CoverageStatement> allCoverage = results.collect { CoverageStatement cs ->
List<org.olf.dataimport.erm.CoverageStatement> allCoverage = CoverageStatement.executeQuery(
"""
SELECT cs FROM CoverageStatement cs WHERE
cs.resource.id IN (
SELECT pti.id FROM PlatformTitleInstance pti WHERE
pti.titleInstance.id = :tiId
)
""".toString(), [tiId: ti.id]
).collect { CoverageStatement cs ->
new org.olf.dataimport.erm.CoverageStatement([
'startDate': cs.startDate,
'endDate': cs.endDate
Expand Down Expand Up @@ -421,23 +442,30 @@ public class CoverageService {
res instanceof TitleInstance ? res : null
}


// NOTE -- this is slightly misnamed. As of right now this is _only_ a change coverage listener.
// This means this whole method is not triggered when doNotCalculate coverage transient is
// set on the resource
public static void changeListener(ErmResource res) {

final PackageContentItem pci = asPCI(res)
if ( pci ) {
log.trace "PCI updated, regenerate PTI's coverage"
//log.debug "PCI updated, regenerate PTI's coverage"
calculateCoverage( pci.pti )
}

final PlatformTitleInstance pti = asPTI(res)
if ( pti ) {
log.trace "PTI updated regenerate TI's coverage"
//log.debug "PTI updated regenerate TI's coverage"
calculateCoverage( pti.titleInstance )
}

final TitleInstance ti = asTI(res)
if ( ti ) {
log.trace 'TI updated'
//log.debug("TI updated")
}
}

Expand Down
21 changes: 15 additions & 6 deletions service/grails-app/services/org/olf/PackageIngestService.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ class PackageIngestService implements DataBinder {
log.debug ("(Package in progress) processed ${result.titleCount} titles, average per title: ${result.averageTimePerTitle}s")
} */
}
long finishedTime = (System.currentTimeMillis()-result.startTime)/1000
long finishedTime = (System.currentTimeMillis()-result.startTime) // Don't divide by 1000 here, instead leave in ms for greater accuracy

// This removed logic is WRONG under pushKB because it's chunked -- ensure pushKB does not call full upsertPackage method
// At the end - Any PCIs that are currently live (Don't have a removedTimestamp) but whos lastSeenTimestamp is < result.updateTime
Expand Down Expand Up @@ -219,7 +219,7 @@ class PackageIngestService implements DataBinder {
// Need to pause long enough so that the timestamps are different
TimeUnit.MILLISECONDS.sleep(1)
if (result.titleCount > 0) {
log.debug ("Processed ${result.titleCount} titles in ${finishedTime} seconds (${finishedTime/result.titleCount}s average)")
log.debug ("Processed ${result.titleCount} titles in ${finishedTime/1000} seconds (${(finishedTime/result.titleCount)/1000}s average)")
log.info("Package titles summary::Processed/${result.titleCount}, Added/${result.newTitles}, Updated/${result.updatedTitles}, Removed/${result.removedTitles}, AccessStart/${result.updatedAccessStart}, AccessEnd/${result.updatedAccessEnd}")

// Log the counts too.
Expand Down Expand Up @@ -483,12 +483,20 @@ class PackageIngestService implements DataBinder {
pti = new PlatformTitleInstance(titleInstance:title,
platform:platform,
url:pc.url,
).save(failOnError: true)
);

// Ensure coverages don't change here... I _think_ this is only used during ingest where PCI coverage sweep will get triggered eventually
pti.doNotCalculateCoverage = true
pti.save(failOnError: true)
} else if (trustedSourceTI) {
// Update any PTI fields directly
if (pti.url != pc.url) {
pti.url = pc.url
}

// Ensure coverages don't change here... I _think_ this is only used during ingest where PCI coverage sweep will get triggered eventually
pti.doNotCalculateCoverage = true

pti.save(flush: true, failOnError: true)
}

Expand Down Expand Up @@ -598,8 +606,6 @@ class PackageIngestService implements DataBinder {
result.pciStatus = 'new'
}

pci.save(flush: true, failOnError: true)

// ADD PTI AND PCI ID TO RESULT
result.pciId = pci.id;

Expand All @@ -611,8 +617,11 @@ class PackageIngestService implements DataBinder {
// We define coverage to be a list in the exchange format, but sometimes it comes just as a JSON map. Convert that
// to the list of maps that coverageService.extend expects
Iterable<CoverageStatementSchema> cov = pc.coverage instanceof Iterable ? pc.coverage : [ pc.coverage ]
coverageService.setCoverageFromSchema (pci, cov)
// Ensure this doesn't trigger the calculateCoverage, that will happen on PCI save
coverageService.setCoverageFromSchema (pci, cov, false)
}

pci.save(failOnError: true, flush: true)
}
else {
throw new IngestException("Unable to identify platform from ${platform_url_to_use} and ${pc.platformName}");
Expand Down
Loading

0 comments on commit 5a34b7d

Please sign in to comment.