Skip to contents

Introduction

This vignette covers advanced features of the soilquality package, including:

  • Property selection strategies
  • Custom scoring rules
  • Creating and using AHP matrices
  • Advanced visualization options
  • Working with different data formats
library(soilquality)
data(soil_ucayali)

Property Selection Strategies

Choosing the right soil properties is crucial for meaningful SQI calculation. The package offers several approaches.

Strategy 1: Use Pre-defined Property Sets

The package includes curated property sets for common scenarios:

# View available property sets
names(soil_property_sets)
#> [1] "basic"         "standard"      "comprehensive" "physical"     
#> [5] "chemical"      "fertility"

# Compare different property sets
result_basic <- compute_sqi_properties(
  data = soil_ucayali,
  properties = soil_property_sets$basic,
  id_column = "SampleID"
)

# Use only fertility properties that exist in the dataset
fertility_available <- intersect(soil_property_sets$fertility, 
                                  names(soil_ucayali))
result_fertility <- compute_sqi_properties(
  data = soil_ucayali,
  properties = fertility_available,
  id_column = "SampleID"
)

# Compare mean SQI values
cat("Basic properties - Mean SQI:", 
    round(mean(result_basic$results$SQI), 3), "\n")
#> Basic properties - Mean SQI: 0.49
cat("Fertility properties - Mean SQI:", 
    round(mean(result_fertility$results$SQI), 3), "\n")
#> Fertility properties - Mean SQI: 0.508

Strategy 2: Domain-Specific Selection

Select properties based on your research question or management goal:

# For erosion risk assessment - focus on physical properties
erosion_props <- c("Sand", "Silt", "Clay", "BD", "OM")

result_erosion <- compute_sqi_properties(
  data = soil_ucayali,
  properties = erosion_props,
  id_column = "SampleID"
)

# For nutrient management - focus on fertility
nutrient_props <- c("pH", "OM", "N", "P", "K", "CEC")

result_nutrient <- compute_sqi_properties(
  data = soil_ucayali,
  properties = nutrient_props,
  id_column = "SampleID"
)

# Compare selected MDS
cat("Erosion assessment MDS:", paste(result_erosion$mds, collapse = ", "), "\n")
#> Erosion assessment MDS: Sand, Clay, BD, OM
cat("Nutrient management MDS:", paste(result_nutrient$mds, collapse = ", "), "\n")
#> Nutrient management MDS: N, K, P, pH, OM

Strategy 3: Data-Driven Selection

Let PCA automatically select the most informative properties:

# Use all available numeric properties
all_numeric <- names(soil_ucayali)[sapply(soil_ucayali, is.numeric)]

result_all <- compute_sqi_properties(
  data = soil_ucayali,
  properties = all_numeric,
  id_column = "SampleID"
)

# See which properties were selected by PCA
cat("PCA selected:", length(result_all$mds), "indicators from", 
    length(all_numeric), "properties\n")
#> PCA selected: 8 indicators from 14 properties
print(result_all$mds)
#> [1] "Sand" "Silt" "Ca"   "SOC"  "OM"   "pH"   "Mg"   "P"

Strategy 4: Correlation-Based Pre-screening

Remove highly correlated properties before analysis:

# Calculate correlation matrix
numeric_data <- soil_ucayali[, sapply(soil_ucayali, is.numeric)]
cor_matrix <- cor(numeric_data, use = "complete.obs")

# Find highly correlated pairs (|r| > 0.9)
high_cor <- which(abs(cor_matrix) > 0.9 & cor_matrix < 1, arr.ind = TRUE)

if (nrow(high_cor) > 0) {
  cat("Highly correlated property pairs:\n")
  for (i in 1:nrow(high_cor)) {
    if (high_cor[i, 1] < high_cor[i, 2]) {  # Avoid duplicates
      cat(sprintf("  %s - %s: r = %.3f\n",
                  rownames(cor_matrix)[high_cor[i, 1]],
                  colnames(cor_matrix)[high_cor[i, 2]],
                  cor_matrix[high_cor[i, 1], high_cor[i, 2]]))
    }
  }
}

# Remove one from each highly correlated pair
# For example, keep SOC but remove OM (they're highly correlated)
reduced_props <- setdiff(all_numeric, c("OM"))

result_reduced <- compute_sqi_properties(
  data = soil_ucayali,
  properties = reduced_props,
  id_column = "SampleID"
)

Custom Scoring Rules

The package provides flexible scoring functions to match different soil property behaviors.

Higher is Better

For properties where higher values indicate better soil quality:

# Organic matter, nutrients, CEC
scoring_rules <- list(
  OM = higher_better(),
  N = higher_better(),
  P = higher_better(),
  K = higher_better(),
  CEC = higher_better()
)

result_higher <- compute_sqi_properties(
  data = soil_ucayali,
  properties = names(scoring_rules),
  scoring_rules = scoring_rules,
  id_column = "SampleID"
)

Lower is Better

For properties where lower values indicate better soil quality:

# Bulk density, electrical conductivity
scoring_rules_lower <- list(
  BD = lower_better(),
  EC = lower_better()
)

result_lower <- compute_sqi_properties(
  data = soil_ucayali,
  properties = names(scoring_rules_lower),
  scoring_rules = scoring_rules_lower,
  id_column = "SampleID"
)

Optimum Range

For properties with an ideal target value:

# pH has an optimal range around neutral
scoring_rules_opt <- list(
  pH = optimum_range(optimal = 6.5, tolerance = 1.5),
  Sand = higher_better(),
  Clay = higher_better(),
  OM = higher_better()
)

result_opt <- compute_sqi_properties(
  data = soil_ucayali,
  properties = names(scoring_rules_opt),
  scoring_rules = scoring_rules_opt,
  id_column = "SampleID"
)

# Compare pH scoring with different optimal values
result_opt_7 <- compute_sqi_properties(
  data = soil_ucayali,
  properties = c("pH", "OM", "N", "P"),
  scoring_rules = list(
    pH = optimum_range(optimal = 7.0, tolerance = 1.0),
    OM = higher_better(),
    N = higher_better(),
    P = higher_better()
  ),
  id_column = "SampleID"
)

cat("Mean SQI with pH optimum 6.5:", 
    round(mean(result_opt$results$SQI), 3), "\n")
#> Mean SQI with pH optimum 6.5: 0.395
cat("Mean SQI with pH optimum 7.0:", 
    round(mean(result_opt_7$results$SQI), 3), "\n")
#> Mean SQI with pH optimum 7.0: 0.336

Threshold-Based Scoring

For properties with specific threshold values:

# Example: P availability with agronomic thresholds
# Low: <10, Medium: 10-20, High: 20-40, Very High: >40
scoring_rules_thresh <- list(
  P = threshold_scoring(
    thresholds = c(0, 10, 20, 40, 100),
    scores = c(0, 0.4, 0.7, 1.0, 1.0)
  ),
  K = threshold_scoring(
    thresholds = c(0, 50, 100, 200, 500),
    scores = c(0, 0.4, 0.7, 1.0, 1.0)
  ),
  OM = higher_better(),
  pH = optimum_range(optimal = 6.5, tolerance = 1.5)
)

result_thresh <- compute_sqi_properties(
  data = soil_ucayali,
  properties = names(scoring_rules_thresh),
  scoring_rules = scoring_rules_thresh,
  id_column = "SampleID"
)

Mixed Scoring Strategies

Combine different scoring approaches for comprehensive assessment:

# Realistic scoring for tropical agricultural soils
tropical_scoring <- list(
  pH = optimum_range(optimal = 6.0, tolerance = 1.5),
  OM = higher_better(),
  SOC = higher_better(),
  N = higher_better(),
  P = threshold_scoring(
    thresholds = c(0, 8, 15, 30, 100),
    scores = c(0.2, 0.5, 0.8, 1.0, 1.0)
  ),
  K = threshold_scoring(
    thresholds = c(0, 60, 120, 200, 500),
    scores = c(0.2, 0.5, 0.8, 1.0, 1.0)
  ),
  CEC = higher_better(),
  BD = lower_better(),
  EC = lower_better()
)

result_tropical <- compute_sqi_properties(
  data = soil_ucayali,
  properties = names(tropical_scoring),
  scoring_rules = tropical_scoring,
  id_column = "SampleID"
)

cat("Tropical scoring - Mean SQI:", 
    round(mean(result_tropical$results$SQI), 3), "\n")
#> Tropical scoring - Mean SQI: 0.493

Creating AHP Matrices

The Analytic Hierarchy Process (AHP) allows expert-based weighting of indicators.

Method 1: Interactive Creation

Create a pairwise comparison matrix interactively:

# This will prompt you for pairwise comparisons
indicators <- c("pH", "OM", "P", "K")
ahp_matrix <- create_ahp_matrix(indicators, mode = "interactive")

# Use the AHP matrix in SQI calculation
result_ahp <- compute_sqi_properties(
  data = soil_ucayali,
  properties = indicators,
  pairwise_matrix = ahp_matrix$matrix,
  id_column = "SampleID"
)

Method 2: Manual Matrix Creation

Create a pairwise comparison matrix manually:

# Define indicators
indicators <- c("pH", "OM", "N", "P")

# Create pairwise comparison matrix
# Rows are compared to columns
# Values: 1=Equal, 3=Moderate, 5=Strong, 7=Very Strong, 9=Extreme
pairwise <- matrix(c(
  1,   1/3, 1/5, 1/3,  # pH compared to others
  3,   1,   1/3, 1,    # OM compared to others
  5,   3,   1,   3,    # N compared to others
  3,   1,   1/3, 1     # P compared to others
), nrow = 4, byrow = TRUE)

colnames(pairwise) <- rownames(pairwise) <- indicators

# Calculate weights
ahp_result <- ahp_weights(pairwise, indicators = indicators)

cat("Indicator weights:\n")
#> Indicator weights:
print(round(ahp_result$weights, 3))
#>    pH    OM     N     P 
#> 0.078 0.200 0.522 0.200
cat("\nConsistency Ratio:", round(ahp_result$CR, 4), "\n")
#> 
#> Consistency Ratio: 0.0161

# Note: To use AHP weights in SQI calculation, the pairwise matrix must match
# the MDS indicators selected by PCA. For this example, we'll calculate SQI
# with equal weights and show the AHP weights separately.
result_manual_ahp <- compute_sqi_properties(
  data = soil_ucayali,
  properties = indicators,
  id_column = "SampleID"
)

cat("\nMDS indicators selected by PCA:", paste(result_manual_ahp$mds, collapse = ", "), "\n")
#> 
#> MDS indicators selected by PCA: N, pH, OM

Method 3: Importance Ratios

Use relative importance ratios (simpler than full pairwise matrix):

# Specify relative importance (e.g., N is 3x more important than pH)
indicators <- c("pH", "OM", "N", "P")
importance_ratios <- c(1, 2, 3, 2)  # Relative importance

# Convert to pairwise matrix
pairwise_from_ratios <- ratio_to_saaty(importance_ratios)
colnames(pairwise_from_ratios) <- rownames(pairwise_from_ratios) <- indicators

# Calculate weights
ahp_from_ratios <- ahp_weights(pairwise_from_ratios, indicators = indicators)

cat("Weights from importance ratios:\n")
#> Weights from importance ratios:
print(round(ahp_from_ratios$weights, 3))
#>    pH    OM     N     P 
#> 0.125 0.250 0.375 0.250

Advanced Visualization

Customizing Plot Appearance

# Calculate SQI for visualization examples
result_viz <- compute_sqi_properties(
  data = soil_ucayali,
  properties = c("pH", "OM", "N", "P", "K", "CEC"),
  id_column = "SampleID"
)

# Distribution plot
plot(result_viz, type = "distribution")

Comparing Multiple Scenarios

# Calculate SQI with different property sets
result_physical <- compute_sqi_properties(
  data = soil_ucayali,
  properties = soil_property_sets$physical,
  id_column = "SampleID"
)

# Use only chemical properties that exist in the dataset
chemical_available <- intersect(soil_property_sets$chemical, 
                                 names(soil_ucayali))
result_chemical <- compute_sqi_properties(
  data = soil_ucayali,
  properties = chemical_available,
  id_column = "SampleID"
)

# Compare distributions
par(mfrow = c(1, 2))
hist(result_physical$results$SQI, 
     main = "Physical Properties",
     xlab = "SQI", col = "lightblue", xlim = c(0, 1))
abline(v = mean(result_physical$results$SQI), col = "red", lwd = 2)

hist(result_chemical$results$SQI, 
     main = "Chemical Properties",
     xlab = "SQI", col = "lightgreen", xlim = c(0, 1))
abline(v = mean(result_chemical$results$SQI), col = "red", lwd = 2)

par(mfrow = c(1, 1))

PCA Biplot

Visualize relationships between samples and variables:

plot(result_viz, type = "biplot")

Indicator Contribution Analysis

Identify which indicators contribute most to SQI variation:

# Extract scored indicators and weights
scored_data <- result_viz$results[, paste0(result_viz$mds, "_scored")]
weights <- result_viz$weights

# Calculate weighted contribution of each indicator
contributions <- sweep(scored_data, 2, weights, "*")

# Plot contributions
boxplot(contributions, 
        main = "Weighted Indicator Contributions to SQI",
        ylab = "Weighted Score",
        las = 2,
        col = rainbow(length(result_viz$mds)))

Working with Different Data Formats

File-Based Workflow

# Save data to CSV
write.csv(soil_ucayali, "soil_data.csv", row.names = FALSE)

# Calculate SQI from file
result_from_file <- compute_sqi(
  input_csv = "soil_data.csv",
  id_column = "SampleID",
  output_csv = "sqi_results.csv"
)

# Results are automatically saved to output_csv

Data Frame Workflow

# Work directly with data frames (no file I/O)
result_df <- compute_sqi_df(
  df = soil_ucayali,
  id_column = "SampleID"
)

# Extract results
sqi_values <- result_df$results$SQI
names(sqi_values) <- result_df$results$SampleID

# View top 5 samples by SQI
head(sort(sqi_values, decreasing = TRUE), 5)
#>    UCY043    UCY005    UCY001    UCY050    UCY045 
#> 0.6927483 0.6061193 0.5852834 0.5773329 0.5691887

Batch Processing Multiple Datasets

# Process multiple sites or time points
sites <- c("site1.csv", "site2.csv", "site3.csv")

results_list <- lapply(sites, function(site) {
  compute_sqi(
    input_csv = site,
    id_column = "SampleID",
    properties = soil_property_sets$standard
  )
})

# Compare mean SQI across sites
mean_sqi <- sapply(results_list, function(r) mean(r$results$SQI))
names(mean_sqi) <- sites
print(mean_sqi)

Best Practices

1. Property Selection

  • Use at least 5-6 properties for robust PCA
  • Include properties from different categories (physical, chemical, biological)
  • Avoid highly correlated properties (|r| > 0.9)
  • Match properties to your management objectives

2. Scoring Rules

  • Use domain knowledge to select appropriate scoring functions
  • Validate scoring rules with local experts or literature
  • Consider regional differences in optimal values
  • Document your scoring rationale

3. AHP Weighting

  • Involve multiple experts for more reliable weights
  • Check Consistency Ratio (CR < 0.1)
  • Use equal weights if expert knowledge is unavailable
  • Sensitivity analysis: test how results change with different weights

4. Interpretation

  • Consider SQI in context of land use and management goals
  • Compare SQI across similar soil types or land uses
  • Use visualization to identify limiting factors
  • Validate results with field observations

Summary

This vignette demonstrated advanced features including:

  • Multiple property selection strategies
  • Custom scoring rules for different property behaviors
  • Creating AHP matrices for expert-based weighting
  • Advanced visualization and comparison techniques
  • Different data workflow options

For more information on AHP methodology, see vignette("ahp-matrices").

References

  • Karlen, D. L., Mausbach, M. J., Doran, J. W., Cline, R. G., Harris, R. F., & Schuman, G. E. (1997). Soil quality: A concept, definition, and framework for evaluation. Soil Science Society of America Journal, 61(1), 4-10.

  • Qi, Y., Darilek, J. L., Huang, B., Zhao, Y., Sun, W., & Gu, Z. (2009). Evaluating soil quality indices in an agricultural region of Jiangsu Province, China. Geoderma, 149(3-4), 325-334.