Refinement Forecasting & Trend Analysis

This notebook analyzes quality trends over time by:

Loading git history to track refinement score changes
Performing time series analysis
Forecasting future quality improvements
Projecting maintenance schedules for evergreen content

# Import required libraries
import os
import subprocess
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import numpy as np
from sklearn.linear_model import LinearRegression
from pathlib import Path

1. Git History Analysis¶

Extract refinement score changes from git commit history.

def get_git_history(file_path):
    """Get git log for a specific file."""
    try:
        # Get commit history with dates
        result = subprocess.run(
            ['git', 'log', '--follow', '--format=%H|%ai', '--', file_path],
            capture_output=True,
            text=True,
            cwd=Path(file_path).parent.parent.parent
        )
        
        commits = []
        for line in result.stdout.strip().split('\n'):
            if '|' in line:
                commit_hash, date_str = line.split('|')
                commits.append({
                    'commit': commit_hash[:7],
                    'date': pd.to_datetime(date_str)
                })
        return commits
    except Exception as e:
        print(f"Error getting git history: {e}")
        return []

# Example: Get history for J-Editorial framework index
base_dir = Path('../..')
index_file = base_dir / 'j-editorial' / 'index.md'
history = get_git_history(index_file)

print(f"Found {len(history)} commits for {index_file.name}")
if history:
    print(f"First commit: {history[-1]['date'].strftime('%Y-%m-%d')}")
    print(f"Last commit: {history[0]['date'].strftime('%Y-%m-%d')}")

Found 10 commits for index.md
First commit: 2025-11-25
Last commit: 2025-11-29

2. Simulated Quality Improvement Trends¶

Since we may not have extensive git history with refinement changes, we’ll create a realistic simulation based on typical content improvement patterns.

# Simulate refinement evolution for demonstration
# In practice, this would come from parsing historical frontmatter from git commits

def simulate_refinement_evolution(start_date, end_date, initial_refinement=0.40, target_refinement=0.85):
    """Simulate realistic refinement score evolution over time."""
    days = (end_date - start_date).days
    dates = pd.date_range(start=start_date, end=end_date, periods=min(days//7, 20))
    
    # Simulate logarithmic improvement (fast at first, then plateaus)
    progress = np.linspace(0, 1, len(dates))
    refinement = initial_refinement + (target_refinement - initial_refinement) * (1 - np.exp(-3 * progress))
    
    # Add some noise
    noise = np.random.normal(0, 0.02, len(refinement))
    refinement = np.clip(refinement + noise, 0, 1)
    
    return pd.DataFrame({'date': dates, 'refinement': refinement})

# Create simulated data for framework documents
end_date = datetime.now()
start_date = end_date - timedelta(days=90)

docs_evolution = {
    'J-Editorial Framework': simulate_refinement_evolution(start_date, end_date, 0.50, 0.85),
    'Layer 1: Properties': simulate_refinement_evolution(start_date, end_date, 0.45, 0.80),
    'Layer 2: Dimensions': simulate_refinement_evolution(start_date, end_date, 0.40, 0.80),
    'Layer 3: Rules': simulate_refinement_evolution(start_date, end_date, 0.42, 0.80)
}

# Display first few rows
for doc, df in docs_evolution.items():
    print(f"\n{doc}: {len(df)} data points")
    print(f"  Start: {df.iloc[0]['refinement']:.2f}")
    print(f"  End: {df.iloc[-1]['refinement']:.2f}")
    print(f"  Improvement: +{(df.iloc[-1]['refinement'] - df.iloc[0]['refinement']):.2f}")


J-Editorial Framework: 12 data points
  Start: 0.50
  End: 0.84
  Improvement: +0.34

Layer 1: Properties: 12 data points
  Start: 0.45
  End: 0.80
  Improvement: +0.34

Layer 2: Dimensions: 12 data points
  Start: 0.42
  End: 0.76
  Improvement: +0.34

Layer 3: Rules: 12 data points
  Start: 0.44
  End: 0.83
  Improvement: +0.39

# Interactive refinement evolution plot
fig = go.Figure()

colors = ['#e74c3c', '#3498db', '#2ecc71', '#f39c12']
for (doc, df), color in zip(docs_evolution.items(), colors):
    fig.add_trace(go.Scatter(
        x=df['date'],
        y=df['refinement'],
        mode='lines+markers',
        name=doc,
        line=dict(color=color, width=3),
        marker=dict(size=8, color=color, line=dict(width=1, color='white')),
        hovertemplate='<b>%{fullData.name}</b><br>Date: %{x|%Y-%m-%d}<br>Refinement: %{y:.2f}<extra></extra>'
    ))

# Add quality gate lines
fig.add_hline(
    y=0.80,
    line_dash='dash',
    line_color='green',
    line_width=2,
    opacity=0.5,
    annotation_text='Public Quality Gate (0.80)',
    annotation_position='right'
)

fig.add_hline(
    y=0.90,
    line_dash='dash',
    line_color='darkgreen',
    line_width=2,
    opacity=0.5,
    annotation_text='Published Quality Gate (0.90)',
    annotation_position='right'
)

fig.update_layout(
    title=dict(text='Content Quality Evolution Over Time', font=dict(size=20, family='Arial Black')),
    xaxis_title='Date',
    yaxis_title='Refinement Score',
    height=600,
    hovermode='x unified',
    legend=dict(orientation='v', yanchor='bottom', y=0.02, xanchor='right', x=0.98),
    yaxis=dict(range=[0.3, 1.0])
)

fig

4. Quality Ratchet Effect¶

Demonstrate that refinement scores only improve over time (never decrease).

# Calculate improvement velocity
print("QUALITY RATCHET ANALYSIS")
print("=" * 60)

for doc, df in docs_evolution.items():
    # Calculate differences
    diffs = df['refinement'].diff().dropna()
    
    positive_changes = (diffs > 0).sum()
    negative_changes = (diffs < 0).sum()
    no_changes = (diffs == 0).sum()
    
    avg_improvement = diffs[diffs > 0].mean() if positive_changes > 0 else 0
    
    print(f"\n{doc}:")
    print(f"  Improvements: {positive_changes}")
    print(f"  Regressions: {negative_changes}")
    print(f"  No change: {no_changes}")
    print(f"  Avg improvement per edit: +{avg_improvement:.3f}")
    print(f"  Quality ratchet: {'✓ YES' if negative_changes == 0 else '✗ NO'}")

print("\n" + "=" * 60)

QUALITY RATCHET ANALYSIS
============================================================


J-Editorial Framework:
  Improvements: 10
  Regressions: 1
  No change: 0
  Avg improvement per edit: +0.035
  Quality ratchet: ✗ NO

Layer 1: Properties:
  Improvements: 8
  Regressions: 3
  No change: 0
  Avg improvement per edit: +0.045
  Quality ratchet: ✗ NO

Layer 2: Dimensions:
  Improvements: 7
  Regressions: 4
  No change: 0
  Avg improvement per edit: +0.056
  Quality ratchet: ✗ NO

Layer 3: Rules:
  Improvements: 9
  Regressions: 2
  No change: 0
  Avg improvement per edit: +0.045
  Quality ratchet: ✗ NO

============================================================

5. Forecasting Future Quality¶

Use linear regression to project when documents will reach publication quality.

def forecast_time_to_target(df, target_refinement=0.90):
    """Forecast when a document will reach target quality."""
    # Convert dates to numeric (days since start)
    df = df.copy()
    df['days'] = (df['date'] - df['date'].min()).dt.days
    
    # Fit linear regression
    X = df['days'].values.reshape(-1, 1)
    y = df['refinement'].values
    model = LinearRegression()
    model.fit(X, y)
    
    # Calculate days to target
    current_refinement = df.iloc[-1]['refinement']
    if current_refinement >= target_refinement:
        return 0, model.coef_[0]  # Already at target
    
    # Project future
    refinement_needed = target_refinement - current_refinement
    days_to_target = refinement_needed / model.coef_[0] if model.coef_[0] > 0 else float('inf')
    
    return days_to_target, model.coef_[0]

# Forecast for each document
print("QUALITY FORECAST TO PUBLICATION THRESHOLD (0.90)")
print("=" * 60)

for doc, df in docs_evolution.items():
    days, velocity = forecast_time_to_target(df, target_refinement=0.90)
    current = df.iloc[-1]['refinement']
    
    print(f"\n{doc}:")
    print(f"  Current refinement: {current:.2f}")
    print(f"  Improvement velocity: +{velocity:.4f}/day")
    
    if current >= 0.90:
        print(f"  Status: ✓ Already publication-ready")
    elif days < float('inf'):
        target_date = datetime.now() + timedelta(days=days)
        print(f"  Days to 0.90: ~{int(days)} days")
        print(f"  Projected date: {target_date.strftime('%Y-%m-%d')}")
    else:
        print(f"  Status: No improvement detected")

print("\n" + "=" * 60)

QUALITY FORECAST TO PUBLICATION THRESHOLD (0.90)
============================================================


J-Editorial Framework:
  Current refinement: 0.84
  Improvement velocity: +0.0034/day
  Days to 0.90: ~16 days
  Projected date: 2026-02-11

Layer 1: Properties:
  Current refinement: 0.80
  Improvement velocity: +0.0034/day
  Days to 0.90: ~30 days
  Projected date: 2026-02-25

Layer 2: Dimensions:
  Current refinement: 0.76
  Improvement velocity: +0.0036/day
  Days to 0.90: ~40 days
  Projected date: 2026-03-07

Layer 3: Rules:
  Current refinement: 0.83
  Improvement velocity: +0.0038/day
  Days to 0.90: ~19 days
  Projected date: 2026-02-14

============================================================

6. Forecast Visualization¶

#| label: maintenance-schedule# Interactive quality forecast visualizationdoc_name = 'Layer 2: Dimensions'df = docs_evolution[doc_name].copy()df['days'] = (df['date'] - df['date'].min()).dt.days# Fit modelX = df['days'].values.reshape(-1, 1)y = df['refinement'].valuesmodel = LinearRegression()model.fit(X, y)# Project 60 days into futurefuture_days = np.arange(0, df['days'].max() + 60, 1).reshape(-1, 1)future_refinement = model.predict(future_days)future_dates = df['date'].min() + pd.to_timedelta(future_days.flatten(), unit='D')# Create interactive plotfig = go.Figure()# Add actual data pointsfig.add_trace(go.Scatter(    x=df['date'],    y=df['refinement'],    mode='markers',    name='Actual',    marker=dict(size=12, color='#3498db', line=dict(width=2, color='black')),    hovertemplate='<b>Actual</b><br>Date: %{x|%Y-%m-%d}<br>Refinement: %{y:.2f}<extra></extra>'))# Add forecast linefig.add_trace(go.Scatter(    x=future_dates,    y=future_refinement,    mode='lines',    name='Forecast',    line=dict(color='#e74c3c', width=3, dash='dash'),    hovertemplate='<b>Forecast</b><br>Date: %{x|%Y-%m-%d}<br>Refinement: %{y:.2f}<extra></extra>'))# Add quality gate linesfig.add_hline(y=0.80, line_dash='dash', line_color='green', line_width=2, opacity=0.5,              annotation_text='Public (0.80)', annotation_position='right')fig.add_hline(y=0.90, line_dash='dash', line_color='darkgreen', line_width=2, opacity=0.5,              annotation_text='Published (0.90)', annotation_position='right')fig.update_layout(    title=dict(text=f'Quality Forecast: {doc_name}', font=dict(size=20, family='Arial Black')),    xaxis_title='Date',    yaxis_title='Refinement Score',    height=600,    hovermode='x unified',    legend=dict(orientation='v', yanchor='bottom', y=0.02, xanchor='right', x=0.98),    yaxis=dict(range=[0.3, 1.0]))fig

7. Maintenance Schedule Recommendations¶

Project when evergreen content will need review based on drift patterns.

# Simulate maintenance schedule
docs = ['J-Editorial Framework', 'Layer 1: Properties', 'Layer 2: Dimensions', 'Layer 3: Rules']
last_edit = [datetime.now() - timedelta(days=d) for d in [5, 10, 8, 12]]
review_frequency_days = 90  # Review evergreen docs every 90 days

maintenance_data = []
for doc, last in zip(docs, last_edit):
    days_since = (datetime.now() - last).days
    days_until_review = review_frequency_days - days_since
    next_review = datetime.now() + timedelta(days=days_until_review)
    
    maintenance_data.append({
        'Document': doc,
        'Last Edit': last.strftime('%Y-%m-%d'),
        'Days Since Edit': days_since,
        'Next Review': next_review.strftime('%Y-%m-%d'),
        'Days Until Review': days_until_review,
        'Status': '✓ Current' if days_until_review > 30 else '⚠ Review Soon'
    })

maintenance_df = pd.DataFrame(maintenance_data)
print("\nMAINTENANCE SCHEDULE (90-Day Review Cycle)")
print("=" * 80)
print(maintenance_df.to_string(index=False))
print("\n" + "=" * 80)


MAINTENANCE SCHEDULE (90-Day Review Cycle)
================================================================================
             Document  Last Edit  Days Since Edit Next Review  Days Until Review    Status
J-Editorial Framework 2026-01-20                5  2026-04-20                 85 ✓ Current
  Layer 1: Properties 2026-01-15               10  2026-04-15                 80 ✓ Current
  Layer 2: Dimensions 2026-01-17                8  2026-04-17                 82 ✓ Current
       Layer 3: Rules 2026-01-13               12  2026-04-13                 78 ✓ Current

================================================================================

8. What-If Scenarios¶

# Scenario analysis
print("WHAT-IF SCENARIOS")
print("=" * 60)

scenarios = [
    {'name': 'Current Velocity', 'edits_per_week': 2, 'refinement_per_edit': 0.05},
    {'name': 'Increased Effort', 'edits_per_week': 3, 'refinement_per_edit': 0.05},
    {'name': 'Quarterly Review', 'edits_per_week': 0.33, 'refinement_per_edit': 0.10},
]

for scenario in scenarios:
    weekly_improvement = scenario['edits_per_week'] * scenario['refinement_per_edit']
    weeks_to_publication = (0.90 - 0.60) / weekly_improvement if weekly_improvement > 0 else float('inf')
    
    print(f"\nScenario: {scenario['name']}")
    print(f"  Edits per week: {scenario['edits_per_week']}")
    print(f"  Improvement per edit: +{scenario['refinement_per_edit']:.2f}")
    print(f"  Weekly improvement: +{weekly_improvement:.3f}")
    
    if weeks_to_publication < float('inf'):
        print(f"  Time to publication (0.60 → 0.90): ~{int(weeks_to_publication)} weeks ({weeks_to_publication*7:.0f} days)")
    else:
        print(f"  Time to publication: No improvement")

print("\n" + "=" * 60)

WHAT-IF SCENARIOS
============================================================

Scenario: Current Velocity
  Edits per week: 2
  Improvement per edit: +0.05
  Weekly improvement: +0.100
  Time to publication (0.60 → 0.90): ~3 weeks (21 days)

Scenario: Increased Effort
  Edits per week: 3
  Improvement per edit: +0.05
  Weekly improvement: +0.150
  Time to publication (0.60 → 0.90): ~2 weeks (14 days)

Scenario: Quarterly Review
  Edits per week: 0.33
  Improvement per edit: +0.10
  Weekly improvement: +0.033
  Time to publication (0.60 → 0.90): ~9 weeks (64 days)

============================================================