Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Refinement Forecasting & Trend Analysis

This notebook analyzes quality trends over time by:

  1. Loading git history to track refinement score changes

  2. Performing time series analysis

  3. Forecasting future quality improvements

  4. Projecting maintenance schedules for evergreen content

# Import required libraries
import os
import subprocess
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import numpy as np
from sklearn.linear_model import LinearRegression
from pathlib import Path

1. Git History Analysis

Extract refinement score changes from git commit history.

def get_git_history(file_path):
    """Get git log for a specific file."""
    try:
        # Get commit history with dates
        result = subprocess.run(
            ['git', 'log', '--follow', '--format=%H|%ai', '--', file_path],
            capture_output=True,
            text=True,
            cwd=Path(file_path).parent.parent.parent
        )
        
        commits = []
        for line in result.stdout.strip().split('\n'):
            if '|' in line:
                commit_hash, date_str = line.split('|')
                commits.append({
                    'commit': commit_hash[:7],
                    'date': pd.to_datetime(date_str)
                })
        return commits
    except Exception as e:
        print(f"Error getting git history: {e}")
        return []

# Example: Get history for J-Editorial framework index
base_dir = Path('../..')
index_file = base_dir / 'j-editorial' / 'index.md'
history = get_git_history(index_file)

print(f"Found {len(history)} commits for {index_file.name}")
if history:
    print(f"First commit: {history[-1]['date'].strftime('%Y-%m-%d')}")
    print(f"Last commit: {history[0]['date'].strftime('%Y-%m-%d')}")
Found 6 commits for index.md
First commit: 2025-11-25
Last commit: 2025-11-26

Since we may not have extensive git history with refinement changes, we’ll create a realistic simulation based on typical content improvement patterns.

# Interactive refinement evolution plot
fig = go.Figure()

colors = ['#e74c3c', '#3498db', '#2ecc71', '#f39c12']
for (doc, df), color in zip(docs_evolution.items(), colors):
    fig.add_trace(go.Scatter(
        x=df['date'],
        y=df['refinement'],
        mode='lines+markers',
        name=doc,
        line=dict(color=color, width=3),
        marker=dict(size=8, color=color, line=dict(width=1, color='white')),
        hovertemplate='<b>%{fullData.name}</b><br>Date: %{x|%Y-%m-%d}<br>Refinement: %{y:.2f}<extra></extra>'
    ))

# Add quality gate lines
fig.add_hline(
    y=0.80,
    line_dash='dash',
    line_color='green',
    line_width=2,
    opacity=0.5,
    annotation_text='Public Quality Gate (0.80)',
    annotation_position='right'
)

fig.add_hline(
    y=0.90,
    line_dash='dash',
    line_color='darkgreen',
    line_width=2,
    opacity=0.5,
    annotation_text='Published Quality Gate (0.90)',
    annotation_position='right'
)

fig.update_layout(
    title=dict(text='Content Quality Evolution Over Time', font=dict(size=20, family='Arial Black')),
    xaxis_title='Date',
    yaxis_title='Refinement Score',
    height=600,
    hovermode='x unified',
    legend=dict(orientation='v', yanchor='bottom', y=0.02, xanchor='right', x=0.98),
    yaxis=dict(range=[0.3, 1.0])
)

fig
Loading...

4. Quality Ratchet Effect

Demonstrate that refinement scores only improve over time (never decrease).

# Calculate improvement velocity
print("QUALITY RATCHET ANALYSIS")
print("=" * 60)

for doc, df in docs_evolution.items():
    # Calculate differences
    diffs = df['refinement'].diff().dropna()
    
    positive_changes = (diffs > 0).sum()
    negative_changes = (diffs < 0).sum()
    no_changes = (diffs == 0).sum()
    
    avg_improvement = diffs[diffs > 0].mean() if positive_changes > 0 else 0
    
    print(f"\n{doc}:")
    print(f"  Improvements: {positive_changes}")
    print(f"  Regressions: {negative_changes}")
    print(f"  No change: {no_changes}")
    print(f"  Avg improvement per edit: +{avg_improvement:.3f}")
    print(f"  Quality ratchet: {'✓ YES' if negative_changes == 0 else '✗ NO'}")

print("\n" + "=" * 60)
QUALITY RATCHET ANALYSIS
============================================================

J-Editorial Framework:
  Improvements: 6
  Regressions: 5
  No change: 0
  Avg improvement per edit: +0.071
  Quality ratchet: ✗ NO

Layer 1: Properties:
  Improvements: 8
  Regressions: 3
  No change: 0
  Avg improvement per edit: +0.043
  Quality ratchet: ✗ NO

Layer 2: Dimensions:
  Improvements: 9
  Regressions: 2
  No change: 0
  Avg improvement per edit: +0.047
  Quality ratchet: ✗ NO

Layer 3: Rules:
  Improvements: 8
  Regressions: 3
  No change: 0
  Avg improvement per edit: +0.043
  Quality ratchet: ✗ NO

============================================================

5. Forecasting Future Quality

Use linear regression to project when documents will reach publication quality.

def forecast_time_to_target(df, target_refinement=0.90):
    """Forecast when a document will reach target quality."""
    # Convert dates to numeric (days since start)
    df = df.copy()
    df['days'] = (df['date'] - df['date'].min()).dt.days
    
    # Fit linear regression
    X = df['days'].values.reshape(-1, 1)
    y = df['refinement'].values
    model = LinearRegression()
    model.fit(X, y)
    
    # Calculate days to target
    current_refinement = df.iloc[-1]['refinement']
    if current_refinement >= target_refinement:
        return 0, model.coef_[0]  # Already at target
    
    # Project future
    refinement_needed = target_refinement - current_refinement
    days_to_target = refinement_needed / model.coef_[0] if model.coef_[0] > 0 else float('inf')
    
    return days_to_target, model.coef_[0]

# Forecast for each document
print("QUALITY FORECAST TO PUBLICATION THRESHOLD (0.90)")
print("=" * 60)

for doc, df in docs_evolution.items():
    days, velocity = forecast_time_to_target(df, target_refinement=0.90)
    current = df.iloc[-1]['refinement']
    
    print(f"\n{doc}:")
    print(f"  Current refinement: {current:.2f}")
    print(f"  Improvement velocity: +{velocity:.4f}/day")
    
    if current >= 0.90:
        print(f"  Status: ✓ Already publication-ready")
    elif days < float('inf'):
        target_date = datetime.now() + timedelta(days=days)
        print(f"  Days to 0.90: ~{int(days)} days")
        print(f"  Projected date: {target_date.strftime('%Y-%m-%d')}")
    else:
        print(f"  Status: No improvement detected")

print("\n" + "=" * 60)
QUALITY FORECAST TO PUBLICATION THRESHOLD (0.90)
============================================================

J-Editorial Framework:
  Current refinement: 0.84
  Improvement velocity: +0.0033/day
  Days to 0.90: ~18 days
  Projected date: 2025-12-15

Layer 1: Properties:
  Current refinement: 0.76
  Improvement velocity: +0.0030/day
  Days to 0.90: ~44 days
  Projected date: 2026-01-10

Layer 2: Dimensions:
  Current refinement: 0.78
  Improvement velocity: +0.0038/day
  Days to 0.90: ~30 days
  Projected date: 2025-12-27

Layer 3: Rules:
  Current refinement: 0.77
  Improvement velocity: +0.0034/day
  Days to 0.90: ~39 days
  Projected date: 2026-01-05

============================================================

6. Forecast Visualization

#| label: maintenance-schedule# Interactive quality forecast visualizationdoc_name = 'Layer 2: Dimensions'df = docs_evolution[doc_name].copy()df['days'] = (df['date'] - df['date'].min()).dt.days# Fit modelX = df['days'].values.reshape(-1, 1)y = df['refinement'].valuesmodel = LinearRegression()model.fit(X, y)# Project 60 days into futurefuture_days = np.arange(0, df['days'].max() + 60, 1).reshape(-1, 1)future_refinement = model.predict(future_days)future_dates = df['date'].min() + pd.to_timedelta(future_days.flatten(), unit='D')# Create interactive plotfig = go.Figure()# Add actual data pointsfig.add_trace(go.Scatter(    x=df['date'],    y=df['refinement'],    mode='markers',    name='Actual',    marker=dict(size=12, color='#3498db', line=dict(width=2, color='black')),    hovertemplate='<b>Actual</b><br>Date: %{x|%Y-%m-%d}<br>Refinement: %{y:.2f}<extra></extra>'))# Add forecast linefig.add_trace(go.Scatter(    x=future_dates,    y=future_refinement,    mode='lines',    name='Forecast',    line=dict(color='#e74c3c', width=3, dash='dash'),    hovertemplate='<b>Forecast</b><br>Date: %{x|%Y-%m-%d}<br>Refinement: %{y:.2f}<extra></extra>'))# Add quality gate linesfig.add_hline(y=0.80, line_dash='dash', line_color='green', line_width=2, opacity=0.5,              annotation_text='Public (0.80)', annotation_position='right')fig.add_hline(y=0.90, line_dash='dash', line_color='darkgreen', line_width=2, opacity=0.5,              annotation_text='Published (0.90)', annotation_position='right')fig.update_layout(    title=dict(text=f'Quality Forecast: {doc_name}', font=dict(size=20, family='Arial Black')),    xaxis_title='Date',    yaxis_title='Refinement Score',    height=600,    hovermode='x unified',    legend=dict(orientation='v', yanchor='bottom', y=0.02, xanchor='right', x=0.98),    yaxis=dict(range=[0.3, 1.0]))fig

7. Maintenance Schedule Recommendations

Project when evergreen content will need review based on drift patterns.

# Simulate maintenance schedule
docs = ['J-Editorial Framework', 'Layer 1: Properties', 'Layer 2: Dimensions', 'Layer 3: Rules']
last_edit = [datetime.now() - timedelta(days=d) for d in [5, 10, 8, 12]]
review_frequency_days = 90  # Review evergreen docs every 90 days

maintenance_data = []
for doc, last in zip(docs, last_edit):
    days_since = (datetime.now() - last).days
    days_until_review = review_frequency_days - days_since
    next_review = datetime.now() + timedelta(days=days_until_review)
    
    maintenance_data.append({
        'Document': doc,
        'Last Edit': last.strftime('%Y-%m-%d'),
        'Days Since Edit': days_since,
        'Next Review': next_review.strftime('%Y-%m-%d'),
        'Days Until Review': days_until_review,
        'Status': '✓ Current' if days_until_review > 30 else '⚠ Review Soon'
    })

maintenance_df = pd.DataFrame(maintenance_data)
print("\nMAINTENANCE SCHEDULE (90-Day Review Cycle)")
print("=" * 80)
print(maintenance_df.to_string(index=False))
print("\n" + "=" * 80)

MAINTENANCE SCHEDULE (90-Day Review Cycle)
================================================================================
             Document  Last Edit  Days Since Edit Next Review  Days Until Review    Status
J-Editorial Framework 2025-11-21                5  2026-02-19                 85 ✓ Current
  Layer 1: Properties 2025-11-16               10  2026-02-14                 80 ✓ Current
  Layer 2: Dimensions 2025-11-18                8  2026-02-16                 82 ✓ Current
       Layer 3: Rules 2025-11-14               12  2026-02-12                 78 ✓ Current

================================================================================

8. What-If Scenarios

# Scenario analysis
print("WHAT-IF SCENARIOS")
print("=" * 60)

scenarios = [
    {'name': 'Current Velocity', 'edits_per_week': 2, 'refinement_per_edit': 0.05},
    {'name': 'Increased Effort', 'edits_per_week': 3, 'refinement_per_edit': 0.05},
    {'name': 'Quarterly Review', 'edits_per_week': 0.33, 'refinement_per_edit': 0.10},
]

for scenario in scenarios:
    weekly_improvement = scenario['edits_per_week'] * scenario['refinement_per_edit']
    weeks_to_publication = (0.90 - 0.60) / weekly_improvement if weekly_improvement > 0 else float('inf')
    
    print(f"\nScenario: {scenario['name']}")
    print(f"  Edits per week: {scenario['edits_per_week']}")
    print(f"  Improvement per edit: +{scenario['refinement_per_edit']:.2f}")
    print(f"  Weekly improvement: +{weekly_improvement:.3f}")
    
    if weeks_to_publication < float('inf'):
        print(f"  Time to publication (0.60 → 0.90): ~{int(weeks_to_publication)} weeks ({weeks_to_publication*7:.0f} days)")
    else:
        print(f"  Time to publication: No improvement")

print("\n" + "=" * 60)
WHAT-IF SCENARIOS
============================================================

Scenario: Current Velocity
  Edits per week: 2
  Improvement per edit: +0.05
  Weekly improvement: +0.100
  Time to publication (0.60 → 0.90): ~3 weeks (21 days)

Scenario: Increased Effort
  Edits per week: 3
  Improvement per edit: +0.05
  Weekly improvement: +0.150
  Time to publication (0.60 → 0.90): ~2 weeks (14 days)

Scenario: Quarterly Review
  Edits per week: 0.33
  Improvement per edit: +0.10
  Weekly improvement: +0.033
  Time to publication (0.60 → 0.90): ~9 weeks (64 days)

============================================================