This notebook analyzes quality trends over time by:
Loading git history to track refinement score changes
Performing time series analysis
Forecasting future quality improvements
Projecting maintenance schedules for evergreen content
# Import required libraries
import os
import subprocess
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import numpy as np
from sklearn.linear_model import LinearRegression
from pathlib import Path1. Git History Analysis¶
Extract refinement score changes from git commit history.
def get_git_history(file_path):
"""Get git log for a specific file."""
try:
# Get commit history with dates
result = subprocess.run(
['git', 'log', '--follow', '--format=%H|%ai', '--', file_path],
capture_output=True,
text=True,
cwd=Path(file_path).parent.parent.parent
)
commits = []
for line in result.stdout.strip().split('\n'):
if '|' in line:
commit_hash, date_str = line.split('|')
commits.append({
'commit': commit_hash[:7],
'date': pd.to_datetime(date_str)
})
return commits
except Exception as e:
print(f"Error getting git history: {e}")
return []
# Example: Get history for J-Editorial framework index
base_dir = Path('../..')
index_file = base_dir / 'j-editorial' / 'index.md'
history = get_git_history(index_file)
print(f"Found {len(history)} commits for {index_file.name}")
if history:
print(f"First commit: {history[-1]['date'].strftime('%Y-%m-%d')}")
print(f"Last commit: {history[0]['date'].strftime('%Y-%m-%d')}")Found 6 commits for index.md
First commit: 2025-11-25
Last commit: 2025-11-26
2. Simulated Quality Improvement Trends¶
Since we may not have extensive git history with refinement changes, we’ll create a realistic simulation based on typical content improvement patterns.
# Simulate refinement evolution for demonstration
# In practice, this would come from parsing historical frontmatter from git commits
def simulate_refinement_evolution(start_date, end_date, initial_refinement=0.40, target_refinement=0.85):
"""Simulate realistic refinement score evolution over time."""
days = (end_date - start_date).days
dates = pd.date_range(start=start_date, end=end_date, periods=min(days//7, 20))
# Simulate logarithmic improvement (fast at first, then plateaus)
progress = np.linspace(0, 1, len(dates))
refinement = initial_refinement + (target_refinement - initial_refinement) * (1 - np.exp(-3 * progress))
# Add some noise
noise = np.random.normal(0, 0.02, len(refinement))
refinement = np.clip(refinement + noise, 0, 1)
return pd.DataFrame({'date': dates, 'refinement': refinement})
# Create simulated data for framework documents
end_date = datetime.now()
start_date = end_date - timedelta(days=90)
docs_evolution = {
'J-Editorial Framework': simulate_refinement_evolution(start_date, end_date, 0.50, 0.85),
'Layer 1: Properties': simulate_refinement_evolution(start_date, end_date, 0.45, 0.80),
'Layer 2: Dimensions': simulate_refinement_evolution(start_date, end_date, 0.40, 0.80),
'Layer 3: Rules': simulate_refinement_evolution(start_date, end_date, 0.42, 0.80)
}
# Display first few rows
for doc, df in docs_evolution.items():
print(f"\n{doc}: {len(df)} data points")
print(f" Start: {df.iloc[0]['refinement']:.2f}")
print(f" End: {df.iloc[-1]['refinement']:.2f}")
print(f" Improvement: +{(df.iloc[-1]['refinement'] - df.iloc[0]['refinement']):.2f}")
J-Editorial Framework: 12 data points
Start: 0.48
End: 0.84
Improvement: +0.36
Layer 1: Properties: 12 data points
Start: 0.46
End: 0.76
Improvement: +0.30
Layer 2: Dimensions: 12 data points
Start: 0.41
End: 0.78
Improvement: +0.37
Layer 3: Rules: 12 data points
Start: 0.45
End: 0.77
Improvement: +0.32
3. Refinement Trends Visualization¶
# Interactive refinement evolution plot
fig = go.Figure()
colors = ['#e74c3c', '#3498db', '#2ecc71', '#f39c12']
for (doc, df), color in zip(docs_evolution.items(), colors):
fig.add_trace(go.Scatter(
x=df['date'],
y=df['refinement'],
mode='lines+markers',
name=doc,
line=dict(color=color, width=3),
marker=dict(size=8, color=color, line=dict(width=1, color='white')),
hovertemplate='<b>%{fullData.name}</b><br>Date: %{x|%Y-%m-%d}<br>Refinement: %{y:.2f}<extra></extra>'
))
# Add quality gate lines
fig.add_hline(
y=0.80,
line_dash='dash',
line_color='green',
line_width=2,
opacity=0.5,
annotation_text='Public Quality Gate (0.80)',
annotation_position='right'
)
fig.add_hline(
y=0.90,
line_dash='dash',
line_color='darkgreen',
line_width=2,
opacity=0.5,
annotation_text='Published Quality Gate (0.90)',
annotation_position='right'
)
fig.update_layout(
title=dict(text='Content Quality Evolution Over Time', font=dict(size=20, family='Arial Black')),
xaxis_title='Date',
yaxis_title='Refinement Score',
height=600,
hovermode='x unified',
legend=dict(orientation='v', yanchor='bottom', y=0.02, xanchor='right', x=0.98),
yaxis=dict(range=[0.3, 1.0])
)
figLoading...
4. Quality Ratchet Effect¶
Demonstrate that refinement scores only improve over time (never decrease).
# Calculate improvement velocity
print("QUALITY RATCHET ANALYSIS")
print("=" * 60)
for doc, df in docs_evolution.items():
# Calculate differences
diffs = df['refinement'].diff().dropna()
positive_changes = (diffs > 0).sum()
negative_changes = (diffs < 0).sum()
no_changes = (diffs == 0).sum()
avg_improvement = diffs[diffs > 0].mean() if positive_changes > 0 else 0
print(f"\n{doc}:")
print(f" Improvements: {positive_changes}")
print(f" Regressions: {negative_changes}")
print(f" No change: {no_changes}")
print(f" Avg improvement per edit: +{avg_improvement:.3f}")
print(f" Quality ratchet: {'✓ YES' if negative_changes == 0 else '✗ NO'}")
print("\n" + "=" * 60)QUALITY RATCHET ANALYSIS
============================================================
J-Editorial Framework:
Improvements: 6
Regressions: 5
No change: 0
Avg improvement per edit: +0.071
Quality ratchet: ✗ NO
Layer 1: Properties:
Improvements: 8
Regressions: 3
No change: 0
Avg improvement per edit: +0.043
Quality ratchet: ✗ NO
Layer 2: Dimensions:
Improvements: 9
Regressions: 2
No change: 0
Avg improvement per edit: +0.047
Quality ratchet: ✗ NO
Layer 3: Rules:
Improvements: 8
Regressions: 3
No change: 0
Avg improvement per edit: +0.043
Quality ratchet: ✗ NO
============================================================
5. Forecasting Future Quality¶
Use linear regression to project when documents will reach publication quality.
def forecast_time_to_target(df, target_refinement=0.90):
"""Forecast when a document will reach target quality."""
# Convert dates to numeric (days since start)
df = df.copy()
df['days'] = (df['date'] - df['date'].min()).dt.days
# Fit linear regression
X = df['days'].values.reshape(-1, 1)
y = df['refinement'].values
model = LinearRegression()
model.fit(X, y)
# Calculate days to target
current_refinement = df.iloc[-1]['refinement']
if current_refinement >= target_refinement:
return 0, model.coef_[0] # Already at target
# Project future
refinement_needed = target_refinement - current_refinement
days_to_target = refinement_needed / model.coef_[0] if model.coef_[0] > 0 else float('inf')
return days_to_target, model.coef_[0]
# Forecast for each document
print("QUALITY FORECAST TO PUBLICATION THRESHOLD (0.90)")
print("=" * 60)
for doc, df in docs_evolution.items():
days, velocity = forecast_time_to_target(df, target_refinement=0.90)
current = df.iloc[-1]['refinement']
print(f"\n{doc}:")
print(f" Current refinement: {current:.2f}")
print(f" Improvement velocity: +{velocity:.4f}/day")
if current >= 0.90:
print(f" Status: ✓ Already publication-ready")
elif days < float('inf'):
target_date = datetime.now() + timedelta(days=days)
print(f" Days to 0.90: ~{int(days)} days")
print(f" Projected date: {target_date.strftime('%Y-%m-%d')}")
else:
print(f" Status: No improvement detected")
print("\n" + "=" * 60)QUALITY FORECAST TO PUBLICATION THRESHOLD (0.90)
============================================================
J-Editorial Framework:
Current refinement: 0.84
Improvement velocity: +0.0033/day
Days to 0.90: ~18 days
Projected date: 2025-12-15
Layer 1: Properties:
Current refinement: 0.76
Improvement velocity: +0.0030/day
Days to 0.90: ~44 days
Projected date: 2026-01-10
Layer 2: Dimensions:
Current refinement: 0.78
Improvement velocity: +0.0038/day
Days to 0.90: ~30 days
Projected date: 2025-12-27
Layer 3: Rules:
Current refinement: 0.77
Improvement velocity: +0.0034/day
Days to 0.90: ~39 days
Projected date: 2026-01-05
============================================================
6. Forecast Visualization¶
#| label: maintenance-schedule# Interactive quality forecast visualizationdoc_name = 'Layer 2: Dimensions'df = docs_evolution[doc_name].copy()df['days'] = (df['date'] - df['date'].min()).dt.days# Fit modelX = df['days'].values.reshape(-1, 1)y = df['refinement'].valuesmodel = LinearRegression()model.fit(X, y)# Project 60 days into futurefuture_days = np.arange(0, df['days'].max() + 60, 1).reshape(-1, 1)future_refinement = model.predict(future_days)future_dates = df['date'].min() + pd.to_timedelta(future_days.flatten(), unit='D')# Create interactive plotfig = go.Figure()# Add actual data pointsfig.add_trace(go.Scatter( x=df['date'], y=df['refinement'], mode='markers', name='Actual', marker=dict(size=12, color='#3498db', line=dict(width=2, color='black')), hovertemplate='<b>Actual</b><br>Date: %{x|%Y-%m-%d}<br>Refinement: %{y:.2f}<extra></extra>'))# Add forecast linefig.add_trace(go.Scatter( x=future_dates, y=future_refinement, mode='lines', name='Forecast', line=dict(color='#e74c3c', width=3, dash='dash'), hovertemplate='<b>Forecast</b><br>Date: %{x|%Y-%m-%d}<br>Refinement: %{y:.2f}<extra></extra>'))# Add quality gate linesfig.add_hline(y=0.80, line_dash='dash', line_color='green', line_width=2, opacity=0.5, annotation_text='Public (0.80)', annotation_position='right')fig.add_hline(y=0.90, line_dash='dash', line_color='darkgreen', line_width=2, opacity=0.5, annotation_text='Published (0.90)', annotation_position='right')fig.update_layout( title=dict(text=f'Quality Forecast: {doc_name}', font=dict(size=20, family='Arial Black')), xaxis_title='Date', yaxis_title='Refinement Score', height=600, hovermode='x unified', legend=dict(orientation='v', yanchor='bottom', y=0.02, xanchor='right', x=0.98), yaxis=dict(range=[0.3, 1.0]))fig7. Maintenance Schedule Recommendations¶
Project when evergreen content will need review based on drift patterns.
# Simulate maintenance schedule
docs = ['J-Editorial Framework', 'Layer 1: Properties', 'Layer 2: Dimensions', 'Layer 3: Rules']
last_edit = [datetime.now() - timedelta(days=d) for d in [5, 10, 8, 12]]
review_frequency_days = 90 # Review evergreen docs every 90 days
maintenance_data = []
for doc, last in zip(docs, last_edit):
days_since = (datetime.now() - last).days
days_until_review = review_frequency_days - days_since
next_review = datetime.now() + timedelta(days=days_until_review)
maintenance_data.append({
'Document': doc,
'Last Edit': last.strftime('%Y-%m-%d'),
'Days Since Edit': days_since,
'Next Review': next_review.strftime('%Y-%m-%d'),
'Days Until Review': days_until_review,
'Status': '✓ Current' if days_until_review > 30 else '⚠ Review Soon'
})
maintenance_df = pd.DataFrame(maintenance_data)
print("\nMAINTENANCE SCHEDULE (90-Day Review Cycle)")
print("=" * 80)
print(maintenance_df.to_string(index=False))
print("\n" + "=" * 80)
MAINTENANCE SCHEDULE (90-Day Review Cycle)
================================================================================
Document Last Edit Days Since Edit Next Review Days Until Review Status
J-Editorial Framework 2025-11-21 5 2026-02-19 85 ✓ Current
Layer 1: Properties 2025-11-16 10 2026-02-14 80 ✓ Current
Layer 2: Dimensions 2025-11-18 8 2026-02-16 82 ✓ Current
Layer 3: Rules 2025-11-14 12 2026-02-12 78 ✓ Current
================================================================================
8. What-If Scenarios¶
# Scenario analysis
print("WHAT-IF SCENARIOS")
print("=" * 60)
scenarios = [
{'name': 'Current Velocity', 'edits_per_week': 2, 'refinement_per_edit': 0.05},
{'name': 'Increased Effort', 'edits_per_week': 3, 'refinement_per_edit': 0.05},
{'name': 'Quarterly Review', 'edits_per_week': 0.33, 'refinement_per_edit': 0.10},
]
for scenario in scenarios:
weekly_improvement = scenario['edits_per_week'] * scenario['refinement_per_edit']
weeks_to_publication = (0.90 - 0.60) / weekly_improvement if weekly_improvement > 0 else float('inf')
print(f"\nScenario: {scenario['name']}")
print(f" Edits per week: {scenario['edits_per_week']}")
print(f" Improvement per edit: +{scenario['refinement_per_edit']:.2f}")
print(f" Weekly improvement: +{weekly_improvement:.3f}")
if weeks_to_publication < float('inf'):
print(f" Time to publication (0.60 → 0.90): ~{int(weeks_to_publication)} weeks ({weeks_to_publication*7:.0f} days)")
else:
print(f" Time to publication: No improvement")
print("\n" + "=" * 60)WHAT-IF SCENARIOS
============================================================
Scenario: Current Velocity
Edits per week: 2
Improvement per edit: +0.05
Weekly improvement: +0.100
Time to publication (0.60 → 0.90): ~3 weeks (21 days)
Scenario: Increased Effort
Edits per week: 3
Improvement per edit: +0.05
Weekly improvement: +0.150
Time to publication (0.60 → 0.90): ~2 weeks (14 days)
Scenario: Quarterly Review
Edits per week: 0.33
Improvement per edit: +0.10
Weekly improvement: +0.033
Time to publication (0.60 → 0.90): ~9 weeks (64 days)
============================================================