code: 9ferno

ref: e81c54ba2ecc673a4d5f8aed0e9b52841fe07b0d
dir: /os/pc/vgasavage.c/

View raw version
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "../port/error.h"

#define	Image	IMAGE
#include <draw.h>
#include <memdraw.h>
#include <cursor.h>
#include "screen.h"

enum {
	PCIS3		= 0x5333,		/* PCI VID */

	SAVAGE3D	= 0x8A20,	/* PCI DID */
	SAVAGE3DMV	= 0x8A21,
	SAVAGE4		= 0x8A22,
	PROSAVAGEP	= 0x8A25,
	PROSAVAGEK	= 0x8A26,
	PROSAVAGE8	= 0x8D04,
	SAVAGEMXMV	= 0x8C10,
	SAVAGEMX	= 0x8C11,
	SAVAGEIXMV	= 0x8C12,
	SAVAGEIX	= 0x8C13,
	SUPERSAVAGEIXC16 = 0x8C2E,
	SAVAGE2000	= 0x9102,

	VIRGE		= 0x5631,
	VIRGEGX2	= 0x8A10,
	VIRGEDXGX	= 0x8A01,
	VIRGEVX		= 0x883D,
	VIRGEMX		= 0x8C01,
	VIRGEMXP	= 0x8C03,

	AURORA64VPLUS	= 0x8812,
};

/*
 * Savage4 et al. acceleration.
 *
 * This is based only on the Savage4 documentation.
 * It is expected to work on other Savage cards as well,
 * but has not been tried.
 * 
 * There are five ways to access the 2D graphics engine registers:
 * 	- Old MMIO non-packed format
 *	- Old MMIO packed format
 *	- New MMIO non-packed format
 *	- New MMIO packed format
 *	- Burst Command Interface (BCI)
 *
 * Of these, the manual hints that the first three are deprecated,
 * and it does not document any of those three well enough to use.
 * 
 * I have tried for many hours with no success to understand the BCI
 * interface well enough to use it.  It is not well documented, and the
 * XFree86 driver seems to completely contradict what little documentation
 * there is.
 *
 * This leaves the packed new MMIO.
 * The manual contradicts itself here, claming that the registers
 * start at 0x2008100 as well as at 0x0008100 from the base of the 
 * mmio segment.  Since the segment is only 512k, we assume that
 * the latter is the correct offset.
 *
 * According to the manual, only 16-bit reads of the 2D registers
 * are supported: 32-bit reads will return garbage in the upper word.
 * 32-bit writes must be enabled explicitly.
 * 
 * 32-bit reads of the status registers seem just fine.
 */

/* 2D graphics engine registers for Savage4; others appear to be mostly the same */
enum {
	SubsystemStatus = 0x8504,	/* Subsystem Status: read only */
	  /* read only: whether we get interrupts on various events */
	  VsyncInt		= 1<<0,		/* vertical sync */
	  GeBusyInt		= 1<<1,		/* 2D graphics engine busy */
	  BfifoFullInt	= 1<<2,		/* BIU FIFO full */
	  BfifoEmptyInt	= 1<<3,		/* BIU FIFO empty */
	  CfifoFullInt	= 1<<4,		/* command FIFO full */
	  CfifoEmptyInt	= 1<<5,		/* command FIFO empty */
	  BciInt		= 1<<6, 	/* BCI */
	  LpbInt		= 1<<7, 	/* LPB */
	  CbHiInt		= 1<<16,	/* COB upper threshold */
	  CbLoInt		= 1<<17,	/* COB lower threshold */

	SubsystemCtl 	= 0x8504,	/* Subsystem Control: write only */
	  /* clear interrupts for various events */
	  VsyncClr		= 1<<0,
	  GeBusyClr		= 1<<1,
	  BfifoFullClr	= 1<<2,
	  BfifoEmptyClr	= 1<<3,
	  CfifoFullClr	= 1<<4,
	  CfifoEmptyClr	= 1<<5,
	  BciClr		= 1<<6,
	  LpbClr		= 1<<7,
	  CbHiClr		= 1<<16,
	  CbLoClr		= 1<<17,

	  /* enable interrupts for various events */
	  VsyncEna		= 1<<8,
	  Busy2DEna		= 1<<9,
	  BfifoFullEna	= 1<<10,
	  BfifoEmptyEna	= 1<<11,
	  CfifoFullEna	= 1<<12,
	  CfifoEmptyEna	= 1<<13,
	  SubsysBciEna	= 1<<14,
	  CbHiEna		= 1<<24,
	  CbLoEna		= 1<<25,

	  /* 2D graphics engine software reset */
	  GeSoftReset	= 1<<15,

	FifoStatus		= 0x8508,	/* FIFO status: read only */
	  CwbEmpty		= 1<<0,		/* command write buffer empty */
	  CrbEmpty		= 1<<1,		/* command read buffer empty */
	  CobEmpty		= 1<<2,		/* command overflow buffer empty */
	  CfifoEmpty	= 1<<3,		/* command FIFO empty */
	  CwbFull		= 1<<8,		/* command write buffer full */
	  CrbFull		= 1<<9,		/* command read buffer full */
	  CobFull		= 1<<10,	/* command overflow buffer full */
	  CfifoFull		= 1<<11,	/* command FIFO full */

	AdvFunCtl		= 0x850C,	/* Advanced Function Control: read/write */
	  GeEna			= 1<<0,	/* enable 2D/3D engine */
	  /*
	   * according to the manual, BigPixel should be
	   * set when bpp >= 8 (bpp != 4), and then CR50_5-4 are
	   * used to figure out bpp example.  however, it does bad things
	   * to the screen in 8bpp mode.
	   */
	  BigPixel		= 1<<2,		/* 8 or more bpp enhanced mode */
	  LaEna			= 1<<3,		/* linear addressing ena: or'ed with CR58_4 */
	  Mclk_2		= 0<<8,		/* 2D engine clock divide: MCLK/2 */
	  Mclk_4		= 1<<8,		/* " MCLK/4 */
	  Mclk			= 2<<8,		/* " MCLK */
	/* Mclk			= 3<<8,		/* " MCLK */
	  Ic33mhz		= 1<<16,	/* Internal clock 33 MHz (instead of 66) */

	WakeupReg		= 0x8510,	/* Wakeup: read/write */
	  WakeupBit		= 1<<0,	/* wake up: or'ed with 3C3_0 */

	SourceY			= 0x8100,	/* UL corner of bitblt source */
	SourceX			= 0x8102,	/* " */
	RectY			= 0x8100,	/* UL corner of rectangle fill */
	RectX			= 0x8102,	/* " */
	DestY			= 0x8108,	/* UL corner of bitblt dest */
	DestX			= 0x810A,	/* " */
	Height			= 0x8148,	/* bitblt, image xfer rectangle height */
	Width			= 0x814A, 	/* bitblt, image xfer rectangle width */

	StartY			= 0x8100,	/* Line draw: first point*/
	StartX			= 0x8102,	/* " */
	/*
	 * For line draws, the following must be programmed:
	 * axial step constant = 2*min(|dx|,|dy|)
	 * diagonal step constant = 2*[min(|dx|,|dy|) - max(|dx|,|dy|)]
	 * error term = 2*min(|dx|,|dy|) - max(|dx|,|dy| - 1
	 *	[sic] when start X < end X
	 * error term = 2*min(|dx|,|dy|) - max(|dx|,|dy|
	 *  [sic] when start X >= end X
	 */
	AxialStep		= 0x8108,
	DiagonalStep	= 0x810A,
	LineError		= 0x8110,
	MinorLength		= 0x8148,	/* pixel count along minor axis */
	MajorLength		= 0x814A,	/* pixel count along major axis */

	DrawCmd			= 0x8118,	/* Drawing Command: write only */
	  CmdMagic		= 0<<1,
	  AcrossPlane	= 1<<1,		/* across the plane mode */
	  LastPixelOff	= 1<<2,		/* last pixel of line or vector draw not drawn */
	  Radial		= 1<<3,		/* enable radial direction (else axial) */
	  DoDraw		= 1<<4,		/* draw pixels (else only move current pos) */

	  DrawRight		= 1<<5,		/* axial drawing direction: left to right */
	  /* DrawLeft		= 0<<5, */
	  MajorY		= 1<<6,
	  /* MajorX		= 0<<6, */
	  DrawDown		= 1<<7,
	  /* DrawUp		= 0<<7, */
	  Degree0		= 0<<5,		/* drawing direction when Radial */
	  Degree45		= 1<<5,
		/* ... */
	  Degree315		= 7<<5,

	  UseCPUData	= 1<<8,

	  /* image write bus transfer width */
	  Bus8			= 0<<9,
	  Bus16			= 1<<9,
	  /*
	   * in Bus32 mode, doubleword bits beyond the image rect width are
	   * discarded.  each line starts on a new doubleword.
	   * Bus32AP is intended for across-the-plane mode and
	   * rounds to byte boundaries instead.
	   */
	  Bus32			= 2<<9,
	  Bus32AP		= 3<<9,

	  CmdNop		= 0<<13,	/* nop */
	  CmdLine		= 1<<13,	/* draw line */
	  CmdFill		= 2<<13,	/* fill rectangle */
	  CmdBitblt		= 6<<13,	/* bitblt */
	  CmdPatblt		= 7<<13,	/* 8x8 pattern blt */

	  SrcGBD		= 0<<16,
	  SrcPBD		= 1<<16,
	  SrcSBD		= 2<<16,

	  DstGBD		= 0<<18,
	  DstPBD		= 1<<18,
	  DstSBD		= 2<<18,

	/* color sources, controls */
	BgColor			= 0x8120,	/* Background Color: read/write */
	FgColor			= 0x8124,	/* Foreground Color: read/write */
	BitplaneWmask	= 0x8128,	/* Bitplane Write Mask: read/write */
	BitplaneRmask	= 0x812C,	/* Bitplane Read Mask: read/write */
	CmpColor		= 0x8130,	/* Color Compare: read/write */
	BgMix			= 0x8134,
	FgMix			= 0x8136,
	  MixNew		= 7,
	  SrcBg			= 0<<5,
	  SrcFg			= 1<<5,
	  SrcCPU		= 2<<5,
	  SrcDisp		= 3<<5,

	/* clipping rectangle */
	TopScissors		= 0x8138,	/* Top Scissors: write only */
	LeftScissors	= 0x813A,	/* Left Scissors: write only */
	BottomScissors	= 0x813C,	/* Bottom Scissors: write only */
	RightScissors	= 0x813E,	/* Right Scissors: write only */

	/*
	 * Registers with Magic were indirectly accessed in older modes.
	 * It is not clear whether the Magic is necessary.
	 * In the older modes, writes to these registers were pipelined,
	 * so that you had to issue an engine command and wait for engine
	 * idle before reading a write back.  It is not clear if this is
	 * still the case either.
	 */
	PixCtl			= 0x8140,	/* Pixel Control: write only */
	  PixMagic		= 0xA<<12,
	  PixMixFg		= 0<<6,		/* foreground mix register always */
	  PixMixCPU		= 2<<6,		/* CPU data determines mix register */
	  PixMixDisp	= 3<<6,		/* display data determines mix register */

	MfMisc2Ctl		= 0x8142,	/* Multifunction Control Misc. 2: write only */
	  MfMisc2Magic	= 0xD<<12,
	  DstShift		= 0,		/* 3 bits: destination base address in MB */
	  SrcShift		= 4,		/* 3 bits: source base address in MB */
	  WaitFifoEmpty	= 2<<8,		/* wait for write FIFO empty between draws */

	MfMiscCtl		= 0x8144,	/* Multifunction Control Misc: write only */
	  MfMiscMagic	= 0xE<<12,
	  UseHighBits	= 1<<4,		/* select upper 16 bits for 32-bit reg access */
	  ClipInvert	= 1<<5,		/* only touch pixels outside clip rectangle */
	  SkipSame		= 0<<6,		/* ignore pixels with color CmpColor */
	  SkipDifferent	= 1<<7,		/* ignore pixels not color CmpColor */
	  CmpEna		= 1<<8,		/* enable color compare */
	  W32Ena		= 1<<9,		/* enable 32-bit register write */
	  ClipDis		= 1<<11,	/* disable clipping */

	/*
	 * The bitmap descriptor 1 registers contain the starting
	 * address of the bitmap (in bytes).
	 * The bitmap descriptor 2 registesr contain stride (in pixels)
	 * in the lower 16 bits, depth (in bits) in the next 8 bits,
	 * and whether block write is disabled.
	 */
	GBD1			= 0x8168,	/* Global Bitmap Descriptor 1: read/write */
	GBD2			= 0x816C,	/* Global Bitmap Descriptor 2: read/write */
	  /* GBD2-only bits */
	  BDS64			= 1<<0,		/* bitmap descriptor size 64 bits */
	  GBDBciEna		= 1<<3,		/* BCI enable */
	  /* generic BD2 bits */
	  BlockWriteDis	= 1<<28,
	  StrideShift	= 0,
	  DepthShift	= 16,

	PBD1			= 0x8170,	/* Primary Bitmap Descriptor: read/write */
	PBD2			= 0x8174,
	SBD1			= 0x8178,	/* Secondary Bitmap Descriptor: read/write */
	SBD2			= 0x817C,
};

/* mastered data transfer registers */

/* configuration/status registers */
enum {
	XStatus0			= 0x48C00,	/* Status Word 0: read only */
	  /* rev. A silicon differs from rev. B; use AltStatus0 */
	  CBEMaskA		= 0x1FFFF,	/* filled command buffer entries */
	  CBEShiftA		= 0,
	  BciIdleA		= 1<<17,	/* BCI idle */
	  Ge3IdleA		= 1<<18,	/* 3D engine idle */
	  Ge2IdleA		= 1<<19,	/* 2D engine idle */
	  McpIdleA		= 1<<20,	/* motion compensation processor idle */
	  MeIdleA		= 1<<22,	/* master engine idle */
	  PfPendA		= 1<<23,	/* page flip pending */

	  CBEMaskB		= 0x1FFFFF,
	  CBEShiftB		= 0,
	  BciIdleB		= 1<<25,
	  Ge3IdleB		= 1<<26,
	  Ge2IdleB		= 1<<27,
	  McpIdleB		= 1<<28,
	  MeIdleB		= 1<<30,
	  PfPendB		= 1<<31,

	AltStatus0		= 0x48C60,	/* Alternate Status Word 0: read only */
	  CBEMask		= 0x1FFFF,
	  CBEShift		= 0,
	  /* the Savage4 manual says bits 17..23 for these, like Status0 */
	  /* empirically, they are bits 21..26 */
	  BciIdle		= 1<<21,
	  Ge3Idle		= 1<<22,
	  Ge2Idle		= 1<<23,
	  McpIdle		= 1<<24,
	  MeIdle		= 1<<25,
	  PfPend		= 1<<26,

	XStatus1			= 0x48C04,	/* Status Word 1: read only */
	  /* contains event tag 1, event tag 0, both 16 bits */

	XStatus2			= 0x48C08,	/* Status Word 2: read only */
	  ScanMask		= 0x3FF,	/* current scan line */
	  ScanShift		= 0,
	  VRTMask		= 0x7F100,	/* vert retrace count */
	  VRTShift		= 11,

	CbThresh		= 0x48C10,	/* Command Buffer Thresholds: read/write */
	CobOff			= 0x48C14,	/* Command Overflow Buffer: read/write */

	CobPtr			= 0x48C18,	/* Command Overflow Buffer Pointers: read/write */
	  CobEna		= 1<<2,		/* command overflow buffer enable */
	  CobBciEna		= 1<<3,		/* BCI function enable */
	  CbeMask		= 0xFFFF8000,	/* no. of entries in command buffer */
	  CbeShift		= 15,

	AltStatus1		= 0x48C64,	/* Alternate Status Word 1: read onnly */
	  /* contains current texture surface tag, vertex buffer tag */

};

struct {
	ulong idletimeout;
	ulong tostatw[16];
} savagestats;

enum {
	Maxloop = 1<<20
};

static void
savagewaitidle(VGAscr *scr)
{
	long x;
	ulong *statw, mask, goal;

	switch(scr->id){
	case SAVAGE4:
	case PROSAVAGEP:
	case PROSAVAGEK:
	case PROSAVAGE8:
		/* wait for engine idle and FIFO empty */
		statw = (ulong*)((uchar*)scr->mmio+AltStatus0);
		mask = CBEMask | Ge2Idle;
		goal = Ge2Idle;
		break;
	/* case SAVAGEMXMV: ? */
	/* case SAVAGEMX: ? */
	/* case SAVAGEIX: ? */
	case SUPERSAVAGEIXC16:
	case SAVAGEIXMV:
	case SAVAGEMXMV:
		/* wait for engine idle and FIFO empty */
		statw = (ulong*)((uchar*)scr->mmio+XStatus0);
		mask = CBEMaskA | Ge2IdleA;
		goal = Ge2IdleA;
		break;
	default:
		/* 
		 * best we can do: can't print or we'll call ourselves.
		 * savageinit is supposed to not let this happen.
		 */	
		return;
	}

	for(x=0; x<Maxloop; x++)
		if((*statw & mask) == goal)
			return;

	savagestats.tostatw[savagestats.idletimeout++&15] = *statw;
	savagestats.tostatw[savagestats.idletimeout++&15] = (ulong)statw;
}

static int
savagefill(VGAscr *scr, Rectangle r, ulong sval)
{
	uchar *mmio;

	mmio = (uchar*)scr->mmio;

	*(ulong*)(mmio+FgColor) = sval;
	*(ulong*)(mmio+BgColor) = sval;
	*(ulong*)(mmio+BgMix) = SrcFg|MixNew;
	*(ulong*)(mmio+FgMix) = SrcFg|MixNew;
	*(ushort*)(mmio+RectY) = r.min.y;
	*(ushort*)(mmio+RectX) = r.min.x;
	*(ushort*)(mmio+Width) = Dx(r)-1;
	*(ushort*)(mmio+Height) = Dy(r)-1;
	*(ulong*)(mmio+DrawCmd) = CmdMagic | DoDraw | CmdFill | DrawRight | DrawDown;
	savagewaitidle(scr);
	return 1;
}

static int
savagescroll(VGAscr *scr, Rectangle r, Rectangle sr)
{
	uchar *mmio;
	ulong cmd;
	Point dp, sp;

	cmd = CmdMagic | DoDraw | CmdBitblt | SrcPBD | DstGBD;

	if(r.min.x <= sr.min.x){
		cmd |= DrawRight;
		dp.x = r.min.x;
		sp.x = sr.min.x;
	}else{
		dp.x = r.max.x-1;
		sp.x = sr.max.x-1;
	}

	if(r.min.y <= sr.min.y){
		cmd |= DrawDown;
		dp.y = r.min.y;
		sp.y = sr.min.y;
	}else{
		dp.y = r.max.y-1;
		sp.y = sr.max.y-1;
	}

	mmio = (uchar*)scr->mmio;

	*(ushort*)(mmio+SourceX) = sp.x;
	*(ushort*)(mmio+SourceY) = sp.y;
	*(ushort*)(mmio+DestX) = dp.x;
	*(ushort*)(mmio+DestY) = dp.y;
	*(ushort*)(mmio+Width) = Dx(r)-1;
	*(ushort*)(mmio+Height) = Dy(r)-1;
	*(ulong*)(mmio+BgMix) = SrcDisp|MixNew;
	*(ulong*)(mmio+FgMix) = SrcDisp|MixNew;
	*(ulong*)(mmio+DrawCmd) = cmd;
	savagewaitidle(scr);
	return 1;
}

static void
savageblank(VGAscr*, int blank)
{
	uchar seqD;

	/*
	 * Will handle DPMS to monitor
	 */
	vgaxo(Seqx, 8, vgaxi(Seqx,8)|0x06);
	seqD = vgaxi(Seqx, 0xD);
	seqD &= 0x03;
	if(blank)
		seqD |= 0x50;
	vgaxo(Seqx, 0xD, seqD);

	/*
	 * Will handle LCD
	 */
	if(blank)
		vgaxo(Seqx, 0x31, vgaxi(Seqx, 0x31) & ~0x10);
	else
		vgaxo(Seqx, 0x31, vgaxi(Seqx, 0x31) | 0x10);
}


void
savageinit(VGAscr *scr)
{
	uchar *mmio;
	ulong bd;

	/* if you add chip IDs here be sure to update savagewaitidle */
	switch(scr->id){
	case SAVAGE4:
	case PROSAVAGEP:
	case PROSAVAGEK:
	case PROSAVAGE8:
	case SAVAGEIXMV:
	case SUPERSAVAGEIXC16:
	case SAVAGEMXMV:
		break;
	default:
		print("unknown savage %.4lux\n", scr->id);
		return;
	}

	mmio = (uchar*)scr->mmio;
	if(mmio == nil) {
		print("savageinit: no mmio\n");
		return;
	}

	/* 2D graphics engine software reset */
	*(ushort*)(mmio+SubsystemCtl) = GeSoftReset;
	delay(2);
	*(ushort*)(mmio+SubsystemCtl) = 0;
	savagewaitidle(scr);

	/* disable BCI as much as possible */
	*(ushort*)(mmio+CobPtr) &= ~CobBciEna;
	*(ushort*)(mmio+GBD2) &= ~GBDBciEna;
	savagewaitidle(scr);

	/* enable 32-bit writes, disable clipping */
	*(ushort*)(mmio+MfMiscCtl) = MfMiscMagic|W32Ena|ClipDis;
	savagewaitidle(scr);

	/* enable all read, write planes */
	*(ulong*)(mmio+BitplaneRmask) = ~0;
	*(ulong*)(mmio+BitplaneWmask) = ~0;
	savagewaitidle(scr);

	/* turn on linear access, 2D engine */
	*(ulong*)(mmio+AdvFunCtl) |= GeEna|LaEna;
	savagewaitidle(scr);

	/* set bitmap descriptors */
	bd = (scr->gscreen->depth<<DepthShift) |
		(Dx(scr->gscreen->r)<<StrideShift) | BlockWriteDis
		| BDS64;

	*(ulong*)(mmio+GBD1) = 0;
	*(ulong*)(mmio+GBD2) = bd;

	*(ulong*)(mmio+PBD1) = 0;
	*(ulong*)(mmio+PBD2) = bd;

	*(ulong*)(mmio+SBD1) = 0;
	*(ulong*)(mmio+SBD2) = bd;

	/*
	 * For some reason, the GBD needs to get programmed twice,
	 * once before the PBD, SBD, and once after.
	 * This empirically makes it get set right.
	 * I would like to better understand the ugliness
	 * going on here.
	 */
	*(ulong*)(mmio+GBD1) = 0;
	*(ulong*)(mmio+GBD2) = bd;
	*(ushort*)(mmio+GBD2+2) = bd>>16;
	savagewaitidle(scr);

	scr->fill = savagefill;
	scr->scroll = savagescroll;
	scr->blank = savageblank;
	hwblank = 0;
}