MAME SVN History

199869 Revisions

r31038 Thursday 19th June, 2014 at 11:44:56 UTC by Couriersud
Moved solver templates into separate header files.

[src/emu/netlist/analog]

nld_bjt.h nld_ms_direct.h* nld_ms_direct1.h* nld_ms_direct2.h* nld_ms_gauss_seidel.h* nld_solver.c nld_solver.h nld_twoterm.h

trunk/src/emu/netlist/analog/nld_ms_direct.h
r0	r31038
	1	/*
	2	* nld_ms_direct.h
	3	*
	4	*/
	5
	6	#ifndef NLD_MS_DIRECT_H_
	7	#define NLD_MS_DIRECT_H_
	8
	9	#include "nld_solver.h"
	10
	11	template <int m_N, int _storage_N>
	12	class netlist_matrix_solver_direct_t: public netlist_matrix_solver_t
	13	{
	14	public:
	15
	16	netlist_matrix_solver_direct_t(const netlist_solver_parameters_t &params, int size);
	17
	18	virtual ~netlist_matrix_solver_direct_t();
	19
	20	ATTR_COLD virtual void vsetup(netlist_analog_net_t::list_t &nets);
	21	ATTR_COLD virtual void reset() { netlist_matrix_solver_t::reset(); }
	22
	23	ATTR_HOT inline const int N() const { if (m_N == 0) return m_dim; else return m_N; }
	24
	25	ATTR_HOT inline int vsolve_non_dynamic();
	26
	27	protected:
	28	ATTR_COLD virtual void add_term(int net_idx, netlist_terminal_t *term);
	29
	30	ATTR_HOT virtual double vsolve();
	31
	32	ATTR_HOT int solve_non_dynamic();
	33	ATTR_HOT void build_LE();
	34	ATTR_HOT void gauss_LE(double (* RESTRICT x));
	35	ATTR_HOT double delta(const double (* RESTRICT V));
	36	ATTR_HOT void store(const double (* RESTRICT V), const bool store_RHS);
	37
	38	/* bring the whole system to the current time
	39	* Don't schedule a new calculation time. The recalculation has to be
	40	* triggered by the caller after the netlist element was changed.
	41	*/
	42	ATTR_HOT double compute_next_timestep();
	43
	44	double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
	45	double m_RHS[_storage_N];
	46	double m_last_RHS[_storage_N]; // right hand side - contains currents
	47	double m_Vdelta[_storage_N];
	48	double m_last_V[_storage_N];
	49
	50	terms_t **m_terms;
	51	terms_t *m_rails_temp;
	52
	53	private:
	54	vector_ops_t *m_row_ops[_storage_N + 1];
	55
	56	int m_dim;
	57	double m_lp_fact;
	58	};
	59
	60	// ----------------------------------------------------------------------------------------
	61	// netlist_matrix_solver_direct
	62	// ----------------------------------------------------------------------------------------
	63
	64	template <int m_N, int _storage_N>
	65	netlist_matrix_solver_direct_t<m_N, _storage_N>::~netlist_matrix_solver_direct_t()
	66	{
	67	for (int k=0; k<_storage_N; k++)
	68	{
	69	//delete[] m_A[k];
	70	}
	71	//delete[] m_last_RHS;
	72	//delete[] m_RHS;
	73	delete[] m_terms;
	74	delete[] m_rails_temp;
	75	//delete[] m_row_ops;
	76
	77	}
	78
	79	template <int m_N, int _storage_N>
	80	ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::compute_next_timestep()
	81	{
	82	double new_solver_timestep = m_params.m_max_timestep;
	83
	84	if (m_params.m_dynamic)
	85	{
	86	/*
	87	* FIXME: We should extend the logic to use either all nets or
	88	* only output nets.
	89	*/
	90	#if 0
	91	for (netlist_analog_output_t * const *p = m_inps.first(); p != NULL; p = m_inps.next(p))
	92	{
	93	netlist_analog_net_t n = (p)->m_proxied_net;
	94	#else
	95	for (int k = 0; k < N(); k++)
	96	{
	97	netlist_analog_net_t *n = m_nets[k];
	98	#endif
	99	const double DD_n = (n->m_cur_Analog - m_last_V[k]);
	100	const double hn = current_timestep();
	101
	102	double DD2 = (DD_n / hn - n->m_DD_n_m_1 / n->m_h_n_m_1) / (hn + n->m_h_n_m_1);
	103	double new_net_timestep;
	104
	105	n->m_h_n_m_1 = hn;
	106	n->m_DD_n_m_1 = DD_n;
	107	if (fabs(DD2) > 1e-50) // avoid div-by-zero
	108	new_net_timestep = sqrt(m_params.m_lte / fabs(0.5*DD2));
	109	else
	110	new_net_timestep = m_params.m_max_timestep;
	111
	112	if (new_net_timestep < new_solver_timestep)
	113	new_solver_timestep = new_net_timestep;
	114	}
	115	if (new_solver_timestep < m_params.m_min_timestep)
	116	new_solver_timestep = m_params.m_min_timestep;
	117	}
	118	//if (new_solver_timestep > 10.0 * hn)
	119	// new_solver_timestep = 10.0 * hn;
	120	return new_solver_timestep;
	121	}
	122
	123	template <int m_N, int _storage_N>
	124	ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::add_term(int k, netlist_terminal_t *term)
	125	{
	126	if (term->m_otherterm->net().isRailNet())
	127	{
	128	m_rails_temp[k].add(term, -1);
	129	}
	130	else
	131	{
	132	int ot = get_net_idx(&term->m_otherterm->net());
	133	if (ot>=0)
	134	{
	135	m_terms[k]->add(term, ot);
	136	SOLVER_VERBOSE_OUT(("Net %d Term %s %f %f\n", k, terms[i]->name().cstr(), terms[i]->m_gt, terms[i]->m_go));
	137	}
	138	/* Should this be allowed ? */
	139	else // if (ot<0)
	140	{
	141	m_rails_temp[k].add(term, ot);
	142	netlist().error("found term with missing othernet %s\n", term->name().cstr());
	143	}
	144	}
	145	}
	146
	147
	148	template <int m_N, int _storage_N>
	149	ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::vsetup(netlist_analog_net_t::list_t &nets)
	150	{
	151
	152	if (m_dim < nets.count())
	153	netlist().error("Dimension %d less than %d", m_dim, nets.count());
	154
	155	for (int k = 0; k < N(); k++)
	156	{
	157	m_terms[k]->clear();
	158	m_rails_temp[k].clear();
	159	}
	160
	161	netlist_matrix_solver_t::setup(nets);
	162
	163	for (int k = 0; k < N(); k++)
	164	{
	165	m_terms[k]->m_railstart = m_terms[k]->count();
	166	for (int i = 0; i < m_rails_temp[k].count(); i++)
	167	this->m_terms[k]->add(m_rails_temp[k].terms()[i], m_rails_temp[k].net_other()[i]);
	168
	169	m_rails_temp[k].clear(); // no longer needed
	170	m_terms[k]->set_pointers();
	171	}
	172
	173	#if 1
	174
	175	/* Sort in descending order by number of connected matrix voltages.
	176	* The idea is, that for Gauss-Seidel algo the first voltage computed
	177	* depends on the greatest number of previous voltages thus taking into
	178	* account the maximum amout of information.
	179	*
	180	* This actually improves performance on popeye slightly. Average
	181	* GS computations reduce from 2.509 to 2.370
	182	*
	183	* Smallest to largest : 2.613
	184	* Unsorted : 2.509
	185	* Largest to smallest : 2.370
	186	*
	187	* Sorting as a general matrix pre-conditioning is mentioned in
	188	* literature but I have found no articles about Gauss Seidel.
	189	*
	190	*/
	191
	192
	193	for (int k = 0; k < N() / 2; k++)
	194	for (int i = 0; i < N() - 1; i++)
	195	{
	196	if (m_terms[i]->m_railstart < m_terms[i+1]->m_railstart)
	197	{
	198	std::swap(m_terms[i],m_terms[i+1]);
	199	m_nets.swap(i, i+1);
	200	}
	201	}
	202
	203	for (int k = 0; k < N(); k++)
	204	{
	205	int *other = m_terms[k]->net_other();
	206	for (int i = 0; i < m_terms[k]->count(); i++)
	207	if (other[i] != -1)
	208	other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
	209	}
	210
	211	#endif
	212
	213	}
	214
	215	template <int m_N, int _storage_N>
	216	ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE()
	217	{
	218	#if 0
	219	for (int k=0; k < N(); k++)
	220	for (int i=0; i < N(); i++)
	221	m_A[k][i] = 0.0;
	222	#endif
	223
	224	for (int k = 0; k < N(); k++)
	225	{
	226	for (int i=0; i < N(); i++)
	227	m_A[k][i] = 0.0;
	228
	229	double rhsk = 0.0;
	230	double akk = 0.0;
	231	{
	232	const int terms_count = m_terms[k]->count();
	233	const double * RESTRICT gt = m_terms[k]->gt();
	234	const double * RESTRICT go = m_terms[k]->go();
	235	const double * RESTRICT Idr = m_terms[k]->Idr();
	236	#if VECTALT
	237
	238	for (int i = 0; i < terms_count; i++)
	239	{
	240	rhsk = rhsk + Idr[i];
	241	akk = akk + gt[i];
	242	}
	243	#else
	244	m_terms[k]->ops()->sum2(Idr, gt, rhsk, akk);
	245	#endif
	246	double * const * RESTRICT other_cur_analog = m_terms[k]->other_curanalog();
	247	for (int i = m_terms[k]->m_railstart; i < terms_count; i++)
	248	{
	249	//rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
	250	rhsk = rhsk + go[i] * *other_cur_analog[i];
	251	}
	252	}
	253	#if 0
	254	/*
	255	* Matrix preconditioning with 1.0 / Akk
	256	*
	257	* will save a number of calculations during elimination
	258	*
	259	*/
	260	akk = 1.0 / akk;
	261	m_RHS[k] = rhsk * akk;
	262	m_A[k][k] += 1.0;
	263	{
	264	const int *net_other = m_terms[k]->net_other();
	265	const double *go = m_terms[k]->go();
	266	const int railstart = m_terms[k]->m_railstart;
	267
	268	for (int i = 0; i < railstart; i++)
	269	{
	270	m_A[k][net_other[i]] += -go[i] * akk;
	271	}
	272	}
	273	#else
	274	m_RHS[k] = rhsk;
	275	m_A[k][k] += akk;
	276	{
	277	const int * RESTRICT net_other = m_terms[k]->net_other();
	278	const double * RESTRICT go = m_terms[k]->go();
	279	const int railstart = m_terms[k]->m_railstart;
	280
	281	for (int i = 0; i < railstart; i++)
	282	{
	283	m_A[k][net_other[i]] += -go[i];
	284	}
	285	}
	286	#endif
	287	}
	288	}
	289
	290	template <int m_N, int _storage_N>
	291	ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::gauss_LE(
	292	double (* RESTRICT x))
	293	{
	294	#if 0
	295	for (int i = 0; i < N(); i++)
	296	{
	297	for (int k = 0; k < N(); k++)
	298	printf("%f ", m_A[i][k]);
	299	printf("\| %f = %f \n", x[i], m_RHS[i]);
	300	}
	301	printf("\n");
	302	#endif
	303
	304	const int kN = N();
	305
	306	for (int i = 0; i < kN; i++) {
	307	// FIXME: use a parameter to enable pivoting?
	308	if (USE_PIVOT_SEARCH)
	309	{
	310	/* Find the row with the largest first value */
	311	int maxrow = i;
	312	for (int j = i + 1; j < kN; j++)
	313	{
	314	if (fabs(m_A[j][i]) > fabs(m_A[maxrow][i]))
	315	maxrow = j;
	316	}
	317
	318	if (maxrow != i)
	319	{
	320	/* Swap the maxrow and ith row */
	321	for (int k = i; k < kN; k++) {
	322	std::swap(m_A[i][k], m_A[maxrow][k]);
	323	}
	324	std::swap(m_RHS[i], m_RHS[maxrow]);
	325	}
	326	}
	327
	328	/* FIXME: Singular matrix? */
	329	const double f = 1.0 / m_A[i][i];
	330
	331	/* Eliminate column i from row j */
	332
	333	for (int j = i + 1; j < kN; j++)
	334	{
	335	const double f1 = - m_A[j][i] * f;
	336	if (f1 != 0.0)
	337	{
	338	#if 0 && VECTALT
	339	for (int k = i + 1; k < kN; k++)
	340	m_A[j][k] += m_A[i][k] * f1;
	341	#else
	342	// addmult gives some performance increase here...
	343	m_row_ops[kN - (i + 1)]->addmult(&m_A[j][i+1], &m_A[i][i+1], f1) ;
	344	#endif
	345	m_RHS[j] += m_RHS[i] * f1;
	346	}
	347	}
	348	}
	349	/* back substitution */
	350	for (int j = kN - 1; j >= 0; j--)
	351	{
	352	double tmp = 0;
	353
	354	for (int k = j + 1; k < kN; k++)
	355	tmp += m_A[j][k] * x[k];
	356
	357	x[j] = (m_RHS[j] - tmp) / m_A[j][j];
	358	}
	359	#if 0
	360	printf("Solution:\n");
	361	for (int i = 0; i < N(); i++)
	362	{
	363	for (int k = 0; k < N(); k++)
	364	printf("%f ", m_A[i][k]);
	365	printf("\| %f = %f \n", x[i], m_RHS[i]);
	366	}
	367	printf("\n");
	368	#endif
	369
	370	}
	371
	372	template <int m_N, int _storage_N>
	373	ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::delta(
	374	const double (* RESTRICT V))
	375	{
	376	double cerr = 0;
	377	double cerr2 = 0;
	378	for (int i = 0; i < this->N(); i++)
	379	{
	380	const double e = (V[i] - this->m_nets[i]->m_cur_Analog);
	381	const double e2 = (m_RHS[i] - this->m_last_RHS[i]);
	382	cerr = (fabs(e) > cerr ? fabs(e) : cerr);
	383	cerr2 = (fabs(e2) > cerr2 ? fabs(e2) : cerr2);
	384	}
	385	// FIXME: Review
	386	return cerr + cerr2*100000.0;
	387	}
	388
	389	template <int m_N, int _storage_N>
	390	ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::store(
	391	const double (* RESTRICT V), const bool store_RHS)
	392	{
	393	for (int i = 0; i < this->N(); i++)
	394	{
	395	this->m_nets[i]->m_cur_Analog = V[i];
	396	}
	397	if (store_RHS)
	398	{
	399	for (int i = 0; i < this->N(); i++)
	400	{
	401	this->m_last_RHS[i] = m_RHS[i];
	402	}
	403	}
	404	}
	405
	406	template <int m_N, int _storage_N>
	407	ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve()
	408	{
	409	solve_base<netlist_matrix_solver_direct_t>(this);
	410	return this->compute_next_timestep();
	411	}
	412
	413
	414	template <int m_N, int _storage_N>
	415	ATTR_HOT int netlist_matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic()
	416	{
	417	double new_v[_storage_N] = { 0.0 };
	418
	419	this->gauss_LE(new_v);
	420
	421	if (this->is_dynamic())
	422	{
	423	double err = delta(new_v);
	424
	425	store(new_v, true);
	426
	427	if (err > this->m_params.m_accuracy)
	428	{
	429	return 2;
	430	}
	431	return 1;
	432	}
	433	store(new_v, false); // ==> No need to store RHS
	434	return 1;
	435	}
	436
	437	template <int m_N, int _storage_N>
	438	ATTR_HOT inline int netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic()
	439	{
	440	this->build_LE();
	441
	442	return this->solve_non_dynamic();
	443	}
	444
	445	template <int m_N, int _storage_N>
	446	netlist_matrix_solver_direct_t<m_N, _storage_N>::netlist_matrix_solver_direct_t(const netlist_solver_parameters_t &params, int size)
	447	: netlist_matrix_solver_t(params)
	448	, m_dim(size)
	449	, m_lp_fact(0)
	450	{
	451	m_terms = new terms_t *[N()];
	452	m_rails_temp = new terms_t[N()];
	453
	454	for (int k = 0; k < N(); k++)
	455	{
	456	m_terms[k] = new terms_t;
	457	m_row_ops[k] = vector_ops_t::create_ops(k);
	458	}
	459	m_row_ops[N()] = vector_ops_t::create_ops(N());
	460	}
	461
	462
	463
	464	#endif /* NLD_MS_DIRECT_H_ */

Property changes on: trunk/src/emu/netlist/analog/nld_ms_direct.h
Added: svn:mime-type + text/plain Added: svn:eol-style + native

trunk/src/emu/netlist/analog/nld_ms_direct1.h
r0	r31038
	1	/*
	2	* nld_ms_direct1.h
	3	*
	4	*/
	5
	6	#ifndef NLD_MS_DIRECT1_H_
	7	#define NLD_MS_DIRECT1_H_
	8
	9	#include "nld_solver.h"
	10	#include "nld_ms_direct.h"
	11
	12	class ATTR_ALIGNED(64) netlist_matrix_solver_direct1_t: public netlist_matrix_solver_direct_t<1,1>
	13	{
	14	public:
	15
	16	netlist_matrix_solver_direct1_t(const netlist_solver_parameters_t &params)
	17	: netlist_matrix_solver_direct_t<1, 1>(params, 1)
	18	{}
	19	ATTR_HOT inline int vsolve_non_dynamic();
	20	protected:
	21	ATTR_HOT virtual double vsolve();
	22	private:
	23	};
	24
	25	// ----------------------------------------------------------------------------------------
	26	// netlist_matrix_solver - Direct1
	27	// ----------------------------------------------------------------------------------------
	28
	29	ATTR_HOT double netlist_matrix_solver_direct1_t::vsolve()
	30	{
	31	solve_base<netlist_matrix_solver_direct1_t>(this);
	32	return this->compute_next_timestep();
	33	}
	34
	35	ATTR_HOT inline int netlist_matrix_solver_direct1_t::vsolve_non_dynamic()
	36	{
	37
	38	netlist_analog_net_t *net = m_nets[0];
	39	this->build_LE();
	40	//NL_VERBOSE_OUT(("%f %f\n", new_val, m_RHS[0] / m_A[0][0]);
	41
	42	double new_val = m_RHS[0] / m_A[0][0];
	43
	44	double e = (new_val - net->m_cur_Analog);
	45	double cerr = fabs(e);
	46
	47	net->m_cur_Analog = new_val;
	48
	49	if (is_dynamic() && (cerr > m_params.m_accuracy))
	50	{
	51	return 2;
	52	}
	53	else
	54	return 1;
	55
	56	}
	57
	58
	59
	60	#endif /* NLD_MS_DIRECT1_H_ */

Property changes on: trunk/src/emu/netlist/analog/nld_ms_direct1.h
Added: svn:mime-type + text/plain Added: svn:eol-style + native

trunk/src/emu/netlist/analog/nld_solver.h
r31037	r31038
11	11
12	12	//#define ATTR_ALIGNED(N) __attribute__((aligned(N)))
13	13	#define ATTR_ALIGNED(N) ATTR_ALIGN
14		//#undef RESTRICT
15		//#define RESTRICT
16	14
	15	#define USE_PIVOT_SEARCH (0)
	16	#define VECTALT 1
	17	#define USE_GABS 0
	18	#define USE_MATRIX_GS 0
	19	// savings are eaten up by effort
	20	#define USE_LINEAR_PREDICTION (0)
17	21
18	22	// ----------------------------------------------------------------------------------------
19	23	// Macros
r31037	r31038
37	41	double m_lte;
38	42	double m_min_timestep;
39	43	double m_max_timestep;
	44	double m_sor;
40	45	bool m_dynamic;
41	46	int m_gs_loops;
42	47	int m_nr_loops;
r31037	r31038
54	59
55	60	virtual ~vector_ops_t() {}
56	61
57		ATTR_ALIGNED(64) double * RESTRICT m_V;
58
59	62	virtual const double sum(const double * v) = 0;
60	63	virtual void sum2(const double * RESTRICT v1, const double * RESTRICT v2, double & RESTRICT s1, double & RESTRICT s2) = 0;
61	64	virtual void addmult(double * RESTRICT v1, const double * RESTRICT v2, const double &mult) = 0;
r31037	r31038
63	66
64	67	virtual const double sumabs(const double * v) = 0;
65	68
	69	static vector_ops_t *create_ops(const int size);
	70
66	71	protected:
67	72	int m_dim;
68	73
r31037	r31038
150	155	NETLIST_PREVENT_COPYING(terms_t)
151	156
152	157	public:
153		ATTR_COLD terms_t() {}
	158	ATTR_COLD terms_t() : m_railstart(0), m_ops(NULL)
	159	{}
154	160
155	161	ATTR_COLD void clear()
156	162	{
r31037	r31038
178	184	private:
179	185	plinearlist_t<netlist_terminal_t *> m_term;
180	186	plinearlist_t<int> m_net_other;
181		plinearlist_t<double> m_gt;
182	187	plinearlist_t<double> m_go;
	188	plinearlist_t<double> m_gt;
183	189	plinearlist_t<double> m_Idr;
184	190	plinearlist_t<double *> m_other_curanalog;
185	191	vector_ops_t * m_ops;
r31037	r31038
191	197	typedef plinearlist_t<netlist_matrix_solver_t *> list_t;
192	198	typedef netlist_core_device_t::list_t dev_list_t;
193	199
194		ATTR_COLD netlist_matrix_solver_t();
	200	ATTR_COLD netlist_matrix_solver_t(const netlist_solver_parameters_t &params);
195	201	ATTR_COLD virtual ~netlist_matrix_solver_t();
196	202
197	203	ATTR_COLD virtual void vsetup(netlist_analog_net_t::list_t &nets) = 0;
r31037	r31038
215	221	ATTR_COLD virtual void start();
216	222	ATTR_COLD virtual void reset();
217	223
218		netlist_solver_parameters_t m_params;
219
220	224	ATTR_COLD int get_net_idx(netlist_net_t *net);
221	225	ATTR_COLD virtual void log_stats() {};
222	226
r31037	r31038
234	238	plinearlist_t<netlist_analog_output_t *> m_inps;
235	239
236	240	int m_calculations;
	241	const netlist_solver_parameters_t &m_params;
237	242
238	243	ATTR_HOT inline const double current_timestep() { return m_cur_ts; }
239	244	private:
r31037	r31038
252	257
253	258	};
254	259
255		template <int m_N, int _storage_N>
256		class netlist_matrix_solver_direct_t: public netlist_matrix_solver_t
257		{
258		public:
259	260
260		netlist_matrix_solver_direct_t(int size);
261	261
262		virtual ~netlist_matrix_solver_direct_t();
263
264		ATTR_COLD virtual void vsetup(netlist_analog_net_t::list_t &nets);
265		ATTR_COLD virtual void reset() { netlist_matrix_solver_t::reset(); }
266
267		ATTR_HOT inline const int N() const { if (m_N == 0) return m_dim; else return m_N; }
268
269		ATTR_HOT inline int vsolve_non_dynamic();
270
271		protected:
272		ATTR_COLD virtual void add_term(int net_idx, netlist_terminal_t *term);
273
274		ATTR_HOT virtual double vsolve();
275
276		ATTR_HOT int solve_non_dynamic();
277		ATTR_HOT void build_LE();
278		ATTR_HOT void gauss_LE(double (* RESTRICT x));
279		ATTR_HOT double delta(const double (* RESTRICT V));
280		ATTR_HOT void store(const double (* RESTRICT V), const bool store_RHS);
281
282		/* bring the whole system to the current time
283		* Don't schedule a new calculation time. The recalculation has to be
284		* triggered by the caller after the netlist element was changed.
285		*/
286		ATTR_HOT double compute_next_timestep();
287
288		double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
289		double m_RHS[_storage_N];
290		double m_last_RHS[_storage_N]; // right hand side - contains currents
291		double m_Vdelta[_storage_N];
292		double m_last_V[_storage_N];
293
294		terms_t **m_terms;
295
296		terms_t *m_rails_temp;
297
298		private:
299		vector_ops_t *m_row_ops[_storage_N + 1];
300
301		int m_dim;
302		double m_lp_fact;
303		};
304
305		template <int m_N, int _storage_N>
306		class ATTR_ALIGNED(64) netlist_matrix_solver_gauss_seidel_t: public netlist_matrix_solver_direct_t<m_N, _storage_N>
307		{
308		public:
309
310		netlist_matrix_solver_gauss_seidel_t(int size)
311		: netlist_matrix_solver_direct_t<m_N, _storage_N>(size)
312		, m_lp_fact(0)
313		, m_gs_fail(0)
314		, m_gs_total(0)
315		{}
316
317		virtual ~netlist_matrix_solver_gauss_seidel_t() {}
318
319		ATTR_COLD virtual void log_stats();
320
321		ATTR_HOT inline int vsolve_non_dynamic();
322		protected:
323		ATTR_HOT virtual double vsolve();
324
325		private:
326		double m_lp_fact;
327		int m_gs_fail;
328		int m_gs_total;
329
330		};
331
332		class ATTR_ALIGNED(64) netlist_matrix_solver_direct1_t: public netlist_matrix_solver_direct_t<1,1>
333		{
334		public:
335
336		netlist_matrix_solver_direct1_t()
337		: netlist_matrix_solver_direct_t<1, 1>(1)
338		{}
339		ATTR_HOT inline int vsolve_non_dynamic();
340		protected:
341		ATTR_HOT virtual double vsolve();
342		private:
343		};
344
345		class ATTR_ALIGNED(64) netlist_matrix_solver_direct2_t: public netlist_matrix_solver_direct_t<2,2>
346		{
347		public:
348
349		netlist_matrix_solver_direct2_t()
350		: netlist_matrix_solver_direct_t<2, 2>(2)
351		{}
352		ATTR_HOT inline int vsolve_non_dynamic();
353		protected:
354		ATTR_HOT virtual double vsolve();
355		private:
356		};
357
358	262	class ATTR_ALIGNED(64) NETLIB_NAME(solver) : public netlist_device_t
359	263	{
360	264	public:
r31037	r31038
381	285	netlist_param_double_t m_accuracy;
382	286	netlist_param_double_t m_gmin;
383	287	netlist_param_double_t m_lte;
	288	netlist_param_double_t m_sor;
384	289	netlist_param_logic_t m_dynamic;
385	290	netlist_param_double_t m_min_timestep;
386	291

trunk/src/emu/netlist/analog/nld_ms_direct2.h
r0	r31038
	1	/*
	2	* nld_ms_direct1.h
	3	*
	4	*/
	5
	6	#ifndef NLD_MS_DIRECT2_H_
	7	#define NLD_MS_DIRECT2_H_
	8
	9	#include "nld_solver.h"
	10	#include "nld_ms_direct.h"
	11
	12
	13
	14	class ATTR_ALIGNED(64) netlist_matrix_solver_direct2_t: public netlist_matrix_solver_direct_t<2,2>
	15	{
	16	public:
	17
	18	netlist_matrix_solver_direct2_t(const netlist_solver_parameters_t &params)
	19	: netlist_matrix_solver_direct_t<2, 2>(params, 2)
	20	{}
	21	ATTR_HOT inline int vsolve_non_dynamic();
	22	protected:
	23	ATTR_HOT virtual double vsolve();
	24	private:
	25	};
	26
	27	// ----------------------------------------------------------------------------------------
	28	// netlist_matrix_solver - Direct2
	29	// ----------------------------------------------------------------------------------------
	30
	31	ATTR_HOT double netlist_matrix_solver_direct2_t::vsolve()
	32	{
	33	solve_base<netlist_matrix_solver_direct2_t>(this);
	34	return this->compute_next_timestep();
	35	}
	36
	37	ATTR_HOT inline int netlist_matrix_solver_direct2_t::vsolve_non_dynamic()
	38	{
	39
	40	build_LE();
	41
	42	const double a = m_A[0][0];
	43	const double b = m_A[0][1];
	44	const double c = m_A[1][0];
	45	const double d = m_A[1][1];
	46
	47	double new_val[2];
	48	new_val[1] = (a * m_RHS[1] - c * m_RHS[0]) / (a * d - b * c);
	49	new_val[0] = (m_RHS[0] - b * new_val[1]) / a;
	50
	51	if (is_dynamic())
	52	{
	53	double err = this->delta(new_val);
	54	store(new_val, true);
	55	if (err > m_params.m_accuracy )
	56	return 2;
	57	else
	58	return 1;
	59	}
	60	store(new_val, false);
	61	return 1;
	62	}
	63
	64
	65
	66	#endif /* NLD_MS_DIRECT2_H_ */

Property changes on: trunk/src/emu/netlist/analog/nld_ms_direct2.h
Added: svn:mime-type + text/plain Added: svn:eol-style + native

trunk/src/emu/netlist/analog/nld_bjt.h
r31037	r31038
98	98
99	99	NETLIB_UPDATE_TERMINALS()
100	100	{
101		double m = (is_qtype( BJT_NPN) ? 1 : -1);
	101	const double m = (is_qtype( BJT_NPN) ? 1 : -1);
102	102
103		int new_state = (m_RB.deltaV() * m > m_V ) ? 1 : 0;
	103	const int new_state = (m_RB.deltaV() * m > m_V ) ? 1 : 0;
104	104	if (m_state_on ^ new_state)
105	105	{
	106	#if 0
106	107	double gb = m_gB;
107	108	double gc = m_gC;
108	109	double v = m_V * m;
r31037	r31038
113	114	v = 0;
114	115	gc = netlist().gmin();
115	116	}
	117	#else
	118	const double gb = new_state ? m_gB : netlist().gmin();
	119	const double gc = new_state ? m_gC : netlist().gmin();
	120	const double v = new_state ? m_V * m : 0;
	121	#endif
116	122	m_RB.set(gb, v, 0.0);
117	123	m_RC.set(gc, 0.0, 0.0);
118	124	//m_RB.update_dev();
r31037	r31038
163	169
164	170	NETLIB_UPDATE_TERMINALS()
165	171	{
166		double polarity = (qtype() == BJT_NPN ? 1.0 : -1.0);
	172	const double polarity = (qtype() == BJT_NPN ? 1.0 : -1.0);
167	173
168	174	m_gD_BE.update_diode(-m_D_EB.deltaV() * polarity);
169	175	m_gD_BC.update_diode(-m_D_CB.deltaV() * polarity);
170	176
171		double gee = m_gD_BE.G();
172		double gcc = m_gD_BC.G();
173		double gec = m_alpha_r * gcc;
174		double gce = m_alpha_f * gee;
175		double sIe = -m_gD_BE.I() + m_alpha_r * m_gD_BC.I();
176		double sIc = m_alpha_f * m_gD_BE.I() - m_gD_BC.I();
177		double Ie = (sIe + gee * m_gD_BE.Vd() - gec * m_gD_BC.Vd()) * polarity;
178		double Ic = (sIc - gce * m_gD_BE.Vd() + gcc * m_gD_BC.Vd()) * polarity;
179		//double Ie = sIe + gee * -m_D_EB.deltaV() - gec * -m_D_CB.deltaV();
180		//double Ic = sIc - gce * -m_D_EB.deltaV() + gcc * -m_D_CB.deltaV();
181		//printf("EB %f sIe %f sIc %f\n", m_D_BE.deltaV(), sIe, sIc);
	177	const double gee = m_gD_BE.G();
	178	const double gcc = m_gD_BC.G();
	179	const double gec = m_alpha_r * gcc;
	180	const double gce = m_alpha_f * gee;
	181	const double sIe = -m_gD_BE.I() + m_alpha_r * m_gD_BC.I();
	182	const double sIc = m_alpha_f * m_gD_BE.I() - m_gD_BC.I();
	183	const double Ie = (sIe + gee * m_gD_BE.Vd() - gec * m_gD_BC.Vd()) * polarity;
	184	const double Ic = (sIc - gce * m_gD_BE.Vd() + gcc * m_gD_BC.Vd()) * polarity;
182	185
183	186	m_D_EB.set_mat(gee, gec - gee, gce - gee, gee - gec, Ie, -Ie);
184	187	m_D_CB.set_mat(gcc, gce - gcc, gec - gcc, gcc - gce, Ic, -Ic);

trunk/src/emu/netlist/analog/nld_twoterm.h
r31037	r31038
103	103	m_N.set( G, G, ( -V) * G + I);
104	104	}
105	105
106		ATTR_HOT inline double deltaV()
	106	ATTR_HOT inline double deltaV() const
107	107	{
108	108	return m_P.net().as_analog().Q_Analog() - m_N.net().as_analog().Q_Analog();
109	109	}
r31037	r31038
172	172
173	173	ATTR_HOT void step_time(const double st)
174	174	{
175		double G = m_C.Value() / st;
176		double I = -G * deltaV();
	175	const double G = m_C.Value() / st;
	176	const double I = -G * deltaV();
177	177	set(G, 0.0, I);
178	178	}
179	179

trunk/src/emu/netlist/analog/nld_solver.c
r31037	r31038
3	3	*
4	4	*/
5	5
6		#include <algorithm>
7
8		#include "nld_solver.h"
9		#include "nld_twoterm.h"
10		#include "../nl_lists.h"
11
12		#if HAS_OPENMP
13		#include "omp.h"
14		#endif
15
16		#define USE_PIVOT_SEARCH (0)
17		#define VECTALT 1
18		#define USE_GABS 0
19		#define USE_MATRIX_GS 0
20		//#define SORP 1.059
21		#define SORP 1.059
22		// savings are eaten up by effort
23		#define USE_LINEAR_PREDICTION (0)
24
25		#define SOLVER_VERBOSE_OUT(x) do {} while (0)
26		//#define SOLVER_VERBOSE_OUT(x) printf x
27
28		/* Commented out for now. Relatively low number of terminals / nes makes
29		* the vectorizations this enables pretty expensive
	6	/* Commented out for now. Relatively low number of terminals / nets make
	7	* the vectorizations fast-math enables pretty expensive
30	8	*/
31	9
32	10	#if 0
r31037	r31038
35	13	#pragma GCC optimize "-funswitch-loops"
36	14	#pragma GCC optimize "-fvariable-expansion-in-unroller"
37	15	#pragma GCC optimize "-funsafe-loop-optimizations"
	16	#pragma GCC optimize "-fvect-cost-model"
	17	#pragma GCC optimize "-fvariable-expansion-in-unroller"
38	18	#pragma GCC optimize "-ftree-loop-if-convert-stores"
39	19	#pragma GCC optimize "-ftree-loop-distribution"
40	20	#pragma GCC optimize "-ftree-loop-im"
41	21	#pragma GCC optimize "-ftree-loop-ivcanon"
42	22	#pragma GCC optimize "-fivopts"
43	23	#pragma GCC optimize "-ftree-parallelize-loops=4"
44		#pragma GCC optimize "-fvect-cost-model"
45		#pragma GCC optimize "-fvariable-expansion-in-unroller"
46	24	#endif
47	25
48		static vector_ops_t *create_ops(const int size)
	26	#define SOLVER_VERBOSE_OUT(x) do {} while (0)
	27	//#define SOLVER_VERBOSE_OUT(x) printf x
	28
	29	#include <algorithm>
	30	#include "nld_solver.h"
	31	#include "nld_ms_direct.h"
	32	#include "nld_ms_direct1.h"
	33	#include "nld_ms_direct2.h"
	34	#include "nld_ms_gauss_seidel.h"
	35	#include "nld_twoterm.h"
	36	#include "../nl_lists.h"
	37
	38	#if HAS_OPENMP
	39	#include "omp.h"
	40	#endif
	41
	42	vector_ops_t *vector_ops_t::create_ops(const int size)
49	43	{
50	44	switch (size)
51	45	{
r31037	r31038
98	92	m_other_curanalog[i] = &m_term[i]->m_otherterm->net().as_analog().m_cur_Analog;
99	93	}
100	94
101		m_ops = create_ops(m_gt.count());
	95	m_ops = vector_ops_t::create_ops(m_gt.count());
102	96	}
103	97
104	98	// ----------------------------------------------------------------------------------------
105	99	// netlist_matrix_solver
106	100	// ----------------------------------------------------------------------------------------
107	101
108		ATTR_COLD netlist_matrix_solver_t::netlist_matrix_solver_t()
109		: m_calculations(0), m_cur_ts(0)
	102	ATTR_COLD netlist_matrix_solver_t::netlist_matrix_solver_t(const netlist_solver_parameters_t &params)
	103	: m_calculations(0), m_params(params), m_cur_ts(0)
110	104	{
111	105	}
112	106
r31037	r31038
197	191	}
198	192	}
199	193
200		// ----------------------------------------------------------------------------------------
201		// netlist_matrix_solver_direct
202		// ----------------------------------------------------------------------------------------
203	194
204		template <int m_N, int _storage_N>
205		netlist_matrix_solver_direct_t<m_N, _storage_N>::netlist_matrix_solver_direct_t(int size)
206		: netlist_matrix_solver_t()
207		, m_dim(size)
208		, m_lp_fact(0)
209		{
210		m_terms = new terms_t *[N()];
211		m_rails_temp = new terms_t[N()];
212
213		for (int k = 0; k < N(); k++)
214		{
215		m_terms[k] = new terms_t;
216		m_row_ops[k] = create_ops(k);
217		}
218		m_row_ops[N()] = create_ops(N());
219		}
220
221		template <int m_N, int _storage_N>
222		netlist_matrix_solver_direct_t<m_N, _storage_N>::~netlist_matrix_solver_direct_t()
223		{
224		for (int k=0; k<_storage_N; k++)
225		{
226		//delete[] m_A[k];
227		}
228		//delete[] m_last_RHS;
229		//delete[] m_RHS;
230		delete[] m_terms;
231		delete[] m_rails_temp;
232		//delete[] m_row_ops;
233
234		}
235
236		template <int m_N, int _storage_N>
237		ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::compute_next_timestep()
238		{
239		double new_solver_timestep = m_params.m_max_timestep;
240
241		if (m_params.m_dynamic)
242		{
243		/*
244		* FIXME: We should extend the logic to use either all nets or
245		* only output nets.
246		*/
247		#if 0
248		for (netlist_analog_output_t * const *p = m_inps.first(); p != NULL; p = m_inps.next(p))
249		{
250		netlist_analog_net_t n = (p)->m_proxied_net;
251		#else
252		for (int k = 0; k < N(); k++)
253		{
254		netlist_analog_net_t *n = m_nets[k];
255		#endif
256		const double DD_n = (n->m_cur_Analog - m_last_V[k]);
257		const double hn = current_timestep();
258
259		double DD2 = (DD_n / hn - n->m_DD_n_m_1 / n->m_h_n_m_1) / (hn + n->m_h_n_m_1);
260		double new_net_timestep;
261
262		n->m_h_n_m_1 = hn;
263		n->m_DD_n_m_1 = DD_n;
264		if (fabs(DD2) > 1e-50) // avoid div-by-zero
265		new_net_timestep = sqrt(m_params.m_lte / fabs(0.5*DD2));
266		else
267		new_net_timestep = m_params.m_max_timestep;
268
269		if (new_net_timestep < new_solver_timestep)
270		new_solver_timestep = new_net_timestep;
271		}
272		if (new_solver_timestep < m_params.m_min_timestep)
273		new_solver_timestep = m_params.m_min_timestep;
274		}
275		//if (new_solver_timestep > 10.0 * hn)
276		// new_solver_timestep = 10.0 * hn;
277		return new_solver_timestep;
278		}
279
280	195	ATTR_HOT void netlist_matrix_solver_t::update_inputs()
281	196	{
282	197	// avoid recursive calls. Inputs are updated outside this call
r31037	r31038
386	301	return next_time_step;
387	302	}
388	303
389		template <int m_N, int _storage_N>
390		void netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::log_stats()
391		{
392		#if 1
393		printf("==============================================\n");
394		printf("Solver %s\n", this->name().cstr());
395		printf(" ==> %d nets\n", this->N()); //, ((groups[i].first())->m_core_terms.first())->name().cstr());
396		printf(" has %s elements\n", this->is_dynamic() ? "dynamic" : "no dynamic");
397		printf(" has %s elements\n", this->is_timestep() ? "timestep" : "no timestep");
398		printf(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average\n",
399		this->m_calculations,
400		this->m_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
401		this->m_gs_fail,
402		100.0 * (double) this->m_gs_fail / (double) this->m_calculations,
403		(double) this->m_gs_total / (double) this->m_calculations);
404		#endif
405		}
406	304
407	305	// ----------------------------------------------------------------------------------------
408	306	// netlist_matrix_solver - Direct base
r31037	r31038
416	314	return -1;
417	315	}
418	316
419		template <int m_N, int _storage_N>
420		ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::add_term(int k, netlist_terminal_t *term)
421		{
422		if (term->m_otherterm->net().isRailNet())
423		{
424		m_rails_temp[k].add(term, -1);
425		}
426		else
427		{
428		int ot = get_net_idx(&term->m_otherterm->net());
429		if (ot>=0)
430		{
431		m_terms[k]->add(term, ot);
432		SOLVER_VERBOSE_OUT(("Net %d Term %s %f %f\n", k, terms[i]->name().cstr(), terms[i]->m_gt, terms[i]->m_go));
433		}
434		/* Should this be allowed ? */
435		else // if (ot<0)
436		{
437		m_rails_temp[k].add(term, ot);
438		netlist().error("found term with missing othernet %s\n", term->name().cstr());
439		}
440		}
441		}
442	317
443	318
444		template <int m_N, int _storage_N>
445		ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::vsetup(netlist_analog_net_t::list_t &nets)
446		{
447	319
448		if (m_dim < nets.count())
449		netlist().error("Dimension %d less than %d", m_dim, nets.count());
450	320
451		for (int k = 0; k < N(); k++)
452		{
453		m_terms[k]->clear();
454		m_rails_temp[k].clear();
455		}
456	321
457		netlist_matrix_solver_t::setup(nets);
458	322
459		for (int k = 0; k < N(); k++)
460		{
461		m_terms[k]->m_railstart = m_terms[k]->count();
462		for (int i = 0; i < m_rails_temp[k].count(); i++)
463		this->m_terms[k]->add(m_rails_temp[k].terms()[i], m_rails_temp[k].net_other()[i]);
464
465		m_rails_temp[k].clear(); // no longer needed
466		m_terms[k]->set_pointers();
467		}
468
469		#if 1
470
471		/* Sort in descending order by number of connected matrix voltages.
472		* The idea is, that for Gauss-Seidel algo the first voltage computed
473		* depends on the greatest number of previous voltages thus taking into
474		* account the maximum amout of information.
475		*
476		* This actually improves performance on popeye slightly. Average
477		* GS computations reduce from 2.509 to 2.370
478		*
479		* Smallest to largest : 2.613
480		* Unsorted : 2.509
481		* Largest to smallest : 2.370
482		*
483		* Sorting as a general matrix pre-conditioning is mentioned in
484		* literature but I have found no articles about Gauss Seidel.
485		*
486		*/
487
488
489		for (int k = 0; k < N() / 2; k++)
490		for (int i = 0; i < N() - 1; i++)
491		{
492		if (m_terms[i]->m_railstart < m_terms[i+1]->m_railstart)
493		{
494		std::swap(m_terms[i],m_terms[i+1]);
495		m_nets.swap(i, i+1);
496		}
497		}
498
499		for (int k = 0; k < N(); k++)
500		{
501		int *other = m_terms[k]->net_other();
502		for (int i = 0; i < m_terms[k]->count(); i++)
503		if (other[i] != -1)
504		other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
505		}
506
507		#endif
508
509		}
510
511		template <int m_N, int _storage_N>
512		ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE()
513		{
514		#if 0
515		for (int k=0; k < N(); k++)
516		for (int i=0; i < N(); i++)
517		m_A[k][i] = 0.0;
518		#endif
519
520		for (int k = 0; k < N(); k++)
521		{
522		for (int i=0; i < N(); i++)
523		m_A[k][i] = 0.0;
524
525		double rhsk = 0.0;
526		double akk = 0.0;
527		{
528		const int terms_count = m_terms[k]->count();
529		const double * RESTRICT gt = m_terms[k]->gt();
530		const double * RESTRICT go = m_terms[k]->go();
531		const double * RESTRICT Idr = m_terms[k]->Idr();
532		#if VECTALT
533
534		for (int i = 0; i < terms_count; i++)
535		{
536		rhsk = rhsk + Idr[i];
537		akk = akk + gt[i];
538		}
539		#else
540		m_terms[k]->ops()->sum2(Idr, gt, rhsk, akk);
541		#endif
542		double * const * RESTRICT other_cur_analog = m_terms[k]->other_curanalog();
543		for (int i = m_terms[k]->m_railstart; i < terms_count; i++)
544		{
545		//rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
546		rhsk = rhsk + go[i] * *other_cur_analog[i];
547		}
548		}
549		#if 0
550		/*
551		* Matrix preconditioning with 1.0 / Akk
552		*
553		* will save a number of calculations during elimination
554		*
555		*/
556		akk = 1.0 / akk;
557		m_RHS[k] = rhsk * akk;
558		m_A[k][k] += 1.0;
559		{
560		const int *net_other = m_terms[k]->net_other();
561		const double *go = m_terms[k]->go();
562		const int railstart = m_terms[k]->m_railstart;
563
564		for (int i = 0; i < railstart; i++)
565		{
566		m_A[k][net_other[i]] += -go[i] * akk;
567		}
568		}
569		#else
570		m_RHS[k] = rhsk;
571		m_A[k][k] += akk;
572		{
573		const int * RESTRICT net_other = m_terms[k]->net_other();
574		const double * RESTRICT go = m_terms[k]->go();
575		const int railstart = m_terms[k]->m_railstart;
576
577		for (int i = 0; i < railstart; i++)
578		{
579		m_A[k][net_other[i]] += -go[i];
580		}
581		}
582		#endif
583		}
584		}
585
586		template <int m_N, int _storage_N>
587		ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::gauss_LE(
588		double (* RESTRICT x))
589		{
590		#if 0
591		for (int i = 0; i < N(); i++)
592		{
593		for (int k = 0; k < N(); k++)
594		printf("%f ", m_A[i][k]);
595		printf("\| %f = %f \n", x[i], m_RHS[i]);
596		}
597		printf("\n");
598		#endif
599
600		const int kN = N();
601
602		for (int i = 0; i < kN; i++) {
603		// FIXME: use a parameter to enable pivoting?
604		if (USE_PIVOT_SEARCH)
605		{
606		/* Find the row with the largest first value */
607		int maxrow = i;
608		for (int j = i + 1; j < kN; j++)
609		{
610		if (fabs(m_A[j][i]) > fabs(m_A[maxrow][i]))
611		maxrow = j;
612		}
613
614		if (maxrow != i)
615		{
616		/* Swap the maxrow and ith row */
617		for (int k = i; k < kN; k++) {
618		std::swap(m_A[i][k], m_A[maxrow][k]);
619		}
620		std::swap(m_RHS[i], m_RHS[maxrow]);
621		}
622		}
623
624		/* FIXME: Singular matrix? */
625		const double f = 1.0 / m_A[i][i];
626
627		/* Eliminate column i from row j */
628
629		for (int j = i + 1; j < kN; j++)
630		{
631		const double f1 = - m_A[j][i] * f;
632		if (f1 != 0.0)
633		{
634		#if 0 && VECTALT
635		for (int k = i + 1; k < kN; k++)
636		m_A[j][k] += m_A[i][k] * f1;
637		#else
638		// addmult gives some performance increase here...
639		m_row_ops[kN - (i + 1)]->addmult(&m_A[j][i+1], &m_A[i][i+1], f1) ;
640		#endif
641		m_RHS[j] += m_RHS[i] * f1;
642		}
643		}
644		}
645		/* back substitution */
646		for (int j = kN - 1; j >= 0; j--)
647		{
648		double tmp = 0;
649
650		for (int k = j + 1; k < kN; k++)
651		tmp += m_A[j][k] * x[k];
652
653		x[j] = (m_RHS[j] - tmp) / m_A[j][j];
654		}
655		#if 0
656		printf("Solution:\n");
657		for (int i = 0; i < N(); i++)
658		{
659		for (int k = 0; k < N(); k++)
660		printf("%f ", m_A[i][k]);
661		printf("\| %f = %f \n", x[i], m_RHS[i]);
662		}
663		printf("\n");
664		#endif
665
666		}
667
668		template <int m_N, int _storage_N>
669		ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::delta(
670		const double (* RESTRICT V))
671		{
672		double cerr = 0;
673		double cerr2 = 0;
674		for (int i = 0; i < this->N(); i++)
675		{
676		const double e = (V[i] - this->m_nets[i]->m_cur_Analog);
677		const double e2 = (m_RHS[i] - this->m_last_RHS[i]);
678		cerr = (fabs(e) > cerr ? fabs(e) : cerr);
679		cerr2 = (fabs(e2) > cerr2 ? fabs(e2) : cerr2);
680		}
681		// FIXME: Review
682		return cerr + cerr2*100000.0;
683		}
684
685		template <int m_N, int _storage_N>
686		ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::store(
687		const double (* RESTRICT V), const bool store_RHS)
688		{
689		for (int i = 0; i < this->N(); i++)
690		{
691		this->m_nets[i]->m_cur_Analog = V[i];
692		}
693		if (store_RHS)
694		{
695		for (int i = 0; i < this->N(); i++)
696		{
697		this->m_last_RHS[i] = m_RHS[i];
698		}
699		}
700		}
701
702		template <int m_N, int _storage_N>
703		ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve()
704		{
705		solve_base<netlist_matrix_solver_direct_t>(this);
706		return this->compute_next_timestep();
707		}
708
709
710		template <int m_N, int _storage_N>
711		ATTR_HOT int netlist_matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic()
712		{
713		double new_v[_storage_N] = { 0.0 };
714
715		this->gauss_LE(new_v);
716
717		if (this->is_dynamic())
718		{
719		double err = delta(new_v);
720
721		store(new_v, true);
722
723		if (err > this->m_params.m_accuracy)
724		{
725		return 2;
726		}
727		return 1;
728		}
729		store(new_v, false); // ==> No need to store RHS
730		return 1;
731		}
732
733		template <int m_N, int _storage_N>
734		ATTR_HOT inline int netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic()
735		{
736		this->build_LE();
737
738		return this->solve_non_dynamic();
739		}
740
741
742	323	// ----------------------------------------------------------------------------------------
743		// netlist_matrix_solver - Direct1
744		// ----------------------------------------------------------------------------------------
745
746		ATTR_HOT double netlist_matrix_solver_direct1_t::vsolve()
747		{
748		solve_base<netlist_matrix_solver_direct1_t>(this);
749		return this->compute_next_timestep();
750		}
751
752		ATTR_HOT inline int netlist_matrix_solver_direct1_t::vsolve_non_dynamic()
753		{
754
755		netlist_analog_net_t *net = m_nets[0];
756		this->build_LE();
757		//NL_VERBOSE_OUT(("%f %f\n", new_val, m_RHS[0] / m_A[0][0]);
758
759		double new_val = m_RHS[0] / m_A[0][0];
760
761		double e = (new_val - net->m_cur_Analog);
762		double cerr = fabs(e);
763
764		net->m_cur_Analog = new_val;
765
766		if (is_dynamic() && (cerr > m_params.m_accuracy))
767		{
768		return 2;
769		}
770		else
771		return 1;
772
773		}
774
775
776
777		// ----------------------------------------------------------------------------------------
778		// netlist_matrix_solver - Direct2
779		// ----------------------------------------------------------------------------------------
780
781		ATTR_HOT double netlist_matrix_solver_direct2_t::vsolve()
782		{
783		solve_base<netlist_matrix_solver_direct2_t>(this);
784		return this->compute_next_timestep();
785		}
786
787		ATTR_HOT inline int netlist_matrix_solver_direct2_t::vsolve_non_dynamic()
788		{
789
790		build_LE();
791
792		const double a = m_A[0][0];
793		const double b = m_A[0][1];
794		const double c = m_A[1][0];
795		const double d = m_A[1][1];
796
797		double new_val[2];
798		new_val[1] = (a * m_RHS[1] - c * m_RHS[0]) / (a * d - b * c);
799		new_val[0] = (m_RHS[0] - b * new_val[1]) / a;
800
801		if (is_dynamic())
802		{
803		double err = this->delta(new_val);
804		store(new_val, true);
805		if (err > m_params.m_accuracy )
806		return 2;
807		else
808		return 1;
809		}
810		store(new_val, false);
811		return 1;
812		}
813
814		// ----------------------------------------------------------------------------------------
815		// netlist_matrix_solver - Gauss - Seidel
816		// ----------------------------------------------------------------------------------------
817
818		template <int m_N, int _storage_N>
819		ATTR_HOT double netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve()
820		{
821		/*
822		* enable linear prediction on first newton pass
823		*/
824
825		if (USE_LINEAR_PREDICTION)
826		for (int k = 0; k < this->N(); k++)
827		{
828		this->m_last_V[k] = this->m_nets[k]->m_cur_Analog;
829		this->m_nets[k]->m_cur_Analog = this->m_nets[k]->m_cur_Analog + this->m_Vdelta[k] * this->current_timestep() * m_lp_fact;
830		}
831		else
832		for (int k = 0; k < this->N(); k++)
833		{
834		this->m_last_V[k] = this->m_nets[k]->m_cur_Analog;
835		}
836
837		this->solve_base(this);
838
839		if (USE_LINEAR_PREDICTION)
840		{
841		double sq = 0;
842		double sqo = 0;
843		for (int k = 0; k < this->N(); k++)
844		{
845		netlist_analog_net_t *n = this->m_nets[k];
846		double nv = (n->m_cur_Analog - this->m_last_V[k]) / this->current_timestep();
847		sq += nv * nv;
848		sqo += this->m_Vdelta[k] * this->m_Vdelta[k];
849		this->m_Vdelta[k] = nv;
850		}
851		if (sqo > 1e-90)
852		m_lp_fact = sqrt(sq/sqo);
853		else
854		m_lp_fact = 0.0;
855		if (m_lp_fact > 2.0)
856		m_lp_fact = 2.0;
857		//printf("fact %f\n", fact);
858		}
859
860
861		return this->compute_next_timestep();
862		}
863
864		template <int m_N, int _storage_N>
865		ATTR_HOT inline int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_dynamic()
866		{
867		/* The matrix based code looks a lot nicer but actually is 30% slower than
868		* the optimized code which works directly on the data structures.
869		* Need something like that for gaussian elimination as well.
870		*/
871
872		#if USE_MATRIX_GS
873		static double ws = 1.0;
874		ATTR_ALIGN double new_v[_storage_N] = { 0.0 };
875		const int iN = this->N();
876
877		bool resched = false;
878
879		int resched_cnt = 0;
880
881		this->build_LE();
882
883		{
884		double frob;
885		frob = 0;
886		for (int k = 0; k < iN; k++)
887		{
888		new_v[k] = this->m_nets[k]->m_cur_Analog;
889		for (int i = 0; i < iN; i++)
890		{
891		frob += this->m_A[k][i] * this->m_A[k][i];
892		}
893
894		}
895		double frobA = sqrt(frob /(iN));
896		if (1 &&frobA < 1.0)
897		//ws = 2.0 / (1.0 + sqrt(1.0-frobA));
898		ws = 2.0 / (2.0 - frobA);
899		else
900		ws = 1.0;
901		ws = 0.9;
902		}
903
904		// Frobenius norm for (D-L)^(-1)U
905		//double frobU;
906		//double frobL;
907		//double norm;
908		do {
909		resched = false;
910		double cerr = 0.0;
911		//frobU = 0;
912		//frobL = 0;
913		//norm = 0;
914
915		for (int k = 0; k < iN; k++)
916		{
917		double Idrive = 0;
918		//double norm_t = 0;
919		// Reduction loops need -ffast-math
920		for (int i = 0; i < iN; i++)
921		Idrive += this->m_A[k][i] * new_v[i];
922
923		for (int i = 0; i < iN; i++)
924		{
925		//if (i < k) frobL += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] /this-> m_A[k][k];
926		//if (i > k) frobU += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] / this->m_A[k][k];
927		//norm_t += fabs(this->m_A[k][i]);
928		}
929
930		//if (norm_t > norm) norm = norm_t;
931		const double new_val = (1.0-ws) * new_v[k] + ws * (this->m_RHS[k] - Idrive + this->m_A[k][k] * new_v[k]) / this->m_A[k][k];
932
933		const double e = fabs(new_val - new_v[k]);
934		cerr = (e > cerr ? e : cerr);
935		new_v[k] = new_val;
936		}
937
938		if (cerr > this->m_params.m_accuracy)
939		{
940		resched = true;
941		}
942		resched_cnt++;
943		//ATTR_UNUSED double frobUL = sqrt((frobU + frobL) / (double) (iN) / (double) (iN));
944		} while (resched && (resched_cnt < this->m_params.m_gs_loops));
945		//printf("Frobenius %f %f %f %f %f\n", sqrt(frobU), sqrt(frobL), frobUL, frobA, norm);
946		//printf("Omega Estimate1 %f %f\n", 2.0 / (1.0 + sqrt(1-frobUL)), 2.0 / (1.0 + sqrt(1-frobA)) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
947		//printf("Omega Estimate2 %f %f\n", 2.0 / (2.0 - frobUL), 2.0 / (2.0 - frobA) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
948
949
950		this->store(new_v, false);
951
952		this->m_gs_total += resched_cnt;
953		if (resched)
954		{
955		//this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
956		this->m_gs_fail++;
957		int tmp = netlist_matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic();
958		this->m_calculations++;
959		return tmp;
960		}
961		else {
962		this->m_calculations++;
963
964		return resched_cnt;
965		}
966
967		#else
968		const int iN = this->N();
969		bool resched = false;
970		int resched_cnt = 0;
971
972		/* ideally, we could get an estimate for the spectral radius of
973		* Inv(D - L) * U
974		*
975		* and estimate using
976		*
977		* omega = 2.0 / (1.0 + sqrt(1-rho))
978		*/
979
980		const double ws = SORP; //1.045; //2.0 / (1.0 + /sin/(3.14159 * 5.5 / (double) (m_nets.count()+1)));
981
982		ATTR_ALIGN double w[_storage_N];
983		ATTR_ALIGN double one_m_w[_storage_N];
984		ATTR_ALIGN double RHS[_storage_N];
985		ATTR_ALIGN double new_V[_storage_N];
986
987		for (int k = 0; k < iN; k++)
988		{
989		new_V[k] = this->m_nets[k]->m_cur_Analog;
990		}
991		for (int k = 0; k < iN; k++)
992		{
993		double gtot_t = 0.0;
994		double gabs_t = 0.0;
995		double RHS_t = 0.0;
996
997		{
998		const int term_count = this->m_terms[k]->count();
999		const double * RESTRICT gt = this->m_terms[k]->gt();
1000		const double * RESTRICT go = this->m_terms[k]->go();
1001		const double * RESTRICT Idr = this->m_terms[k]->Idr();
1002		#if VECTALT
1003		for (int i = 0; i < term_count; i++)
1004		{
1005		gtot_t += gt[i];
1006		if (USE_GABS) gabs_t += fabs(go[i]);
1007		RHS_t += Idr[i];
1008		}
1009		#else
1010		if (USE_GABS)
1011		this->m_terms[k]->ops()->sum2a(gt, Idr, go, gtot_t, RHS_t, gabs_t);
1012		else
1013		this->m_terms[k]->ops()->sum2(gt, Idr, gtot_t, RHS_t);
1014		#endif
1015		double * const *other_cur_analog = this->m_terms[k]->other_curanalog();
1016		for (int i = this->m_terms[k]->m_railstart; i < term_count; i++)
1017		//RHS_t += go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
1018		RHS_t += go[i] * *other_cur_analog[i];
1019		}
1020
1021		RHS[k] = RHS_t;
1022
1023		//if (fabs(gabs_t - fabs(gtot_t)) > 1e-20)
1024		// printf("%d %e abs: %f tot: %f\n",k, gabs_t / gtot_t -1.0, gabs_t, gtot_t);
1025
1026		gabs_t *= 0.5; // avoid rounding issues
1027		if (!USE_GABS \|\| gabs_t <= gtot_t)
1028		{
1029		w[k] = ws / gtot_t;
1030		one_m_w[k] = 1.0 - ws;
1031		}
1032		else
1033		{
1034		//printf("abs: %f tot: %f\n", gabs_t, gtot_t);
1035		w[k] = 1.0 / (gtot_t + gabs_t);
1036		one_m_w[k] = 1.0 - 1.0 * gtot_t / (gtot_t + gabs_t);
1037		}
1038
1039		}
1040
1041		do {
1042		resched = false;
1043		//double cerr = 0.0;
1044
1045		for (int k = 0; k < iN; k++)
1046		{
1047		const int * RESTRICT net_other = this->m_terms[k]->net_other();
1048		const int railstart = this->m_terms[k]->m_railstart;
1049		const double * RESTRICT go = this->m_terms[k]->go();
1050
1051		double Idrive = 0.0;
1052		for (int i = 0; i < railstart; i++)
1053		Idrive = Idrive + go[i] * new_V[net_other[i]];
1054
1055		//double new_val = (net->m_cur_Analog * gabs[k] + iIdr) / (gtot[k]);
1056		const double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
1057
1058		resched = resched \|\| (fabs(new_val - new_V[k]) > this->m_params.m_accuracy);
1059		new_V[k] = new_val;
1060		}
1061
1062		resched_cnt++;
1063		} while (resched && (resched_cnt < this->m_params.m_gs_loops));
1064
1065		for (int k = 0; k < iN; k++)
1066		this->m_nets[k]->m_cur_Analog = new_V[k];
1067
1068		this->m_gs_total += resched_cnt;
1069
1070		if (resched)
1071		{
1072		//this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
1073		this->m_gs_fail++;
1074		int tmp = netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic();
1075		this->m_calculations++;
1076		return tmp;
1077		}
1078		else {
1079		this->m_calculations++;
1080
1081		return resched_cnt;
1082		}
1083		#endif
1084		}
1085
1086		// ----------------------------------------------------------------------------------------
1087	324	// solver
1088	325	// ----------------------------------------------------------------------------------------
1089	326
r31037	r31038
1099	336
1100	337	register_param("ACCURACY", m_accuracy, 1e-7);
1101	338	register_param("GS_LOOPS", m_gs_loops, 9); // Gauss-Seidel loops
1102		register_param("GS_THRESHOLD", m_gs_threshold, 5); // below this value, gaussian elimination is used
	339	register_param("GS_THRESHOLD", m_gs_threshold, 5); // below this value, gaussian elimination is used
1103	340	register_param("NR_LOOPS", m_nr_loops, 25); // Newton-Raphson loops
1104	341	register_param("PARALLEL", m_parallel, 0);
	342	register_param("SOR_FACTOR", m_sor, 1.059);
1105	343	register_param("GMIN", m_gmin, NETLIST_GMIN_DEFAULT);
1106	344	register_param("DYNAMIC_TS", m_dynamic, 0);
1107	345	register_param("LTE", m_lte, 5e-5); // diff/timestep
r31037	r31038
1190	428	netlist_matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const int gs_threshold, const bool use_specific)
1191	429	{
1192	430	if (use_specific && m_N == 1)
1193		return new netlist_matrix_solver_direct1_t();
	431	return new netlist_matrix_solver_direct1_t(m_params);
1194	432	else if (use_specific && m_N == 2)
1195		return new netlist_matrix_solver_direct2_t();
	433	return new netlist_matrix_solver_direct2_t(m_params);
1196	434	else
1197	435	{
1198	436	if (size >= gs_threshold)
1199		return new netlist_matrix_solver_gauss_seidel_t<m_N,_storage_N>(size);
	437	return new netlist_matrix_solver_gauss_seidel_t<m_N,_storage_N>(m_params, size);
1200	438	else
1201		return new netlist_matrix_solver_direct_t<m_N, _storage_N>(size);
	439	return new netlist_matrix_solver_direct_t<m_N, _storage_N>(m_params, size);
1202	440	}
1203	441	}
1204	442
r31037	r31038
1214	452	m_params.m_nr_loops = m_nr_loops.Value();
1215	453	m_params.m_nt_sync_delay = m_sync_delay.Value();
1216	454	m_params.m_lte = m_lte.Value();
	455	m_params.m_sor = m_sor.Value();
	456
1217	457	m_params.m_min_timestep = m_min_timestep.Value();
1218	458	m_params.m_dynamic = (m_dynamic.Value() == 1 ? true : false);
1219	459	m_params.m_max_timestep = netlist_time::from_hz(m_freq.Value()).as_double();
r31037	r31038
1302	542	break;
1303	543	}
1304	544
1305		ms->m_params = m_params;
1306
1307	545	register_sub(*ms, pstring::sprintf("Solver %d",m_mat_solvers.count()));
1308	546
1309	547	ms->vsetup(groups[i]);

trunk/src/emu/netlist/analog/nld_ms_gauss_seidel.h
r0	r31038
	1	/*
	2	* nld_ms_direct1.h
	3	*
	4	*/
	5
	6	#ifndef NLD_MS_GAUSS_SEIDEL_H_
	7	#define NLD_MS_GAUSS_SEIDEL_H_
	8
	9	#include <cmath>
	10
	11	#include "nld_solver.h"
	12	#include "nld_ms_direct.h"
	13
	14	template <int m_N, int _storage_N>
	15	class ATTR_ALIGNED(64) netlist_matrix_solver_gauss_seidel_t: public netlist_matrix_solver_direct_t<m_N, _storage_N>
	16	{
	17	public:
	18
	19	netlist_matrix_solver_gauss_seidel_t(const netlist_solver_parameters_t &params, int size)
	20	: netlist_matrix_solver_direct_t<m_N, _storage_N>(params, size)
	21	, m_lp_fact(0)
	22	, m_gs_fail(0)
	23	, m_gs_total(0)
	24	{}
	25
	26	virtual ~netlist_matrix_solver_gauss_seidel_t() {}
	27
	28	ATTR_COLD virtual void log_stats();
	29
	30	ATTR_HOT inline int vsolve_non_dynamic();
	31	protected:
	32	ATTR_HOT virtual double vsolve();
	33
	34	private:
	35	double m_lp_fact;
	36	int m_gs_fail;
	37	int m_gs_total;
	38
	39	};
	40
	41	// ----------------------------------------------------------------------------------------
	42	// netlist_matrix_solver - Gauss - Seidel
	43	// ----------------------------------------------------------------------------------------
	44
	45	template <int m_N, int _storage_N>
	46	void netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::log_stats()
	47	{
	48	#if 1
	49	printf("==============================================\n");
	50	printf("Solver %s\n", this->name().cstr());
	51	printf(" ==> %d nets\n", this->N()); //, ((groups[i].first())->m_core_terms.first())->name().cstr());
	52	printf(" has %s elements\n", this->is_dynamic() ? "dynamic" : "no dynamic");
	53	printf(" has %s elements\n", this->is_timestep() ? "timestep" : "no timestep");
	54	printf(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average\n",
	55	this->m_calculations,
	56	this->m_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
	57	this->m_gs_fail,
	58	100.0 * (double) this->m_gs_fail / (double) this->m_calculations,
	59	(double) this->m_gs_total / (double) this->m_calculations);
	60	#endif
	61	}
	62
	63	template <int m_N, int _storage_N>
	64	ATTR_HOT double netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve()
	65	{
	66	/*
	67	* enable linear prediction on first newton pass
	68	*/
	69
	70	if (USE_LINEAR_PREDICTION)
	71	for (int k = 0; k < this->N(); k++)
	72	{
	73	this->m_last_V[k] = this->m_nets[k]->m_cur_Analog;
	74	this->m_nets[k]->m_cur_Analog = this->m_nets[k]->m_cur_Analog + this->m_Vdelta[k] * this->current_timestep() * m_lp_fact;
	75	}
	76	else
	77	for (int k = 0; k < this->N(); k++)
	78	{
	79	this->m_last_V[k] = this->m_nets[k]->m_cur_Analog;
	80	}
	81
	82	this->solve_base(this);
	83
	84	if (USE_LINEAR_PREDICTION)
	85	{
	86	double sq = 0;
	87	double sqo = 0;
	88	const double rez_cts = 1.0 / this->current_timestep();
	89	for (int k = 0; k < this->N(); k++)
	90	{
	91	const netlist_analog_net_t *n = this->m_nets[k];
	92	const double nv = (n->m_cur_Analog - this->m_last_V[k]) * rez_cts ;
	93	sq += nv * nv;
	94	sqo += this->m_Vdelta[k] * this->m_Vdelta[k];
	95	this->m_Vdelta[k] = nv;
	96	}
	97	if (sqo > 1e-90)
	98	m_lp_fact = std::min(sqrt(sq/sqo), 2.0);
	99	else
	100	m_lp_fact = 0.0;
	101	}
	102
	103
	104	return this->compute_next_timestep();
	105	}
	106
	107	template <int m_N, int _storage_N>
	108	ATTR_HOT inline int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_dynamic()
	109	{
	110	/* The matrix based code looks a lot nicer but actually is 30% slower than
	111	* the optimized code which works directly on the data structures.
	112	* Need something like that for gaussian elimination as well.
	113	*/
	114
	115	#if 0 \|\| USE_MATRIX_GS
	116	static double ws = 1.0;
	117	ATTR_ALIGN double new_v[_storage_N] = { 0.0 };
	118	const int iN = this->N();
	119
	120	bool resched = false;
	121
	122	int resched_cnt = 0;
	123
	124	this->build_LE();
	125
	126	{
	127	double frob;
	128	frob = 0;
	129	double rmin = 1e99, rmax = -1e99;
	130	for (int k = 0; k < iN; k++)
	131	{
	132	new_v[k] = this->m_nets[k]->m_cur_Analog;
	133	double s=0.0;
	134	for (int i = 0; i < iN; i++)
	135	{
	136	frob += this->m_A[k][i] * this->m_A[k][i];
	137	s = s + fabs(this->m_A[k][i]);
	138	}
	139
	140	if (s<rmin)
	141	rmin = s;
	142	if (s>rmax)
	143	rmax = s;
	144	}
	145	//if (fabs(rmax) > 0.01)
	146	// printf("rmin %f rmax %f\n", rmin, rmax);
	147	#if 0
	148	double frobA = sqrt(frob /(iN));
	149	if (1 &&frobA < 1.0)
	150	//ws = 2.0 / (1.0 + sqrt(1.0-frobA));
	151	ws = 2.0 / (2.0 - frobA);
	152	else
	153	ws = 1.0;
	154	ws = 0.9;
	155	#else
	156	// calculate an estimate for rho.
	157	// This is based on the Perron–Frobenius theorem for positive matrices.
	158	// No mathematical proof here. The following estimates the
	159	// optimal relaxation parameter pretty well. Unfortunately, the
	160	// overhead is bigger than the gain. Consequently the fast GS below
	161	// uses a fixed GS. One can however use this here to determine a
	162	// suitable parameter.
	163	double rm = (rmax + rmin) * 0.5;
	164	if (rm < 1.0)
	165	ws = 2.0 / (1.0 + sqrt(1.0-rm));
	166	else
	167	ws = 1.0;
	168	#endif
	169	}
	170
	171	// Frobenius norm for (D-L)^(-1)U
	172	//double frobU;
	173	//double frobL;
	174	//double norm;
	175	do {
	176	resched = false;
	177	double cerr = 0.0;
	178	//frobU = 0;
	179	//frobL = 0;
	180	//norm = 0;
	181
	182	for (int k = 0; k < iN; k++)
	183	{
	184	double Idrive = 0;
	185	//double norm_t = 0;
	186	// Reduction loops need -ffast-math
	187	for (int i = 0; i < iN; i++)
	188	Idrive += this->m_A[k][i] * new_v[i];
	189
	190	for (int i = 0; i < iN; i++)
	191	{
	192	//if (i < k) frobL += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] /this-> m_A[k][k];
	193	//if (i > k) frobU += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] / this->m_A[k][k];
	194	//norm_t += fabs(this->m_A[k][i]);
	195	}
	196
	197	//if (norm_t > norm) norm = norm_t;
	198	const double new_val = (1.0-ws) * new_v[k] + ws * (this->m_RHS[k] - Idrive + this->m_A[k][k] * new_v[k]) / this->m_A[k][k];
	199
	200	const double e = fabs(new_val - new_v[k]);
	201	cerr = (e > cerr ? e : cerr);
	202	new_v[k] = new_val;
	203	}
	204
	205	if (cerr > this->m_params.m_accuracy)
	206	{
	207	resched = true;
	208	}
	209	resched_cnt++;
	210	//ATTR_UNUSED double frobUL = sqrt((frobU + frobL) / (double) (iN) / (double) (iN));
	211	} while (resched && (resched_cnt < this->m_params.m_gs_loops));
	212	//printf("Frobenius %f %f %f %f %f\n", sqrt(frobU), sqrt(frobL), frobUL, frobA, norm);
	213	//printf("Omega Estimate1 %f %f\n", 2.0 / (1.0 + sqrt(1-frobUL)), 2.0 / (1.0 + sqrt(1-frobA)) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
	214	//printf("Omega Estimate2 %f %f\n", 2.0 / (2.0 - frobUL), 2.0 / (2.0 - frobA) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
	215
	216
	217	this->store(new_v, false);
	218
	219	this->m_gs_total += resched_cnt;
	220	if (resched)
	221	{
	222	//this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
	223	this->m_gs_fail++;
	224	int tmp = netlist_matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic();
	225	this->m_calculations++;
	226	return tmp;
	227	}
	228	else {
	229	this->m_calculations++;
	230
	231	return resched_cnt;
	232	}
	233
	234	#else
	235	const int iN = this->N();
	236	bool resched = false;
	237	int resched_cnt = 0;
	238
	239	/* ideally, we could get an estimate for the spectral radius of
	240	* Inv(D - L) * U
	241	*
	242	* and estimate using
	243	*
	244	* omega = 2.0 / (1.0 + sqrt(1-rho))
	245	*/
	246
	247	const double ws = this->m_params.m_sor; //1.045; //2.0 / (1.0 + /sin/(3.14159 * 5.5 / (double) (m_nets.count()+1)));
	248	//const double ws = 2.0 / (1.0 + sin(3.14159 * 4 / (double) (this->N())));
	249
	250	ATTR_ALIGN double w[_storage_N];
	251	ATTR_ALIGN double one_m_w[_storage_N];
	252	ATTR_ALIGN double RHS[_storage_N];
	253	ATTR_ALIGN double new_V[_storage_N];
	254
	255	for (int k = 0; k < iN; k++)
	256	{
	257	double gtot_t = 0.0;
	258	double gabs_t = 0.0;
	259	double RHS_t = 0.0;
	260
	261	new_V[k] = this->m_nets[k]->m_cur_Analog;
	262
	263	{
	264	const int term_count = this->m_terms[k]->count();
	265	const double * const RESTRICT gt = this->m_terms[k]->gt();
	266	const double * const RESTRICT go = this->m_terms[k]->go();
	267	const double * const RESTRICT Idr = this->m_terms[k]->Idr();
	268	const double * const *other_cur_analog = this->m_terms[k]->other_curanalog();
	269	#if VECTALT
	270	for (int i = 0; i < term_count; i++)
	271	{
	272	gtot_t = gtot_t + gt[i];
	273	RHS_t = RHS_t + Idr[i];
	274	}
	275	if (USE_GABS)
	276	for (int i = 0; i < term_count; i++)
	277	gabs_t = gabs_t + fabs(go[i]);
	278	#else
	279	if (USE_GABS)
	280	this->m_terms[k]->ops()->sum2a(gt, Idr, go, gtot_t, RHS_t, gabs_t);
	281	else
	282	this->m_terms[k]->ops()->sum2(gt, Idr, gtot_t, RHS_t);
	283	#endif
	284	for (int i = this->m_terms[k]->m_railstart; i < term_count; i++)
	285	RHS_t = RHS_t + go[i] * *other_cur_analog[i];
	286	}
	287
	288	RHS[k] = RHS_t;
	289
	290	//if (fabs(gabs_t - fabs(gtot_t)) > 1e-20)
	291	// printf("%d %e abs: %f tot: %f\n",k, gabs_t / gtot_t -1.0, gabs_t, gtot_t);
	292
	293	gabs_t *= 0.5; // avoid rounding issues
	294	if (!USE_GABS \|\| gabs_t <= gtot_t)
	295	{
	296	w[k] = ws / gtot_t;
	297	one_m_w[k] = 1.0 - ws;
	298	}
	299	else
	300	{
	301	//printf("abs: %f tot: %f\n", gabs_t, gtot_t);
	302	w[k] = 1.0 / (gtot_t + gabs_t);
	303	one_m_w[k] = 1.0 - 1.0 * gtot_t / (gtot_t + gabs_t);
	304	}
	305
	306	}
	307
	308	const double accuracy = this->m_params.m_accuracy;
	309
	310	do {
	311	resched = false;
	312
	313	for (int k = 0; k < iN; k++)
	314	{
	315	const int * RESTRICT net_other = this->m_terms[k]->net_other();
	316	const int railstart = this->m_terms[k]->m_railstart;
	317	const double * RESTRICT go = this->m_terms[k]->go();
	318
	319	double Idrive = 0.0;
	320	for (int i = 0; i < railstart; i++)
	321	Idrive = Idrive + go[i] * new_V[net_other[i]];
	322
	323	//double new_val = (net->m_cur_Analog * gabs[k] + iIdr) / (gtot[k]);
	324	const double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
	325
	326	resched = resched \|\| (std::abs(new_val - new_V[k]) > accuracy);
	327	new_V[k] = new_val;
	328	}
	329
	330	resched_cnt++;
	331	} while (resched && (resched_cnt < this->m_params.m_gs_loops));
	332
	333	for (int k = 0; k < iN; k++)
	334	this->m_nets[k]->m_cur_Analog = new_V[k];
	335
	336	this->m_gs_total += resched_cnt;
	337	this->m_calculations++;
	338
	339	if (resched)
	340	{
	341	//this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
	342	this->m_gs_fail++;
	343	return netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic();
	344	}
	345	else {
	346	return resched_cnt;
	347	}
	348	#endif
	349	}
	350
	351
	352	#endif /* NLD_MS_GAUSS_SEIDEL_H_ */

Property changes on: trunk/src/emu/netlist/analog/nld_ms_gauss_seidel.h
Added: svn:mime-type + text/plain Added: svn:eol-style + native

199869 Revisions